document-extraction-tools 0.0.1rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- document_extraction_tools/__init__.py +0 -0
- document_extraction_tools/base/__init__.py +27 -0
- document_extraction_tools/base/converter/__init__.py +0 -0
- document_extraction_tools/base/converter/base_converter.py +40 -0
- document_extraction_tools/base/evaluator/__init__.py +0 -0
- document_extraction_tools/base/evaluator/base_evaluator.py +40 -0
- document_extraction_tools/base/exporter/__init__.py +0 -0
- document_extraction_tools/base/exporter/base_evaluation_exporter.py +43 -0
- document_extraction_tools/base/exporter/base_extraction_exporter.py +41 -0
- document_extraction_tools/base/extractor/__init__.py +0 -0
- document_extraction_tools/base/extractor/base_extractor.py +41 -0
- document_extraction_tools/base/file_lister/__init__.py +0 -0
- document_extraction_tools/base/file_lister/base_file_lister.py +37 -0
- document_extraction_tools/base/reader/__init__.py +0 -0
- document_extraction_tools/base/reader/base_reader.py +36 -0
- document_extraction_tools/base/test_data_loader/__init__.py +0 -0
- document_extraction_tools/base/test_data_loader/base_test_data_loader.py +44 -0
- document_extraction_tools/config/__init__.py +51 -0
- document_extraction_tools/config/base_converter_config.py +14 -0
- document_extraction_tools/config/base_evaluation_exporter_config.py +14 -0
- document_extraction_tools/config/base_evaluator_config.py +14 -0
- document_extraction_tools/config/base_extraction_exporter_config.py +14 -0
- document_extraction_tools/config/base_extractor_config.py +14 -0
- document_extraction_tools/config/base_file_lister_config.py +14 -0
- document_extraction_tools/config/base_reader_config.py +14 -0
- document_extraction_tools/config/base_test_data_loader_config.py +14 -0
- document_extraction_tools/config/config_loader.py +201 -0
- document_extraction_tools/config/evaluation_orchestrator_config.py +20 -0
- document_extraction_tools/config/evaluation_pipeline_config.py +32 -0
- document_extraction_tools/config/extraction_orchestrator_config.py +20 -0
- document_extraction_tools/config/extraction_pipeline_config.py +30 -0
- document_extraction_tools/py.typed +0 -0
- document_extraction_tools/runners/__init__.py +10 -0
- document_extraction_tools/runners/evaluation/__init__.py +0 -0
- document_extraction_tools/runners/evaluation/evaluation_orchestrator.py +260 -0
- document_extraction_tools/runners/extraction/__init__.py +0 -0
- document_extraction_tools/runners/extraction/extraction_orchestrator.py +202 -0
- document_extraction_tools/types/__init__.py +20 -0
- document_extraction_tools/types/document.py +79 -0
- document_extraction_tools/types/document_bytes.py +27 -0
- document_extraction_tools/types/evaluation_example.py +21 -0
- document_extraction_tools/types/evaluation_result.py +16 -0
- document_extraction_tools/types/path_identifier.py +16 -0
- document_extraction_tools/types/schema.py +7 -0
- document_extraction_tools-0.0.1rc1.dist-info/METADATA +15 -0
- document_extraction_tools-0.0.1rc1.dist-info/RECORD +47 -0
- document_extraction_tools-0.0.1rc1.dist-info/WHEEL +4 -0
|
File without changes
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Public base component interfaces."""
|
|
2
|
+
|
|
3
|
+
from document_extraction_tools.base.converter.base_converter import BaseConverter
|
|
4
|
+
from document_extraction_tools.base.evaluator.base_evaluator import BaseEvaluator
|
|
5
|
+
from document_extraction_tools.base.exporter.base_evaluation_exporter import (
|
|
6
|
+
BaseEvaluationExporter,
|
|
7
|
+
)
|
|
8
|
+
from document_extraction_tools.base.exporter.base_extraction_exporter import (
|
|
9
|
+
BaseExtractionExporter,
|
|
10
|
+
)
|
|
11
|
+
from document_extraction_tools.base.extractor.base_extractor import BaseExtractor
|
|
12
|
+
from document_extraction_tools.base.file_lister.base_file_lister import BaseFileLister
|
|
13
|
+
from document_extraction_tools.base.reader.base_reader import BaseReader
|
|
14
|
+
from document_extraction_tools.base.test_data_loader.base_test_data_loader import (
|
|
15
|
+
BaseTestDataLoader,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"BaseConverter",
|
|
20
|
+
"BaseEvaluationExporter",
|
|
21
|
+
"BaseEvaluator",
|
|
22
|
+
"BaseExtractionExporter",
|
|
23
|
+
"BaseExtractor",
|
|
24
|
+
"BaseFileLister",
|
|
25
|
+
"BaseReader",
|
|
26
|
+
"BaseTestDataLoader",
|
|
27
|
+
]
|
|
File without changes
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Abstract Base Class for Document Converters.
|
|
2
|
+
|
|
3
|
+
This module defines the interface that all converter implementations must satisfy.
|
|
4
|
+
Converters are responsible for transforming raw binary data (DocumentBytes)
|
|
5
|
+
into a structured Document object containing pages and content.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from abc import ABC, abstractmethod
|
|
9
|
+
|
|
10
|
+
from document_extraction_tools.config.base_converter_config import BaseConverterConfig
|
|
11
|
+
from document_extraction_tools.types.document import Document
|
|
12
|
+
from document_extraction_tools.types.document_bytes import DocumentBytes
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class BaseConverter(ABC):
|
|
16
|
+
"""Abstract interface for document transformation."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, config: BaseConverterConfig) -> None:
|
|
19
|
+
"""Initialize with a configuration object.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
config (BaseConverterConfig): Configuration specific to the converter implementation.
|
|
23
|
+
"""
|
|
24
|
+
self.config = config
|
|
25
|
+
|
|
26
|
+
@abstractmethod
|
|
27
|
+
def convert(self, document_bytes: DocumentBytes) -> Document:
|
|
28
|
+
"""Transforms raw document bytes into a structured Document object.
|
|
29
|
+
|
|
30
|
+
This method should handle the parsing logic and map the metadata from the
|
|
31
|
+
input bytes to the output document.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
document_bytes (DocumentBytes): The standardized raw input containing
|
|
35
|
+
file bytes and source metadata.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
Document: The fully structured document model ready for extraction.
|
|
39
|
+
"""
|
|
40
|
+
pass
|
|
File without changes
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Abstract Base Class for Evaluators.
|
|
2
|
+
|
|
3
|
+
This module defines the interface that all evaluator implementations must satisfy.
|
|
4
|
+
Evaluators are responsible for computing evaluation metrics by comparing
|
|
5
|
+
predicted data against ground-truth data.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from abc import ABC, abstractmethod
|
|
9
|
+
from typing import Generic
|
|
10
|
+
|
|
11
|
+
from document_extraction_tools.config.base_evaluator_config import BaseEvaluatorConfig
|
|
12
|
+
from document_extraction_tools.types.evaluation_result import EvaluationResult
|
|
13
|
+
from document_extraction_tools.types.schema import ExtractionSchema
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class BaseEvaluator(ABC, Generic[ExtractionSchema]):
|
|
17
|
+
"""Abstract interface for evaluation metrics."""
|
|
18
|
+
|
|
19
|
+
def __init__(self, config: BaseEvaluatorConfig) -> None:
|
|
20
|
+
"""Initialize with a configuration object.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
config (BaseEvaluatorConfig): Configuration specific to the evaluator implementation.
|
|
24
|
+
"""
|
|
25
|
+
self.config = config
|
|
26
|
+
|
|
27
|
+
@abstractmethod
|
|
28
|
+
def evaluate(
|
|
29
|
+
self, true: ExtractionSchema, pred: ExtractionSchema
|
|
30
|
+
) -> EvaluationResult:
|
|
31
|
+
"""Compute a metric for a single document.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
true (ExtractionSchema): Ground-truth data.
|
|
35
|
+
pred (ExtractionSchema): Predicted data.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
EvaluationResult: The metric result for this document.
|
|
39
|
+
"""
|
|
40
|
+
pass
|
|
File without changes
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""Abstract Base Class for Evaluation Exporters.
|
|
2
|
+
|
|
3
|
+
This module defines the interface that all evaluation exporter implementations must satisfy.
|
|
4
|
+
Evaluation Exporters are responsible for taking evaluation results and persisting them
|
|
5
|
+
to a target destination.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from abc import ABC, abstractmethod
|
|
9
|
+
|
|
10
|
+
from document_extraction_tools.config.base_evaluation_exporter_config import (
|
|
11
|
+
BaseEvaluationExporterConfig,
|
|
12
|
+
)
|
|
13
|
+
from document_extraction_tools.types.document import Document
|
|
14
|
+
from document_extraction_tools.types.evaluation_result import EvaluationResult
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class BaseEvaluationExporter(ABC):
|
|
18
|
+
"""Abstract interface for exporting evaluation results."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, config: BaseEvaluationExporterConfig) -> None:
|
|
21
|
+
"""Initialize with a configuration object.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
config (BaseEvaluationExporterConfig): Configuration specific to the evaluation exporter implementation.
|
|
25
|
+
"""
|
|
26
|
+
self.config = config
|
|
27
|
+
|
|
28
|
+
@abstractmethod
|
|
29
|
+
async def export(
|
|
30
|
+
self, results: list[tuple[Document, list[EvaluationResult]]]
|
|
31
|
+
) -> None:
|
|
32
|
+
"""Persist evaluation results to a target destination.
|
|
33
|
+
|
|
34
|
+
This is an asynchronous operation to support non-blocking I/O writes.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
results (list[tuple[Document, list[EvaluationResult]]]):
|
|
38
|
+
A list of tuples containing documents and their associated evaluation results.
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
None: The method should raise an exception if the export fails.
|
|
42
|
+
"""
|
|
43
|
+
pass
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""Abstract Base Class for Extraction Exporters.
|
|
2
|
+
|
|
3
|
+
This module defines the interface that all exporter implementations must satisfy.
|
|
4
|
+
Extraction Exporters are responsible for taking the extracted, structured Pydantic data
|
|
5
|
+
and persisting it to a target destination.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from abc import ABC, abstractmethod
|
|
9
|
+
|
|
10
|
+
from document_extraction_tools.config.base_extraction_exporter_config import (
|
|
11
|
+
BaseExtractionExporterConfig,
|
|
12
|
+
)
|
|
13
|
+
from document_extraction_tools.types.document import Document
|
|
14
|
+
from document_extraction_tools.types.schema import ExtractionSchema
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class BaseExtractionExporter(ABC):
|
|
18
|
+
"""Abstract interface for data persistence."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, config: BaseExtractionExporterConfig) -> None:
|
|
21
|
+
"""Initialize with a configuration object.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
config (BaseExtractionExporterConfig): Configuration specific to the exporter implementation.
|
|
25
|
+
"""
|
|
26
|
+
self.config = config
|
|
27
|
+
|
|
28
|
+
@abstractmethod
|
|
29
|
+
async def export(self, document: Document, data: ExtractionSchema) -> None:
|
|
30
|
+
"""Persists extracted data to the configured destination.
|
|
31
|
+
|
|
32
|
+
This is an asynchronous operation to support non-blocking I/O writes.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
document (Document): The source document for this extraction.
|
|
36
|
+
data (ExtractionSchema): The populated Pydantic model containing the extracted information.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
None: The method should raise an exception if the export fails.
|
|
40
|
+
"""
|
|
41
|
+
pass
|
|
File without changes
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""Abstract Base Class for Information Extractors.
|
|
2
|
+
|
|
3
|
+
This module defines the interface that all extractor implementations must satisfy.
|
|
4
|
+
Extractors are responsible for analyzing the structured Document
|
|
5
|
+
and populating a target Pydantic schema with specific data points.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from abc import ABC, abstractmethod
|
|
9
|
+
|
|
10
|
+
from document_extraction_tools.config.base_extractor_config import BaseExtractorConfig
|
|
11
|
+
from document_extraction_tools.types.document import Document
|
|
12
|
+
from document_extraction_tools.types.schema import ExtractionSchema
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class BaseExtractor(ABC):
|
|
16
|
+
"""Abstract interface for data extraction."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, config: BaseExtractorConfig) -> None:
|
|
19
|
+
"""Initialize with a configuration object.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
config (BaseExtractorConfig): Configuration specific to the extractor implementation.
|
|
23
|
+
"""
|
|
24
|
+
self.config = config
|
|
25
|
+
|
|
26
|
+
@abstractmethod
|
|
27
|
+
async def extract(
|
|
28
|
+
self, document: Document, schema: type[ExtractionSchema]
|
|
29
|
+
) -> ExtractionSchema:
|
|
30
|
+
"""Extracts structured data from a Document to match the provided Schema.
|
|
31
|
+
|
|
32
|
+
This is an asynchronous operation to support I/O-bound tasks.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
document (Document): The fully parsed document.
|
|
36
|
+
schema (type[ExtractionSchema]): The Pydantic model class defining the target structure.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
ExtractionSchema: An instance of the schema populated with the extracted data.
|
|
40
|
+
"""
|
|
41
|
+
pass
|
|
File without changes
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""Abstract Base Class for File Listers.
|
|
2
|
+
|
|
3
|
+
This module defines the interface that all file lister implementations must satisfy.
|
|
4
|
+
File Listers are responsible for scanning a source
|
|
5
|
+
and returning a list of standardized identifiers to be processed.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from abc import ABC, abstractmethod
|
|
9
|
+
|
|
10
|
+
from document_extraction_tools.config.base_file_lister_config import (
|
|
11
|
+
BaseFileListerConfig,
|
|
12
|
+
)
|
|
13
|
+
from document_extraction_tools.types.path_identifier import PathIdentifier
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class BaseFileLister(ABC):
|
|
17
|
+
"""Abstract interface for file discovery."""
|
|
18
|
+
|
|
19
|
+
def __init__(self, config: BaseFileListerConfig) -> None:
|
|
20
|
+
"""Initialize with a configuration object.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
config (BaseFileListerConfig): Configuration specific to the file lister implementation.
|
|
24
|
+
"""
|
|
25
|
+
self.config = config
|
|
26
|
+
|
|
27
|
+
@abstractmethod
|
|
28
|
+
def list_files(self) -> list[PathIdentifier]:
|
|
29
|
+
"""Scans the target source and returns a list of file identifiers.
|
|
30
|
+
|
|
31
|
+
This method should handle the logic to return a clean list of work items.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
List[PathIdentifier]: A list of standardized objects containing the
|
|
35
|
+
path and any necessary execution context.
|
|
36
|
+
"""
|
|
37
|
+
pass
|
|
File without changes
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Abstract Base Class for Document Readers.
|
|
2
|
+
|
|
3
|
+
This module defines the interface that all reader implementations must satisfy.
|
|
4
|
+
Readers are responsible for fetching raw file content from a source
|
|
5
|
+
and returning it as a standardized DocumentBytes object.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from abc import ABC, abstractmethod
|
|
9
|
+
|
|
10
|
+
from document_extraction_tools.config.base_reader_config import BaseReaderConfig
|
|
11
|
+
from document_extraction_tools.types.document_bytes import DocumentBytes
|
|
12
|
+
from document_extraction_tools.types.path_identifier import PathIdentifier
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class BaseReader(ABC):
|
|
16
|
+
"""Abstract interface for document ingestion."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, config: BaseReaderConfig) -> None:
|
|
19
|
+
"""Initialize with a configuration object.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
config (BaseReaderConfig): Configuration specific to the reader implementation.
|
|
23
|
+
"""
|
|
24
|
+
self.config = config
|
|
25
|
+
|
|
26
|
+
@abstractmethod
|
|
27
|
+
def read(self, path_identifier: PathIdentifier) -> DocumentBytes:
|
|
28
|
+
"""Reads a document from a specific source and returns its raw bytes.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
path_identifier (PathIdentifier): The identifier for the file.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
DocumentBytes: A standardized container with raw bytes and source metadata.
|
|
35
|
+
"""
|
|
36
|
+
pass
|
|
File without changes
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""Abstract Base Class for Test Data Loaders.
|
|
2
|
+
|
|
3
|
+
This module defines the interface that all test data loader implementations must satisfy.
|
|
4
|
+
Test Data Loaders are responsible for loading evaluation test examples from a specified source.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from abc import ABC, abstractmethod
|
|
8
|
+
from typing import Generic
|
|
9
|
+
|
|
10
|
+
from document_extraction_tools.config.base_test_data_loader_config import (
|
|
11
|
+
BaseTestDataLoaderConfig,
|
|
12
|
+
)
|
|
13
|
+
from document_extraction_tools.types.evaluation_example import EvaluationExample
|
|
14
|
+
from document_extraction_tools.types.path_identifier import PathIdentifier
|
|
15
|
+
from document_extraction_tools.types.schema import ExtractionSchema
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class BaseTestDataLoader(ABC, Generic[ExtractionSchema]):
|
|
19
|
+
"""Abstract interface for loading evaluation test data."""
|
|
20
|
+
|
|
21
|
+
def __init__(self, config: BaseTestDataLoaderConfig) -> None:
|
|
22
|
+
"""Initialize with a configuration object.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
config (BaseTestDataLoaderConfig): Configuration specific to the test data loader implementation.
|
|
26
|
+
"""
|
|
27
|
+
self.config = config
|
|
28
|
+
|
|
29
|
+
@abstractmethod
|
|
30
|
+
def load_test_data(
|
|
31
|
+
self, path_identifier: PathIdentifier
|
|
32
|
+
) -> list[EvaluationExample[ExtractionSchema]]:
|
|
33
|
+
"""Load test examples for evaluation.
|
|
34
|
+
|
|
35
|
+
This method should retrieve and return a list of EvaluationExample instances
|
|
36
|
+
based on the provided path identifier.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
path_identifier (PathIdentifier): The source location for loading evaluation examples.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
list[EvaluationExample[ExtractionSchema]]: A list of evaluation examples for evaluation.
|
|
43
|
+
"""
|
|
44
|
+
pass
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""Public config helpers and models."""
|
|
2
|
+
|
|
3
|
+
from document_extraction_tools.config.base_converter_config import BaseConverterConfig
|
|
4
|
+
from document_extraction_tools.config.base_evaluation_exporter_config import (
|
|
5
|
+
BaseEvaluationExporterConfig,
|
|
6
|
+
)
|
|
7
|
+
from document_extraction_tools.config.base_evaluator_config import BaseEvaluatorConfig
|
|
8
|
+
from document_extraction_tools.config.base_extraction_exporter_config import (
|
|
9
|
+
BaseExtractionExporterConfig,
|
|
10
|
+
)
|
|
11
|
+
from document_extraction_tools.config.base_extractor_config import BaseExtractorConfig
|
|
12
|
+
from document_extraction_tools.config.base_file_lister_config import (
|
|
13
|
+
BaseFileListerConfig,
|
|
14
|
+
)
|
|
15
|
+
from document_extraction_tools.config.base_reader_config import BaseReaderConfig
|
|
16
|
+
from document_extraction_tools.config.base_test_data_loader_config import (
|
|
17
|
+
BaseTestDataLoaderConfig,
|
|
18
|
+
)
|
|
19
|
+
from document_extraction_tools.config.config_loader import (
|
|
20
|
+
load_config,
|
|
21
|
+
load_evaluation_config,
|
|
22
|
+
)
|
|
23
|
+
from document_extraction_tools.config.evaluation_orchestrator_config import (
|
|
24
|
+
EvaluationOrchestratorConfig,
|
|
25
|
+
)
|
|
26
|
+
from document_extraction_tools.config.evaluation_pipeline_config import (
|
|
27
|
+
EvaluationPipelineConfig,
|
|
28
|
+
)
|
|
29
|
+
from document_extraction_tools.config.extraction_orchestrator_config import (
|
|
30
|
+
ExtractionOrchestratorConfig,
|
|
31
|
+
)
|
|
32
|
+
from document_extraction_tools.config.extraction_pipeline_config import (
|
|
33
|
+
ExtractionPipelineConfig,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
__all__ = [
|
|
37
|
+
"BaseConverterConfig",
|
|
38
|
+
"BaseEvaluationExporterConfig",
|
|
39
|
+
"BaseEvaluatorConfig",
|
|
40
|
+
"BaseExtractionExporterConfig",
|
|
41
|
+
"BaseExtractorConfig",
|
|
42
|
+
"BaseFileListerConfig",
|
|
43
|
+
"BaseReaderConfig",
|
|
44
|
+
"BaseTestDataLoaderConfig",
|
|
45
|
+
"EvaluationOrchestratorConfig",
|
|
46
|
+
"EvaluationPipelineConfig",
|
|
47
|
+
"ExtractionOrchestratorConfig",
|
|
48
|
+
"ExtractionPipelineConfig",
|
|
49
|
+
"load_config",
|
|
50
|
+
"load_evaluation_config",
|
|
51
|
+
]
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Configuration for Converter components."""
|
|
2
|
+
|
|
3
|
+
from typing import ClassVar
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class BaseConverterConfig(BaseModel):
|
|
9
|
+
"""Base config for Converters.
|
|
10
|
+
|
|
11
|
+
Implementations should subclass this to add specific parameters.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
filename: ClassVar[str] = "converter.yaml"
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Configuration for Evaluation Exporter components."""
|
|
2
|
+
|
|
3
|
+
from typing import ClassVar
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class BaseEvaluationExporterConfig(BaseModel):
|
|
9
|
+
"""Base config for Evaluation Exporters.
|
|
10
|
+
|
|
11
|
+
Implementations should subclass this to add specific parameters.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
filename: ClassVar[str] = "evaluation_exporter.yaml"
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Configuration for Evaluator components."""
|
|
2
|
+
|
|
3
|
+
from typing import ClassVar
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class BaseEvaluatorConfig(BaseModel):
|
|
9
|
+
"""Base config for Evaluators.
|
|
10
|
+
|
|
11
|
+
Implementations should subclass this to add specific parameters.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
filename: ClassVar[str] = "evaluator.yaml"
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Configuration for Extraction Exporter components."""
|
|
2
|
+
|
|
3
|
+
from typing import ClassVar
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class BaseExtractionExporterConfig(BaseModel):
|
|
9
|
+
"""Base config for Exporters.
|
|
10
|
+
|
|
11
|
+
Implementations should subclass this to add specific parameters.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
filename: ClassVar[str] = "extraction_exporter.yaml"
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Configuration for Extractor components."""
|
|
2
|
+
|
|
3
|
+
from typing import ClassVar
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class BaseExtractorConfig(BaseModel):
|
|
9
|
+
"""Base config for Extractors.
|
|
10
|
+
|
|
11
|
+
Implementations should subclass this to add specific parameters.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
filename: ClassVar[str] = "extractor.yaml"
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Configuration for File Lister components."""
|
|
2
|
+
|
|
3
|
+
from typing import ClassVar
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class BaseFileListerConfig(BaseModel):
|
|
9
|
+
"""Base config for File Listers.
|
|
10
|
+
|
|
11
|
+
Implementations should subclass this to add specific parameters.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
filename: ClassVar[str] = "file_lister.yaml"
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Configuration for Reader components."""
|
|
2
|
+
|
|
3
|
+
from typing import ClassVar
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class BaseReaderConfig(BaseModel):
|
|
9
|
+
"""Base config for Readers.
|
|
10
|
+
|
|
11
|
+
Implementations should subclass this to add specific parameters.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
filename: ClassVar[str] = "reader.yaml"
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Configuration for Test Data Loader components."""
|
|
2
|
+
|
|
3
|
+
from typing import ClassVar
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class BaseTestDataLoaderConfig(BaseModel):
|
|
9
|
+
"""Base config for Test Data Loaders.
|
|
10
|
+
|
|
11
|
+
Implementations should subclass this to add specific parameters.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
filename: ClassVar[str] = "test_data_loader.yaml"
|