sibi-flux 2025.12.0__tar.gz → 2026.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/PKG-INFO +2 -1
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/pyproject.toml +14 -1
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/__init__.py +4 -4
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/artifacts/parquet_engine/executor.py +1 -1
- sibi_flux-2026.1.1/src/sibi_flux/config/__init__.py +3 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/dask_cluster/async_core.py +1 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/dask_cluster/client_manager.py +5 -2
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/dask_cluster/core.py +3 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/datacube/_data_cube.py +12 -3
- sibi_flux-2026.1.1/src/sibi_flux/datacube/cli.py +1247 -0
- sibi_flux-2026.1.1/src/sibi_flux/datacube/config_engine.py +219 -0
- sibi_flux-2026.1.1/src/sibi_flux/datacube/field_factory.py +131 -0
- sibi_flux-2026.1.1/src/sibi_flux/datacube/field_mapper.py +243 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/datacube/field_registry.py +2 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/datacube/generator.py +322 -90
- sibi_flux-2026.1.1/src/sibi_flux/datacube/orchestrator.py +297 -0
- sibi_flux-2025.12.0/src/sibi_flux/utils/boilerplate/base_cube_router.py → sibi_flux-2026.1.1/src/sibi_flux/datacube/router.py +2 -3
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/dataset/_dataset.py +1 -1
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/df_helper/_df_helper.py +2 -1
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/df_helper/backends/_params.py +6 -6
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/df_validator/_df_validator.py +5 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/mcp/__init__.py +10 -2
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/mcp/router.py +1 -1
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/osmnx_helper/__init__.py +12 -4
- sibi_flux-2026.1.1/src/sibi_flux/parquet/__init__.py +8 -0
- sibi_flux-2026.1.1/src/sibi_flux/parquet/readers/__init__.py +4 -0
- {sibi_flux-2025.12.0/src/sibi_flux → sibi_flux-2026.1.1/src/sibi_flux/parquet}/readers/base.py +1 -1
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/pipelines/base.py +1 -2
- sibi_flux-2026.1.1/src/sibi_flux/readers/__init__.py +3 -0
- sibi_flux-2026.1.1/src/sibi_flux/readers/base.py +3 -0
- {sibi_flux-2025.12.0/src/sibi_flux/utils → sibi_flux-2026.1.1/src/sibi_flux}/storage/_storage_manager.py +4 -4
- {sibi_flux-2025.12.0/src/sibi_flux/utils → sibi_flux-2026.1.1/src/sibi_flux}/storage/factory.py +1 -1
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/utils/__init__.py +1 -2
- sibi_flux-2025.12.0/src/sibi_flux/datacube/config_engine.py +0 -152
- sibi_flux-2025.12.0/src/sibi_flux/datacube/field_factory.py +0 -48
- sibi_flux-2025.12.0/src/sibi_flux/datacube/orchestrator.py +0 -171
- sibi_flux-2025.12.0/src/sibi_flux/readers/__init__.py +0 -3
- sibi_flux-2025.12.0/src/sibi_flux/utils/boilerplate/__init__.py +0 -19
- sibi_flux-2025.12.0/src/sibi_flux/utils/boilerplate/base_data_cube.py +0 -132
- sibi_flux-2025.12.0/src/sibi_flux/utils/credentials/__init__.py +0 -3
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/README.md +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_dst/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/artifacts/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/artifacts/base.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/artifacts/parquet.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/artifacts/parquet_engine/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/artifacts/parquet_engine/manifest.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/artifacts/parquet_engine/planner.py +0 -0
- /sibi_flux-2025.12.0/src/sibi_flux/utils/credentials/_config_manager.py → /sibi_flux-2026.1.1/src/sibi_flux/config/manager.py +0 -0
- {sibi_flux-2025.12.0/src/sibi_flux/conf → sibi_flux-2026.1.1/src/sibi_flux/config}/settings.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/core/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/core/managed_resource/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/core/managed_resource/_managed_resource.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/core/type_maps/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/dask_cluster/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/dask_cluster/exceptions.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/dask_cluster/utils.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/datacube/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/dataset/__init__.py +0 -0
- /sibi_flux-2025.12.0/src/sibi_flux/utils/boilerplate/hybrid_data_loader.py → /sibi_flux-2026.1.1/src/sibi_flux/dataset/hybrid_loader.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/df_enricher/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/df_enricher/async_enricher.py +0 -0
- /sibi_flux-2025.12.0/src/sibi_flux/utils/boilerplate/base_attacher.py → /sibi_flux-2026.1.1/src/sibi_flux/df_enricher/attacher.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/df_enricher/merger.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/df_enricher/specs.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/df_enricher/types.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/df_helper/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/df_helper/backends/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/df_helper/backends/_strategies.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/df_helper/backends/http/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/df_helper/backends/http/_http_config.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/df_helper/backends/parquet/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/df_helper/backends/parquet/_parquet_options.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/df_helper/backends/sqlalchemy/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/df_helper/backends/sqlalchemy/_db_connection.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/df_helper/backends/sqlalchemy/_db_gatekeeper.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/df_helper/backends/sqlalchemy/_model_registry.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/df_helper/backends/sqlalchemy/_sql_model_builder.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/df_helper/backends/utils.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/df_helper/core/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/df_helper/core/_defaults.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/df_helper/core/_filter_handler.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/df_helper/core/_params_config.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/df_helper/core/_query_config.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/df_validator/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/logger/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/logger/_logger.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/mcp/client.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/orchestration/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/orchestration/_artifact_orchestrator.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/orchestration/_pipeline_executor.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/osmnx_helper/_pbf_handler.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/osmnx_helper/graph_loader.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/osmnx_helper/utils.py +0 -0
- {sibi_flux-2025.12.0/src/sibi_flux → sibi_flux-2026.1.1/src/sibi_flux/parquet}/readers/parquet.py +0 -0
- {sibi_flux-2025.12.0/src/sibi_flux/utils/parquet_saver → sibi_flux-2026.1.1/src/sibi_flux/parquet/saver}/__init__.py +0 -0
- {sibi_flux-2025.12.0/src/sibi_flux/utils/parquet_saver → sibi_flux-2026.1.1/src/sibi_flux/parquet/saver}/_parquet_saver.py +0 -0
- {sibi_flux-2025.12.0/src/sibi_flux/utils/parquet_saver → sibi_flux-2026.1.1/src/sibi_flux/parquet/saver}/_write_gatekeeper.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/pipelines/__init__.py +0 -0
- /sibi_flux-2025.12.0/src/sibi_flux/utils/boilerplate/base_pipeline_template.py → /sibi_flux-2026.1.1/src/sibi_flux/pipelines/template.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/py.typed +0 -0
- {sibi_flux-2025.12.0/src/sibi_flux/utils → sibi_flux-2026.1.1/src/sibi_flux}/storage/__init__.py +0 -0
- {sibi_flux-2025.12.0/src/sibi_flux/utils → sibi_flux-2026.1.1/src/sibi_flux}/storage/_fs_registry.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/utils/clickhouse_writer/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/utils/clickhouse_writer/_clickhouse_writer.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/utils/common.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/utils/dask_utils.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/utils/data_utils/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/utils/data_utils/_data_utils.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/utils/dataframe_utils.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/utils/date_utils/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/utils/date_utils/_business_days.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/utils/date_utils/_date_utils.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/utils/date_utils/_file_age_checker.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/utils/file_utils.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/utils/filepath_generator/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/utils/filepath_generator/_filepath_generator.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/utils/retry.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: sibi-flux
|
|
3
|
-
Version:
|
|
3
|
+
Version: 2026.1.1
|
|
4
4
|
Summary: Sibi Toolkit: A collection of tools for Data Analysis/Engineering.
|
|
5
5
|
Author: Luis Valverde
|
|
6
6
|
Author-email: Luis Valverde <lvalverdeb@gmail.com>
|
|
@@ -27,6 +27,7 @@ Requires-Dist: httpx>=0.28.1
|
|
|
27
27
|
Requires-Dist: opentelemetry-api>=1.38.0
|
|
28
28
|
Requires-Dist: opentelemetry-exporter-otlp>=1.38.0
|
|
29
29
|
Requires-Dist: opentelemetry-sdk>=1.38.0
|
|
30
|
+
Requires-Dist: deep-translator>=1.11.4
|
|
30
31
|
Requires-Dist: sibi-flux[distributed,geospatial,mcp] ; extra == 'complete'
|
|
31
32
|
Requires-Dist: distributed>=2025.11.0 ; extra == 'distributed'
|
|
32
33
|
Requires-Dist: osmnx>=2.0.7 ; extra == 'geospatial'
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "sibi-flux"
|
|
3
|
-
version = "
|
|
3
|
+
version = "2026.1.1"
|
|
4
4
|
description = "Sibi Toolkit: A collection of tools for Data Analysis/Engineering."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [
|
|
@@ -33,6 +33,7 @@ dependencies = [
|
|
|
33
33
|
"opentelemetry-api>=1.38.0",
|
|
34
34
|
"opentelemetry-exporter-otlp>=1.38.0",
|
|
35
35
|
"opentelemetry-sdk>=1.38.0",
|
|
36
|
+
"deep-translator>=1.11.4",
|
|
36
37
|
]
|
|
37
38
|
|
|
38
39
|
[project.optional-dependencies]
|
|
@@ -97,6 +98,7 @@ filterwarnings = ["ignore::DeprecationWarning"]
|
|
|
97
98
|
|
|
98
99
|
|
|
99
100
|
[tool.poe.tasks]
|
|
101
|
+
publish = { cmd = "uv publish", envfile = ".env" }
|
|
100
102
|
dev = """
|
|
101
103
|
uvicorn solutions.main:app
|
|
102
104
|
--reload
|
|
@@ -108,6 +110,17 @@ uvicorn solutions.main:app
|
|
|
108
110
|
"""
|
|
109
111
|
test = { cmd = "pytest tests/"}
|
|
110
112
|
lint = "black src/"
|
|
113
|
+
build = "uv build"
|
|
114
|
+
dc-sync = "python solutions/generators/datacubes/gen_dc.py sync"
|
|
115
|
+
dc-init = "python solutions/generators/datacubes/gen_dc.py init"
|
|
116
|
+
dc-discover = "python solutions/generators/datacubes/gen_dc.py discover"
|
|
117
|
+
dc-scan = "python solutions/generators/datacubes/gen_dc.py scan"
|
|
118
|
+
dc-match = "python solutions/generators/datacubes/gen_dc.py match"
|
|
119
|
+
dc-map = "python solutions/generators/datacubes/gen_dc.py map"
|
|
120
|
+
|
|
121
|
+
[tool.poe.tasks.release]
|
|
122
|
+
sequence = ["build","publish"]
|
|
123
|
+
envfile = ".env" # Loads the token for the whole sequence
|
|
111
124
|
|
|
112
125
|
[tool.commitizen]
|
|
113
126
|
name = "cz_conventional_commits"
|
|
@@ -21,11 +21,12 @@ from sibi_flux.df_validator._df_validator import DfValidator
|
|
|
21
21
|
|
|
22
22
|
# Artifacts
|
|
23
23
|
from sibi_flux.artifacts import ParquetArtifact, BaseArtifact as Artifact
|
|
24
|
-
from sibi_flux.
|
|
24
|
+
from sibi_flux.parquet import ParquetReader
|
|
25
25
|
|
|
26
26
|
# Utilities (Sub-packages)
|
|
27
27
|
from sibi_flux import dask_cluster
|
|
28
|
-
from sibi_flux.utils import
|
|
28
|
+
from sibi_flux.utils import clickhouse_writer
|
|
29
|
+
from sibi_flux import parquet
|
|
29
30
|
|
|
30
31
|
|
|
31
32
|
__all__ = [
|
|
@@ -43,7 +44,6 @@ __all__ = [
|
|
|
43
44
|
"Artifact",
|
|
44
45
|
"ParquetReader",
|
|
45
46
|
"dask_cluster",
|
|
46
|
-
"
|
|
47
|
-
"parquet_saver",
|
|
47
|
+
"parquet",
|
|
48
48
|
"clickhouse_writer",
|
|
49
49
|
]
|
{sibi_flux-2025.12.0 → sibi_flux-2026.1.1}/src/sibi_flux/artifacts/parquet_engine/executor.py
RENAMED
|
@@ -13,7 +13,7 @@ import functools
|
|
|
13
13
|
from tqdm import tqdm
|
|
14
14
|
import pandas as pd
|
|
15
15
|
from sibi_flux.core import ManagedResource
|
|
16
|
-
from sibi_flux.
|
|
16
|
+
from sibi_flux.parquet import ParquetSaver
|
|
17
17
|
|
|
18
18
|
from sibi_flux.utils import ensure_slash
|
|
19
19
|
from sibi_flux.utils.retry import with_retry
|
|
@@ -23,6 +23,7 @@ import shutil
|
|
|
23
23
|
|
|
24
24
|
try:
|
|
25
25
|
from dask.distributed import Client, LocalCluster, get_client
|
|
26
|
+
|
|
26
27
|
HAS_DISTRIBUTED = True
|
|
27
28
|
except ImportError:
|
|
28
29
|
Client = object
|
|
@@ -231,8 +232,10 @@ class DaskClientMixin:
|
|
|
231
232
|
def _init_dask_client(self, **kwargs) -> None:
|
|
232
233
|
self._init_params = kwargs
|
|
233
234
|
if not HAS_DISTRIBUTED:
|
|
234
|
-
|
|
235
|
-
|
|
235
|
+
self.logger.info(
|
|
236
|
+
"Dask Distributed not installed. Skipping cluster initialization."
|
|
237
|
+
)
|
|
238
|
+
return
|
|
236
239
|
|
|
237
240
|
if kwargs.get("dask_client"):
|
|
238
241
|
self.dask_client = kwargs["dask_client"]
|
|
@@ -14,15 +14,18 @@ from typing import Any, Callable, Dict, List, Optional, TypeVar
|
|
|
14
14
|
import dask
|
|
15
15
|
import dask.dataframe as dd
|
|
16
16
|
import pandas as pd
|
|
17
|
+
|
|
17
18
|
try:
|
|
18
19
|
from dask.distributed import Client, Future
|
|
19
20
|
from dask.distributed import wait as dask_wait
|
|
20
21
|
except ImportError:
|
|
21
22
|
Client = object
|
|
22
23
|
Future = object
|
|
24
|
+
|
|
23
25
|
def dask_wait(*args, **kwargs):
|
|
24
26
|
pass
|
|
25
27
|
|
|
28
|
+
|
|
26
29
|
# Project-specific imports
|
|
27
30
|
from .client_manager import get_persistent_client
|
|
28
31
|
from .exceptions import RECOVERABLE_COMMS
|
|
@@ -175,9 +175,10 @@ class Datacube(DfHelper):
|
|
|
175
175
|
# but we log it for debugging.
|
|
176
176
|
self.logger.debug(f"Schema inference skipped: {e}")
|
|
177
177
|
|
|
178
|
-
def
|
|
178
|
+
def validate_data(self, df: DataFrameType) -> DataFrameType:
|
|
179
179
|
"""
|
|
180
180
|
Runs DfValidator if a schema is configured.
|
|
181
|
+
Overrides BaseDatacube hook.
|
|
181
182
|
"""
|
|
182
183
|
schema = self.config.get("validation_schema")
|
|
183
184
|
if not schema:
|
|
@@ -203,6 +204,14 @@ class Datacube(DfHelper):
|
|
|
203
204
|
|
|
204
205
|
return validator.get_df()
|
|
205
206
|
|
|
207
|
+
async def avalidate_data(self, df: DataFrameType) -> DataFrameType:
|
|
208
|
+
"""
|
|
209
|
+
Asynchronous validation hook.
|
|
210
|
+
Offloads synchronous validation (CPU bound) to a thread.
|
|
211
|
+
"""
|
|
212
|
+
import asyncio
|
|
213
|
+
return await asyncio.to_thread(self.validate_data, df)
|
|
214
|
+
|
|
206
215
|
def get_ddl(self, table_name: Optional[str] = None) -> str:
|
|
207
216
|
"""
|
|
208
217
|
Generates ClickHouse DDL for the current cube.
|
|
@@ -230,7 +239,7 @@ class Datacube(DfHelper):
|
|
|
230
239
|
# 3. Apply Transform Hook
|
|
231
240
|
df = self.fix_data(df, **kwargs)
|
|
232
241
|
# 4. Validate
|
|
233
|
-
df = self.
|
|
242
|
+
df = self.validate_data(df)
|
|
234
243
|
else:
|
|
235
244
|
self.logger.debug(f"No data loaded by {self.__class__.__name__}")
|
|
236
245
|
|
|
@@ -254,7 +263,7 @@ class Datacube(DfHelper):
|
|
|
254
263
|
# 3. Apply Async Transform Hook
|
|
255
264
|
df = await self.afix_data(df, **kwargs)
|
|
256
265
|
# 4. Validate (CPU bound)
|
|
257
|
-
df = await
|
|
266
|
+
df = await self.avalidate_data(df)
|
|
258
267
|
else:
|
|
259
268
|
self.logger.debug(f"No data loaded by {self.__class__.__name__}")
|
|
260
269
|
|