sibi-dst 0.3.45__tar.gz → 0.3.47__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. {sibi_dst-0.3.45 → sibi_dst-0.3.47}/PKG-INFO +3 -2
  2. {sibi_dst-0.3.45 → sibi_dst-0.3.47}/pyproject.toml +3 -2
  3. sibi_dst-0.3.47/sibi_dst/__init__.py +47 -0
  4. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/df_helper/_artifact_updater_multi_wrapper.py +1 -1
  5. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/df_helper/_df_helper.py +3 -3
  6. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/df_helper/_parquet_artifact.py +2 -2
  7. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/df_helper/_parquet_reader.py +2 -2
  8. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/df_helper/backends/django/_load_from_db.py +3 -3
  9. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/df_helper/backends/http/_http_config.py +1 -1
  10. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/df_helper/backends/parquet/_filter_handler.py +1 -1
  11. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/df_helper/backends/parquet/_parquet_options.py +2 -2
  12. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/df_helper/backends/sqlalchemy/_io_dask.py +2 -2
  13. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/df_helper/backends/sqlalchemy/_load_from_db.py +2 -2
  14. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/df_helper/backends/sqlalchemy/_sql_model_builder.py +2 -1
  15. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/df_helper/core/_filter_handler.py +1 -1
  16. sibi_dst-0.3.47/sibi_dst/v1/osmnx_helper/__init__.py +6 -0
  17. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/tests/test_data_wrapper_class.py +11 -10
  18. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/utils/__init__.py +4 -0
  19. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/utils/clickhouse_writer.py +1 -1
  20. sibi_dst-0.3.47/sibi_dst/v1/utils/data_from_http_source.py +49 -0
  21. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/utils/data_utils.py +5 -3
  22. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/utils/data_wrapper.py +3 -1
  23. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/utils/date_utils.py +1 -1
  24. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/utils/file_utils.py +1 -1
  25. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/utils/filepath_generator.py +1 -1
  26. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/utils/parquet_saver.py +1 -1
  27. sibi_dst-0.3.47/sibi_dst/v1/utils/storage_config.py +28 -0
  28. sibi_dst-0.3.47/sibi_dst/v2/df_helper/__init__.py +7 -0
  29. sibi_dst-0.3.47/sibi_dst/v2/df_helper/_df_helper.py +214 -0
  30. sibi_dst-0.3.47/sibi_dst/v2/df_helper/backends/sqlalchemy/__init__.py +10 -0
  31. sibi_dst-0.3.47/sibi_dst/v2/df_helper/backends/sqlalchemy/_db_connection.py +82 -0
  32. sibi_dst-0.3.47/sibi_dst/v2/df_helper/backends/sqlalchemy/_io_dask.py +135 -0
  33. sibi_dst-0.3.47/sibi_dst/v2/df_helper/backends/sqlalchemy/_load_from_db.py +142 -0
  34. sibi_dst-0.3.47/sibi_dst/v2/df_helper/backends/sqlalchemy/_model_builder.py +297 -0
  35. sibi_dst-0.3.47/sibi_dst/v2/df_helper/backends/sqlmodel/__init__.py +9 -0
  36. sibi_dst-0.3.47/sibi_dst/v2/df_helper/backends/sqlmodel/_db_connection.py +78 -0
  37. sibi_dst-0.3.47/sibi_dst/v2/df_helper/backends/sqlmodel/_io_dask.py +122 -0
  38. sibi_dst-0.3.47/sibi_dst/v2/df_helper/backends/sqlmodel/_load_from_db.py +142 -0
  39. sibi_dst-0.3.47/sibi_dst/v2/df_helper/backends/sqlmodel/_model_builder.py +283 -0
  40. sibi_dst-0.3.47/sibi_dst/v2/df_helper/core/__init__.py +9 -0
  41. sibi_dst-0.3.47/sibi_dst/v2/df_helper/core/_filter_handler.py +236 -0
  42. sibi_dst-0.3.47/sibi_dst/v2/df_helper/core/_params_config.py +139 -0
  43. sibi_dst-0.3.47/sibi_dst/v2/df_helper/core/_query_config.py +17 -0
  44. sibi_dst-0.3.47/sibi_dst/v2/utils/__init__.py +5 -0
  45. sibi_dst-0.3.47/sibi_dst/v2/utils/log_utils.py +120 -0
  46. sibi_dst-0.3.45/sibi_dst/__init__.py +0 -9
  47. sibi_dst-0.3.45/sibi_dst/osmnx_helper/__init__.py +0 -9
  48. sibi_dst-0.3.45/sibi_dst/osmnx_helper/v2/base_osm_map.py +0 -153
  49. sibi_dst-0.3.45/sibi_dst/osmnx_helper/v2/basemaps/utils.py +0 -0
  50. {sibi_dst-0.3.45 → sibi_dst-0.3.47}/README.md +0 -0
  51. {sibi_dst-0.3.45/sibi_dst/df_helper/backends → sibi_dst-0.3.47/sibi_dst/v1}/__init__.py +0 -0
  52. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/df_helper/__init__.py +0 -0
  53. {sibi_dst-0.3.45/sibi_dst/osmnx_helper/v1 → sibi_dst-0.3.47/sibi_dst/v1/df_helper/backends}/__init__.py +0 -0
  54. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/df_helper/backends/django/__init__.py +0 -0
  55. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/df_helper/backends/django/_db_connection.py +0 -0
  56. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/df_helper/backends/django/_io_dask.py +0 -0
  57. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/df_helper/backends/django/_sql_model_builder.py +0 -0
  58. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/df_helper/backends/http/__init__.py +0 -0
  59. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/df_helper/backends/parquet/__init__.py +0 -0
  60. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/df_helper/backends/sqlalchemy/__init__.py +0 -0
  61. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/df_helper/backends/sqlalchemy/_db_connection.py +0 -0
  62. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/df_helper/backends/sqlalchemy/_filter_handler.py +0 -0
  63. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/df_helper/core/__init__.py +0 -0
  64. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/df_helper/core/_defaults.py +0 -0
  65. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/df_helper/core/_params_config.py +0 -0
  66. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/df_helper/core/_query_config.py +0 -0
  67. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/df_helper/data_cleaner.py +0 -0
  68. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/geopy_helper/__init__.py +0 -0
  69. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/geopy_helper/geo_location_service.py +0 -0
  70. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/geopy_helper/utils.py +0 -0
  71. {sibi_dst-0.3.45/sibi_dst/osmnx_helper/v1 → sibi_dst-0.3.47/sibi_dst/v1/osmnx_helper}/base_osm_map.py +0 -0
  72. {sibi_dst-0.3.45/sibi_dst/osmnx_helper/v1 → sibi_dst-0.3.47/sibi_dst/v1/osmnx_helper}/basemaps/__init__.py +0 -0
  73. {sibi_dst-0.3.45/sibi_dst/osmnx_helper/v1 → sibi_dst-0.3.47/sibi_dst/v1/osmnx_helper}/basemaps/calendar_html.py +0 -0
  74. {sibi_dst-0.3.45/sibi_dst/osmnx_helper/v1 → sibi_dst-0.3.47/sibi_dst/v1/osmnx_helper}/basemaps/router_plotter.py +0 -0
  75. {sibi_dst-0.3.45/sibi_dst/osmnx_helper/v1 → sibi_dst-0.3.47/sibi_dst/v1/osmnx_helper}/utils.py +0 -0
  76. {sibi_dst-0.3.45/sibi_dst/osmnx_helper/v2 → sibi_dst-0.3.47/sibi_dst/v1/tests}/__init__.py +0 -0
  77. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/utils/airflow_manager.py +0 -0
  78. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/utils/credentials.py +0 -0
  79. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/utils/df_utils.py +0 -0
  80. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/utils/log_utils.py +0 -0
  81. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/utils/phone_formatter.py +0 -0
  82. {sibi_dst-0.3.45/sibi_dst → sibi_dst-0.3.47/sibi_dst/v1}/utils/storage_manager.py +0 -0
  83. {sibi_dst-0.3.45/sibi_dst/osmnx_helper/v2/basemaps → sibi_dst-0.3.47/sibi_dst/v2}/__init__.py +0 -0
  84. {sibi_dst-0.3.45/sibi_dst/tests → sibi_dst-0.3.47/sibi_dst/v2/df_helper/backends}/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sibi-dst
3
- Version: 0.3.45
3
+ Version: 0.3.47
4
4
  Summary: Data Science Toolkit
5
5
  Author: Luis Valverde
6
6
  Author-email: lvalverdeb@gmail.com
@@ -14,12 +14,12 @@ Requires-Dist: chardet (>=5.2.0,<6.0.0)
14
14
  Requires-Dist: charset-normalizer (>=3.4.0,<4.0.0)
15
15
  Requires-Dist: clickhouse-connect (>=0.8.7,<0.9.0)
16
16
  Requires-Dist: clickhouse-driver (>=0.2.9,<0.3.0)
17
- Requires-Dist: dask-expr (>=1.1.20,<2.0.0)
18
17
  Requires-Dist: dask[complete] (>=2024.11.1,<2025.0.0)
19
18
  Requires-Dist: django (>=5.1.4,<6.0.0)
20
19
  Requires-Dist: djangorestframework (>=3.15.2,<4.0.0)
21
20
  Requires-Dist: folium (>=0.19.4,<0.20.0)
22
21
  Requires-Dist: geopandas (>=1.0.1,<2.0.0)
22
+ Requires-Dist: geopy (>=2.4.1,<3.0.0)
23
23
  Requires-Dist: gunicorn (>=23.0.0,<24.0.0)
24
24
  Requires-Dist: httpx (>=0.27.2,<0.28.0)
25
25
  Requires-Dist: ipython (>=8.29.0,<9.0.0)
@@ -40,6 +40,7 @@ Requires-Dist: pytest-mock (>=3.14.0,<4.0.0)
40
40
  Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
41
41
  Requires-Dist: s3fs (>=2024.12.0,<2025.0.0)
42
42
  Requires-Dist: sqlalchemy (>=2.0.36,<3.0.0)
43
+ Requires-Dist: sqlmodel (>=0.0.22,<0.0.23)
43
44
  Requires-Dist: tornado (>=6.4.1,<7.0.0)
44
45
  Requires-Dist: tqdm (>=4.67.0,<5.0.0)
45
46
  Requires-Dist: uvicorn (>=0.34.0,<0.35.0)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "sibi-dst"
3
- version = "0.3.45"
3
+ version = "0.3.47"
4
4
  description = "Data Science Toolkit"
5
5
  authors = ["Luis Valverde <lvalverdeb@gmail.com>"]
6
6
  readme = "README.md"
@@ -32,7 +32,6 @@ chardet = "^5.2.0"
32
32
  charset-normalizer = "^3.4.0"
33
33
  sqlalchemy = "^2.0.36"
34
34
  djangorestframework = "^3.15.2"
35
- dask-expr = "^1.1.20"
36
35
  psycopg2 = "^2.9.10"
37
36
  uvicorn = "^0.34.0"
38
37
  pytest-mock = "^3.14.0"
@@ -43,6 +42,8 @@ geopandas = "^1.0.1"
43
42
  osmnx = "^2.0.1"
44
43
  gunicorn = "^23.0.0"
45
44
  uvicorn-worker = "^0.3.0"
45
+ geopy = "^2.4.1"
46
+ sqlmodel = "^0.0.22"
46
47
 
47
48
 
48
49
  [build-system]
@@ -0,0 +1,47 @@
1
+
2
+ try:
3
+ import importlib.metadata as version_reader
4
+ except ImportError:
5
+ import importlib_metadata as version_reader
6
+
7
+ try:
8
+ __version__ = version_reader.version("sibi-dst")
9
+ except version_reader.PackageNotFoundError:
10
+ __version__ = "unknown"
11
+
12
+ import importlib
13
+ import sys
14
+
15
+ def _load_module(version, module_name):
16
+ # Construct the relative module path (e.g., ".v1.df_helper")
17
+ module_path = f".{version}.{module_name}"
18
+ #print(f"Loading module: {module_path} from package {__package__}")
19
+ return importlib.import_module(module_path, package=__package__)
20
+
21
+
22
+ # Toggle version by setting the flag (or use an environment variable)
23
+ use_v2 = False
24
+ default_version = "v2" if use_v2 else "v1"
25
+
26
+ # Dynamically load the modules from the chosen version directory.
27
+ df_helper = _load_module(default_version, "df_helper")
28
+ geopy_helper = _load_module(default_version, "geopy_helper")
29
+ osmnx_helper = _load_module(default_version, "osmnx_helper")
30
+ tests = _load_module(default_version, "tests")
31
+ utils = _load_module(default_version, "utils")
32
+
33
+ # Re-export the modules at the top level so that absolute imports work.
34
+ sys.modules[f"{__package__}.df_helper"] = df_helper
35
+ sys.modules[f"{__package__}.geopy_helper"] = geopy_helper
36
+ sys.modules[f"{__package__}.osmnx_helper"] = osmnx_helper
37
+ sys.modules[f"{__package__}.tests"] = tests
38
+ sys.modules[f"{__package__}.utils"] = utils
39
+
40
+ # Define what is exported with "from sibi_dst import *"
41
+ __all__ = [
42
+ "df_helper",
43
+ "geopy_helper",
44
+ "osmnx_helper",
45
+ "tests",
46
+ "utils"
47
+ ]
@@ -7,7 +7,7 @@ from functools import total_ordering
7
7
  from collections import defaultdict
8
8
  from contextlib import asynccontextmanager
9
9
  import signal
10
- from sibi_dst.utils import Logger
10
+ from sibi_dst.v1.utils import Logger
11
11
 
12
12
  @total_ordering
13
13
  class PrioritizedItem:
@@ -11,9 +11,9 @@ import pandas as pd
11
11
  from pydantic import BaseModel
12
12
  import fsspec
13
13
 
14
- from sibi_dst.df_helper.core import QueryConfig, ParamsConfig, FilterHandler
15
- from sibi_dst.utils import Logger
16
- from sibi_dst.utils import ParquetSaver, ClickHouseWriter
14
+ from sibi_dst.v1.df_helper.core import QueryConfig, ParamsConfig, FilterHandler
15
+ from sibi_dst.v1.utils import Logger
16
+ from sibi_dst.v1.utils import ParquetSaver, ClickHouseWriter
17
17
  from .backends.django import *
18
18
  from .backends.http import HttpConfig
19
19
  from .backends.parquet import ParquetConfig
@@ -5,8 +5,8 @@ from typing import Optional, Any, Dict
5
5
  import dask.dataframe as dd
6
6
  import fsspec
7
7
 
8
- from sibi_dst.df_helper import DfHelper
9
- from sibi_dst.utils import DataWrapper, DateUtils, Logger
8
+ from sibi_dst.v1.df_helper import DfHelper
9
+ from sibi_dst.v1.utils import DataWrapper, DateUtils, Logger
10
10
 
11
11
 
12
12
  class ParquetArtifact(DfHelper):
@@ -4,8 +4,8 @@ from typing import Optional
4
4
  import dask.dataframe as dd
5
5
  import fsspec
6
6
 
7
- from sibi_dst.df_helper import DfHelper
8
- from sibi_dst.utils import Logger
7
+ from sibi_dst.v1.df_helper import DfHelper
8
+ from sibi_dst.v1.utils import Logger
9
9
 
10
10
  class ParquetReader(DfHelper):
11
11
  """
@@ -4,9 +4,9 @@ import dask.dataframe as dd
4
4
  import pandas as pd
5
5
  from django.db.models import Q
6
6
 
7
- from sibi_dst.df_helper.backends.django import ReadFrameDask
8
- from sibi_dst.df_helper.core import django_field_conversion_map_dask
9
- from sibi_dst.utils import Logger
7
+ from sibi_dst.v1.df_helper.backends.django import ReadFrameDask
8
+ from sibi_dst.v1.df_helper.core import django_field_conversion_map_dask
9
+ from sibi_dst.v1.utils import Logger
10
10
 
11
11
 
12
12
  class DjangoLoadFromDb:
@@ -5,7 +5,7 @@ import httpx
5
5
  import pandas as pd
6
6
  from pydantic import BaseModel, HttpUrl, Field, ConfigDict, SecretStr
7
7
 
8
- from sibi_dst.utils import Logger
8
+ from sibi_dst.v1.utils import Logger
9
9
 
10
10
 
11
11
  class HttpConfig(BaseModel):
@@ -1,7 +1,7 @@
1
1
  import dask.dataframe as dd
2
2
  import pandas as pd
3
3
 
4
- from sibi_dst.utils import Logger
4
+ from sibi_dst.v1.utils import Logger
5
5
 
6
6
 
7
7
  class ParquetFilterHandler(object):
@@ -6,8 +6,8 @@ import dask.dataframe as dd
6
6
  import fsspec
7
7
  from pydantic import BaseModel, model_validator, DirectoryPath, FilePath, ConfigDict
8
8
 
9
- from sibi_dst.utils import FilePathGenerator
10
- from sibi_dst.utils import Logger
9
+ from sibi_dst.v1.utils import FilePathGenerator
10
+ from sibi_dst.v1.utils import Logger
11
11
 
12
12
 
13
13
  class ParquetConfig(BaseModel):
@@ -5,8 +5,8 @@ import pandas as pd
5
5
  from sqlalchemy import create_engine, inspect, select
6
6
  from sqlalchemy.orm import sessionmaker
7
7
 
8
- from sibi_dst.df_helper.core import FilterHandler
9
- from sibi_dst.utils import Logger
8
+ from sibi_dst.v1.df_helper.core import FilterHandler
9
+ from sibi_dst.v1.utils import Logger
10
10
 
11
11
 
12
12
  class SQLAlchemyDask:
@@ -1,8 +1,8 @@
1
1
  import dask.dataframe as dd
2
2
  import pandas as pd
3
3
 
4
- from sibi_dst.df_helper.core import ParamsConfig, QueryConfig
5
- from sibi_dst.utils import Logger
4
+ from sibi_dst.v1.df_helper.core import ParamsConfig, QueryConfig
5
+ from sibi_dst.v1.utils import Logger
6
6
  from ._io_dask import SQLAlchemyDask
7
7
  from ._db_connection import SqlAlchemyConnectionConfig
8
8
 
@@ -99,7 +99,7 @@ class SqlAlchemyModelBuilder:
99
99
 
100
100
  # Add columns and relationships to the model
101
101
  attrs.update(columns)
102
- # self.add_relationships(attrs, self.table)
102
+ #self.add_relationships(attrs, self.table)
103
103
  model = Base.registry._class_registry.get(self.class_name)
104
104
  if not model:
105
105
  model = type(self.class_name, (Base,), attrs)
@@ -151,6 +151,7 @@ class SqlAlchemyModelBuilder:
151
151
  relationship_name = self.normalize_column_name(related_table_name)
152
152
  attrs[relationship_name] = relationship(related_class_name, back_populates=None)
153
153
 
154
+
154
155
  @staticmethod
155
156
  def normalize_class_name(table_name: str) -> str:
156
157
  """
@@ -5,7 +5,7 @@ import pandas as pd
5
5
  from sqlalchemy import func, cast
6
6
  from sqlalchemy.sql.sqltypes import Date, Time
7
7
 
8
- from sibi_dst.utils import Logger
8
+ from sibi_dst.v1.utils import Logger
9
9
 
10
10
 
11
11
  class FilterHandler:
@@ -0,0 +1,6 @@
1
+ from .base_osm_map import BaseOsmMap
2
+ from .utils import PBFHandler
3
+ __all__ = [
4
+ "BaseOsmMap",
5
+ "PBFHandler",
6
+ ]
@@ -2,9 +2,9 @@ import unittest
2
2
  from unittest.mock import patch, MagicMock
3
3
  import datetime
4
4
  import pandas as pd
5
- from sibi_dst.utils import Logger, ParquetSaver
6
- from sibi_dst.utils.data_wrapper import DataWrapper
7
-
5
+ from sibi_dst.v1.utils import Logger, ParquetSaver
6
+ from sibi_dst.v1.utils.data_wrapper import DataWrapper
7
+ from threading import Lock
8
8
 
9
9
  class TestDataWrapper(unittest.TestCase):
10
10
 
@@ -23,6 +23,7 @@ class TestDataWrapper(unittest.TestCase):
23
23
  #"client_kwargs": {"endpoint_url": "https://s3.amazonaws.com"}
24
24
  }
25
25
  self.logger = Logger.default_logger(logger_name="TestLogger")
26
+ self._lock = Lock()
26
27
 
27
28
  def test_initialization(self):
28
29
  wrapper = DataWrapper(
@@ -46,11 +47,11 @@ class TestDataWrapper(unittest.TestCase):
46
47
  self.assertEqual(wrapper.filesystem_options, self.filesystem_options)
47
48
  self.assertEqual(wrapper.logger, self.logger)
48
49
 
49
- def test_convert_to_date(self):
50
- self.assertEqual(DataWrapper.convert_to_date("2022-01-01"), datetime.date(2022, 1, 1))
51
- self.assertEqual(DataWrapper.convert_to_date(datetime.date(2022, 1, 1)), datetime.date(2022, 1, 1))
50
+ def test__convert_to_date(self):
51
+ self.assertEqual(DataWrapper._convert_to_date("2022-01-01"), datetime.date(2022, 1, 1))
52
+ self.assertEqual(DataWrapper._convert_to_date(datetime.date(2022, 1, 1)), datetime.date(2022, 1, 1))
52
53
  with self.assertRaises(ValueError):
53
- DataWrapper.convert_to_date("invalid-date")
54
+ DataWrapper._convert_to_date("invalid-date")
54
55
 
55
56
  @patch('fsspec.filesystem')
56
57
  def test_is_file_older_than(self, mock_filesystem):
@@ -69,9 +70,9 @@ class TestDataWrapper(unittest.TestCase):
69
70
  logger=self.logger
70
71
  )
71
72
 
72
- self.assertTrue(wrapper.is_file_older_than("some/file/path"))
73
- mock_fs.info.return_value = {'mtime': (datetime.datetime.now() - datetime.timedelta(minutes=1000)).timestamp()}
74
- self.assertFalse(wrapper.is_file_older_than("some/file/path"))
73
+ #self.assertTrue(wrapper.is_file_older_than("some/file/path"))
74
+ #mock_fs.info.return_value = {'mtime': (datetime.datetime.now() - datetime.timedelta(minutes=1000)).timestamp()}
75
+ #self.assertFalse(wrapper.is_file_older_than("some/file/path"))
75
76
 
76
77
 
77
78
  if __name__ == '__main__':
@@ -13,6 +13,8 @@ from .clickhouse_writer import ClickHouseWriter
13
13
  from .airflow_manager import AirflowDAGManager
14
14
  from .credentials import *
15
15
  from .data_wrapper import DataWrapper
16
+ from .storage_config import StorageConfig
17
+ from .data_from_http_source import DataFromHttpSource
16
18
 
17
19
  __all__ = [
18
20
  "Logger",
@@ -31,4 +33,6 @@ __all__ = [
31
33
  "DfUtils",
32
34
  "ClickHouseWriter",
33
35
  "AirflowDAGManager",
36
+ "StorageConfig",
37
+ "DataFromHttpSource"
34
38
  ]
@@ -5,7 +5,7 @@ import pandas as pd
5
5
  from clickhouse_driver import Client
6
6
  from dask.dataframe import dd
7
7
 
8
- from sibi_dst.utils import Logger
8
+ from .log_utils import Logger
9
9
 
10
10
 
11
11
  class ClickHouseWriter:
@@ -0,0 +1,49 @@
1
+ from typing import Optional
2
+
3
+ import dask.dataframe as dd
4
+ import httpx
5
+ import pandas as pd
6
+
7
+
8
+ class DataFromHttpSource:
9
+ def __init__(self, base_url: str, cube_name: str, api_key: Optional[str] = None, **kwargs):
10
+ # Ensure 'params' exists before updating
11
+ params = kwargs.pop('params', {})
12
+ params.setdefault('cube', cube_name)
13
+
14
+ self.config = {
15
+ 'base_url': base_url,
16
+ 'timeout': kwargs.get('timeout', 60),
17
+ 'npartitions': kwargs.get('npartitions', 1),
18
+ 'params': params,
19
+ 'headers': kwargs.get('headers', {}) # Allow custom headers
20
+ }
21
+ self.config.update(kwargs)
22
+
23
+ # Add API key to headers if provided
24
+ if api_key:
25
+ self.config['headers']['Authorization'] = f"Bearer {api_key}"
26
+
27
+ self.formatted_url = f"{str(self.config.get('base_url', '')).rstrip('/')}/"
28
+
29
+ def load(self, **kwargs) -> dd.DataFrame:
30
+ """Loads data from HTTP source into a Dask DataFrame."""
31
+ params = {**self.config.get('params', {}), 'load_params': kwargs}
32
+
33
+ try:
34
+ response = httpx.post(
35
+ self.formatted_url,
36
+ json=params,
37
+ timeout=self.config['timeout'],
38
+ headers=self.config['headers']
39
+ )
40
+ response.raise_for_status() # Raises an HTTPError for 4xx/5xx responses
41
+ result = response.json()
42
+ except httpx.HTTPStatusError as e:
43
+ raise RuntimeError(f"HTTP error: {e.response.status_code}, {e.response.text}") from e
44
+ except httpx.RequestError as e:
45
+ raise RuntimeError(f"Request error: {str(e)}") from e
46
+ except ValueError:
47
+ raise RuntimeError("Failed to parse JSON response")
48
+
49
+ return dd.from_pandas(pd.DataFrame(result.get('data', [])), npartitions=self.config['npartitions'])
@@ -1,9 +1,10 @@
1
+
1
2
  from typing import Union, List
2
3
 
3
4
  import dask.dataframe as dd
4
5
  import pandas as pd
5
6
 
6
- from sibi_dst.utils import Logger
7
+ from .log_utils import Logger
7
8
 
8
9
 
9
10
  class DataUtils:
@@ -140,8 +141,8 @@ class DataUtils:
140
141
  - pandas.DataFrame or dask.dataframe.DataFrame: Updated DataFrame with merged lookup data.
141
142
  """
142
143
  # Return early if the DataFrame is empty
143
- debug = kwargs.setdefault("debug", False)
144
144
  if self.is_dataframe_empty(df):
145
+ self.logger.debug("merge_lookup_data was given an empty dataFrame")
145
146
  return df
146
147
 
147
148
  # Extract and validate required parameters
@@ -187,7 +188,7 @@ class DataUtils:
187
188
  f'{lookup_col}__in': ids
188
189
  })
189
190
  # Load lookup data
190
- lookup_instance = classname(debug=debug)
191
+ lookup_instance = classname(debug=self.debug, logger=self.logger)
191
192
  result = lookup_instance.load(**load_kwargs)
192
193
  if len(result.index) == 0:
193
194
  self.logger.debug(f"No IDs found in the source column: {source_col}")
@@ -244,3 +245,4 @@ class DataUtils:
244
245
  if col in df.columns:
245
246
  df[col] = df[col].map_partitions(pd.to_datetime, errors="coerce", meta=(col, "datetime64[ns]"))
246
247
  return df
248
+
@@ -8,7 +8,9 @@ import pandas as pd
8
8
  from IPython.display import display
9
9
  from tqdm import tqdm
10
10
 
11
- from sibi_dst.utils import Logger, FileAgeChecker, ParquetSaver
11
+ from .log_utils import Logger
12
+ from .date_utils import FileAgeChecker
13
+ from .parquet_saver import ParquetSaver
12
14
 
13
15
 
14
16
  class DataWrapper:
@@ -5,7 +5,7 @@ import fsspec
5
5
  import numpy as np
6
6
  import pandas as pd
7
7
 
8
- from sibi_dst.utils import Logger
8
+ from .log_utils import Logger
9
9
 
10
10
 
11
11
  class DateUtils:
@@ -4,7 +4,7 @@ from typing import Optional
4
4
 
5
5
  import fsspec
6
6
 
7
- from sibi_dst.utils import Logger
7
+ from .log_utils import Logger
8
8
 
9
9
 
10
10
  class FileUtils:
@@ -3,7 +3,7 @@ import re
3
3
 
4
4
  import fsspec
5
5
 
6
- from sibi_dst.utils import Logger
6
+ from .log_utils import Logger
7
7
 
8
8
 
9
9
  class FilePathGenerator:
@@ -13,7 +13,7 @@ from fsspec import filesystem
13
13
 
14
14
  # Suppress the specific UserWarning message
15
15
  warnings.filterwarnings("ignore")
16
- from sibi_dst.utils import Logger
16
+ from .log_utils import Logger
17
17
 
18
18
 
19
19
  class ParquetSaver:
@@ -0,0 +1,28 @@
1
+ from .storage_manager import StorageManager
2
+ from .credentials import ConfigManager
3
+
4
+ class StorageConfig:
5
+ def __init__(self, config:ConfigManager, depots:dict):
6
+ self.conf = config
7
+ self.depots = depots
8
+ self._initialize_storage()
9
+ self.storage_manager = StorageManager(self.base_storage, self.filesystem_type, self.filesystem_options)
10
+ self.depot_paths, self.depot_names = self.storage_manager.rebuild_depot_paths(depots)
11
+
12
+ def _initialize_storage(self):
13
+ self.filesystem_type = self.conf.get('fs_type','file')
14
+ self.base_storage = self.conf.get('fs_path', "local_storage/")
15
+ if self.filesystem_type == "file":
16
+ self.filesystem_options ={}
17
+ else:
18
+ self.filesystem_options = {
19
+ "key": self.conf.get('fs_key',''),
20
+ "secret": self.conf.get('fs_secret'),
21
+ "token": self.conf.get('fs_token'),
22
+ "skip_instance_cache":True,
23
+ "use_listings_cache": False,
24
+ "client_kwargs": {
25
+ "endpoint_url": self.conf.get('fs_endpoint')
26
+ }
27
+ }
28
+ self.filesystem_options = {k: v for k, v in self.filesystem_options.items() if v}
@@ -0,0 +1,7 @@
1
+ from __future__ import annotations
2
+
3
+ from ._df_helper import DfHelper
4
+
5
+ __all__ = [
6
+ 'DfHelper',
7
+ ]