sibi-dst 0.3.53__tar.gz → 0.3.55__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. sibi_dst-0.3.55/PKG-INFO +90 -0
  2. sibi_dst-0.3.55/README.md +36 -0
  3. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/pyproject.toml +1 -1
  4. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +10 -2
  5. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/utils/data_wrapper.py +4 -2
  6. sibi_dst-0.3.53/PKG-INFO +0 -199
  7. sibi_dst-0.3.53/README.md +0 -145
  8. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/__init__.py +0 -0
  9. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/df_helper/__init__.py +0 -0
  10. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/df_helper/_artifact_updater_multi_wrapper.py +0 -0
  11. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/df_helper/_df_helper.py +0 -0
  12. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/df_helper/_parquet_artifact.py +0 -0
  13. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/df_helper/_parquet_reader.py +0 -0
  14. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/df_helper/backends/__init__.py +0 -0
  15. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/df_helper/backends/django/__init__.py +0 -0
  16. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/df_helper/backends/django/_db_connection.py +0 -0
  17. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/df_helper/backends/django/_io_dask.py +0 -0
  18. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/df_helper/backends/django/_load_from_db.py +0 -0
  19. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/df_helper/backends/django/_sql_model_builder.py +0 -0
  20. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/df_helper/backends/http/__init__.py +0 -0
  21. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/df_helper/backends/http/_http_config.py +0 -0
  22. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/df_helper/backends/parquet/__init__.py +0 -0
  23. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/df_helper/backends/parquet/_filter_handler.py +0 -0
  24. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/df_helper/backends/parquet/_parquet_options.py +0 -0
  25. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/df_helper/backends/sqlalchemy/__init__.py +0 -0
  26. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/df_helper/backends/sqlalchemy/_filter_handler.py +0 -0
  27. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
  28. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
  29. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +0 -0
  30. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/df_helper/core/__init__.py +0 -0
  31. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/df_helper/core/_defaults.py +0 -0
  32. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/df_helper/core/_filter_handler.py +0 -0
  33. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/df_helper/core/_params_config.py +0 -0
  34. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/df_helper/core/_query_config.py +0 -0
  35. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/df_helper/data_cleaner.py +0 -0
  36. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/geopy_helper/__init__.py +0 -0
  37. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/geopy_helper/geo_location_service.py +0 -0
  38. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/geopy_helper/utils.py +0 -0
  39. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/osmnx_helper/__init__.py +0 -0
  40. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/osmnx_helper/base_osm_map.py +0 -0
  41. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/osmnx_helper/basemaps/__init__.py +0 -0
  42. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/osmnx_helper/basemaps/calendar_html.py +0 -0
  43. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/osmnx_helper/basemaps/router_plotter.py +0 -0
  44. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/osmnx_helper/utils.py +0 -0
  45. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/tests/__init__.py +0 -0
  46. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/tests/test_data_wrapper_class.py +0 -0
  47. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/utils/__init__.py +0 -0
  48. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/utils/airflow_manager.py +0 -0
  49. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/utils/clickhouse_writer.py +0 -0
  50. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/utils/credentials.py +0 -0
  51. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/utils/data_from_http_source.py +0 -0
  52. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/utils/data_utils.py +0 -0
  53. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/utils/date_utils.py +0 -0
  54. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/utils/df_utils.py +0 -0
  55. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/utils/file_utils.py +0 -0
  56. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/utils/filepath_generator.py +0 -0
  57. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/utils/log_utils.py +0 -0
  58. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/utils/parquet_saver.py +0 -0
  59. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/utils/phone_formatter.py +0 -0
  60. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/utils/storage_config.py +0 -0
  61. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/utils/storage_manager.py +0 -0
  62. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/utils/webdav_client.py +0 -0
  63. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/v2/__init__.py +0 -0
  64. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/v2/df_helper/__init__.py +0 -0
  65. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/v2/df_helper/_df_helper.py +0 -0
  66. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/v2/df_helper/backends/__init__.py +0 -0
  67. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/v2/df_helper/backends/sqlalchemy/__init__.py +0 -0
  68. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/v2/df_helper/backends/sqlalchemy/_db_connection.py +0 -0
  69. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/v2/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
  70. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/v2/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
  71. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/v2/df_helper/backends/sqlalchemy/_model_builder.py +0 -0
  72. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/v2/df_helper/backends/sqlmodel/__init__.py +0 -0
  73. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/v2/df_helper/backends/sqlmodel/_db_connection.py +0 -0
  74. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/v2/df_helper/backends/sqlmodel/_io_dask.py +0 -0
  75. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/v2/df_helper/backends/sqlmodel/_load_from_db.py +0 -0
  76. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/v2/df_helper/backends/sqlmodel/_model_builder.py +0 -0
  77. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/v2/df_helper/core/__init__.py +0 -0
  78. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/v2/df_helper/core/_filter_handler.py +0 -0
  79. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/v2/df_helper/core/_params_config.py +0 -0
  80. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/v2/df_helper/core/_query_config.py +0 -0
  81. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/v2/utils/__init__.py +0 -0
  82. {sibi_dst-0.3.53 → sibi_dst-0.3.55}/sibi_dst/v2/utils/log_utils.py +0 -0
@@ -0,0 +1,90 @@
1
+ Metadata-Version: 2.1
2
+ Name: sibi-dst
3
+ Version: 0.3.55
4
+ Summary: Data Science Toolkit
5
+ Author: Luis Valverde
6
+ Author-email: lvalverdeb@gmail.com
7
+ Requires-Python: >=3.11,<4.0
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.11
10
+ Classifier: Programming Language :: Python :: 3.12
11
+ Classifier: Programming Language :: Python :: 3.13
12
+ Provides-Extra: complete
13
+ Provides-Extra: df-helper
14
+ Provides-Extra: geospatial
15
+ Requires-Dist: apache-airflow-client (>=2.10.0,<3.0.0)
16
+ Requires-Dist: chardet (>=5.2.0,<6.0.0)
17
+ Requires-Dist: charset-normalizer (>=3.4.0,<4.0.0)
18
+ Requires-Dist: clickhouse-connect (>=0.8.7,<0.9.0)
19
+ Requires-Dist: clickhouse-driver (>=0.2.9,<0.3.0)
20
+ Requires-Dist: dask[complete] (>=2025.3.0,<2026.0.0)
21
+ Requires-Dist: django (>=5.1.4,<6.0.0) ; extra == "df-helper" or extra == "complete"
22
+ Requires-Dist: djangorestframework (>=3.15.2,<4.0.0) ; extra == "df-helper" or extra == "complete"
23
+ Requires-Dist: folium (>=0.19.4,<0.20.0) ; extra == "geospatial" or extra == "complete"
24
+ Requires-Dist: geopandas (>=1.0.1,<2.0.0) ; extra == "geospatial" or extra == "complete"
25
+ Requires-Dist: geopy (>=2.4.1,<3.0.0) ; extra == "geospatial" or extra == "complete"
26
+ Requires-Dist: gunicorn (>=23.0.0,<24.0.0)
27
+ Requires-Dist: httpx (>=0.27.2,<0.28.0)
28
+ Requires-Dist: ipython (>=8.29.0,<9.0.0)
29
+ Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
30
+ Requires-Dist: mysqlclient (>=2.2.6,<3.0.0) ; extra == "df-helper" or extra == "complete"
31
+ Requires-Dist: nltk (>=3.9.1,<4.0.0)
32
+ Requires-Dist: openpyxl (>=3.1.5,<4.0.0)
33
+ Requires-Dist: osmnx (>=2.0.1,<3.0.0) ; extra == "geospatial" or extra == "complete"
34
+ Requires-Dist: pandas (>=2.2.3,<3.0.0)
35
+ Requires-Dist: paramiko (>=3.5.0,<4.0.0)
36
+ Requires-Dist: psutil (>=6.1.0,<7.0.0)
37
+ Requires-Dist: psycopg2 (>=2.9.10,<3.0.0) ; extra == "df-helper" or extra == "complete"
38
+ Requires-Dist: pyarrow (>=18.0.0,<19.0.0)
39
+ Requires-Dist: pydantic (>=2.9.2,<3.0.0)
40
+ Requires-Dist: pymysql (>=1.1.1,<2.0.0) ; extra == "df-helper" or extra == "complete"
41
+ Requires-Dist: pytest (>=8.3.3,<9.0.0)
42
+ Requires-Dist: pytest-mock (>=3.14.0,<4.0.0)
43
+ Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
44
+ Requires-Dist: s3fs (>=2024.12.0,<2025.0.0)
45
+ Requires-Dist: sqlalchemy (>=2.0.36,<3.0.0) ; extra == "df-helper" or extra == "complete"
46
+ Requires-Dist: sqlmodel (>=0.0.22,<0.0.23) ; extra == "df-helper" or extra == "complete"
47
+ Requires-Dist: tornado (>=6.4.1,<7.0.0)
48
+ Requires-Dist: tqdm (>=4.67.0,<5.0.0)
49
+ Requires-Dist: uvicorn (>=0.34.0,<0.35.0)
50
+ Requires-Dist: uvicorn-worker (>=0.3.0,<0.4.0)
51
+ Requires-Dist: webdav4[fsspec] (>=0.10.0,<0.11.0)
52
+ Description-Content-Type: text/markdown
53
+
54
+ ### SIBI-DST
55
+
56
+ Data Science Toolkit built with Python, Pandas, Dask, OpenStreetMaps, NetworkX, SQLAlchemy, GeoPandas, and Folium.
57
+
58
+ ## Example Use Cases
59
+
60
+ 1. **Build DataCubes, DataSets, and DataObjects** from diverse data sources, including **relational databases, Parquet files, Excel (`.xlsx`), delimited tables (`.csv`, `.tsv`), JSON, and RESTful APIs**.
61
+ 2. **Comprehensive DataFrame Management** utilities for efficient data handling, transformation, and optimization using **Pandas** and **Dask**.
62
+ 3. **Flexible Data Sharing** with client applications by writing to **Data Warehouses in Clickhouse, local filesystems, and cloud storage platforms** such as **S3**.
63
+ 4. **Microservices for Data Access** – Build scalable **API-driven services** using **RESTful APIs (`Django REST Framework`, `FastAPI`)** for high-performance data exchange.
64
+ 5. **Geospatial Analysis** – Utilize **OpenStreetMaps** and **GeoPandas** for advanced geospatial data processing and visualization.
65
+
66
+ ## Supported Technologies
67
+
68
+ - **Data Processing**: Pandas, Dask
69
+ - **Databases & Storage**: SQLAlchemy, Parquet, S3, Clickhouse
70
+ - **Mapping & Geospatial Analysis**: OpenStreetMaps, OSMnx, Geopy
71
+ - **API Development**: Django REST Framework, FastAPI
72
+
73
+ ## Installation
74
+
75
+ ```bash
76
+ # with pip
77
+
78
+ pip install sibi-dst[complete] # Install all dependencies
79
+ pip install sibi-dst[df_helper] # Install only df_helper dependencies
80
+ pip install sibi-dst[geospatial] # Install only geospatial dependencies
81
+
82
+ # with poetry
83
+
84
+ poetry add "sibi-dst[complete]" # Install all dependencies
85
+ poetry add "sibi-dst[df_helper]" # Install only df_helper dependencies
86
+ poetry add "sibi-dst[geospatial]" # Install only geospatial dependencies
87
+
88
+
89
+ ```
90
+
@@ -0,0 +1,36 @@
1
+ ### SIBI-DST
2
+
3
+ Data Science Toolkit built with Python, Pandas, Dask, OpenStreetMaps, NetworkX, SQLAlchemy, GeoPandas, and Folium.
4
+
5
+ ## Example Use Cases
6
+
7
+ 1. **Build DataCubes, DataSets, and DataObjects** from diverse data sources, including **relational databases, Parquet files, Excel (`.xlsx`), delimited tables (`.csv`, `.tsv`), JSON, and RESTful APIs**.
8
+ 2. **Comprehensive DataFrame Management** utilities for efficient data handling, transformation, and optimization using **Pandas** and **Dask**.
9
+ 3. **Flexible Data Sharing** with client applications by writing to **Data Warehouses in Clickhouse, local filesystems, and cloud storage platforms** such as **S3**.
10
+ 4. **Microservices for Data Access** – Build scalable **API-driven services** using **RESTful APIs (`Django REST Framework`, `FastAPI`)** for high-performance data exchange.
11
+ 5. **Geospatial Analysis** – Utilize **OpenStreetMaps** and **GeoPandas** for advanced geospatial data processing and visualization.
12
+
13
+ ## Supported Technologies
14
+
15
+ - **Data Processing**: Pandas, Dask
16
+ - **Databases & Storage**: SQLAlchemy, Parquet, S3, Clickhouse
17
+ - **Mapping & Geospatial Analysis**: OpenStreetMaps, OSMnx, Geopy
18
+ - **API Development**: Django REST Framework, FastAPI
19
+
20
+ ## Installation
21
+
22
+ ```bash
23
+ # with pip
24
+
25
+ pip install sibi-dst[complete] # Install all dependencies
26
+ pip install sibi-dst[df_helper] # Install only df_helper dependencies
27
+ pip install sibi-dst[geospatial] # Install only geospatial dependencies
28
+
29
+ # with poetry
30
+
31
+ poetry add "sibi-dst[complete]" # Install all dependencies
32
+ poetry add "sibi-dst[df_helper]" # Install only df_helper dependencies
33
+ poetry add "sibi-dst[geospatial]" # Install only geospatial dependencies
34
+
35
+
36
+ ```
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "sibi-dst"
3
- version = "0.3.53"
3
+ version = "0.3.55"
4
4
  description = "Data Science Toolkit"
5
5
  authors = ["Luis Valverde <lvalverdeb@gmail.com>"]
6
6
  readme = "README.md"
@@ -29,7 +29,11 @@ class SqlAlchemyConnectionConfig(BaseModel):
29
29
  connection_url: str
30
30
  table: Optional[str] = None
31
31
  model: Any = None
32
- engine: Optional[Any] = None # Save engine to reuse it
32
+ engine: Optional[Any] = None
33
+ pool_size: int = 10
34
+ max_overflow: int = 5
35
+ pool_timeout: int = 30
36
+ pool_recycle:int = 3600
33
37
 
34
38
  @model_validator(mode="after")
35
39
  def validate_and_initialize(self):
@@ -41,7 +45,11 @@ class SqlAlchemyConnectionConfig(BaseModel):
41
45
  raise ValueError("`connection_url` must be provided.")
42
46
 
43
47
  # Initialize the engine
44
- self.engine = create_engine(self.connection_url)
48
+ self.engine = create_engine(self.connection_url,
49
+ pool_size=self.pool_size,
50
+ max_overflow=self.max_overflow,
51
+ pool_timeout=self.pool_timeout,
52
+ pool_recycle=self.pool_recycle)
45
53
 
46
54
  # Validate the connection
47
55
  self.validate_connection()
@@ -78,6 +78,7 @@ class DataWrapper:
78
78
  self._lock = Lock()
79
79
  self.processed_dates = []
80
80
  self.age_checker = FileAgeChecker(logger=self.logger)
81
+ self.data_class_instance = self.dataclass(**self.class_params) or None
81
82
 
82
83
  def _init_filesystem(self) -> fsspec.AbstractFileSystem:
83
84
  with self._lock:
@@ -265,8 +266,9 @@ class DataWrapper:
265
266
  try:
266
267
  self.logger.debug(f"Class Params: {self.class_params}")
267
268
  self.logger.debug(f"Load Params: {self.load_params}")
268
- data = self.dataclass(**self.class_params)
269
- df = data.load_period(
269
+
270
+ #data = self.dataclass(**self.class_params)
271
+ df = self.data_class_instance.load_period(
270
272
  dt_field=self.date_field,
271
273
  start=date,
272
274
  end=date,
sibi_dst-0.3.53/PKG-INFO DELETED
@@ -1,199 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: sibi-dst
3
- Version: 0.3.53
4
- Summary: Data Science Toolkit
5
- Author: Luis Valverde
6
- Author-email: lvalverdeb@gmail.com
7
- Requires-Python: >=3.11,<4.0
8
- Classifier: Programming Language :: Python :: 3
9
- Classifier: Programming Language :: Python :: 3.11
10
- Classifier: Programming Language :: Python :: 3.12
11
- Classifier: Programming Language :: Python :: 3.13
12
- Provides-Extra: complete
13
- Provides-Extra: df-helper
14
- Provides-Extra: geospatial
15
- Requires-Dist: apache-airflow-client (>=2.10.0,<3.0.0)
16
- Requires-Dist: chardet (>=5.2.0,<6.0.0)
17
- Requires-Dist: charset-normalizer (>=3.4.0,<4.0.0)
18
- Requires-Dist: clickhouse-connect (>=0.8.7,<0.9.0)
19
- Requires-Dist: clickhouse-driver (>=0.2.9,<0.3.0)
20
- Requires-Dist: dask[complete] (>=2025.3.0,<2026.0.0)
21
- Requires-Dist: django (>=5.1.4,<6.0.0) ; extra == "df-helper" or extra == "complete"
22
- Requires-Dist: djangorestframework (>=3.15.2,<4.0.0) ; extra == "df-helper" or extra == "complete"
23
- Requires-Dist: folium (>=0.19.4,<0.20.0) ; extra == "geospatial" or extra == "complete"
24
- Requires-Dist: geopandas (>=1.0.1,<2.0.0) ; extra == "geospatial" or extra == "complete"
25
- Requires-Dist: geopy (>=2.4.1,<3.0.0) ; extra == "geospatial" or extra == "complete"
26
- Requires-Dist: gunicorn (>=23.0.0,<24.0.0)
27
- Requires-Dist: httpx (>=0.27.2,<0.28.0)
28
- Requires-Dist: ipython (>=8.29.0,<9.0.0)
29
- Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
30
- Requires-Dist: mysqlclient (>=2.2.6,<3.0.0) ; extra == "df-helper" or extra == "complete"
31
- Requires-Dist: nltk (>=3.9.1,<4.0.0)
32
- Requires-Dist: openpyxl (>=3.1.5,<4.0.0)
33
- Requires-Dist: osmnx (>=2.0.1,<3.0.0) ; extra == "geospatial" or extra == "complete"
34
- Requires-Dist: pandas (>=2.2.3,<3.0.0)
35
- Requires-Dist: paramiko (>=3.5.0,<4.0.0)
36
- Requires-Dist: psutil (>=6.1.0,<7.0.0)
37
- Requires-Dist: psycopg2 (>=2.9.10,<3.0.0) ; extra == "df-helper" or extra == "complete"
38
- Requires-Dist: pyarrow (>=18.0.0,<19.0.0)
39
- Requires-Dist: pydantic (>=2.9.2,<3.0.0)
40
- Requires-Dist: pymysql (>=1.1.1,<2.0.0) ; extra == "df-helper" or extra == "complete"
41
- Requires-Dist: pytest (>=8.3.3,<9.0.0)
42
- Requires-Dist: pytest-mock (>=3.14.0,<4.0.0)
43
- Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
44
- Requires-Dist: s3fs (>=2024.12.0,<2025.0.0)
45
- Requires-Dist: sqlalchemy (>=2.0.36,<3.0.0) ; extra == "df-helper" or extra == "complete"
46
- Requires-Dist: sqlmodel (>=0.0.22,<0.0.23) ; extra == "df-helper" or extra == "complete"
47
- Requires-Dist: tornado (>=6.4.1,<7.0.0)
48
- Requires-Dist: tqdm (>=4.67.0,<5.0.0)
49
- Requires-Dist: uvicorn (>=0.34.0,<0.35.0)
50
- Requires-Dist: uvicorn-worker (>=0.3.0,<0.4.0)
51
- Requires-Dist: webdav4[fsspec] (>=0.10.0,<0.11.0)
52
- Description-Content-Type: text/markdown
53
-
54
- # sibi-dst
55
-
56
- Data Science Toolkit
57
- ---------------------
58
- Data Science Toolkit built with Python, Pandas, Dask, OpenStreetMaps, Scikit-Learn, XGBOOST, Django ORM, SQLAlchemy, DjangoRestFramework, FastAPI
59
-
60
- Major Functionality
61
- --------------------
62
- 1) **Build DataCubes, DataSets, and DataObjects** from diverse data sources, including **relational databases, Parquet files, Excel (`.xlsx`), delimited tables (`.csv`, `.tsv`), JSON, and RESTful APIs (`JSON API REST`)**.
63
- 2) **Comprehensive DataFrame Management** utilities for efficient data handling, transformation, and optimization using **Pandas** and **Dask**.
64
- 3) **Flexible Data Sharing** with client applications by writing to **Data Warehouses, local filesystems, and cloud storage platforms** such as **Amazon S3, Google Cloud Storage (GCS), and Azure Blob Storage**.
65
- 4) **Microservices for Data Access** – Build scalable **API-driven services** using **RESTful APIs (`Django REST Framework`, `FastAPI`) and gRPC** for high-performance data exchange.
66
-
67
- Supported Technologies
68
- --------------------
69
- - **Data Processing**: Pandas, Dask
70
- - **Machine Learning**: Scikit-Learn, XGBoost
71
- - **Databases & Storage**: SQLAlchemy, Django ORM, Parquet, Amazon S3, GCS, Azure Blob Storage
72
- - **Mapping & Geospatial Analysis**: OpenStreetMaps, OSMnx, Geopy
73
- - **API Development**: Django REST Framework, gRPC, FastAPI
74
-
75
- Installation
76
- ---------------------
77
- ```bash
78
- pip install sibi-dst
79
- ```
80
-
81
- Usage
82
- ---------------------
83
- ### Loading Data from SQLAlchemy
84
- ```python
85
- from sibi_dst.df_helper import DfHelper
86
- from conf.transforms.fields.crm import customer_fields
87
- from conf.credentials import replica_db_conf
88
- from conf.storage import get_fs_instance
89
-
90
- config = {
91
- 'backend': 'sqlalchemy',
92
- 'connection_url': replica_db_conf.get('db_url'),
93
- 'table': 'crm_clientes_archivo',
94
- 'field_map': customer_fields,
95
- 'legacy_filters': True,
96
- 'fs': get_fs_instance()
97
- }
98
-
99
- df_helper = DfHelper(**config)
100
- result = df_helper.load(id__gte=1)
101
- ```
102
-
103
- ### Saving Data to ClickHouse
104
- ```python
105
- clk_creds = {
106
- 'host': '192.168.3.171',
107
- 'port': 18123,
108
- 'user': 'username',
109
- 'database': 'xxxxxxx',
110
- 'table': 'customer_file',
111
- 'order_by': 'id'
112
- }
113
-
114
- df_helper.save_to_clickhouse(**clk_creds)
115
- ```
116
-
117
- ### Saving Data to Parquet
118
- ```python
119
- df_helper.save_to_parquet(
120
- parquet_filename='filename.parquet',
121
- parquet_storage_path='/path/to/my/files/'
122
- )
123
- ```
124
-
125
- Backends Supported
126
- ---------------------
127
- | Backend | Description |
128
- |--------------|-------------|
129
- | `sqlalchemy` | Load data from SQL databases using SQLAlchemy. |
130
- | `django_db` | Load data from Django ORM models. |
131
- | `parquet` | Load and save data from Parquet files. |
132
- | `http` | Fetch data from HTTP endpoints. |
133
- | `osmnx` | Geospatial mapping and routing using OpenStreetMap. |
134
- | `geopy` | Geolocation services for address lookup and reverse geocoding. |
135
-
136
- Geospatial Utilities
137
- ---------------------
138
- ### **OSMnx Helper (`sibi_dst.osmnx_helper`)
139
- **
140
- Provides **OpenStreetMap-based mapping utilities** using `osmnx` and `folium`.
141
-
142
- #### 🔹 Key Features
143
- - **BaseOsmMap**: Manages interactive Folium-based maps.
144
- - **PBFHandler**: Loads `.pbf` (Protocolbuffer Binary Format) files for network graphs.
145
-
146
- #### Example: Generating an OSM Map
147
- ```python
148
- from sibi_dst.osmnx_helper import BaseOsmMap
149
- osm_map = BaseOsmMap(osmnx_graph=my_graph, df=my_dataframe)
150
- osm_map.generate_map()
151
- ```
152
-
153
- ### **Geopy Helper (`sibi_dst.geopy_helper`)
154
- **
155
- Provides **geolocation services** using `Geopy` for forward and reverse geocoding.
156
-
157
- #### 🔹 Key Features
158
- - **GeolocationService**: Interfaces with `Nominatim` API for geocoding.
159
- - **Error Handling**: Manages `GeocoderTimedOut` and `GeocoderServiceError` gracefully.
160
- - **Singleton Geolocator**: Efficiently reuses a global geolocator instance.
161
-
162
- #### Example: Reverse Geocoding
163
- ```python
164
- from sibi_dst.geopy_helper import GeolocationService
165
- gs = GeolocationService()
166
- location = gs.reverse((9.935,-84.091))
167
- print(location)
168
- ```
169
-
170
- Advanced Features
171
- ---------------------
172
- ### Querying with Custom Filters
173
- Filters can be applied dynamically using Django-style syntax:
174
- ```python
175
- result = df_helper.load(date__gte='2023-01-01', status='active')
176
- ```
177
-
178
- ### Parallel Processing
179
- Leverage Dask for parallel execution:
180
- ```python
181
- result = df_helper.load_parallel(status='active')
182
- ```
183
-
184
- Testing
185
- ---------------------
186
- To run unit tests, use:
187
- ```bash
188
- pytest tests/
189
- ```
190
-
191
- Contributing
192
- ---------------------
193
- Contributions are welcome! Please submit pull requests or open issues for discussions.
194
-
195
- License
196
- ---------------------
197
- sibi-dst is licensed under the MIT License.
198
-
199
-
sibi_dst-0.3.53/README.md DELETED
@@ -1,145 +0,0 @@
1
- # sibi-dst
2
-
3
- Data Science Toolkit
4
- ---------------------
5
- Data Science Toolkit built with Python, Pandas, Dask, OpenStreetMaps, Scikit-Learn, XGBOOST, Django ORM, SQLAlchemy, DjangoRestFramework, FastAPI
6
-
7
- Major Functionality
8
- --------------------
9
- 1) **Build DataCubes, DataSets, and DataObjects** from diverse data sources, including **relational databases, Parquet files, Excel (`.xlsx`), delimited tables (`.csv`, `.tsv`), JSON, and RESTful APIs (`JSON API REST`)**.
10
- 2) **Comprehensive DataFrame Management** utilities for efficient data handling, transformation, and optimization using **Pandas** and **Dask**.
11
- 3) **Flexible Data Sharing** with client applications by writing to **Data Warehouses, local filesystems, and cloud storage platforms** such as **Amazon S3, Google Cloud Storage (GCS), and Azure Blob Storage**.
12
- 4) **Microservices for Data Access** – Build scalable **API-driven services** using **RESTful APIs (`Django REST Framework`, `FastAPI`) and gRPC** for high-performance data exchange.
13
-
14
- Supported Technologies
15
- --------------------
16
- - **Data Processing**: Pandas, Dask
17
- - **Machine Learning**: Scikit-Learn, XGBoost
18
- - **Databases & Storage**: SQLAlchemy, Django ORM, Parquet, Amazon S3, GCS, Azure Blob Storage
19
- - **Mapping & Geospatial Analysis**: OpenStreetMaps, OSMnx, Geopy
20
- - **API Development**: Django REST Framework, gRPC, FastAPI
21
-
22
- Installation
23
- ---------------------
24
- ```bash
25
- pip install sibi-dst
26
- ```
27
-
28
- Usage
29
- ---------------------
30
- ### Loading Data from SQLAlchemy
31
- ```python
32
- from sibi_dst.df_helper import DfHelper
33
- from conf.transforms.fields.crm import customer_fields
34
- from conf.credentials import replica_db_conf
35
- from conf.storage import get_fs_instance
36
-
37
- config = {
38
- 'backend': 'sqlalchemy',
39
- 'connection_url': replica_db_conf.get('db_url'),
40
- 'table': 'crm_clientes_archivo',
41
- 'field_map': customer_fields,
42
- 'legacy_filters': True,
43
- 'fs': get_fs_instance()
44
- }
45
-
46
- df_helper = DfHelper(**config)
47
- result = df_helper.load(id__gte=1)
48
- ```
49
-
50
- ### Saving Data to ClickHouse
51
- ```python
52
- clk_creds = {
53
- 'host': '192.168.3.171',
54
- 'port': 18123,
55
- 'user': 'username',
56
- 'database': 'xxxxxxx',
57
- 'table': 'customer_file',
58
- 'order_by': 'id'
59
- }
60
-
61
- df_helper.save_to_clickhouse(**clk_creds)
62
- ```
63
-
64
- ### Saving Data to Parquet
65
- ```python
66
- df_helper.save_to_parquet(
67
- parquet_filename='filename.parquet',
68
- parquet_storage_path='/path/to/my/files/'
69
- )
70
- ```
71
-
72
- Backends Supported
73
- ---------------------
74
- | Backend | Description |
75
- |--------------|-------------|
76
- | `sqlalchemy` | Load data from SQL databases using SQLAlchemy. |
77
- | `django_db` | Load data from Django ORM models. |
78
- | `parquet` | Load and save data from Parquet files. |
79
- | `http` | Fetch data from HTTP endpoints. |
80
- | `osmnx` | Geospatial mapping and routing using OpenStreetMap. |
81
- | `geopy` | Geolocation services for address lookup and reverse geocoding. |
82
-
83
- Geospatial Utilities
84
- ---------------------
85
- ### **OSMnx Helper (`sibi_dst.osmnx_helper`)
86
- **
87
- Provides **OpenStreetMap-based mapping utilities** using `osmnx` and `folium`.
88
-
89
- #### 🔹 Key Features
90
- - **BaseOsmMap**: Manages interactive Folium-based maps.
91
- - **PBFHandler**: Loads `.pbf` (Protocolbuffer Binary Format) files for network graphs.
92
-
93
- #### Example: Generating an OSM Map
94
- ```python
95
- from sibi_dst.osmnx_helper import BaseOsmMap
96
- osm_map = BaseOsmMap(osmnx_graph=my_graph, df=my_dataframe)
97
- osm_map.generate_map()
98
- ```
99
-
100
- ### **Geopy Helper (`sibi_dst.geopy_helper`)
101
- **
102
- Provides **geolocation services** using `Geopy` for forward and reverse geocoding.
103
-
104
- #### 🔹 Key Features
105
- - **GeolocationService**: Interfaces with `Nominatim` API for geocoding.
106
- - **Error Handling**: Manages `GeocoderTimedOut` and `GeocoderServiceError` gracefully.
107
- - **Singleton Geolocator**: Efficiently reuses a global geolocator instance.
108
-
109
- #### Example: Reverse Geocoding
110
- ```python
111
- from sibi_dst.geopy_helper import GeolocationService
112
- gs = GeolocationService()
113
- location = gs.reverse((9.935,-84.091))
114
- print(location)
115
- ```
116
-
117
- Advanced Features
118
- ---------------------
119
- ### Querying with Custom Filters
120
- Filters can be applied dynamically using Django-style syntax:
121
- ```python
122
- result = df_helper.load(date__gte='2023-01-01', status='active')
123
- ```
124
-
125
- ### Parallel Processing
126
- Leverage Dask for parallel execution:
127
- ```python
128
- result = df_helper.load_parallel(status='active')
129
- ```
130
-
131
- Testing
132
- ---------------------
133
- To run unit tests, use:
134
- ```bash
135
- pytest tests/
136
- ```
137
-
138
- Contributing
139
- ---------------------
140
- Contributions are welcome! Please submit pull requests or open issues for discussions.
141
-
142
- License
143
- ---------------------
144
- sibi-dst is licensed under the MIT License.
145
-