giga-spatial 0.6.4__py3-none-any.whl → 0.6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {giga_spatial-0.6.4.dist-info → giga_spatial-0.6.5.dist-info}/METADATA +2 -1
- {giga_spatial-0.6.4.dist-info → giga_spatial-0.6.5.dist-info}/RECORD +24 -21
- gigaspatial/__init__.py +1 -1
- gigaspatial/config.py +29 -4
- gigaspatial/core/io/__init__.py +1 -0
- gigaspatial/core/io/database.py +316 -0
- gigaspatial/generators/__init__.py +5 -1
- gigaspatial/generators/poi.py +228 -43
- gigaspatial/generators/zonal/__init__.py +2 -1
- gigaspatial/generators/zonal/admin.py +84 -0
- gigaspatial/generators/zonal/base.py +221 -64
- gigaspatial/generators/zonal/geometry.py +74 -31
- gigaspatial/generators/zonal/mercator.py +50 -19
- gigaspatial/grid/__init__.py +1 -1
- gigaspatial/grid/mercator_tiles.py +33 -10
- gigaspatial/handlers/boundaries.py +43 -18
- gigaspatial/handlers/ghsl.py +79 -14
- gigaspatial/handlers/rwi.py +5 -2
- gigaspatial/processing/algorithms.py +188 -0
- gigaspatial/processing/geo.py +87 -25
- gigaspatial/processing/tif_processor.py +220 -45
- {giga_spatial-0.6.4.dist-info → giga_spatial-0.6.5.dist-info}/WHEEL +0 -0
- {giga_spatial-0.6.4.dist-info → giga_spatial-0.6.5.dist-info}/licenses/LICENSE +0 -0
- {giga_spatial-0.6.4.dist-info → giga_spatial-0.6.5.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: giga-spatial
|
3
|
-
Version: 0.6.
|
3
|
+
Version: 0.6.5
|
4
4
|
Summary: A package for spatial data download & processing
|
5
5
|
Home-page: https://github.com/unicef/giga-spatial
|
6
6
|
Author: Utku Can Ozturk
|
@@ -31,6 +31,7 @@ Requires-Dist: OWSLib==0.32.1
|
|
31
31
|
Requires-Dist: pydantic-settings>=2.7.1
|
32
32
|
Requires-Dist: hdx-python-api>=6.3.8
|
33
33
|
Requires-Dist: bs4==0.0.2
|
34
|
+
Requires-Dist: sqlalchemy-trino==0.5.0
|
34
35
|
Dynamic: author
|
35
36
|
Dynamic: author-email
|
36
37
|
Dynamic: classifier
|
@@ -1,28 +1,30 @@
|
|
1
|
-
giga_spatial-0.6.
|
2
|
-
gigaspatial/__init__.py,sha256=
|
3
|
-
gigaspatial/config.py,sha256=
|
1
|
+
giga_spatial-0.6.5.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
2
|
+
gigaspatial/__init__.py,sha256=KDgkBrBsBSUzbLgrOZ89YsNN06fU4j5bmcuEwo6q5pg,22
|
3
|
+
gigaspatial/config.py,sha256=pLbxGc08OHT2IfTBzZVuIJTPR2vvg3KTFfvciOtRswk,9304
|
4
4
|
gigaspatial/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
-
gigaspatial/core/io/__init__.py,sha256=
|
5
|
+
gigaspatial/core/io/__init__.py,sha256=stlpgEeHf5KIb2ZW8yEbdJK5iq6n_wX4DPmKyR9PK-w,317
|
6
6
|
gigaspatial/core/io/adls_data_store.py,sha256=Zv-D_8d_2h57HnCUTJb0JWWjXqR_0XH4F8Nu_UFZK9E,11975
|
7
7
|
gigaspatial/core/io/data_api.py,sha256=3HMstau3zH3JPRUW0t83DZt74N39bt-jsfAyrUUFMoc,3944
|
8
8
|
gigaspatial/core/io/data_store.py,sha256=mi8fy78Dtwj4dpKkyDM6kTlna1lfCQ5ro2hUAOFr83A,3223
|
9
|
+
gigaspatial/core/io/database.py,sha256=zoOQ1j6bNarngQL8vS8adrWYi9P1NRUytZEzHd08F30,11303
|
9
10
|
gigaspatial/core/io/local_data_store.py,sha256=hcu7DNYa3AL6sEPMqguzxWal_bnP7CIpbwpoiyf5TCw,2933
|
10
11
|
gigaspatial/core/io/readers.py,sha256=gqFKGRCsAP_EBXipqGtT8MEV-x0u6SrCqaSiOC5YPTA,9284
|
11
12
|
gigaspatial/core/io/writers.py,sha256=asb56ZHQEWO2rdilIq7QywDRk8yfebecWv1KwzUpaXI,4367
|
12
13
|
gigaspatial/core/schemas/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
14
|
gigaspatial/core/schemas/entity.py,sha256=QAhEW0-JgdWh9pjKGbB5ArvqtVK85ayYZJPgjdb-IKw,8590
|
14
|
-
gigaspatial/generators/__init__.py,sha256=
|
15
|
-
gigaspatial/generators/poi.py,sha256=
|
16
|
-
gigaspatial/generators/zonal/__init__.py,sha256=
|
17
|
-
gigaspatial/generators/zonal/
|
18
|
-
gigaspatial/generators/zonal/
|
19
|
-
gigaspatial/generators/zonal/
|
20
|
-
gigaspatial/
|
21
|
-
gigaspatial/grid/
|
15
|
+
gigaspatial/generators/__init__.py,sha256=cKbMTW7Eh-oDPtM9OfGP14_ekVwc2_7Je7n_pr_anig,223
|
16
|
+
gigaspatial/generators/poi.py,sha256=cPMH1-0V_P7orGpIcsarZ6rWZiGOfcSxQNxvQJgAuM8,33628
|
17
|
+
gigaspatial/generators/zonal/__init__.py,sha256=egnpvGVeIOS2Zg516AT84tJnIqS4owxmMLLmBQJmK7Y,301
|
18
|
+
gigaspatial/generators/zonal/admin.py,sha256=rgOyQX3f_g9qnXqrf-NkR2GEdwOqjNuPNe1H7AUVsfg,3698
|
19
|
+
gigaspatial/generators/zonal/base.py,sha256=DzIXcOMnUhlCZQ8AxCyGvVuTY8oTCw66xhyWn7tSO-U,22862
|
20
|
+
gigaspatial/generators/zonal/geometry.py,sha256=P1vKJNvYpBQj_g-B-OzeJy4KCt8f_SI7h3H4WKTWXiU,19605
|
21
|
+
gigaspatial/generators/zonal/mercator.py,sha256=fA02j30PWB5BVjrbNGCMjiOw-ds182yK7R27z8mWFug,5291
|
22
|
+
gigaspatial/grid/__init__.py,sha256=ypSSyZ4fYtMNc4IG7chSD7NkUfS2bv9KWRsKR1D9pDI,80
|
23
|
+
gigaspatial/grid/mercator_tiles.py,sha256=mAYZDBJ1U0l3z9i4rh5OqiPhOGWcBYzUOI1cvQG_Ff4,11240
|
22
24
|
gigaspatial/handlers/__init__.py,sha256=R2rugXR5kF4lLkSO1fjpVDYK_jWdD8U2NbXbW71Ezv8,1523
|
23
25
|
gigaspatial/handlers/base.py,sha256=rL94c3wDjsqzLp4na8FfYXW6tNjVGX6v4M-Ce4LrAro,26413
|
24
|
-
gigaspatial/handlers/boundaries.py,sha256=
|
25
|
-
gigaspatial/handlers/ghsl.py,sha256=
|
26
|
+
gigaspatial/handlers/boundaries.py,sha256=TfqjtLE4VdJlUt7APLX16hzGuR1EH2MuRBn8u6R0k9A,18705
|
27
|
+
gigaspatial/handlers/ghsl.py,sha256=NFjUSQrv-YrlfnX2erzd7r88PZhhyezLg3HzIikZwaM,30170
|
26
28
|
gigaspatial/handlers/giga.py,sha256=F5ZfcE37a24X-c6Xhyt72C9eZZbyN_gV7w_InxKFMQQ,28348
|
27
29
|
gigaspatial/handlers/google_open_buildings.py,sha256=Liqk7qJhDtB4Ia4uhBe44LFcf-XVKBjRfj-pWlE5erY,16594
|
28
30
|
gigaspatial/handlers/hdx.py,sha256=LTEs_xZF1yPhD8dAdZ_YN8Vcan7iB5_tZ8NjF_ip6u0,18001
|
@@ -33,15 +35,16 @@ gigaspatial/handlers/ookla_speedtest.py,sha256=EcvSAxJZ9GPfzYnT_C85Qgy2ecc9ndf70
|
|
33
35
|
gigaspatial/handlers/opencellid.py,sha256=KuJqd-5-RO5ZzyDaBSrTgCK2ib5N_m3RUcPlX5heWwI,10683
|
34
36
|
gigaspatial/handlers/osm.py,sha256=sLNMkOVh1v50jrWw7Z0-HILY5QTQjgKCHCeAfXj5jA8,14084
|
35
37
|
gigaspatial/handlers/overture.py,sha256=lKeNw00v5Qia7LdWORuYihnlKEqxE9m38tdeRrvag9k,4218
|
36
|
-
gigaspatial/handlers/rwi.py,sha256=
|
38
|
+
gigaspatial/handlers/rwi.py,sha256=eAaplDysVeBhghJusYUKZYbKL5hW-klWvi8pWhILQkY,4962
|
37
39
|
gigaspatial/handlers/unicef_georepo.py,sha256=ODYNvkU_UKgOHXT--0MqmJ4Uk6U1_mp9xgehbTzKpX8,31924
|
38
40
|
gigaspatial/handlers/worldpop.py,sha256=oJ39NGajXi0rn829ZoFiaeG4_wavyPvljdActpxs12I,9850
|
39
41
|
gigaspatial/processing/__init__.py,sha256=QDVL-QbLCrIb19lrajP7LrHNdGdnsLeGcvAs_jQpdRM,183
|
40
|
-
gigaspatial/processing/
|
42
|
+
gigaspatial/processing/algorithms.py,sha256=6fBCwbZrI_ISWJ7UpkH6moq1vw-7dBy14yXSLHZprqY,6591
|
43
|
+
gigaspatial/processing/geo.py,sha256=tAykXH5UwrXtfnyZ9CClvejo9Ae7Yw6ij7EbF-7WLhQ,40091
|
41
44
|
gigaspatial/processing/sat_images.py,sha256=YUbH5MFNzl6NX49Obk14WaFcr1s3SyGJIOk-kRpbBNg,1429
|
42
|
-
gigaspatial/processing/tif_processor.py,sha256=
|
45
|
+
gigaspatial/processing/tif_processor.py,sha256=QLln9D-_zBhdYQL9NAL_bmo0bmmxE3sxDUQEglYQK94,27490
|
43
46
|
gigaspatial/processing/utils.py,sha256=HC85vGKQakxlkoQAkZmeAXWHsenAwTIRn7jPKUA7x20,1500
|
44
|
-
giga_spatial-0.6.
|
45
|
-
giga_spatial-0.6.
|
46
|
-
giga_spatial-0.6.
|
47
|
-
giga_spatial-0.6.
|
47
|
+
giga_spatial-0.6.5.dist-info/METADATA,sha256=eZHvzPKaNla-npj8PwfruUNp0chqxxst_mugbde6OL8,7506
|
48
|
+
giga_spatial-0.6.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
49
|
+
giga_spatial-0.6.5.dist-info/top_level.txt,sha256=LZsccgw6H4zXT7m6Y4XChm-Y5LjHAwZ2hkGN_B3ExmI,12
|
50
|
+
giga_spatial-0.6.5.dist-info/RECORD,,
|
gigaspatial/__init__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.6.
|
1
|
+
__version__ = "0.6.5"
|
gigaspatial/config.py
CHANGED
@@ -70,11 +70,12 @@ class Config(BaseSettings):
|
|
70
70
|
description="Directory for temporary/cache files",
|
71
71
|
alias="CACHE_DIR",
|
72
72
|
)
|
73
|
-
ADMIN_BOUNDARIES_DATA_DIR: Path = Field(
|
74
|
-
default=
|
73
|
+
ADMIN_BOUNDARIES_DATA_DIR: Optional[Path] = Field(
|
74
|
+
default=None,
|
75
75
|
description="Root directory for administrative boundary data",
|
76
76
|
alias="ADMIN_BOUNDARIES_DIR",
|
77
77
|
)
|
78
|
+
DB_CONFIG: Optional[Dict] = Field(default=None, alias="DB_CONFIG")
|
78
79
|
|
79
80
|
DATA_TYPES: Dict[str, str] = Field(
|
80
81
|
default={
|
@@ -156,6 +157,11 @@ class Config(BaseSettings):
|
|
156
157
|
) -> Path:
|
157
158
|
"""Dynamic path construction for administrative boundary data based on admin level."""
|
158
159
|
base_dir = getattr(self, "ADMIN_BOUNDARIES_DATA_DIR")
|
160
|
+
if base_dir is None:
|
161
|
+
raise ValueError(
|
162
|
+
"ADMIN_BOUNDARIES_DATA_DIR is not configured. "
|
163
|
+
"Please set the ADMIN_BOUNDARIES_DIR environment variable."
|
164
|
+
)
|
159
165
|
level_dir = f"admin{admin_level}"
|
160
166
|
file = f"{country_code}_{level_dir}{file_suffix}"
|
161
167
|
|
@@ -174,7 +180,6 @@ class Config(BaseSettings):
|
|
174
180
|
"SILVER_DATA_DIR",
|
175
181
|
"GOLD_DATA_DIR",
|
176
182
|
"CACHE_DIR",
|
177
|
-
"ADMIN_BOUNDARIES_DATA_DIR",
|
178
183
|
mode="before",
|
179
184
|
)
|
180
185
|
def resolve_and_validate_paths(
|
@@ -192,10 +197,30 @@ class Config(BaseSettings):
|
|
192
197
|
resolved = path.expanduser().resolve()
|
193
198
|
return resolved if resolve else path
|
194
199
|
|
200
|
+
@field_validator("ADMIN_BOUNDARIES_DATA_DIR", mode="before")
|
201
|
+
def validate_admin_boundaries_dir(
|
202
|
+
cls, value: Union[str, Path, None]
|
203
|
+
) -> Optional[Path]:
|
204
|
+
"""Validator for ADMIN_BOUNDARIES_DATA_DIR that handles None and string values."""
|
205
|
+
if value is None:
|
206
|
+
return None
|
207
|
+
if isinstance(value, str):
|
208
|
+
return Path(value)
|
209
|
+
elif isinstance(value, Path):
|
210
|
+
return value
|
211
|
+
else:
|
212
|
+
raise ValueError(
|
213
|
+
f"Invalid path type for ADMIN_BOUNDARIES_DATA_DIR: {type(value)}"
|
214
|
+
)
|
215
|
+
|
195
216
|
def ensure_directories_exist(self, create: bool = False) -> None:
|
196
217
|
"""Ensures all configured directories exist."""
|
197
218
|
for field_name, field_value in self.__dict__.items():
|
198
|
-
if
|
219
|
+
if (
|
220
|
+
isinstance(field_value, Path)
|
221
|
+
and field_value is not None
|
222
|
+
and not field_value.exists()
|
223
|
+
):
|
199
224
|
if create:
|
200
225
|
field_value.mkdir(parents=True, exist_ok=True)
|
201
226
|
else:
|
gigaspatial/core/io/__init__.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
from gigaspatial.core.io.adls_data_store import ADLSDataStore
|
2
2
|
from gigaspatial.core.io.local_data_store import LocalDataStore
|
3
3
|
from gigaspatial.core.io.data_api import GigaDataAPI
|
4
|
+
from gigaspatial.core.io.database import DBConnection
|
4
5
|
from gigaspatial.core.io.readers import *
|
5
6
|
from gigaspatial.core.io.writers import *
|
@@ -0,0 +1,316 @@
|
|
1
|
+
from typing import List, Dict, Optional, Union, Literal
|
2
|
+
|
3
|
+
import pandas as pd
|
4
|
+
import dask.dataframe as dd
|
5
|
+
|
6
|
+
from sqlalchemy import inspect, MetaData, Table, select, create_engine, event, text
|
7
|
+
from sqlalchemy.engine import Engine
|
8
|
+
from sqlalchemy.exc import SQLAlchemyError
|
9
|
+
from urllib.parse import quote_plus
|
10
|
+
import warnings
|
11
|
+
|
12
|
+
from gigaspatial.config import config as global_config
|
13
|
+
|
14
|
+
|
15
|
+
class DBConnection:
|
16
|
+
"""
|
17
|
+
A unified database connection class supporting both Trino and PostgreSQL.
|
18
|
+
"""
|
19
|
+
|
20
|
+
DB_CONFIG = global_config.DB_CONFIG or {}
|
21
|
+
|
22
|
+
def __init__(
|
23
|
+
self,
|
24
|
+
db_type: Literal["postgresql", "trino"] = DB_CONFIG.get(
|
25
|
+
"db_type", "postgresql"
|
26
|
+
),
|
27
|
+
host: Optional[str] = DB_CONFIG.get("host", None),
|
28
|
+
port: Union[int, str] = DB_CONFIG.get("port", None), # type: ignore
|
29
|
+
user: Optional[str] = DB_CONFIG.get("user", None),
|
30
|
+
password: Optional[str] = DB_CONFIG.get("password", None),
|
31
|
+
catalog: Optional[str] = DB_CONFIG.get("catalog", None), # For Trino
|
32
|
+
database: Optional[str] = DB_CONFIG.get("database", None), # For PostgreSQL
|
33
|
+
schema: str = DB_CONFIG.get("schema", "public"), # Default for PostgreSQL
|
34
|
+
http_scheme: str = DB_CONFIG.get("http_scheme", "https"), # For Trino
|
35
|
+
sslmode: str = DB_CONFIG.get("sslmode", "require"), # For PostgreSQL
|
36
|
+
**kwargs,
|
37
|
+
):
|
38
|
+
"""
|
39
|
+
Initialize a database connection for either Trino or PostgreSQL.
|
40
|
+
|
41
|
+
Args:
|
42
|
+
db_type: Either "trino" or "postgresql"
|
43
|
+
host: Database server host
|
44
|
+
port: Database server port
|
45
|
+
user: Username
|
46
|
+
password: Password
|
47
|
+
catalog: Trino catalog name
|
48
|
+
database: PostgreSQL database name
|
49
|
+
schema: Default schema name
|
50
|
+
http_scheme: For Trino ("http" or "https")
|
51
|
+
sslmode: For PostgreSQL (e.g., "require", "verify-full")
|
52
|
+
**kwargs: Additional connection parameters
|
53
|
+
"""
|
54
|
+
self.db_type = db_type.lower()
|
55
|
+
self.host = host
|
56
|
+
self.port = str(port) if port else None
|
57
|
+
self.user = user
|
58
|
+
self.password = quote_plus(password) if password else None
|
59
|
+
self.default_schema = schema
|
60
|
+
|
61
|
+
if self.db_type == "trino":
|
62
|
+
self.catalog = catalog
|
63
|
+
self.http_scheme = http_scheme
|
64
|
+
self.engine = self._create_trino_engine(**kwargs)
|
65
|
+
elif self.db_type == "postgresql":
|
66
|
+
self.database = database
|
67
|
+
self.sslmode = sslmode
|
68
|
+
self.engine = self._create_postgresql_engine(**kwargs)
|
69
|
+
else:
|
70
|
+
raise ValueError(f"Unsupported database type: {db_type}")
|
71
|
+
|
72
|
+
self._add_event_listener()
|
73
|
+
|
74
|
+
def _create_trino_engine(self, **kwargs) -> Engine:
|
75
|
+
"""Create a Trino SQLAlchemy engine."""
|
76
|
+
self._connection_string = (
|
77
|
+
f"trino://{self.user}:{self.password}@{self.host}:{self.port}/"
|
78
|
+
f"{self.catalog}/{self.default_schema}"
|
79
|
+
)
|
80
|
+
return create_engine(
|
81
|
+
self._connection_string,
|
82
|
+
connect_args={"http_scheme": self.http_scheme},
|
83
|
+
**kwargs,
|
84
|
+
)
|
85
|
+
|
86
|
+
def _create_postgresql_engine(self, **kwargs) -> Engine:
|
87
|
+
"""Create a PostgreSQL SQLAlchemy engine."""
|
88
|
+
self._connection_string = (
|
89
|
+
f"postgresql://{self.user}:{self.password}@{self.host}:{self.port}/"
|
90
|
+
f"{self.database}?sslmode={self.sslmode}"
|
91
|
+
)
|
92
|
+
return create_engine(self._connection_string, **kwargs)
|
93
|
+
|
94
|
+
def _add_event_listener(self):
|
95
|
+
"""Add event listeners for schema setting."""
|
96
|
+
if self.db_type == "trino":
|
97
|
+
|
98
|
+
@event.listens_for(self.engine, "connect", insert=True)
|
99
|
+
def set_current_schema(dbapi_connection, connection_record):
|
100
|
+
cursor_obj = dbapi_connection.cursor()
|
101
|
+
try:
|
102
|
+
cursor_obj.execute(f"USE {self.default_schema}")
|
103
|
+
except Exception as e:
|
104
|
+
warnings.warn(f"Could not set schema to {self.default_schema}: {e}")
|
105
|
+
finally:
|
106
|
+
cursor_obj.close()
|
107
|
+
|
108
|
+
def get_connection_string(self) -> str:
|
109
|
+
"""
|
110
|
+
Returns the connection string used to create the engine.
|
111
|
+
|
112
|
+
Returns:
|
113
|
+
str: The connection string.
|
114
|
+
"""
|
115
|
+
return self._connection_string
|
116
|
+
|
117
|
+
def get_schema_names(self) -> List[str]:
|
118
|
+
"""Get list of all schema names."""
|
119
|
+
inspector = inspect(self.engine)
|
120
|
+
return inspector.get_schema_names()
|
121
|
+
|
122
|
+
def get_table_names(self, schema: Optional[str] = None) -> List[str]:
|
123
|
+
"""Get list of table names in a schema."""
|
124
|
+
schema = schema or self.default_schema
|
125
|
+
inspector = inspect(self.engine)
|
126
|
+
return inspector.get_table_names(schema=schema)
|
127
|
+
|
128
|
+
def get_view_names(self, schema: Optional[str] = None) -> List[str]:
|
129
|
+
"""Get list of view names in a schema."""
|
130
|
+
schema = schema or self.default_schema
|
131
|
+
inspector = inspect(self.engine)
|
132
|
+
return inspector.get_view_names(schema=schema)
|
133
|
+
|
134
|
+
def get_column_names(
|
135
|
+
self, table_name: str, schema: Optional[str] = None
|
136
|
+
) -> List[str]:
|
137
|
+
"""Get column names for a specific table."""
|
138
|
+
if "." in table_name:
|
139
|
+
schema, table_name = table_name.split(".")
|
140
|
+
else:
|
141
|
+
schema = schema or self.default_schema
|
142
|
+
|
143
|
+
inspector = inspect(self.engine)
|
144
|
+
columns = inspector.get_columns(table_name, schema=schema)
|
145
|
+
return [col["name"] for col in columns]
|
146
|
+
|
147
|
+
def get_table_info(
|
148
|
+
self, table_name: str, schema: Optional[str] = None
|
149
|
+
) -> List[Dict]:
|
150
|
+
"""Get detailed column information for a table."""
|
151
|
+
if "." in table_name:
|
152
|
+
schema, table_name = table_name.split(".")
|
153
|
+
else:
|
154
|
+
schema = schema or self.default_schema
|
155
|
+
|
156
|
+
inspector = inspect(self.engine)
|
157
|
+
return inspector.get_columns(table_name, schema=schema)
|
158
|
+
|
159
|
+
def get_primary_keys(
|
160
|
+
self, table_name: str, schema: Optional[str] = None
|
161
|
+
) -> List[str]:
|
162
|
+
"""Get primary key columns for a table."""
|
163
|
+
if "." in table_name:
|
164
|
+
schema, table_name = table_name.split(".")
|
165
|
+
else:
|
166
|
+
schema = schema or self.default_schema
|
167
|
+
|
168
|
+
inspector = inspect(self.engine)
|
169
|
+
try:
|
170
|
+
return inspector.get_pk_constraint(table_name, schema=schema)[
|
171
|
+
"constrained_columns"
|
172
|
+
]
|
173
|
+
except:
|
174
|
+
return [] # Some databases may not support PK constraints
|
175
|
+
|
176
|
+
def table_exists(self, table_name: str, schema: Optional[str] = None) -> bool:
|
177
|
+
"""Check if a table exists."""
|
178
|
+
if "." in table_name:
|
179
|
+
schema, table_name = table_name.split(".")
|
180
|
+
else:
|
181
|
+
schema = schema or self.default_schema
|
182
|
+
|
183
|
+
return table_name in self.get_table_names(schema=schema)
|
184
|
+
|
185
|
+
# PostgreSQL-specific methods
|
186
|
+
def get_extensions(self) -> List[str]:
|
187
|
+
"""Get list of installed PostgreSQL extensions (PostgreSQL only)."""
|
188
|
+
if self.db_type != "postgresql":
|
189
|
+
raise NotImplementedError(
|
190
|
+
"This method is only available for PostgreSQL connections"
|
191
|
+
)
|
192
|
+
|
193
|
+
with self.engine.connect() as conn:
|
194
|
+
result = conn.execute("SELECT extname FROM pg_extension")
|
195
|
+
return [row[0] for row in result]
|
196
|
+
|
197
|
+
def execute_query(
|
198
|
+
self, query: str, fetch_results: bool = True, params: Optional[Dict] = None
|
199
|
+
) -> Union[List[tuple], None]:
|
200
|
+
"""
|
201
|
+
Executes a SQL query (works for both PostgreSQL and Trino).
|
202
|
+
|
203
|
+
Args:
|
204
|
+
query: SQL query to execute
|
205
|
+
fetch_results: Whether to fetch results
|
206
|
+
params: Parameters for parameterized queries
|
207
|
+
|
208
|
+
Returns:
|
209
|
+
Results as list of tuples or None
|
210
|
+
"""
|
211
|
+
try:
|
212
|
+
with self.engine.connect() as connection:
|
213
|
+
stmt = text(query)
|
214
|
+
result = (
|
215
|
+
connection.execute(stmt, params)
|
216
|
+
if params
|
217
|
+
else connection.execute(stmt)
|
218
|
+
)
|
219
|
+
|
220
|
+
if fetch_results and result.returns_rows:
|
221
|
+
return result.fetchall()
|
222
|
+
return None
|
223
|
+
except SQLAlchemyError as e:
|
224
|
+
print(f"Error executing query: {e}")
|
225
|
+
raise
|
226
|
+
|
227
|
+
def test_connection(self) -> bool:
|
228
|
+
"""
|
229
|
+
Tests the database connection (works for both PostgreSQL and Trino).
|
230
|
+
|
231
|
+
Returns:
|
232
|
+
True if connection successful, False otherwise
|
233
|
+
"""
|
234
|
+
test_query = (
|
235
|
+
"SELECT 1"
|
236
|
+
if self.db_type == "postgresql"
|
237
|
+
else "SELECT 1 AS connection_test"
|
238
|
+
)
|
239
|
+
|
240
|
+
try:
|
241
|
+
print(
|
242
|
+
f"Attempting to connect to {self.db_type} at {self.host}:{self.port}..."
|
243
|
+
)
|
244
|
+
with self.engine.connect() as conn:
|
245
|
+
conn.execute(text(test_query))
|
246
|
+
print(f"Successfully connected to {self.db_type.upper()}.")
|
247
|
+
return True
|
248
|
+
except Exception as e:
|
249
|
+
print(f"Failed to connect to {self.db_type.upper()}: {e}")
|
250
|
+
return False
|
251
|
+
|
252
|
+
def read_sql_to_dataframe(
|
253
|
+
self, query: str, params: Optional[Dict] = None
|
254
|
+
) -> pd.DataFrame:
|
255
|
+
"""
|
256
|
+
Executes query and returns results as pandas DataFrame (works for both).
|
257
|
+
|
258
|
+
Args:
|
259
|
+
query: SQL query to execute
|
260
|
+
params: Parameters for parameterized queries
|
261
|
+
|
262
|
+
Returns:
|
263
|
+
pandas DataFrame with results
|
264
|
+
"""
|
265
|
+
try:
|
266
|
+
with self.engine.connect() as connection:
|
267
|
+
return pd.read_sql_query(text(query), connection, params=params)
|
268
|
+
except SQLAlchemyError as e:
|
269
|
+
print(f"Error reading SQL to DataFrame: {e}")
|
270
|
+
raise
|
271
|
+
|
272
|
+
def read_sql_to_dask_dataframe(
|
273
|
+
self,
|
274
|
+
table_name: str,
|
275
|
+
columns: Optional[List[str]] = None,
|
276
|
+
limit: Optional[int] = None,
|
277
|
+
**kwargs,
|
278
|
+
) -> pd.DataFrame:
|
279
|
+
"""
|
280
|
+
Reads data to Dask DataFrame (works for both, but connection string differs).
|
281
|
+
|
282
|
+
Args:
|
283
|
+
table_name: Table name (schema.table or just table)
|
284
|
+
columns: List of columns to select
|
285
|
+
limit: Maximum rows to return
|
286
|
+
**kwargs: Additional arguments
|
287
|
+
|
288
|
+
Returns:
|
289
|
+
Dask DataFrame with results
|
290
|
+
"""
|
291
|
+
try:
|
292
|
+
connection_string = self.get_connection_string()
|
293
|
+
|
294
|
+
# Handle schema.table format
|
295
|
+
if "." in table_name:
|
296
|
+
schema, table = table_name.split(".")
|
297
|
+
else:
|
298
|
+
schema = self.default_schema
|
299
|
+
table = table_name
|
300
|
+
|
301
|
+
metadata = MetaData()
|
302
|
+
table_obj = Table(table, metadata, schema=schema, autoload_with=self.engine)
|
303
|
+
|
304
|
+
# Build query
|
305
|
+
query = (
|
306
|
+
select(*[table_obj.c[col] for col in columns])
|
307
|
+
if columns
|
308
|
+
else select(table_obj)
|
309
|
+
)
|
310
|
+
if limit:
|
311
|
+
query = query.limit(limit)
|
312
|
+
|
313
|
+
return dd.read_sql_query(sql=query, con=connection_string, **kwargs)
|
314
|
+
except Exception as e:
|
315
|
+
print(f"Error reading SQL to Dask DataFrame: {e}")
|
316
|
+
raise ValueError(f"Failed to read SQL to Dask DataFrame: {e}") from e
|
@@ -1,2 +1,6 @@
|
|
1
1
|
from gigaspatial.generators.poi import PoiViewGenerator, PoiViewGeneratorConfig
|
2
|
-
from gigaspatial.generators.zonal import
|
2
|
+
from gigaspatial.generators.zonal import (
|
3
|
+
GeometryBasedZonalViewGenerator,
|
4
|
+
MercatorViewGenerator,
|
5
|
+
AdminBoundariesViewGenerator,
|
6
|
+
)
|