giga-spatial 0.6.4__tar.gz → 0.6.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/.env_sample +1 -0
  2. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/CHANGELOG.md +82 -0
  3. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/PKG-INFO +2 -1
  4. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/docs/getting-started/quickstart.md +10 -17
  5. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/docs/user-guide/configuration.md +1 -1
  6. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/giga_spatial.egg-info/PKG-INFO +2 -1
  7. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/giga_spatial.egg-info/SOURCES.txt +3 -0
  8. giga_spatial-0.6.4/requirements.txt → giga_spatial-0.6.5/giga_spatial.egg-info/requires.txt +2 -1
  9. giga_spatial-0.6.5/gigaspatial/__init__.py +1 -0
  10. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/config.py +29 -4
  11. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/core/io/__init__.py +1 -0
  12. giga_spatial-0.6.5/gigaspatial/core/io/database.py +316 -0
  13. giga_spatial-0.6.5/gigaspatial/generators/__init__.py +6 -0
  14. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/generators/poi.py +228 -43
  15. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/generators/zonal/__init__.py +2 -1
  16. giga_spatial-0.6.5/gigaspatial/generators/zonal/admin.py +84 -0
  17. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/generators/zonal/base.py +221 -64
  18. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/generators/zonal/geometry.py +74 -31
  19. giga_spatial-0.6.5/gigaspatial/generators/zonal/mercator.py +109 -0
  20. giga_spatial-0.6.5/gigaspatial/grid/__init__.py +1 -0
  21. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/grid/mercator_tiles.py +33 -10
  22. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/handlers/boundaries.py +43 -18
  23. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/handlers/ghsl.py +79 -14
  24. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/handlers/rwi.py +5 -2
  25. giga_spatial-0.6.5/gigaspatial/processing/algorithms.py +188 -0
  26. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/processing/geo.py +87 -25
  27. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/processing/tif_processor.py +220 -45
  28. giga_spatial-0.6.4/giga_spatial.egg-info/requires.txt → giga_spatial-0.6.5/requirements.txt +1 -0
  29. giga_spatial-0.6.4/gigaspatial/__init__.py +0 -1
  30. giga_spatial-0.6.4/gigaspatial/generators/__init__.py +0 -2
  31. giga_spatial-0.6.4/gigaspatial/generators/zonal/mercator.py +0 -78
  32. giga_spatial-0.6.4/gigaspatial/grid/__init__.py +0 -1
  33. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/CODE_OF_CONDUCT.md +0 -0
  34. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/CONTRIBUTING.md +0 -0
  35. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/LICENSE +0 -0
  36. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/MANIFEST.in +0 -0
  37. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/PULL_REQUEST_TEMPLATE.md +0 -0
  38. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/README.md +0 -0
  39. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/docs/.DS_Store +0 -0
  40. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/docs/api/core.md +0 -0
  41. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/docs/api/generators.md +0 -0
  42. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/docs/api/grid.md +0 -0
  43. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/docs/api/handlers.md +0 -0
  44. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/docs/api/index.md +0 -0
  45. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/docs/api/processing.md +0 -0
  46. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/docs/assets/GIGA_horizontal_notext_white.webp +0 -0
  47. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/docs/assets/datasets.png +0 -0
  48. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/docs/assets/logo.png +0 -0
  49. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/docs/changelog.md +0 -0
  50. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/docs/contributing.md +0 -0
  51. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/docs/examples/advanced.md +0 -0
  52. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/docs/examples/basic.md +0 -0
  53. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/docs/examples/downloading/ghsl.md +0 -0
  54. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/docs/examples/downloading/osm.md +0 -0
  55. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/docs/examples/index.md +0 -0
  56. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/docs/examples/processing/tif.md +0 -0
  57. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/docs/examples/use-cases.md +0 -0
  58. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/docs/getting-started/installation.md +0 -0
  59. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/docs/index.md +0 -0
  60. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/docs/license.md +0 -0
  61. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/docs/stylesheets/extra.css +0 -0
  62. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/docs/user-guide/index.md +0 -0
  63. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/giga_spatial.egg-info/dependency_links.txt +0 -0
  64. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/giga_spatial.egg-info/top_level.txt +0 -0
  65. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/core/__init__.py +0 -0
  66. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/core/io/adls_data_store.py +0 -0
  67. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/core/io/data_api.py +0 -0
  68. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/core/io/data_store.py +0 -0
  69. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/core/io/local_data_store.py +0 -0
  70. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/core/io/readers.py +0 -0
  71. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/core/io/writers.py +0 -0
  72. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/core/schemas/__init__.py +0 -0
  73. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/core/schemas/entity.py +0 -0
  74. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/handlers/__init__.py +0 -0
  75. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/handlers/base.py +0 -0
  76. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/handlers/giga.py +0 -0
  77. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/handlers/google_open_buildings.py +0 -0
  78. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/handlers/hdx.py +0 -0
  79. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/handlers/mapbox_image.py +0 -0
  80. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/handlers/maxar_image.py +0 -0
  81. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/handlers/microsoft_global_buildings.py +0 -0
  82. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/handlers/ookla_speedtest.py +0 -0
  83. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/handlers/opencellid.py +0 -0
  84. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/handlers/osm.py +0 -0
  85. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/handlers/overture.py +0 -0
  86. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/handlers/unicef_georepo.py +0 -0
  87. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/handlers/worldpop.py +0 -0
  88. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/processing/__init__.py +0 -0
  89. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/processing/sat_images.py +0 -0
  90. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/gigaspatial/processing/utils.py +0 -0
  91. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/pyproject.toml +0 -0
  92. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/setup.cfg +0 -0
  93. {giga_spatial-0.6.4 → giga_spatial-0.6.5}/setup.py +0 -0
@@ -4,6 +4,7 @@ export GOOGLE_SERVICE_ACCOUNT=""
4
4
  export API_PROFILE_FILE_PATH=""
5
5
  export API_SHARE_NAME=""
6
6
  export API_SCHEMA_NAME=""
7
+ export DB_CONFIG='{}'
7
8
  export MAPBOX_ACCESS_TOKEN=""
8
9
  export MAXAR_USERNAME=""
9
10
  export MAXAR_PASSWORD=""
@@ -2,6 +2,88 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
+ ## [v0.6.5] - 2025-07-01
6
+
7
+ ### Added
8
+
9
+ - **`MercatorTiles.get_quadkeys_from_points()`**
10
+ New static method for efficient 1:1 point-to-quadkey mapping using coordinate-based logic, improving performance over spatial joins.
11
+
12
+ - **`AdminBoundariesViewGenerator`**
13
+ New generator class for producing zonal views based on administrative boundaries (e.g., districts, provinces) with flexible source and admin level support.
14
+
15
+ - **Zonal View Generator Enhancements**
16
+ - `_view`: Internal attribute for accumulating mapped statistics.
17
+ - `view`: Exposes current state of zonal view.
18
+ - `add_variable_to_view()`: Adds mapped data from `map_points`, `map_polygons`, or `map_rasters` with robust validation and zone alignment.
19
+ - `to_dataframe()` and `to_geodataframe()` methods added for exporting current view in tabular or spatial formats.
20
+
21
+ - **`PoiViewGenerator` Enhancements**
22
+ - Consistent `_view` DataFrame for storing mapped results.
23
+ - `_update_view()`: Central method to update POI data.
24
+ - `save_view()`: Improved format handling (CSV, Parquet, GeoJSON, etc.) with geometry recovery.
25
+ - `to_dataframe()` and `to_geodataframe()` methods added for convenient export of enriched POI view.
26
+ - Robust duplicate ID detection and CRS validation in `map_zonal_stats`.
27
+
28
+ - **`TifProcessor` Enhancements**
29
+ - `sample_by_polygons_batched()`: Parallel polygon sampling.
30
+ - Enhanced `sample_by_polygons()` with nodata masking and multiple stats.
31
+ - `warn_on_error`: Flag to suppress sampling warnings.
32
+
33
+ - **GeoTIFF Multi-Band Support**
34
+ - `multi` mode added for multi-band raster support.
35
+ - Auto-detects band names via metadata.
36
+ - Strict validation of band count based on mode (`single`, `rgb`, `rgba`, `multi`).
37
+
38
+ - **Spatial Distance Graph Algorithm**
39
+ - `build_distance_graph()` added for fast KD-tree-based spatial matching.
40
+ - Supports both `DataFrame` and `GeoDataFrame` inputs.
41
+ - Outputs a `networkx.Graph` with optional DataFrame of matches.
42
+ - Handles projections, self-match exclusion, and includes verbose stats/logs.
43
+
44
+ - **Database Integration (Experimental)**
45
+ - Added `DBConnection` class in `core/io/database.py` for unified Trino and PostgreSQL access.
46
+ - Supports schema/table introspection, query execution, and reading into `pandas` or `dask`.
47
+ - Handles connection creation, credential management, and diagnostics.
48
+ - Utility methods for schema/view/table/column listings and parameterized queries.
49
+
50
+ - **GHSL Population Mapping**
51
+ - `map_ghsl_pop()` method added to `GeometryBasedZonalViewGenerator`.
52
+ - Aggregates GHSL population rasters to user-defined zones.
53
+ - Supports `intersects` and `fractional` predicates (latter for 1000m resolution only).
54
+ - Returns population statistics (e.g., `sum`) with customizable column prefix.
55
+
56
+ ### Changed
57
+
58
+ - **`MercatorTiles.from_points()`** now internally uses `get_quadkeys_from_points()` for better performance.
59
+
60
+ - **`map_points()` and `map_rasters()`** now return `Dict[zone_id, value]` to support direct usage with `add_variable_to_view()`.
61
+
62
+ - **Refactored `aggregate_polygons_to_zones()`**
63
+ - `area_weighted` deprecated in favor of `predicate`.
64
+ - Supports flexible predicates like `"within"`, `"fractional"` for spatial aggregation.
65
+ - `map_polygons()` updated to reflect this change.
66
+
67
+ - **Optional Admin Boundaries Configuration**
68
+ - `ADMIN_BOUNDARIES_DATA_DIR` is now optional.
69
+ - `AdminBoundaries.create()` only attempts to load if explicitly configured or path is provided.
70
+ - Improved documentation and fallback behavior for missing configs.
71
+
72
+ ### Fixed
73
+
74
+ - **GHSL Downloader**
75
+ - ZIP files are now downloaded into a temporary cache directory using `requests.get()`.
76
+ - Avoids unnecessary writes and ensures cleanup.
77
+
78
+ - **`TifProcessor`**
79
+ - Removed polygon sampling warnings unless explicitly enabled.
80
+
81
+ ### Deprecated
82
+
83
+ - `TifProcessor.tabular` → use `to_dataframe()` instead.
84
+ - `TifProcessor.get_zoned_geodataframe()` → use `to_geodataframe()` instead.
85
+ - `area_weighted` → use `predicate` in aggregation methods instead.
86
+
5
87
  ## [v0.6.4] - 2025-06-19
6
88
 
7
89
  ### Added
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: giga-spatial
3
- Version: 0.6.4
3
+ Version: 0.6.5
4
4
  Summary: A package for spatial data download & processing
5
5
  Home-page: https://github.com/unicef/giga-spatial
6
6
  Author: Utku Can Ozturk
@@ -31,6 +31,7 @@ Requires-Dist: OWSLib==0.32.1
31
31
  Requires-Dist: pydantic-settings>=2.7.1
32
32
  Requires-Dist: hdx-python-api>=6.3.8
33
33
  Requires-Dist: bs4==0.0.2
34
+ Requires-Dist: sqlalchemy-trino==0.5.0
34
35
  Dynamic: author
35
36
  Dynamic: author-email
36
37
  Dynamic: classifier
@@ -16,43 +16,36 @@ import gigaspatial as gs
16
16
 
17
17
  ## Setting Up Configuration
18
18
 
19
- The `gigaspatial` package uses a configuration file (`config.py`) to manage paths, API keys, and other settings. You can customize the configuration as needed.
19
+ The `gigaspatial` package uses a unified configuration system to manage paths, API keys, and other settings.
20
20
 
21
- ### Using Environment Variables
21
+ - **Environment Variables:** Most configuration is handled via environment variables, which can be set in a `.env` file at the project root. For a full list of supported variables and their descriptions, see the [Configuration Guide](../user-guide/configuration.md).
22
+ - **Defaults:** If not set, sensible defaults are used for all paths and keys.
23
+ - **Manual Overrides:** You can override data directory paths in your code using `config.set_path`.
22
24
 
23
- The package can read configuration settings from an environment file (e.g., `.env`). Here's an example of how to set up the `.env` file based on the `env_sample`:
25
+ ### Example `.env` File
24
26
 
25
27
  ```bash
26
- # Paths for different data types
27
28
  BRONZE_DIR=/path/to/your/bronze_tier_data
28
29
  SILVER_DIR=/path/to/your/silver_tier_data
29
30
  GOLD_DIR=/path/to/your/gold_tier_data
30
31
  VIEWS_DIR=/path/to/your/views_data
31
- ADMIN_BOUNDARIES_DIR=/path/to/your/admin_boundaries_data
32
-
33
- # API keys and tokens
32
+ CACHE_DIR=/path/to/your/cache
33
+ ADMIN_BOUNDARIES_DIR=/path/to/your/admin_boundaries
34
34
  MAPBOX_ACCESS_TOKEN=your_mapbox_token_here
35
- MAXAR_USERNAME=your_maxar_username_here
36
- MAXAR_PASSWORD=your_maxar_password_here
37
- MAXAR_CONNECTION_STRING=your_maxar_key_here
35
+ # ... other keys ...
38
36
  ```
39
37
 
40
- The `config.py` file will automatically read these environment variables and set the paths and keys accordingly.
41
-
42
- ### Setting Paths Manually
43
-
44
- You can also set paths manually in your code:
38
+ ### Setting Paths Programmatically
45
39
 
46
40
  ```python
47
41
  from gigaspatial.config import config
48
42
 
49
- # Example: Setting custom data storage paths
50
43
  config.set_path("bronze", "/path/to/your/bronze_tier_data")
51
44
  config.set_path("gold", "/path/to/your/gold_tier_data")
52
45
  config.set_path("views", "/path/to/your/views_data")
53
46
  ```
54
47
 
55
- API keys and tokens should be set through environment variables.
48
+ > For more details and troubleshooting, see the [full configuration guide](../user-guide/configuration.md).
56
49
 
57
50
  ## Downloading and Processing Geospatial Data
58
51
 
@@ -123,7 +123,7 @@ print(config)
123
123
 
124
124
  ## Next Steps
125
125
 
126
- Once configuration is set up, proceed to the [Data Handling Guide](data-handling/downloading.md) to start using `gigaspatial`.
126
+ Once configuration is set up, proceed to the [Data Handling Guide](data-handling/downloading.md) *(Coming Soon)* to start using `gigaspatial`.
127
127
 
128
128
  ---
129
129
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: giga-spatial
3
- Version: 0.6.4
3
+ Version: 0.6.5
4
4
  Summary: A package for spatial data download & processing
5
5
  Home-page: https://github.com/unicef/giga-spatial
6
6
  Author: Utku Can Ozturk
@@ -31,6 +31,7 @@ Requires-Dist: OWSLib==0.32.1
31
31
  Requires-Dist: pydantic-settings>=2.7.1
32
32
  Requires-Dist: hdx-python-api>=6.3.8
33
33
  Requires-Dist: bs4==0.0.2
34
+ Requires-Dist: sqlalchemy-trino==0.5.0
34
35
  Dynamic: author
35
36
  Dynamic: author-email
36
37
  Dynamic: classifier
@@ -47,6 +47,7 @@ gigaspatial/core/io/__init__.py
47
47
  gigaspatial/core/io/adls_data_store.py
48
48
  gigaspatial/core/io/data_api.py
49
49
  gigaspatial/core/io/data_store.py
50
+ gigaspatial/core/io/database.py
50
51
  gigaspatial/core/io/local_data_store.py
51
52
  gigaspatial/core/io/readers.py
52
53
  gigaspatial/core/io/writers.py
@@ -55,6 +56,7 @@ gigaspatial/core/schemas/entity.py
55
56
  gigaspatial/generators/__init__.py
56
57
  gigaspatial/generators/poi.py
57
58
  gigaspatial/generators/zonal/__init__.py
59
+ gigaspatial/generators/zonal/admin.py
58
60
  gigaspatial/generators/zonal/base.py
59
61
  gigaspatial/generators/zonal/geometry.py
60
62
  gigaspatial/generators/zonal/mercator.py
@@ -78,6 +80,7 @@ gigaspatial/handlers/rwi.py
78
80
  gigaspatial/handlers/unicef_georepo.py
79
81
  gigaspatial/handlers/worldpop.py
80
82
  gigaspatial/processing/__init__.py
83
+ gigaspatial/processing/algorithms.py
81
84
  gigaspatial/processing/geo.py
82
85
  gigaspatial/processing/sat_images.py
83
86
  gigaspatial/processing/tif_processor.py
@@ -15,4 +15,5 @@ tqdm==4.65.0
15
15
  OWSLib==0.32.1
16
16
  pydantic-settings>=2.7.1
17
17
  hdx-python-api>=6.3.8
18
- bs4==0.0.2
18
+ bs4==0.0.2
19
+ sqlalchemy-trino==0.5.0
@@ -0,0 +1 @@
1
+ __version__ = "0.6.5"
@@ -70,11 +70,12 @@ class Config(BaseSettings):
70
70
  description="Directory for temporary/cache files",
71
71
  alias="CACHE_DIR",
72
72
  )
73
- ADMIN_BOUNDARIES_DATA_DIR: Path = Field(
74
- default=Path("admin_boundaries"),
73
+ ADMIN_BOUNDARIES_DATA_DIR: Optional[Path] = Field(
74
+ default=None,
75
75
  description="Root directory for administrative boundary data",
76
76
  alias="ADMIN_BOUNDARIES_DIR",
77
77
  )
78
+ DB_CONFIG: Optional[Dict] = Field(default=None, alias="DB_CONFIG")
78
79
 
79
80
  DATA_TYPES: Dict[str, str] = Field(
80
81
  default={
@@ -156,6 +157,11 @@ class Config(BaseSettings):
156
157
  ) -> Path:
157
158
  """Dynamic path construction for administrative boundary data based on admin level."""
158
159
  base_dir = getattr(self, "ADMIN_BOUNDARIES_DATA_DIR")
160
+ if base_dir is None:
161
+ raise ValueError(
162
+ "ADMIN_BOUNDARIES_DATA_DIR is not configured. "
163
+ "Please set the ADMIN_BOUNDARIES_DIR environment variable."
164
+ )
159
165
  level_dir = f"admin{admin_level}"
160
166
  file = f"{country_code}_{level_dir}{file_suffix}"
161
167
 
@@ -174,7 +180,6 @@ class Config(BaseSettings):
174
180
  "SILVER_DATA_DIR",
175
181
  "GOLD_DATA_DIR",
176
182
  "CACHE_DIR",
177
- "ADMIN_BOUNDARIES_DATA_DIR",
178
183
  mode="before",
179
184
  )
180
185
  def resolve_and_validate_paths(
@@ -192,10 +197,30 @@ class Config(BaseSettings):
192
197
  resolved = path.expanduser().resolve()
193
198
  return resolved if resolve else path
194
199
 
200
+ @field_validator("ADMIN_BOUNDARIES_DATA_DIR", mode="before")
201
+ def validate_admin_boundaries_dir(
202
+ cls, value: Union[str, Path, None]
203
+ ) -> Optional[Path]:
204
+ """Validator for ADMIN_BOUNDARIES_DATA_DIR that handles None and string values."""
205
+ if value is None:
206
+ return None
207
+ if isinstance(value, str):
208
+ return Path(value)
209
+ elif isinstance(value, Path):
210
+ return value
211
+ else:
212
+ raise ValueError(
213
+ f"Invalid path type for ADMIN_BOUNDARIES_DATA_DIR: {type(value)}"
214
+ )
215
+
195
216
  def ensure_directories_exist(self, create: bool = False) -> None:
196
217
  """Ensures all configured directories exist."""
197
218
  for field_name, field_value in self.__dict__.items():
198
- if isinstance(field_value, Path) and not field_value.exists():
219
+ if (
220
+ isinstance(field_value, Path)
221
+ and field_value is not None
222
+ and not field_value.exists()
223
+ ):
199
224
  if create:
200
225
  field_value.mkdir(parents=True, exist_ok=True)
201
226
  else:
@@ -1,5 +1,6 @@
1
1
  from gigaspatial.core.io.adls_data_store import ADLSDataStore
2
2
  from gigaspatial.core.io.local_data_store import LocalDataStore
3
3
  from gigaspatial.core.io.data_api import GigaDataAPI
4
+ from gigaspatial.core.io.database import DBConnection
4
5
  from gigaspatial.core.io.readers import *
5
6
  from gigaspatial.core.io.writers import *
@@ -0,0 +1,316 @@
1
+ from typing import List, Dict, Optional, Union, Literal
2
+
3
+ import pandas as pd
4
+ import dask.dataframe as dd
5
+
6
+ from sqlalchemy import inspect, MetaData, Table, select, create_engine, event, text
7
+ from sqlalchemy.engine import Engine
8
+ from sqlalchemy.exc import SQLAlchemyError
9
+ from urllib.parse import quote_plus
10
+ import warnings
11
+
12
+ from gigaspatial.config import config as global_config
13
+
14
+
15
+ class DBConnection:
16
+ """
17
+ A unified database connection class supporting both Trino and PostgreSQL.
18
+ """
19
+
20
+ DB_CONFIG = global_config.DB_CONFIG or {}
21
+
22
+ def __init__(
23
+ self,
24
+ db_type: Literal["postgresql", "trino"] = DB_CONFIG.get(
25
+ "db_type", "postgresql"
26
+ ),
27
+ host: Optional[str] = DB_CONFIG.get("host", None),
28
+ port: Union[int, str] = DB_CONFIG.get("port", None), # type: ignore
29
+ user: Optional[str] = DB_CONFIG.get("user", None),
30
+ password: Optional[str] = DB_CONFIG.get("password", None),
31
+ catalog: Optional[str] = DB_CONFIG.get("catalog", None), # For Trino
32
+ database: Optional[str] = DB_CONFIG.get("database", None), # For PostgreSQL
33
+ schema: str = DB_CONFIG.get("schema", "public"), # Default for PostgreSQL
34
+ http_scheme: str = DB_CONFIG.get("http_scheme", "https"), # For Trino
35
+ sslmode: str = DB_CONFIG.get("sslmode", "require"), # For PostgreSQL
36
+ **kwargs,
37
+ ):
38
+ """
39
+ Initialize a database connection for either Trino or PostgreSQL.
40
+
41
+ Args:
42
+ db_type: Either "trino" or "postgresql"
43
+ host: Database server host
44
+ port: Database server port
45
+ user: Username
46
+ password: Password
47
+ catalog: Trino catalog name
48
+ database: PostgreSQL database name
49
+ schema: Default schema name
50
+ http_scheme: For Trino ("http" or "https")
51
+ sslmode: For PostgreSQL (e.g., "require", "verify-full")
52
+ **kwargs: Additional connection parameters
53
+ """
54
+ self.db_type = db_type.lower()
55
+ self.host = host
56
+ self.port = str(port) if port else None
57
+ self.user = user
58
+ self.password = quote_plus(password) if password else None
59
+ self.default_schema = schema
60
+
61
+ if self.db_type == "trino":
62
+ self.catalog = catalog
63
+ self.http_scheme = http_scheme
64
+ self.engine = self._create_trino_engine(**kwargs)
65
+ elif self.db_type == "postgresql":
66
+ self.database = database
67
+ self.sslmode = sslmode
68
+ self.engine = self._create_postgresql_engine(**kwargs)
69
+ else:
70
+ raise ValueError(f"Unsupported database type: {db_type}")
71
+
72
+ self._add_event_listener()
73
+
74
+ def _create_trino_engine(self, **kwargs) -> Engine:
75
+ """Create a Trino SQLAlchemy engine."""
76
+ self._connection_string = (
77
+ f"trino://{self.user}:{self.password}@{self.host}:{self.port}/"
78
+ f"{self.catalog}/{self.default_schema}"
79
+ )
80
+ return create_engine(
81
+ self._connection_string,
82
+ connect_args={"http_scheme": self.http_scheme},
83
+ **kwargs,
84
+ )
85
+
86
+ def _create_postgresql_engine(self, **kwargs) -> Engine:
87
+ """Create a PostgreSQL SQLAlchemy engine."""
88
+ self._connection_string = (
89
+ f"postgresql://{self.user}:{self.password}@{self.host}:{self.port}/"
90
+ f"{self.database}?sslmode={self.sslmode}"
91
+ )
92
+ return create_engine(self._connection_string, **kwargs)
93
+
94
+ def _add_event_listener(self):
95
+ """Add event listeners for schema setting."""
96
+ if self.db_type == "trino":
97
+
98
+ @event.listens_for(self.engine, "connect", insert=True)
99
+ def set_current_schema(dbapi_connection, connection_record):
100
+ cursor_obj = dbapi_connection.cursor()
101
+ try:
102
+ cursor_obj.execute(f"USE {self.default_schema}")
103
+ except Exception as e:
104
+ warnings.warn(f"Could not set schema to {self.default_schema}: {e}")
105
+ finally:
106
+ cursor_obj.close()
107
+
108
+ def get_connection_string(self) -> str:
109
+ """
110
+ Returns the connection string used to create the engine.
111
+
112
+ Returns:
113
+ str: The connection string.
114
+ """
115
+ return self._connection_string
116
+
117
+ def get_schema_names(self) -> List[str]:
118
+ """Get list of all schema names."""
119
+ inspector = inspect(self.engine)
120
+ return inspector.get_schema_names()
121
+
122
+ def get_table_names(self, schema: Optional[str] = None) -> List[str]:
123
+ """Get list of table names in a schema."""
124
+ schema = schema or self.default_schema
125
+ inspector = inspect(self.engine)
126
+ return inspector.get_table_names(schema=schema)
127
+
128
+ def get_view_names(self, schema: Optional[str] = None) -> List[str]:
129
+ """Get list of view names in a schema."""
130
+ schema = schema or self.default_schema
131
+ inspector = inspect(self.engine)
132
+ return inspector.get_view_names(schema=schema)
133
+
134
+ def get_column_names(
135
+ self, table_name: str, schema: Optional[str] = None
136
+ ) -> List[str]:
137
+ """Get column names for a specific table."""
138
+ if "." in table_name:
139
+ schema, table_name = table_name.split(".")
140
+ else:
141
+ schema = schema or self.default_schema
142
+
143
+ inspector = inspect(self.engine)
144
+ columns = inspector.get_columns(table_name, schema=schema)
145
+ return [col["name"] for col in columns]
146
+
147
+ def get_table_info(
148
+ self, table_name: str, schema: Optional[str] = None
149
+ ) -> List[Dict]:
150
+ """Get detailed column information for a table."""
151
+ if "." in table_name:
152
+ schema, table_name = table_name.split(".")
153
+ else:
154
+ schema = schema or self.default_schema
155
+
156
+ inspector = inspect(self.engine)
157
+ return inspector.get_columns(table_name, schema=schema)
158
+
159
+ def get_primary_keys(
160
+ self, table_name: str, schema: Optional[str] = None
161
+ ) -> List[str]:
162
+ """Get primary key columns for a table."""
163
+ if "." in table_name:
164
+ schema, table_name = table_name.split(".")
165
+ else:
166
+ schema = schema or self.default_schema
167
+
168
+ inspector = inspect(self.engine)
169
+ try:
170
+ return inspector.get_pk_constraint(table_name, schema=schema)[
171
+ "constrained_columns"
172
+ ]
173
+ except:
174
+ return [] # Some databases may not support PK constraints
175
+
176
+ def table_exists(self, table_name: str, schema: Optional[str] = None) -> bool:
177
+ """Check if a table exists."""
178
+ if "." in table_name:
179
+ schema, table_name = table_name.split(".")
180
+ else:
181
+ schema = schema or self.default_schema
182
+
183
+ return table_name in self.get_table_names(schema=schema)
184
+
185
+ # PostgreSQL-specific methods
186
+ def get_extensions(self) -> List[str]:
187
+ """Get list of installed PostgreSQL extensions (PostgreSQL only)."""
188
+ if self.db_type != "postgresql":
189
+ raise NotImplementedError(
190
+ "This method is only available for PostgreSQL connections"
191
+ )
192
+
193
+ with self.engine.connect() as conn:
194
+ result = conn.execute("SELECT extname FROM pg_extension")
195
+ return [row[0] for row in result]
196
+
197
+ def execute_query(
198
+ self, query: str, fetch_results: bool = True, params: Optional[Dict] = None
199
+ ) -> Union[List[tuple], None]:
200
+ """
201
+ Executes a SQL query (works for both PostgreSQL and Trino).
202
+
203
+ Args:
204
+ query: SQL query to execute
205
+ fetch_results: Whether to fetch results
206
+ params: Parameters for parameterized queries
207
+
208
+ Returns:
209
+ Results as list of tuples or None
210
+ """
211
+ try:
212
+ with self.engine.connect() as connection:
213
+ stmt = text(query)
214
+ result = (
215
+ connection.execute(stmt, params)
216
+ if params
217
+ else connection.execute(stmt)
218
+ )
219
+
220
+ if fetch_results and result.returns_rows:
221
+ return result.fetchall()
222
+ return None
223
+ except SQLAlchemyError as e:
224
+ print(f"Error executing query: {e}")
225
+ raise
226
+
227
+ def test_connection(self) -> bool:
228
+ """
229
+ Tests the database connection (works for both PostgreSQL and Trino).
230
+
231
+ Returns:
232
+ True if connection successful, False otherwise
233
+ """
234
+ test_query = (
235
+ "SELECT 1"
236
+ if self.db_type == "postgresql"
237
+ else "SELECT 1 AS connection_test"
238
+ )
239
+
240
+ try:
241
+ print(
242
+ f"Attempting to connect to {self.db_type} at {self.host}:{self.port}..."
243
+ )
244
+ with self.engine.connect() as conn:
245
+ conn.execute(text(test_query))
246
+ print(f"Successfully connected to {self.db_type.upper()}.")
247
+ return True
248
+ except Exception as e:
249
+ print(f"Failed to connect to {self.db_type.upper()}: {e}")
250
+ return False
251
+
252
+ def read_sql_to_dataframe(
253
+ self, query: str, params: Optional[Dict] = None
254
+ ) -> pd.DataFrame:
255
+ """
256
+ Executes query and returns results as pandas DataFrame (works for both).
257
+
258
+ Args:
259
+ query: SQL query to execute
260
+ params: Parameters for parameterized queries
261
+
262
+ Returns:
263
+ pandas DataFrame with results
264
+ """
265
+ try:
266
+ with self.engine.connect() as connection:
267
+ return pd.read_sql_query(text(query), connection, params=params)
268
+ except SQLAlchemyError as e:
269
+ print(f"Error reading SQL to DataFrame: {e}")
270
+ raise
271
+
272
+ def read_sql_to_dask_dataframe(
273
+ self,
274
+ table_name: str,
275
+ columns: Optional[List[str]] = None,
276
+ limit: Optional[int] = None,
277
+ **kwargs,
278
+ ) -> pd.DataFrame:
279
+ """
280
+ Reads data to Dask DataFrame (works for both, but connection string differs).
281
+
282
+ Args:
283
+ table_name: Table name (schema.table or just table)
284
+ columns: List of columns to select
285
+ limit: Maximum rows to return
286
+ **kwargs: Additional arguments
287
+
288
+ Returns:
289
+ Dask DataFrame with results
290
+ """
291
+ try:
292
+ connection_string = self.get_connection_string()
293
+
294
+ # Handle schema.table format
295
+ if "." in table_name:
296
+ schema, table = table_name.split(".")
297
+ else:
298
+ schema = self.default_schema
299
+ table = table_name
300
+
301
+ metadata = MetaData()
302
+ table_obj = Table(table, metadata, schema=schema, autoload_with=self.engine)
303
+
304
+ # Build query
305
+ query = (
306
+ select(*[table_obj.c[col] for col in columns])
307
+ if columns
308
+ else select(table_obj)
309
+ )
310
+ if limit:
311
+ query = query.limit(limit)
312
+
313
+ return dd.read_sql_query(sql=query, con=connection_string, **kwargs)
314
+ except Exception as e:
315
+ print(f"Error reading SQL to Dask DataFrame: {e}")
316
+ raise ValueError(f"Failed to read SQL to Dask DataFrame: {e}") from e
@@ -0,0 +1,6 @@
1
+ from gigaspatial.generators.poi import PoiViewGenerator, PoiViewGeneratorConfig
2
+ from gigaspatial.generators.zonal import (
3
+ GeometryBasedZonalViewGenerator,
4
+ MercatorViewGenerator,
5
+ AdminBoundariesViewGenerator,
6
+ )