giga-spatial 0.6.4__py3-none-any.whl → 0.6.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {giga_spatial-0.6.4.dist-info → giga_spatial-0.6.6.dist-info}/METADATA +3 -1
- giga_spatial-0.6.6.dist-info/RECORD +50 -0
- gigaspatial/__init__.py +1 -1
- gigaspatial/config.py +29 -4
- gigaspatial/core/io/__init__.py +1 -0
- gigaspatial/core/io/data_api.py +3 -1
- gigaspatial/core/io/database.py +319 -0
- gigaspatial/generators/__init__.py +5 -1
- gigaspatial/generators/poi.py +300 -52
- gigaspatial/generators/zonal/__init__.py +2 -1
- gigaspatial/generators/zonal/admin.py +84 -0
- gigaspatial/generators/zonal/base.py +237 -81
- gigaspatial/generators/zonal/geometry.py +151 -53
- gigaspatial/generators/zonal/mercator.py +50 -19
- gigaspatial/grid/__init__.py +1 -1
- gigaspatial/grid/mercator_tiles.py +33 -10
- gigaspatial/handlers/__init__.py +8 -1
- gigaspatial/handlers/base.py +26 -6
- gigaspatial/handlers/boundaries.py +93 -18
- gigaspatial/handlers/ghsl.py +92 -15
- gigaspatial/handlers/rwi.py +5 -2
- gigaspatial/handlers/worldpop.py +771 -186
- gigaspatial/processing/algorithms.py +188 -0
- gigaspatial/processing/geo.py +204 -102
- gigaspatial/processing/tif_processor.py +220 -45
- giga_spatial-0.6.4.dist-info/RECORD +0 -47
- {giga_spatial-0.6.4.dist-info → giga_spatial-0.6.6.dist-info}/WHEEL +0 -0
- {giga_spatial-0.6.4.dist-info → giga_spatial-0.6.6.dist-info}/licenses/LICENSE +0 -0
- {giga_spatial-0.6.4.dist-info → giga_spatial-0.6.6.dist-info}/top_level.txt +0 -0
@@ -13,7 +13,6 @@ from gigaspatial.core.io.local_data_store import LocalDataStore
|
|
13
13
|
from gigaspatial.core.io.writers import write_dataset
|
14
14
|
from gigaspatial.config import config as global_config
|
15
15
|
from gigaspatial.processing.geo import (
|
16
|
-
convert_to_geodataframe,
|
17
16
|
aggregate_polygons_to_zones,
|
18
17
|
aggregate_points_to_zones,
|
19
18
|
)
|
@@ -77,6 +76,7 @@ class ZonalViewGenerator(ABC, Generic[T]):
|
|
77
76
|
self.config = config or ZonalViewGeneratorConfig()
|
78
77
|
self.data_store = data_store or LocalDataStore()
|
79
78
|
self.logger = logger or global_config.get_logger(self.__class__.__name__)
|
79
|
+
self._view: Optional[pd.DataFrame] = None
|
80
80
|
|
81
81
|
@abstractmethod
|
82
82
|
def get_zonal_geometries(self) -> List[Polygon]:
|
@@ -103,7 +103,7 @@ class ZonalViewGenerator(ABC, Generic[T]):
|
|
103
103
|
"""
|
104
104
|
pass
|
105
105
|
|
106
|
-
def
|
106
|
+
def get_zone_geodataframe(self) -> gpd.GeoDataFrame:
|
107
107
|
"""Convert zones to a GeoDataFrame.
|
108
108
|
|
109
109
|
Creates a GeoDataFrame containing zone identifiers and their corresponding
|
@@ -131,9 +131,77 @@ class ZonalViewGenerator(ABC, Generic[T]):
|
|
131
131
|
and identifiers.
|
132
132
|
"""
|
133
133
|
if not hasattr(self, "_zone_gdf"):
|
134
|
-
self._zone_gdf = self.
|
134
|
+
self._zone_gdf = self.get_zone_geodataframe()
|
135
135
|
return self._zone_gdf
|
136
136
|
|
137
|
+
@property
|
138
|
+
def view(self) -> pd.DataFrame:
|
139
|
+
"""The DataFrame representing the current zonal view.
|
140
|
+
|
141
|
+
Returns:
|
142
|
+
pd.DataFrame: The DataFrame containing zone IDs, and
|
143
|
+
any added variables. If no variables have been added,
|
144
|
+
it returns the base `zone_gdf` without geometries.
|
145
|
+
"""
|
146
|
+
if self._view is None:
|
147
|
+
self._view = self.zone_gdf.drop(columns="geometry")
|
148
|
+
return self._view
|
149
|
+
|
150
|
+
def add_variable_to_view(self, data_dict: Dict, column_name: str) -> None:
|
151
|
+
"""
|
152
|
+
Adds a new variable (column) to the zonal view GeoDataFrame.
|
153
|
+
|
154
|
+
This method takes a dictionary (typically the result of map_points or map_polygons)
|
155
|
+
and adds its values as a new column to the internal `_view` (or `zone_gdf` if not yet initialized).
|
156
|
+
The dictionary keys are expected to be the `zone_id` values.
|
157
|
+
|
158
|
+
Args:
|
159
|
+
data_dict (Dict): A dictionary where keys are `zone_id`s and values are
|
160
|
+
the data to be added.
|
161
|
+
column_name (str): The name of the new column to be added to the GeoDataFrame.
|
162
|
+
Raises:
|
163
|
+
ValueError: If the `data_dict` keys do not match the `zone_id`s in the zonal view.
|
164
|
+
If the `column_name` already exists in the zonal view.
|
165
|
+
"""
|
166
|
+
if self._view is None:
|
167
|
+
self._view = self.zone_gdf.drop(columns="geometry")
|
168
|
+
|
169
|
+
if column_name in self._view.columns:
|
170
|
+
raise ValueError(
|
171
|
+
f"Column '{column_name}' already exists in the zonal view."
|
172
|
+
)
|
173
|
+
|
174
|
+
# Create a pandas Series from the dictionary, aligning by index (zone_id)
|
175
|
+
new_series = pd.Series(data_dict, name=column_name)
|
176
|
+
|
177
|
+
# Before merging, ensure the zone_ids in data_dict match those in _view
|
178
|
+
missing_zones_in_data = set(self._view["zone_id"]) - set(new_series.index)
|
179
|
+
extra_zones_in_data = set(new_series.index) - set(self._view["zone_id"])
|
180
|
+
|
181
|
+
if missing_zones_in_data:
|
182
|
+
self.logger.warning(
|
183
|
+
f"Warning: {len(missing_zones_in_data)} zone(s) from the zonal view "
|
184
|
+
f"are missing in the provided data_dict for column '{column_name}'. "
|
185
|
+
f"These zones will have NaN values for '{column_name}'. Missing: {list(missing_zones_in_data)[:5]}..."
|
186
|
+
)
|
187
|
+
if extra_zones_in_data:
|
188
|
+
self.logger.warning(
|
189
|
+
f"Warning: {len(extra_zones_in_data)} zone(s) in the provided data_dict "
|
190
|
+
f"are not present in the zonal view for column '{column_name}'. "
|
191
|
+
f"These will be ignored. Extra: {list(extra_zones_in_data)[:5]}..."
|
192
|
+
)
|
193
|
+
|
194
|
+
# Merge the new series with the _view based on 'zone_id'
|
195
|
+
# Using .set_index() for efficient alignment
|
196
|
+
original_index_name = self._view.index.name
|
197
|
+
self._view = self._view.set_index("zone_id").join(new_series).reset_index()
|
198
|
+
if original_index_name: # Restore original index name if it existed
|
199
|
+
self._view.index.name = original_index_name
|
200
|
+
else: # If it was a default integer index, ensure it's not named 'index'
|
201
|
+
self._view.index.name = None
|
202
|
+
|
203
|
+
self.logger.info(f"Added variable '{column_name}' to the zonal view.")
|
204
|
+
|
137
205
|
def map_points(
|
138
206
|
self,
|
139
207
|
points: Union[pd.DataFrame, gpd.GeoDataFrame],
|
@@ -173,98 +241,144 @@ class ZonalViewGenerator(ABC, Generic[T]):
|
|
173
241
|
if mapping_function is not None:
|
174
242
|
return mapping_function(self, points, **mapping_kwargs)
|
175
243
|
|
176
|
-
|
244
|
+
self.logger.warning(
|
245
|
+
"Using default points mapping implementation. Consider creating a specialized mapping function."
|
246
|
+
)
|
247
|
+
result = aggregate_points_to_zones(
|
248
|
+
points=points,
|
249
|
+
zones=self.zone_gdf,
|
250
|
+
value_columns=value_columns,
|
251
|
+
aggregation=aggregation,
|
252
|
+
point_zone_predicate=predicate,
|
253
|
+
zone_id_column="zone_id",
|
254
|
+
output_suffix=output_suffix,
|
255
|
+
)
|
256
|
+
|
257
|
+
if isinstance(value_columns, str):
|
258
|
+
return result.set_index("zone_id")[value_columns].to_dict()
|
259
|
+
elif isinstance(value_columns, list):
|
260
|
+
# If multiple value columns, return a dictionary of dictionaries
|
261
|
+
# Or, if preferred, a dictionary where values are lists/tuples of results
|
262
|
+
# For now, let's return a dict of series, which is common.
|
263
|
+
# The previous version implied a single dictionary result from map_points/polygons
|
264
|
+
# but with multiple columns, it's usually {zone_id: {col1: val1, col2: val2}}
|
265
|
+
# or {col_name: {zone_id: val}}
|
266
|
+
# In this version, it'll return a dictionary for each column.
|
267
|
+
return {
|
268
|
+
col: result.set_index("zone_id")[col].to_dict() for col in value_columns
|
269
|
+
}
|
270
|
+
else: # If value_columns is None, it should return point_count
|
177
271
|
self.logger.warning(
|
178
|
-
"
|
272
|
+
"No `value_columns` provided. Mapping point counts. Consider passing `value_columns` and `aggregation` or `mapping_function`."
|
179
273
|
)
|
180
|
-
result
|
181
|
-
points=points,
|
182
|
-
zones=self.zone_gdf,
|
183
|
-
value_columns=value_columns,
|
184
|
-
aggregation=aggregation,
|
185
|
-
point_zone_predicate=predicate,
|
186
|
-
zone_id_column="zone_id",
|
187
|
-
output_suffix=output_suffix,
|
188
|
-
)
|
189
|
-
|
190
|
-
if not value_columns:
|
191
|
-
return result["point_count"].to_dict()
|
192
|
-
|
193
|
-
return result[value_columns].to_dict()
|
274
|
+
return result.set_index("zone_id")["point_count"].to_dict()
|
194
275
|
|
195
276
|
def map_polygons(
|
196
277
|
self,
|
197
|
-
polygons
|
278
|
+
polygons,
|
198
279
|
value_columns: Optional[Union[str, List[str]]] = None,
|
199
|
-
aggregation: Union[str, Dict[str, str]] = "
|
200
|
-
|
201
|
-
|
202
|
-
mapping_function: Optional[Callable] = None,
|
203
|
-
**mapping_kwargs,
|
280
|
+
aggregation: Union[str, Dict[str, str]] = "count",
|
281
|
+
predicate: str = "intersects",
|
282
|
+
**kwargs,
|
204
283
|
) -> Dict:
|
205
|
-
"""
|
284
|
+
"""
|
285
|
+
Maps polygon data to the instance's zones and aggregates values.
|
206
286
|
|
207
|
-
|
208
|
-
|
287
|
+
This method leverages `aggregate_polygons_to_zones` to perform a spatial
|
288
|
+
aggregation of polygon data onto the zones stored within this object instance.
|
289
|
+
It can count polygons, or aggregate their values, based on different spatial
|
290
|
+
relationships defined by the `predicate`.
|
209
291
|
|
210
292
|
Args:
|
211
|
-
polygons (Union[pd.DataFrame, gpd.GeoDataFrame]):
|
212
|
-
Must contain geometry information if
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
293
|
+
polygons (Union[pd.DataFrame, gpd.GeoDataFrame]):
|
294
|
+
The polygon data to map. Must contain geometry information if a
|
295
|
+
DataFrame.
|
296
|
+
value_columns (Union[str, List[str]], optional):
|
297
|
+
The column name(s) from the `polygons` data to aggregate. If `None`,
|
298
|
+
the method will automatically count the number of polygons that
|
299
|
+
match the given `predicate` for each zone.
|
300
|
+
aggregation (Union[str, Dict[str, str]], optional):
|
301
|
+
The aggregation method(s) to use. Can be a single string (e.g., "sum",
|
302
|
+
"mean", "max") or a dictionary mapping column names to specific
|
303
|
+
aggregation methods. This is ignored and set to "count" if
|
304
|
+
`value_columns` is `None`. Defaults to "count".
|
305
|
+
predicate (Literal["intersects", "within", "fractional"], optional):
|
306
|
+
The spatial relationship to use for aggregation:
|
307
|
+
- "intersects": Counts or aggregates values for any polygon that
|
308
|
+
intersects a zone.
|
309
|
+
- "within": Counts or aggregates values for polygons that are
|
310
|
+
entirely contained within a zone.
|
311
|
+
- "fractional": Performs area-weighted aggregation. The value of a
|
312
|
+
polygon is distributed proportionally to the area of its overlap
|
313
|
+
with each zone.
|
314
|
+
Defaults to "intersects".
|
315
|
+
**kwargs:
|
316
|
+
Additional keyword arguments to be passed to the underlying
|
317
|
+
`aggregate_polygons_to_zones_new` function.
|
226
318
|
|
227
319
|
Returns:
|
228
|
-
Dict:
|
229
|
-
|
320
|
+
Dict:
|
321
|
+
A dictionary or a nested dictionary containing the aggregated values,
|
322
|
+
with zone IDs as keys. If `value_columns` is a single string, the
|
323
|
+
return value is a dictionary mapping zone ID to the aggregated value.
|
324
|
+
If `value_columns` is a list, the return value is a nested dictionary
|
325
|
+
mapping each column name to its own dictionary of aggregated values.
|
230
326
|
|
231
327
|
Raises:
|
232
|
-
|
328
|
+
ValueError: If `value_columns` is of an unexpected type after processing.
|
329
|
+
|
330
|
+
Example:
|
331
|
+
>>> # Assuming 'self' is an object with a 'zone_gdf' attribute
|
332
|
+
>>> # Count all land parcels that intersect each zone
|
333
|
+
>>> parcel_counts = self.map_polygons(landuse_polygons)
|
334
|
+
>>>
|
335
|
+
>>> # Aggregate total population within zones using area weighting
|
336
|
+
>>> population_by_zone = self.map_polygons(
|
337
|
+
... landuse_polygons,
|
338
|
+
... value_columns="population",
|
339
|
+
... predicate="fractional",
|
340
|
+
... aggregation="sum"
|
341
|
+
... )
|
342
|
+
>>>
|
343
|
+
>>> # Get the sum of residential area and count of buildings within each zone
|
344
|
+
>>> residential_stats = self.map_polygons(
|
345
|
+
... building_polygons,
|
346
|
+
... value_columns=["residential_area_sqm", "building_id"],
|
347
|
+
... aggregation={"residential_area_sqm": "sum", "building_id": "count"},
|
348
|
+
... predicate="intersects"
|
349
|
+
... )
|
233
350
|
"""
|
234
|
-
if mapping_function is not None:
|
235
|
-
return mapping_function(self, polygons, **mapping_kwargs)
|
236
|
-
|
237
|
-
if area_column not in polygons_gdf:
|
238
|
-
if not isinstance(polygons, gpd.GeoDataFrame):
|
239
|
-
try:
|
240
|
-
polygons_gdf = convert_to_geodataframe(polygons)
|
241
|
-
except:
|
242
|
-
raise TypeError(
|
243
|
-
"polygons must be a GeoDataFrame or convertible to one"
|
244
|
-
)
|
245
|
-
else:
|
246
|
-
polygons_gdf = polygons.copy()
|
247
|
-
|
248
|
-
polygons_gdf[area_column] = polygons_gdf.to_crs(
|
249
|
-
polygons_gdf.estimate_utm_crs()
|
250
|
-
).geometry.area
|
251
351
|
|
252
352
|
if value_columns is None:
|
253
353
|
self.logger.warning(
|
254
|
-
"
|
354
|
+
f"No value_columns specified. Defaulting to counting polygons with {predicate} predicate."
|
255
355
|
)
|
256
|
-
|
356
|
+
temp_value_col = "_temp_polygon_count_dummy"
|
357
|
+
polygons[temp_value_col] = 1
|
358
|
+
actual_value_columns = temp_value_col
|
359
|
+
aggregation = "count" # Force count if no value columns
|
360
|
+
else:
|
361
|
+
actual_value_columns = value_columns
|
257
362
|
|
258
363
|
result = aggregate_polygons_to_zones(
|
259
|
-
polygons=
|
364
|
+
polygons=polygons,
|
260
365
|
zones=self.zone_gdf,
|
261
|
-
value_columns=
|
366
|
+
value_columns=actual_value_columns,
|
262
367
|
aggregation=aggregation,
|
263
|
-
|
368
|
+
predicate=predicate,
|
264
369
|
zone_id_column="zone_id",
|
265
370
|
)
|
266
371
|
|
267
|
-
|
372
|
+
# Convert the result GeoDataFrame to the expected dictionary format
|
373
|
+
if isinstance(actual_value_columns, str):
|
374
|
+
return result.set_index("zone_id")[actual_value_columns].to_dict()
|
375
|
+
elif isinstance(actual_value_columns, list):
|
376
|
+
return {
|
377
|
+
col: result.set_index("zone_id")[col].to_dict()
|
378
|
+
for col in actual_value_columns
|
379
|
+
}
|
380
|
+
else:
|
381
|
+
raise ValueError("Unexpected type for actual_value_columns.")
|
268
382
|
|
269
383
|
def map_rasters(
|
270
384
|
self,
|
@@ -291,7 +405,7 @@ class ZonalViewGenerator(ABC, Generic[T]):
|
|
291
405
|
|
292
406
|
Returns:
|
293
407
|
Union[np.ndarray, Dict]: By default, returns a NumPy array of sampled values
|
294
|
-
with shape (n_zones,
|
408
|
+
with shape (n_zones, 1), taking the first non-nodata value encountered.
|
295
409
|
Custom mapping functions may return different data structures.
|
296
410
|
|
297
411
|
Note:
|
@@ -301,10 +415,6 @@ class ZonalViewGenerator(ABC, Generic[T]):
|
|
301
415
|
if mapping_function is not None:
|
302
416
|
return mapping_function(self, tif_processors, **mapping_kwargs)
|
303
417
|
|
304
|
-
self.logger.warning(
|
305
|
-
"Using default raster mapping implementation. Consider creating a specialized mapping function."
|
306
|
-
)
|
307
|
-
|
308
418
|
raster_crs = tif_processors[0].crs
|
309
419
|
|
310
420
|
if raster_crs != self.zone_gdf.crs:
|
@@ -318,7 +428,9 @@ class ZonalViewGenerator(ABC, Generic[T]):
|
|
318
428
|
tif_processors=tif_processors, polygon_list=zone_geoms, stat=stat
|
319
429
|
)
|
320
430
|
|
321
|
-
|
431
|
+
zone_ids = self.get_zone_identifiers()
|
432
|
+
|
433
|
+
return {zone_id: value for zone_id, value in zip(zone_ids, sampled_values)}
|
322
434
|
|
323
435
|
@lru_cache(maxsize=32)
|
324
436
|
def _get_transformed_geometries(self, target_crs):
|
@@ -337,34 +449,78 @@ class ZonalViewGenerator(ABC, Generic[T]):
|
|
337
449
|
|
338
450
|
def save_view(
|
339
451
|
self,
|
340
|
-
view_data: gpd.GeoDataFrame,
|
341
452
|
name: str,
|
342
453
|
output_format: Optional[str] = None,
|
343
454
|
) -> Path:
|
344
455
|
"""Save the generated zonal view to disk.
|
345
456
|
|
346
457
|
Args:
|
347
|
-
view_data (gpd.GeoDataFrame): The zonal view data to save.
|
348
458
|
name (str): Base name for the output file (without extension).
|
349
459
|
output_format (str, optional): File format to save in (e.g., "parquet",
|
350
|
-
"geojson", "shp"). If None, uses the format specified in
|
460
|
+
"geojson", "shp"). If None, uses the format specified in config.
|
351
461
|
|
352
462
|
Returns:
|
353
463
|
Path: The full path where the view was saved.
|
354
464
|
|
355
465
|
Note:
|
356
|
-
The output directory is determined by the
|
466
|
+
The output directory is determined by the config.base_path setting.
|
357
467
|
The file extension is automatically added based on the output format.
|
468
|
+
This method now saves the internal `self.view`.
|
358
469
|
"""
|
470
|
+
if self._view is None:
|
471
|
+
self.logger.warning(
|
472
|
+
"No variables have been added to the zonal view. Saving the base zone_gdf."
|
473
|
+
)
|
474
|
+
view_to_save = self.zone_gdf
|
475
|
+
else:
|
476
|
+
view_to_save = self._view
|
477
|
+
|
359
478
|
format_to_use = output_format or self.config.output_format
|
360
479
|
output_path = self.config.base_path / f"{name}.{format_to_use}"
|
361
480
|
|
362
481
|
self.logger.info(f"Saving zonal view to {output_path}")
|
482
|
+
|
483
|
+
if format_to_use in ["geojson", "shp", "gpkg"]:
|
484
|
+
self.logger.warning(
|
485
|
+
f"Saving to {format_to_use} requires converting back to GeoDataFrame. Geometry column will be re-added."
|
486
|
+
)
|
487
|
+
# Re-add geometry for saving to geospatial formats
|
488
|
+
view_to_save = self.view.merge(
|
489
|
+
self.zone_gdf[["zone_id", "geometry"]], on="zone_id", how="left"
|
490
|
+
)
|
491
|
+
|
363
492
|
write_dataset(
|
364
|
-
|
493
|
+
data=view_to_save,
|
365
494
|
path=str(output_path),
|
366
495
|
data_store=self.data_store,
|
367
|
-
format=format_to_use,
|
368
496
|
)
|
369
497
|
|
370
498
|
return output_path
|
499
|
+
|
500
|
+
def to_dataframe(self) -> pd.DataFrame:
|
501
|
+
"""
|
502
|
+
Returns the current zonal view as a DataFrame.
|
503
|
+
|
504
|
+
This method combines all accumulated variables in the view
|
505
|
+
|
506
|
+
Returns:
|
507
|
+
pd.DataFrame: The current view.
|
508
|
+
"""
|
509
|
+
return self.view
|
510
|
+
|
511
|
+
def to_geodataframe(self) -> gpd.GeoDataFrame:
|
512
|
+
"""
|
513
|
+
Returns the current zonal view merged with zone geometries as a GeoDataFrame.
|
514
|
+
|
515
|
+
This method combines all accumulated variables in the view with the corresponding
|
516
|
+
zone geometries, providing a spatially-enabled DataFrame for further analysis or export.
|
517
|
+
|
518
|
+
Returns:
|
519
|
+
gpd.GeoDataFrame: The current view merged with zone geometries.
|
520
|
+
"""
|
521
|
+
return gpd.GeoDataFrame(
|
522
|
+
(self.view).merge(
|
523
|
+
self.zone_gdf[["zone_id", "geometry"]], on="zone_id", how="left"
|
524
|
+
),
|
525
|
+
crs=self.zone_gdf.crs,
|
526
|
+
)
|