giga-spatial 0.7.0__py3-none-any.whl → 0.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {giga_spatial-0.7.0.dist-info → giga_spatial-0.7.1.dist-info}/METADATA +1 -1
- {giga_spatial-0.7.0.dist-info → giga_spatial-0.7.1.dist-info}/RECORD +18 -17
- gigaspatial/__init__.py +1 -1
- gigaspatial/config.py +1 -0
- gigaspatial/generators/poi.py +226 -82
- gigaspatial/generators/zonal/base.py +41 -28
- gigaspatial/generators/zonal/geometry.py +79 -36
- gigaspatial/handlers/base.py +22 -8
- gigaspatial/handlers/ghsl.py +22 -8
- gigaspatial/handlers/giga.py +9 -4
- gigaspatial/handlers/healthsites.py +350 -0
- gigaspatial/handlers/osm.py +325 -105
- gigaspatial/handlers/worldpop.py +228 -9
- gigaspatial/processing/geo.py +1 -1
- gigaspatial/processing/tif_processor.py +831 -300
- {giga_spatial-0.7.0.dist-info → giga_spatial-0.7.1.dist-info}/WHEEL +0 -0
- {giga_spatial-0.7.0.dist-info → giga_spatial-0.7.1.dist-info}/licenses/LICENSE +0 -0
- {giga_spatial-0.7.0.dist-info → giga_spatial-0.7.1.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
|
|
1
1
|
from abc import ABC, abstractmethod
|
2
2
|
from pydantic import BaseModel, Field
|
3
3
|
from pathlib import Path
|
4
|
-
from typing import Dict, List, Optional, Union,
|
4
|
+
from typing import Dict, List, Optional, Union, TypeVar, Generic
|
5
5
|
from shapely.geometry import Polygon
|
6
6
|
|
7
7
|
import geopandas as gpd
|
@@ -16,10 +16,8 @@ from gigaspatial.processing.geo import (
|
|
16
16
|
aggregate_polygons_to_zones,
|
17
17
|
aggregate_points_to_zones,
|
18
18
|
)
|
19
|
-
from gigaspatial.processing.tif_processor import
|
20
|
-
|
21
|
-
sample_multiple_tifs_by_polygons,
|
22
|
-
)
|
19
|
+
from gigaspatial.processing.tif_processor import TifProcessor
|
20
|
+
|
23
21
|
from functools import lru_cache
|
24
22
|
import logging
|
25
23
|
|
@@ -209,8 +207,6 @@ class ZonalViewGenerator(ABC, Generic[T]):
|
|
209
207
|
aggregation: Union[str, Dict[str, str]] = "count",
|
210
208
|
predicate: str = "within",
|
211
209
|
output_suffix: str = "",
|
212
|
-
mapping_function: Optional[Callable] = None,
|
213
|
-
**mapping_kwargs,
|
214
210
|
) -> Dict:
|
215
211
|
"""Map point data to zones with spatial aggregation.
|
216
212
|
|
@@ -228,18 +224,12 @@ class ZonalViewGenerator(ABC, Generic[T]):
|
|
228
224
|
predicate (str): Spatial predicate for point-to-zone relationship.
|
229
225
|
Options include "within", "intersects", "contains". Defaults to "within".
|
230
226
|
output_suffix (str): Suffix to add to output column names. Defaults to empty string.
|
231
|
-
mapping_function (Callable, optional): Custom function for mapping points to zones.
|
232
|
-
If provided, signature should be mapping_function(self, points, **mapping_kwargs).
|
233
|
-
When used, all other parameters except mapping_kwargs are ignored.
|
234
|
-
**mapping_kwargs: Additional keyword arguments passed to the mapping function.
|
235
227
|
|
236
228
|
Returns:
|
237
229
|
Dict: Dictionary with zone IDs as keys and aggregated values as values.
|
238
230
|
If value_columns is None, returns point counts per zone.
|
239
231
|
If value_columns is specified, returns aggregated values per zone.
|
240
232
|
"""
|
241
|
-
if mapping_function is not None:
|
242
|
-
return mapping_function(self, points, **mapping_kwargs)
|
243
233
|
|
244
234
|
self.logger.warning(
|
245
235
|
"Using default points mapping implementation. Consider creating a specialized mapping function."
|
@@ -382,40 +372,63 @@ class ZonalViewGenerator(ABC, Generic[T]):
|
|
382
372
|
|
383
373
|
def map_rasters(
|
384
374
|
self,
|
385
|
-
|
386
|
-
mapping_function: Optional[Callable] = None,
|
375
|
+
raster_data: Union[TifProcessor, List[TifProcessor]],
|
387
376
|
stat: str = "mean",
|
388
|
-
**
|
389
|
-
) ->
|
377
|
+
**kwargs,
|
378
|
+
) -> Dict:
|
390
379
|
"""Map raster data to zones using zonal statistics.
|
391
380
|
|
392
381
|
Samples raster values within each zone and computes statistics. Automatically
|
393
382
|
handles coordinate reference system transformations between raster and zone data.
|
394
383
|
|
395
384
|
Args:
|
396
|
-
|
397
|
-
|
385
|
+
raster_data (Union[TifProcessor, List[TifProcessor]]):
|
386
|
+
Either a TifProcessor object or a list of TifProcessor objects (which will be merged
|
387
|
+
into a single TifProcessor for processing).
|
398
388
|
mapping_function (Callable, optional): Custom function for mapping rasters
|
399
389
|
to zones. If provided, signature should be mapping_function(self, tif_processors, **mapping_kwargs).
|
400
390
|
When used, stat and other parameters except mapping_kwargs are ignored.
|
401
391
|
stat (str): Statistic to calculate when aggregating raster values within
|
402
392
|
each zone. Options include "mean", "sum", "min", "max", "std", etc.
|
403
393
|
Defaults to "mean".
|
404
|
-
**mapping_kwargs: Additional keyword arguments
|
394
|
+
**mapping_kwargs: Additional keyword arguments for raster data.
|
405
395
|
|
406
396
|
Returns:
|
407
|
-
|
408
|
-
with
|
409
|
-
Custom mapping functions may return different data structures.
|
397
|
+
Dict: By default, returns a dictionary of sampled values
|
398
|
+
with zone IDs as keys.
|
410
399
|
|
411
400
|
Note:
|
412
401
|
If the coordinate reference system of the rasters differs from the zones,
|
413
402
|
the zone geometries will be automatically transformed to match the raster CRS.
|
414
403
|
"""
|
415
|
-
|
416
|
-
|
404
|
+
raster_processor: Optional[TifProcessor] = None
|
405
|
+
|
406
|
+
if isinstance(raster_data, TifProcessor):
|
407
|
+
raster_processor = raster_data
|
408
|
+
elif isinstance(raster_data, list) and all(
|
409
|
+
isinstance(x, TifProcessor) for x in raster_data
|
410
|
+
):
|
411
|
+
if not raster_data:
|
412
|
+
self.logger.info("No valid raster data provided")
|
413
|
+
return self.view
|
414
|
+
|
415
|
+
if len(raster_data) > 1:
|
416
|
+
all_source_paths = [tp.dataset_path for tp in raster_data]
|
417
|
+
|
418
|
+
self.logger.info(
|
419
|
+
f"Merging {len(all_source_paths)} rasters into a single TifProcessor for zonal statistics."
|
420
|
+
)
|
421
|
+
raster_processor = TifProcessor(
|
422
|
+
dataset_path=all_source_paths, data_store=self.data_store, **kwargs
|
423
|
+
)
|
424
|
+
else:
|
425
|
+
raster_processor = raster_data[0]
|
426
|
+
else:
|
427
|
+
raise ValueError(
|
428
|
+
"raster_data must be a TifProcessor object or a list of TifProcessor objects."
|
429
|
+
)
|
417
430
|
|
418
|
-
raster_crs =
|
431
|
+
raster_crs = raster_processor.crs
|
419
432
|
|
420
433
|
if raster_crs != self.zone_gdf.crs:
|
421
434
|
self.logger.info(f"Projecting zones to raster CRS: {raster_crs}")
|
@@ -424,8 +437,8 @@ class ZonalViewGenerator(ABC, Generic[T]):
|
|
424
437
|
zone_geoms = self.get_zonal_geometries()
|
425
438
|
|
426
439
|
# Sample raster values
|
427
|
-
sampled_values =
|
428
|
-
|
440
|
+
sampled_values = raster_processor.sample_by_polygons(
|
441
|
+
polygon_list=zone_geoms, stat=stat
|
429
442
|
)
|
430
443
|
|
431
444
|
zone_ids = self.get_zone_identifiers()
|
@@ -261,13 +261,16 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
|
|
261
261
|
f"Mapping {handler.config.product} data (year: {handler.config.year}, resolution: {handler.config.resolution}m)"
|
262
262
|
)
|
263
263
|
tif_processors = handler.load_data(
|
264
|
-
self.zone_gdf,
|
264
|
+
self.zone_gdf,
|
265
|
+
ensure_available=self.config.ensure_available,
|
266
|
+
merge_rasters=True,
|
267
|
+
**kwargs,
|
265
268
|
)
|
266
269
|
|
267
270
|
self.logger.info(
|
268
271
|
f"Sampling {handler.config.product} data using '{stat}' statistic"
|
269
272
|
)
|
270
|
-
sampled_values = self.map_rasters(
|
273
|
+
sampled_values = self.map_rasters(raster_data=tif_processors, stat=stat)
|
271
274
|
|
272
275
|
column_name = (
|
273
276
|
output_column
|
@@ -488,57 +491,97 @@ class GeometryBasedZonalViewGenerator(ZonalViewGenerator[T]):
|
|
488
491
|
self,
|
489
492
|
country: Union[str, List[str]],
|
490
493
|
resolution=1000,
|
491
|
-
predicate: Literal[
|
494
|
+
predicate: Literal[
|
495
|
+
"centroid_within", "intersects", "fractional"
|
496
|
+
] = "intersects",
|
492
497
|
output_column: str = "population",
|
493
498
|
**kwargs,
|
494
499
|
):
|
495
|
-
|
496
|
-
|
500
|
+
|
501
|
+
# Ensure country is always a list for consistent handling
|
502
|
+
countries_list = [country] if isinstance(country, str) else country
|
497
503
|
|
498
504
|
handler = WPPopulationHandler(
|
499
|
-
|
505
|
+
resolution=resolution,
|
506
|
+
data_store=self.data_store,
|
507
|
+
**kwargs,
|
500
508
|
)
|
501
509
|
|
510
|
+
# Restrict to single country for age_structures project
|
511
|
+
if handler.config.project == "age_structures" and len(countries_list) > 1:
|
512
|
+
raise ValueError(
|
513
|
+
"For 'age_structures' project, only a single country can be processed at a time."
|
514
|
+
)
|
515
|
+
|
502
516
|
self.logger.info(
|
503
517
|
f"Mapping WorldPop Population data (year: {handler.config.year}, resolution: {handler.config.resolution}m)"
|
504
518
|
)
|
505
519
|
|
506
|
-
if predicate == "fractional":
|
507
|
-
|
508
|
-
|
509
|
-
|
520
|
+
if predicate == "fractional" and resolution == 100:
|
521
|
+
self.logger.warning(
|
522
|
+
"Fractional aggregations only supported for datasets with 1000m resolution. Using `intersects` as predicate"
|
523
|
+
)
|
524
|
+
predicate = "intersects"
|
525
|
+
|
526
|
+
if predicate == "centroid_within":
|
527
|
+
if handler.config.project == "age_structures":
|
528
|
+
# Load individual tif processors for the single country
|
529
|
+
all_tif_processors = handler.load_data(
|
530
|
+
countries_list[0],
|
531
|
+
ensure_available=self.config.ensure_available,
|
532
|
+
**kwargs,
|
510
533
|
)
|
511
|
-
|
534
|
+
|
535
|
+
# Sum results from each tif_processor separately
|
536
|
+
all_results_by_zone = {
|
537
|
+
zone_id: 0 for zone_id in self.get_zone_identifiers()
|
538
|
+
}
|
539
|
+
self.logger.info(
|
540
|
+
f"Sampling individual age_structures rasters using 'sum' statistic and summing per zone."
|
541
|
+
)
|
542
|
+
for tif_processor in all_tif_processors:
|
543
|
+
single_raster_result = self.map_rasters(
|
544
|
+
raster_data=tif_processor, stat="sum"
|
545
|
+
)
|
546
|
+
for zone_id, value in single_raster_result.items():
|
547
|
+
all_results_by_zone[zone_id] += value
|
548
|
+
result = all_results_by_zone
|
512
549
|
else:
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
550
|
+
# Existing behavior for non-age_structures projects or if merging is fine
|
551
|
+
tif_processors = []
|
552
|
+
for c in countries_list:
|
553
|
+
tif_processors.extend(
|
554
|
+
handler.load_data(
|
555
|
+
c,
|
556
|
+
ensure_available=self.config.ensure_available,
|
557
|
+
**kwargs,
|
517
558
|
)
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
)
|
522
|
-
|
523
|
-
result = self.map_polygons(
|
524
|
-
gdf_pop,
|
525
|
-
value_columns="pixel_value",
|
526
|
-
aggregation="sum",
|
527
|
-
predicate=predicate,
|
559
|
+
)
|
560
|
+
self.logger.info(
|
561
|
+
f"Sampling WorldPop Population data using 'sum' statistic"
|
528
562
|
)
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
563
|
+
result = self.map_rasters(raster_data=tif_processors, stat="sum")
|
564
|
+
else:
|
565
|
+
gdf_pop = pd.concat(
|
566
|
+
[
|
567
|
+
handler.load_into_geodataframe(
|
568
|
+
c,
|
569
|
+
ensure_available=self.config.ensure_available,
|
570
|
+
**kwargs,
|
571
|
+
)
|
572
|
+
for c in countries_list
|
573
|
+
],
|
574
|
+
ignore_index=True,
|
537
575
|
)
|
538
576
|
|
539
|
-
|
540
|
-
|
577
|
+
self.logger.info(f"Aggregating WorldPop Population data to the zones.")
|
578
|
+
result = self.map_polygons(
|
579
|
+
gdf_pop,
|
580
|
+
value_columns="pixel_value",
|
581
|
+
aggregation="sum",
|
582
|
+
predicate=predicate,
|
583
|
+
)
|
541
584
|
|
542
|
-
self.add_variable_to_view(
|
585
|
+
self.add_variable_to_view(result, output_column)
|
543
586
|
|
544
587
|
return self.view
|
gigaspatial/handlers/base.py
CHANGED
@@ -328,21 +328,33 @@ class BaseHandlerReader(ABC):
|
|
328
328
|
)
|
329
329
|
|
330
330
|
def _load_raster_data(
|
331
|
-
self,
|
332
|
-
|
331
|
+
self,
|
332
|
+
raster_paths: List[Union[str, Path]],
|
333
|
+
merge_rasters: bool = False,
|
334
|
+
**kwargs,
|
335
|
+
) -> Union[List[TifProcessor], TifProcessor]:
|
333
336
|
"""
|
334
337
|
Load raster data from file paths.
|
335
338
|
|
336
339
|
Args:
|
337
340
|
raster_paths (List[Union[str, Path]]): List of file paths to raster files.
|
341
|
+
merge_rasters (bool): If True, all rasters will be merged into a single TifProcessor.
|
342
|
+
Defaults to False.
|
338
343
|
|
339
344
|
Returns:
|
340
|
-
List[TifProcessor]: List of TifProcessor objects
|
345
|
+
Union[List[TifProcessor], TifProcessor]: List of TifProcessor objects or a single
|
346
|
+
TifProcessor if merge_rasters is True.
|
341
347
|
"""
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
348
|
+
if merge_rasters and len(raster_paths) > 1:
|
349
|
+
self.logger.info(
|
350
|
+
f"Merging {len(raster_paths)} rasters into a single TifProcessor."
|
351
|
+
)
|
352
|
+
return TifProcessor(raster_paths, self.data_store, **kwargs)
|
353
|
+
else:
|
354
|
+
return [
|
355
|
+
TifProcessor(data_path, self.data_store, **kwargs)
|
356
|
+
for data_path in raster_paths
|
357
|
+
]
|
346
358
|
|
347
359
|
def _load_tabular_data(
|
348
360
|
self, file_paths: List[Union[str, Path]], read_function: Callable = read_dataset
|
@@ -619,7 +631,9 @@ class BaseHandler(ABC):
|
|
619
631
|
# Download logic
|
620
632
|
if data_units is not None:
|
621
633
|
# Map data_units to their paths and select only those that are missing
|
622
|
-
unit_to_path = dict(
|
634
|
+
unit_to_path = dict(
|
635
|
+
zip(data_paths, data_units)
|
636
|
+
) # units might be dicts, cannot be used as key
|
623
637
|
if force_download:
|
624
638
|
# Download all units if force_download
|
625
639
|
self.downloader.download_data_units(data_units, **kwargs)
|
gigaspatial/handlers/ghsl.py
CHANGED
@@ -610,19 +610,27 @@ class GHSLDataReader(BaseHandlerReader):
|
|
610
610
|
super().__init__(config=config, data_store=data_store, logger=logger)
|
611
611
|
|
612
612
|
def load_from_paths(
|
613
|
-
self,
|
614
|
-
|
613
|
+
self,
|
614
|
+
source_data_path: List[Union[str, Path]],
|
615
|
+
merge_rasters: bool = False,
|
616
|
+
**kwargs,
|
617
|
+
) -> Union[List[TifProcessor], TifProcessor]:
|
615
618
|
"""
|
616
619
|
Load TifProcessors from GHSL dataset.
|
617
620
|
Args:
|
618
621
|
source_data_path: List of file paths to load
|
622
|
+
merge_rasters: If True, all rasters will be merged into a single TifProcessor.
|
623
|
+
Defaults to False.
|
619
624
|
Returns:
|
620
|
-
List[TifProcessor]: List of TifProcessor objects for accessing the raster data
|
625
|
+
Union[List[TifProcessor], TifProcessor]: List of TifProcessor objects for accessing the raster data or a single
|
626
|
+
TifProcessor if merge_rasters is True.
|
621
627
|
"""
|
622
|
-
return self._load_raster_data(
|
628
|
+
return self._load_raster_data(
|
629
|
+
raster_paths=source_data_path, merge_rasters=merge_rasters
|
630
|
+
)
|
623
631
|
|
624
|
-
def load(self, source, **kwargs):
|
625
|
-
return super().load(source=source, file_ext=".tif")
|
632
|
+
def load(self, source, merge_rasters: bool = False, **kwargs):
|
633
|
+
return super().load(source=source, file_ext=".tif", merge_rasters=merge_rasters)
|
626
634
|
|
627
635
|
|
628
636
|
class GHSLDataHandler(BaseHandler):
|
@@ -763,6 +771,7 @@ class GHSLDataHandler(BaseHandler):
|
|
763
771
|
List[Union[str, Path]], # list of paths
|
764
772
|
],
|
765
773
|
ensure_available: bool = True,
|
774
|
+
merge_rasters: bool = False,
|
766
775
|
**kwargs,
|
767
776
|
):
|
768
777
|
return super().load_data(
|
@@ -771,6 +780,7 @@ class GHSLDataHandler(BaseHandler):
|
|
771
780
|
file_ext=".tif",
|
772
781
|
extract=True,
|
773
782
|
file_pattern=r".*\.tif$",
|
783
|
+
merge_rasters=merge_rasters,
|
774
784
|
**kwargs,
|
775
785
|
)
|
776
786
|
|
@@ -801,8 +811,10 @@ class GHSLDataHandler(BaseHandler):
|
|
801
811
|
tif_processors = self.load_data(
|
802
812
|
source=source, ensure_available=ensure_available, **kwargs
|
803
813
|
)
|
814
|
+
if isinstance(tif_processors, TifProcessor):
|
815
|
+
return tif_processors.to_dataframe(**kwargs)
|
804
816
|
return pd.concat(
|
805
|
-
[tp.to_dataframe() for tp in tif_processors], ignore_index=True
|
817
|
+
[tp.to_dataframe(**kwargs) for tp in tif_processors], ignore_index=True
|
806
818
|
)
|
807
819
|
|
808
820
|
def load_into_geodataframe(
|
@@ -832,8 +844,10 @@ class GHSLDataHandler(BaseHandler):
|
|
832
844
|
tif_processors = self.load_data(
|
833
845
|
source=source, ensure_available=ensure_available, **kwargs
|
834
846
|
)
|
847
|
+
if isinstance(tif_processors, TifProcessor):
|
848
|
+
return tif_processors.to_geodataframe(**kwargs)
|
835
849
|
return pd.concat(
|
836
|
-
[tp.to_geodataframe() for tp in tif_processors], ignore_index=True
|
850
|
+
[tp.to_geodataframe(**kwargs) for tp in tif_processors], ignore_index=True
|
837
851
|
)
|
838
852
|
|
839
853
|
def get_available_data_info(
|
gigaspatial/handlers/giga.py
CHANGED
@@ -8,6 +8,7 @@ from shapely.geometry import Point
|
|
8
8
|
import pycountry
|
9
9
|
from typing import Optional, Union
|
10
10
|
import logging
|
11
|
+
import geopandas as gpd
|
11
12
|
|
12
13
|
from gigaspatial.config import config as global_config
|
13
14
|
|
@@ -40,11 +41,14 @@ class GigaSchoolLocationFetcher:
|
|
40
41
|
if self.logger is None:
|
41
42
|
self.logger = global_config.get_logger(self.__class__.__name__)
|
42
43
|
|
43
|
-
def fetch_locations(
|
44
|
+
def fetch_locations(
|
45
|
+
self, process_geospatial: bool = False, **kwargs
|
46
|
+
) -> Union[pd.DataFrame, gpd.GeoDataFrame]:
|
44
47
|
"""
|
45
48
|
Fetch and process school locations.
|
46
49
|
|
47
50
|
Args:
|
51
|
+
process_geospatial (bool): Whether to process geospatial data and return a GeoDataFrame. Defaults to False.
|
48
52
|
**kwargs: Additional parameters for customization
|
49
53
|
- page_size: Override default page size
|
50
54
|
- sleep_time: Override default sleep time between requests
|
@@ -122,11 +126,12 @@ class GigaSchoolLocationFetcher:
|
|
122
126
|
|
123
127
|
df = pd.DataFrame(all_data)
|
124
128
|
|
125
|
-
|
129
|
+
if process_geospatial:
|
130
|
+
df = self._process_geospatial_data(df)
|
126
131
|
|
127
132
|
return df
|
128
133
|
|
129
|
-
def _process_geospatial_data(self, df: pd.DataFrame) ->
|
134
|
+
def _process_geospatial_data(self, df: pd.DataFrame) -> gpd.GeoDataFrame:
|
130
135
|
"""
|
131
136
|
Process and enhance the DataFrame with geospatial information.
|
132
137
|
|
@@ -144,7 +149,7 @@ class GigaSchoolLocationFetcher:
|
|
144
149
|
)
|
145
150
|
self.logger.info(f"Created geometry for all {len(df)} records")
|
146
151
|
|
147
|
-
return df
|
152
|
+
return gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")
|
148
153
|
|
149
154
|
|
150
155
|
@dataclass(config=ConfigDict(arbitrary_types_allowed=True))
|