giga-spatial 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. giga_spatial-0.6.0.dist-info/METADATA +141 -0
  2. giga_spatial-0.6.0.dist-info/RECORD +47 -0
  3. giga_spatial-0.6.0.dist-info/WHEEL +5 -0
  4. giga_spatial-0.6.0.dist-info/licenses/LICENSE +661 -0
  5. giga_spatial-0.6.0.dist-info/top_level.txt +1 -0
  6. gigaspatial/__init__.py +1 -0
  7. gigaspatial/config.py +226 -0
  8. gigaspatial/core/__init__.py +0 -0
  9. gigaspatial/core/io/__init__.py +5 -0
  10. gigaspatial/core/io/adls_data_store.py +325 -0
  11. gigaspatial/core/io/data_api.py +113 -0
  12. gigaspatial/core/io/data_store.py +147 -0
  13. gigaspatial/core/io/local_data_store.py +92 -0
  14. gigaspatial/core/io/readers.py +265 -0
  15. gigaspatial/core/io/writers.py +128 -0
  16. gigaspatial/core/schemas/__init__.py +0 -0
  17. gigaspatial/core/schemas/entity.py +244 -0
  18. gigaspatial/generators/__init__.py +2 -0
  19. gigaspatial/generators/poi.py +636 -0
  20. gigaspatial/generators/zonal/__init__.py +3 -0
  21. gigaspatial/generators/zonal/base.py +370 -0
  22. gigaspatial/generators/zonal/geometry.py +439 -0
  23. gigaspatial/generators/zonal/mercator.py +78 -0
  24. gigaspatial/grid/__init__.py +1 -0
  25. gigaspatial/grid/mercator_tiles.py +286 -0
  26. gigaspatial/handlers/__init__.py +40 -0
  27. gigaspatial/handlers/base.py +761 -0
  28. gigaspatial/handlers/boundaries.py +305 -0
  29. gigaspatial/handlers/ghsl.py +772 -0
  30. gigaspatial/handlers/giga.py +145 -0
  31. gigaspatial/handlers/google_open_buildings.py +472 -0
  32. gigaspatial/handlers/hdx.py +241 -0
  33. gigaspatial/handlers/mapbox_image.py +208 -0
  34. gigaspatial/handlers/maxar_image.py +291 -0
  35. gigaspatial/handlers/microsoft_global_buildings.py +548 -0
  36. gigaspatial/handlers/ookla_speedtest.py +199 -0
  37. gigaspatial/handlers/opencellid.py +290 -0
  38. gigaspatial/handlers/osm.py +356 -0
  39. gigaspatial/handlers/overture.py +126 -0
  40. gigaspatial/handlers/rwi.py +157 -0
  41. gigaspatial/handlers/unicef_georepo.py +806 -0
  42. gigaspatial/handlers/worldpop.py +266 -0
  43. gigaspatial/processing/__init__.py +4 -0
  44. gigaspatial/processing/geo.py +1054 -0
  45. gigaspatial/processing/sat_images.py +39 -0
  46. gigaspatial/processing/tif_processor.py +477 -0
  47. gigaspatial/processing/utils.py +49 -0
@@ -0,0 +1,286 @@
1
+ import pandas as pd
2
+ import geopandas as gpd
3
+ import mercantile
4
+ from shapely.geometry import box
5
+ from shapely.geometry.base import BaseGeometry
6
+ from shapely.strtree import STRtree
7
+ from shapely import MultiPolygon, Polygon, Point
8
+ import json
9
+ from pathlib import Path
10
+ from pydantic import BaseModel, Field, PrivateAttr
11
+ from typing import List, Union, Iterable, Optional, Tuple, ClassVar
12
+ import pycountry
13
+
14
+ from gigaspatial.core.io.data_store import DataStore
15
+ from gigaspatial.core.io.local_data_store import LocalDataStore
16
+ from gigaspatial.config import config
17
+
18
+
19
+ class MercatorTiles(BaseModel):
20
+ zoom_level: int = Field(..., ge=0, le=20)
21
+ quadkeys: List[str] = Field(default_factory=list)
22
+ data_store: DataStore = Field(default_factory=LocalDataStore, exclude=True)
23
+ logger: ClassVar = config.get_logger("MercatorTiles")
24
+
25
+ class Config:
26
+ arbitrary_types_allowed = True
27
+
28
+ @classmethod
29
+ def from_quadkeys(cls, quadkeys: List[str]):
30
+ """Create MercatorTiles from list of quadkeys."""
31
+ if not quadkeys:
32
+ cls.logger.warning("No quadkeys provided to from_quadkeys.")
33
+ return cls(zoom_level=0, quadkeys=[])
34
+ return cls(zoom_level=len(quadkeys[0]), quadkeys=set(quadkeys))
35
+
36
+ @classmethod
37
+ def from_bounds(
38
+ cls, xmin: float, ymin: float, xmax: float, ymax: float, zoom_level: int
39
+ ):
40
+ """Create MercatorTiles from boundary coordinates."""
41
+ cls.logger.info(
42
+ f"Creating MercatorTiles from bounds: ({xmin}, {ymin}, {xmax}, {ymax}) at zoom level: {zoom_level}"
43
+ )
44
+ return cls(
45
+ zoom_level=zoom_level,
46
+ quadkeys=[
47
+ mercantile.quadkey(tile)
48
+ for tile in mercantile.tiles(xmin, ymin, xmax, ymax, zoom_level)
49
+ ],
50
+ )
51
+
52
+ @classmethod
53
+ def from_spatial(
54
+ cls,
55
+ source: Union[
56
+ BaseGeometry,
57
+ gpd.GeoDataFrame,
58
+ List[Union[Point, Tuple[float, float]]], # points
59
+ ],
60
+ zoom_level: int,
61
+ predicate: str = "intersects",
62
+ **kwargs,
63
+ ):
64
+ cls.logger.info(
65
+ f"Creating MercatorTiles from spatial source (type: {type(source)}) at zoom level: {zoom_level} with predicate: {predicate}"
66
+ )
67
+ if isinstance(source, gpd.GeoDataFrame):
68
+ if source.crs != "EPSG:4326":
69
+ source = source.to_crs("EPSG:4326")
70
+ source = source.geometry.unary_union
71
+
72
+ if isinstance(source, BaseGeometry):
73
+ return cls.from_geometry(
74
+ geometry=source, zoom_level=zoom_level, predicate=predicate, **kwargs
75
+ )
76
+ elif isinstance(source, Iterable) and all(
77
+ len(pt) == 2 or isinstance(pt, Point) for pt in source
78
+ ):
79
+ return cls.from_points(geometry=source, zoom_level=zoom_level, **kwargs)
80
+ else:
81
+ raise
82
+
83
+ @classmethod
84
+ def from_geometry(
85
+ cls,
86
+ geometry: BaseGeometry,
87
+ zoom_level: int,
88
+ predicate: str = "intersects",
89
+ **kwargs,
90
+ ):
91
+ """Create MercatorTiles from a polygon."""
92
+ cls.logger.info(
93
+ f"Creating MercatorTiles from geometry (bounds: {geometry.bounds}) at zoom level: {zoom_level} with predicate: {predicate}"
94
+ )
95
+ tiles = list(mercantile.tiles(*geometry.bounds, zoom_level))
96
+ quadkeys_boxes = [
97
+ (mercantile.quadkey(t), box(*mercantile.bounds(t))) for t in tiles
98
+ ]
99
+ quadkeys, boxes = zip(*quadkeys_boxes) if quadkeys_boxes else ([], [])
100
+
101
+ if not boxes:
102
+ cls.logger.warning(
103
+ "No boxes generated from geometry bounds. Returning empty MercatorTiles."
104
+ )
105
+ return MercatorTiles(zoom_level=zoom_level, quadkeys=[])
106
+
107
+ s = STRtree(boxes)
108
+ result_indices = s.query(geometry, predicate=predicate)
109
+ filtered_quadkeys = [quadkeys[i] for i in result_indices]
110
+ cls.logger.info(
111
+ f"Filtered down to {len(filtered_quadkeys)} quadkeys using spatial predicate."
112
+ )
113
+ return cls(zoom_level=zoom_level, quadkeys=filtered_quadkeys, **kwargs)
114
+
115
+ @classmethod
116
+ def from_points(
117
+ cls, points: List[Union[Point, Tuple[float, float]]], zoom_level: int, **kwargs
118
+ ) -> "MercatorTiles":
119
+ """Create MercatorTiles from a list of points or lat-lon pairs."""
120
+ cls.logger.info(
121
+ f"Creating MercatorTiles from {len(points)} points at zoom level: {zoom_level}"
122
+ )
123
+ quadkeys = {
124
+ (
125
+ mercantile.quadkey(mercantile.tile(p.x, p.y, zoom_level))
126
+ if isinstance(p, Point)
127
+ else mercantile.quadkey(mercantile.tile(p[1], p[0], zoom_level))
128
+ )
129
+ for p in points
130
+ }
131
+ cls.logger.info(f"Generated {len(quadkeys)} unique quadkeys from points.")
132
+ return cls(zoom_level=zoom_level, quadkeys=list(quadkeys), **kwargs)
133
+
134
+ @classmethod
135
+ def from_json(
136
+ cls, data_store: DataStore, file: Union[str, Path], **kwargs
137
+ ) -> "MercatorTiles":
138
+ """Load MercatorTiles from a JSON file."""
139
+ cls.logger.info(
140
+ f"Loading MercatorTiles from JSON file: {file} using data store: {type(data_store).__name__}"
141
+ )
142
+ with data_store.open(str(file), "r") as f:
143
+ data = json.load(f)
144
+ if isinstance(data, list): # If file contains only quadkeys
145
+ data = {
146
+ "zoom_level": len(data[0]) if data else 0,
147
+ "quadkeys": data,
148
+ **kwargs,
149
+ }
150
+ else:
151
+ data.update(kwargs)
152
+ instance = cls(**data)
153
+ instance.data_store = data_store
154
+ cls.logger.info(
155
+ f"Successfully loaded {len(instance.quadkeys)} quadkeys from JSON file."
156
+ )
157
+ return instance
158
+
159
+ def filter_quadkeys(self, quadkeys: Iterable[str]) -> "MercatorTiles":
160
+ """Filter quadkeys by a given set of quadkeys."""
161
+ original_count = len(self.quadkeys)
162
+ incoming_count = len(
163
+ list(quadkeys)
164
+ ) # Convert to list to get length if it's an iterator
165
+
166
+ self.logger.info(
167
+ f"Filtering {original_count} quadkeys with an incoming set of {incoming_count} quadkeys."
168
+ )
169
+ filtered_quadkeys = list(set(self.quadkeys) & set(quadkeys))
170
+ self.logger.info(f"Resulting in {len(filtered_quadkeys)} filtered quadkeys.")
171
+ return MercatorTiles(
172
+ zoom_level=self.zoom_level,
173
+ quadkeys=filtered_quadkeys,
174
+ )
175
+
176
+ def to_dataframe(self) -> pd.DataFrame:
177
+ """Convert to pandas DataFrame with quadkey and centroid coordinates."""
178
+ self.logger.info(
179
+ f"Converting {len(self.quadkeys)} quadkeys to pandas DataFrame."
180
+ )
181
+ if not self.quadkeys:
182
+ self.logger.warning(
183
+ "No quadkeys to convert to DataFrame. Returning empty DataFrame."
184
+ )
185
+ return pd.DataFrame(columns=["quadkey", "latitude", "longitude"])
186
+ tiles_data = [mercantile.quadkey_to_tile(q) for q in self.quadkeys]
187
+ bounds_data = [mercantile.bounds(tile) for tile in tiles_data]
188
+
189
+ centroids = [
190
+ (
191
+ (bounds.south + bounds.north) / 2, # latitude
192
+ (bounds.west + bounds.east) / 2, # longitude
193
+ )
194
+ for bounds in bounds_data
195
+ ]
196
+
197
+ self.logger.info(f"Successfully converted to DataFrame.")
198
+
199
+ return pd.DataFrame(
200
+ {
201
+ "quadkey": self.quadkeys,
202
+ "latitude": [c[0] for c in centroids],
203
+ "longitude": [c[1] for c in centroids],
204
+ }
205
+ )
206
+
207
+ def to_geoms(self) -> List[box]:
208
+ self.logger.info(
209
+ f"Converting {len(self.quadkeys)} quadkeys to shapely box geometries."
210
+ )
211
+ return [
212
+ box(*mercantile.bounds(mercantile.quadkey_to_tile(q)))
213
+ for q in self.quadkeys
214
+ ]
215
+
216
+ def to_geodataframe(self) -> gpd.GeoDataFrame:
217
+ """Convert to GeoPandas GeoDataFrame."""
218
+ return gpd.GeoDataFrame(
219
+ {"quadkey": self.quadkeys, "geometry": self.to_geoms()}, crs="EPSG:4326"
220
+ )
221
+
222
+ def save(self, file: Union[str, Path], format: str = "json") -> None:
223
+ """Save MercatorTiles to file in specified format."""
224
+ with self.data_store.open(str(file), "wb" if format == "parquet" else "w") as f:
225
+ if format == "parquet":
226
+ self.to_geodataframe().to_parquet(f, index=False)
227
+ elif format == "geojson":
228
+ f.write(self.to_geodataframe().to_json(drop_id=True))
229
+ elif format == "json":
230
+ json.dump(self.quadkeys, f)
231
+ else:
232
+ raise ValueError(f"Unsupported format: {format}")
233
+
234
+ def __len__(self) -> int:
235
+ return len(self.quadkeys)
236
+
237
+
238
+ class CountryMercatorTiles(MercatorTiles):
239
+ """MercatorTiles specialized for country-level operations.
240
+
241
+ This class extends MercatorTiles to work specifically with country boundaries.
242
+ It can only be instantiated through the create() classmethod.
243
+ """
244
+
245
+ country: str = Field(..., exclude=True)
246
+
247
+ def __init__(self, *args, **kwargs):
248
+ raise TypeError(
249
+ "CountryMercatorTiles cannot be instantiated directly. "
250
+ "Use CountryMercatorTiles.create() instead."
251
+ )
252
+
253
+ @classmethod
254
+ def create(
255
+ cls,
256
+ country: str,
257
+ zoom_level: int,
258
+ predicate: str = "intersects",
259
+ data_store: Optional[DataStore] = None,
260
+ country_geom_path: Optional[Union[str, Path]] = None,
261
+ ):
262
+ """Create CountryMercatorTiles for a specific country."""
263
+ from gigaspatial.handlers.boundaries import AdminBoundaries
264
+
265
+ instance = super().__new__(cls)
266
+ super(CountryMercatorTiles, instance).__init__(
267
+ zoom_level=zoom_level,
268
+ quadkeys=[],
269
+ data_store=data_store or LocalDataStore(),
270
+ country=pycountry.countries.lookup(country).alpha_3,
271
+ )
272
+
273
+ country_geom = (
274
+ AdminBoundaries.create(
275
+ country_code=country,
276
+ data_store=data_store,
277
+ path=country_geom_path,
278
+ )
279
+ .boundaries[0]
280
+ .geometry
281
+ )
282
+
283
+ tiles = MercatorTiles.from_geometry(country_geom, zoom_level, predicate)
284
+
285
+ instance.quadkeys = tiles.quadkeys
286
+ return instance
@@ -0,0 +1,40 @@
1
+ from gigaspatial.handlers.boundaries import AdminBoundaries
2
+ from gigaspatial.handlers.ghsl import (
3
+ GHSLDataConfig,
4
+ GHSLDataDownloader,
5
+ GHSLDataReader,
6
+ GHSLDataHandler,
7
+ )
8
+ from gigaspatial.handlers.google_open_buildings import (
9
+ GoogleOpenBuildingsConfig,
10
+ GoogleOpenBuildingsDownloader,
11
+ GoogleOpenBuildingsReader,
12
+ GoogleOpenBuildingsHandler,
13
+ )
14
+ from gigaspatial.handlers.microsoft_global_buildings import (
15
+ MSBuildingsConfig,
16
+ MSBuildingsDownloader,
17
+ MSBuildingsReader,
18
+ MSBuildingsHandler,
19
+ )
20
+ from gigaspatial.handlers.osm import OSMLocationFetcher
21
+ from gigaspatial.handlers.overture import OvertureAmenityFetcher
22
+ from gigaspatial.handlers.mapbox_image import MapboxImageDownloader
23
+ from gigaspatial.handlers.maxar_image import MaxarConfig, MaxarImageDownloader
24
+ from gigaspatial.handlers.worldpop import WorldPopConfig, WorldPopDownloader
25
+ from gigaspatial.handlers.ookla_speedtest import (
26
+ OoklaSpeedtestTileConfig,
27
+ OoklaSpeedtestConfig,
28
+ )
29
+ from gigaspatial.handlers.opencellid import (
30
+ OpenCellIDConfig,
31
+ OpenCellIDDownloader,
32
+ OpenCellIDReader,
33
+ )
34
+ from gigaspatial.handlers.hdx import HDXConfig, HDXDownloader, HDXReader
35
+ from gigaspatial.handlers.rwi import RWIConfig, RelativeWealthIndexDownloader
36
+ from gigaspatial.handlers.unicef_georepo import (
37
+ GeoRepoClient,
38
+ get_country_boundaries_by_iso3,
39
+ )
40
+ from gigaspatial.handlers.giga import GigaSchoolLocationFetcher