giga-spatial 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. giga_spatial-0.6.0.dist-info/METADATA +141 -0
  2. giga_spatial-0.6.0.dist-info/RECORD +47 -0
  3. giga_spatial-0.6.0.dist-info/WHEEL +5 -0
  4. giga_spatial-0.6.0.dist-info/licenses/LICENSE +661 -0
  5. giga_spatial-0.6.0.dist-info/top_level.txt +1 -0
  6. gigaspatial/__init__.py +1 -0
  7. gigaspatial/config.py +226 -0
  8. gigaspatial/core/__init__.py +0 -0
  9. gigaspatial/core/io/__init__.py +5 -0
  10. gigaspatial/core/io/adls_data_store.py +325 -0
  11. gigaspatial/core/io/data_api.py +113 -0
  12. gigaspatial/core/io/data_store.py +147 -0
  13. gigaspatial/core/io/local_data_store.py +92 -0
  14. gigaspatial/core/io/readers.py +265 -0
  15. gigaspatial/core/io/writers.py +128 -0
  16. gigaspatial/core/schemas/__init__.py +0 -0
  17. gigaspatial/core/schemas/entity.py +244 -0
  18. gigaspatial/generators/__init__.py +2 -0
  19. gigaspatial/generators/poi.py +636 -0
  20. gigaspatial/generators/zonal/__init__.py +3 -0
  21. gigaspatial/generators/zonal/base.py +370 -0
  22. gigaspatial/generators/zonal/geometry.py +439 -0
  23. gigaspatial/generators/zonal/mercator.py +78 -0
  24. gigaspatial/grid/__init__.py +1 -0
  25. gigaspatial/grid/mercator_tiles.py +286 -0
  26. gigaspatial/handlers/__init__.py +40 -0
  27. gigaspatial/handlers/base.py +761 -0
  28. gigaspatial/handlers/boundaries.py +305 -0
  29. gigaspatial/handlers/ghsl.py +772 -0
  30. gigaspatial/handlers/giga.py +145 -0
  31. gigaspatial/handlers/google_open_buildings.py +472 -0
  32. gigaspatial/handlers/hdx.py +241 -0
  33. gigaspatial/handlers/mapbox_image.py +208 -0
  34. gigaspatial/handlers/maxar_image.py +291 -0
  35. gigaspatial/handlers/microsoft_global_buildings.py +548 -0
  36. gigaspatial/handlers/ookla_speedtest.py +199 -0
  37. gigaspatial/handlers/opencellid.py +290 -0
  38. gigaspatial/handlers/osm.py +356 -0
  39. gigaspatial/handlers/overture.py +126 -0
  40. gigaspatial/handlers/rwi.py +157 -0
  41. gigaspatial/handlers/unicef_georepo.py +806 -0
  42. gigaspatial/handlers/worldpop.py +266 -0
  43. gigaspatial/processing/__init__.py +4 -0
  44. gigaspatial/processing/geo.py +1054 -0
  45. gigaspatial/processing/sat_images.py +39 -0
  46. gigaspatial/processing/tif_processor.py +477 -0
  47. gigaspatial/processing/utils.py +49 -0
@@ -0,0 +1,305 @@
1
+ from pydantic import BaseModel, Field
2
+ from typing import Optional, ClassVar, Union, Dict, List
3
+ import geopandas as gpd
4
+ from pathlib import Path
5
+ from urllib.error import HTTPError
6
+ from shapely.geometry import Polygon, MultiPolygon, shape
7
+ import pycountry
8
+
9
+ from gigaspatial.core.io.data_store import DataStore
10
+ from gigaspatial.core.io.readers import read_dataset
11
+ from gigaspatial.config import config
12
+
13
+
14
+ class AdminBoundary(BaseModel):
15
+ """Base class for administrative boundary data with flexible fields."""
16
+
17
+ id: str = Field(..., description="Unique identifier for the administrative unit")
18
+ name: str = Field(..., description="Primary local name")
19
+ geometry: Union[Polygon, MultiPolygon] = Field(
20
+ ..., description="Geometry of the administrative boundary"
21
+ )
22
+
23
+ name_en: Optional[str] = Field(
24
+ None, description="English name if different from local name"
25
+ )
26
+ parent_id: Optional[str] = Field(
27
+ None, description="ID of parent administrative unit"
28
+ )
29
+ country_code: Optional[str] = Field(
30
+ None, min_length=3, max_length=3, description="ISO 3166-1 alpha-3 country code"
31
+ )
32
+
33
+ class Config:
34
+ # extra = "allow"
35
+ arbitrary_types_allowed = True
36
+
37
+
38
+ class AdminBoundaries(BaseModel):
39
+ """Base class for administrative boundary data with flexible fields."""
40
+
41
+ boundaries: List[AdminBoundary] = Field(default_factory=list)
42
+ level: int = Field(
43
+ ...,
44
+ ge=0,
45
+ le=4,
46
+ description="Administrative level (e.g., 0=country, 1=state, etc.)",
47
+ )
48
+
49
+ logger: ClassVar = config.get_logger("AdminBoundaries")
50
+
51
+ _schema_config: ClassVar[Dict[str, Dict[str, str]]] = {
52
+ "gadm": {
53
+ "country_code": "GID_0",
54
+ "id": "GID_{level}",
55
+ "name": "NAME_{level}",
56
+ "parent_id": "GID_{parent_level}",
57
+ },
58
+ "internal": {
59
+ "id": "admin{level}_id_giga",
60
+ "name": "name",
61
+ "name_en": "name_en",
62
+ "country_code": "iso_3166_1_alpha_3",
63
+ },
64
+ }
65
+
66
+ @classmethod
67
+ def get_schema_config(cls) -> Dict[str, Dict[str, str]]:
68
+ """Return field mappings for different data sources"""
69
+ return cls._schema_config
70
+
71
+ @classmethod
72
+ def from_gadm(
73
+ cls, country_code: str, admin_level: int = 0, **kwargs
74
+ ) -> "AdminBoundaries":
75
+ """Load and create instance from GADM data."""
76
+ url = f"https://geodata.ucdavis.edu/gadm/gadm4.1/json/gadm41_{country_code}_{admin_level}.json"
77
+ cls.logger.info(
78
+ f"Loading GADM data for country: {country_code}, admin level: {admin_level} from URL: {url}"
79
+ )
80
+ try:
81
+ gdf = gpd.read_file(url)
82
+
83
+ gdf = cls._map_fields(gdf, "gadm", admin_level)
84
+
85
+ if admin_level == 0:
86
+ gdf["country_code"] = gdf["id"]
87
+ gdf["name"] = gdf["COUNTRY"]
88
+ elif admin_level == 1:
89
+ gdf["country_code"] = gdf["parent_id"]
90
+
91
+ boundaries = [
92
+ AdminBoundary(**row_dict) for row_dict in gdf.to_dict("records")
93
+ ]
94
+ cls.logger.info(f"Created {len(boundaries)} AdminBoundary objects.")
95
+ return cls(
96
+ boundaries=boundaries, level=admin_level, country_code=country_code
97
+ )
98
+
99
+ except (ValueError, HTTPError, FileNotFoundError) as e:
100
+ cls.logger.warning(
101
+ f"Error loading GADM data for {country_code} at admin level {admin_level}: {str(e)}"
102
+ )
103
+ return cls._create_empty_instance(country_code, admin_level, "gadm")
104
+
105
+ @classmethod
106
+ def from_data_store(
107
+ cls,
108
+ data_store: DataStore,
109
+ path: Union[str, "Path"],
110
+ admin_level: int = 0,
111
+ **kwargs,
112
+ ) -> "AdminBoundaries":
113
+ """Load and create instance from internal data store."""
114
+ cls.logger.info(
115
+ f"Loading data from data store at path: {path}, admin level: {admin_level}"
116
+ )
117
+ try:
118
+ gdf = read_dataset(data_store, str(path), **kwargs)
119
+
120
+ if gdf.empty:
121
+ cls.logger.warning(f"No data found at {path}.")
122
+ return cls._create_empty_instance(None, admin_level, "internal")
123
+
124
+ gdf = cls._map_fields(gdf, "internal", admin_level)
125
+
126
+ if admin_level == 0:
127
+ gdf["id"] = gdf["country_code"]
128
+ else:
129
+ gdf["parent_id"] = gdf["id"].apply(lambda x: x[:-3])
130
+
131
+ boundaries = [
132
+ AdminBoundary(**row_dict) for row_dict in gdf.to_dict("records")
133
+ ]
134
+ cls.logger.info(f"Created {len(boundaries)} AdminBoundary objects.")
135
+ return cls(boundaries=boundaries, level=admin_level)
136
+
137
+ except (FileNotFoundError, KeyError) as e:
138
+ cls.logger.warning(
139
+ f"No data found at {path} for admin level {admin_level}: {str(e)}"
140
+ )
141
+ return cls._create_empty_instance(None, admin_level, "internal")
142
+
143
+ @classmethod
144
+ def from_georepo(
145
+ cls,
146
+ country_code: str = None,
147
+ admin_level: int = 0,
148
+ **kwargs,
149
+ ) -> "AdminBoundaries":
150
+ """
151
+ Load and create instance from GeoRepo (UNICEF) API.
152
+
153
+ Args:
154
+ country: Country name (if using name-based lookup)
155
+ iso3: ISO3 code (if using code-based lookup)
156
+ admin_level: Administrative level (0=country, 1=state, etc.)
157
+ api_key: GeoRepo API key (optional)
158
+ email: GeoRepo user email (optional)
159
+ kwargs: Extra arguments (ignored)
160
+
161
+ Returns:
162
+ AdminBoundaries instance
163
+ """
164
+ cls.logger.info(
165
+ f"Loading data from UNICEF GeoRepo for country: {country_code}, admin level: {admin_level}"
166
+ )
167
+ from gigaspatial.handlers.unicef_georepo import get_country_boundaries_by_iso3
168
+
169
+ # Fetch boundaries from GeoRepo
170
+ geojson = get_country_boundaries_by_iso3(country_code, admin_level=admin_level)
171
+
172
+ features = geojson.get("features", [])
173
+ boundaries = []
174
+ parent_level = admin_level - 1
175
+
176
+ for feat in features:
177
+ props = feat.get("properties", {})
178
+ geometry = feat.get("geometry")
179
+ shapely_geom = shape(geometry) if geometry else None
180
+ # For admin_level 0, no parent_id
181
+ parent_id = None
182
+ if admin_level > 0:
183
+ parent_id = props.get(f"adm{parent_level}_ucode")
184
+
185
+ boundary = AdminBoundary(
186
+ id=props.get("ucode"),
187
+ name=props.get("name"),
188
+ name_en=props.get("name_en"),
189
+ geometry=shapely_geom,
190
+ parent_id=parent_id,
191
+ country_code=country_code,
192
+ )
193
+ boundaries.append(boundary)
194
+
195
+ cls.logger.info(
196
+ f"Created {len(boundaries)} AdminBoundary objects from GeoRepo data."
197
+ )
198
+
199
+ # Try to infer country_code from first boundary if not set
200
+ if boundaries and not boundaries[0].country_code:
201
+ boundaries[0].country_code = boundaries[0].id[:3]
202
+
203
+ return cls(boundaries=boundaries, level=admin_level)
204
+
205
+ @classmethod
206
+ def create(
207
+ cls,
208
+ country_code: Optional[str] = None,
209
+ admin_level: int = 0,
210
+ data_store: Optional[DataStore] = None,
211
+ path: Optional[Union[str, "Path"]] = None,
212
+ **kwargs,
213
+ ) -> "AdminBoundaries":
214
+ """Factory method to create AdminBoundaries instance from either GADM or data store."""
215
+ cls.logger.info(
216
+ f"Creating AdminBoundaries instance. Country: {country_code}, admin level: {admin_level}, data_store provided: {data_store is not None}, path provided: {path is not None}"
217
+ )
218
+ iso3_code = pycountry.countries.lookup(country_code).alpha_3
219
+ if data_store is not None:
220
+ if path is None:
221
+ if country_code is None:
222
+ ValueError(
223
+ "If data_store is provided, path or country_code must also be specified."
224
+ )
225
+ path = config.get_admin_path(
226
+ country_code=iso3_code,
227
+ admin_level=admin_level,
228
+ )
229
+ return cls.from_data_store(data_store, path, admin_level, **kwargs)
230
+ elif country_code is not None:
231
+ from gigaspatial.handlers.unicef_georepo import GeoRepoClient
232
+
233
+ client = GeoRepoClient()
234
+
235
+ if client.check_connection():
236
+ cls.logger.info("GeoRepo connection successful.")
237
+ return cls.from_georepo(
238
+ iso3_code,
239
+ admin_level=admin_level,
240
+ )
241
+ else:
242
+ cls.logger.warning(
243
+ "GeoRepo connection check failed. Falling back to GADM."
244
+ )
245
+ return cls.from_gadm(iso3_code, admin_level, **kwargs)
246
+ else:
247
+ raise ValueError(
248
+ "Either country_code or (data_store, path) must be provided."
249
+ )
250
+
251
+ @classmethod
252
+ def _create_empty_instance(
253
+ cls, country_code: Optional[str], admin_level: int, source_type: str
254
+ ) -> "AdminBoundaries":
255
+ """Create an empty instance with the required schema structure."""
256
+ # for to_geodataframe() to use later
257
+ instance = cls(boundaries=[], level=admin_level, country_code=country_code)
258
+
259
+ schema_fields = set(cls.get_schema_config()[source_type].keys())
260
+ schema_fields.update(["geometry", "country_code", "id", "name", "name_en"])
261
+ if admin_level > 0:
262
+ schema_fields.add("parent_id")
263
+
264
+ instance._empty_schema = list(schema_fields)
265
+ return instance
266
+
267
+ @classmethod
268
+ def _map_fields(
269
+ cls,
270
+ gdf: gpd.GeoDataFrame,
271
+ source: str,
272
+ current_level: int,
273
+ ) -> gpd.GeoDataFrame:
274
+ """Map source fields to schema fields"""
275
+ config = cls.get_schema_config().get(source, {})
276
+ parent_level = current_level - 1
277
+
278
+ field_mapping = {}
279
+ for k, v in config.items():
280
+ if "{parent_level}" in v:
281
+ field_mapping[v.format(parent_level=parent_level)] = k
282
+ elif "{level}" in v:
283
+ field_mapping[v.format(level=current_level)] = k
284
+ else:
285
+ field_mapping[v] = k
286
+
287
+ return gdf.rename(columns=field_mapping)
288
+
289
+ def to_geodataframe(self) -> gpd.GeoDataFrame:
290
+ """Convert the AdminBoundaries to a GeoDataFrame."""
291
+ if not self.boundaries:
292
+ if hasattr(self, "_empty_schema"):
293
+ columns = self._empty_schema
294
+ else:
295
+ columns = ["id", "name", "country_code", "geometry"]
296
+ if self.level > 0:
297
+ columns.append("parent_id")
298
+
299
+ return gpd.GeoDataFrame(columns=columns, geometry="geometry", crs=4326)
300
+
301
+ return gpd.GeoDataFrame(
302
+ [boundary.model_dump() for boundary in self.boundaries],
303
+ geometry="geometry",
304
+ crs=4326,
305
+ )