giga-spatial 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- giga_spatial-0.6.0.dist-info/METADATA +141 -0
- giga_spatial-0.6.0.dist-info/RECORD +47 -0
- giga_spatial-0.6.0.dist-info/WHEEL +5 -0
- giga_spatial-0.6.0.dist-info/licenses/LICENSE +661 -0
- giga_spatial-0.6.0.dist-info/top_level.txt +1 -0
- gigaspatial/__init__.py +1 -0
- gigaspatial/config.py +226 -0
- gigaspatial/core/__init__.py +0 -0
- gigaspatial/core/io/__init__.py +5 -0
- gigaspatial/core/io/adls_data_store.py +325 -0
- gigaspatial/core/io/data_api.py +113 -0
- gigaspatial/core/io/data_store.py +147 -0
- gigaspatial/core/io/local_data_store.py +92 -0
- gigaspatial/core/io/readers.py +265 -0
- gigaspatial/core/io/writers.py +128 -0
- gigaspatial/core/schemas/__init__.py +0 -0
- gigaspatial/core/schemas/entity.py +244 -0
- gigaspatial/generators/__init__.py +2 -0
- gigaspatial/generators/poi.py +636 -0
- gigaspatial/generators/zonal/__init__.py +3 -0
- gigaspatial/generators/zonal/base.py +370 -0
- gigaspatial/generators/zonal/geometry.py +439 -0
- gigaspatial/generators/zonal/mercator.py +78 -0
- gigaspatial/grid/__init__.py +1 -0
- gigaspatial/grid/mercator_tiles.py +286 -0
- gigaspatial/handlers/__init__.py +40 -0
- gigaspatial/handlers/base.py +761 -0
- gigaspatial/handlers/boundaries.py +305 -0
- gigaspatial/handlers/ghsl.py +772 -0
- gigaspatial/handlers/giga.py +145 -0
- gigaspatial/handlers/google_open_buildings.py +472 -0
- gigaspatial/handlers/hdx.py +241 -0
- gigaspatial/handlers/mapbox_image.py +208 -0
- gigaspatial/handlers/maxar_image.py +291 -0
- gigaspatial/handlers/microsoft_global_buildings.py +548 -0
- gigaspatial/handlers/ookla_speedtest.py +199 -0
- gigaspatial/handlers/opencellid.py +290 -0
- gigaspatial/handlers/osm.py +356 -0
- gigaspatial/handlers/overture.py +126 -0
- gigaspatial/handlers/rwi.py +157 -0
- gigaspatial/handlers/unicef_georepo.py +806 -0
- gigaspatial/handlers/worldpop.py +266 -0
- gigaspatial/processing/__init__.py +4 -0
- gigaspatial/processing/geo.py +1054 -0
- gigaspatial/processing/sat_images.py +39 -0
- gigaspatial/processing/tif_processor.py +477 -0
- gigaspatial/processing/utils.py +49 -0
@@ -0,0 +1,244 @@
|
|
1
|
+
from typing import List, Type, TypeVar, Generic, Union, Optional
|
2
|
+
from pathlib import Path
|
3
|
+
from pydantic import BaseModel, Field, ValidationError, PrivateAttr
|
4
|
+
import pandas as pd
|
5
|
+
import numpy as np
|
6
|
+
import geopandas as gpd
|
7
|
+
from shapely.geometry import Point, Polygon
|
8
|
+
from scipy.spatial import cKDTree
|
9
|
+
|
10
|
+
from gigaspatial.core.io.data_store import DataStore
|
11
|
+
from gigaspatial.core.io.local_data_store import LocalDataStore
|
12
|
+
from gigaspatial.core.io.readers import read_dataset
|
13
|
+
from gigaspatial.core.io.writers import write_dataset
|
14
|
+
|
15
|
+
|
16
|
+
# Base class for all Giga entities
|
17
|
+
class BaseGigaEntity(BaseModel):
|
18
|
+
"""Base class for all Giga entities with common fields."""
|
19
|
+
|
20
|
+
source: Optional[str] = Field(None, max_length=100, description="Source reference")
|
21
|
+
source_detail: Optional[str] = None
|
22
|
+
|
23
|
+
@property
|
24
|
+
def id(self) -> str:
|
25
|
+
"""Abstract property that must be implemented by subclasses."""
|
26
|
+
raise NotImplementedError("Subclasses must implement id property")
|
27
|
+
|
28
|
+
|
29
|
+
class GigaEntity(BaseGigaEntity):
|
30
|
+
"""Entity with location data."""
|
31
|
+
|
32
|
+
latitude: float = Field(
|
33
|
+
..., ge=-90, le=90, description="Latitude coordinate of the entity"
|
34
|
+
)
|
35
|
+
longitude: float = Field(
|
36
|
+
..., ge=-180, le=180, description="Longitude coordinate of the entity"
|
37
|
+
)
|
38
|
+
admin1: Optional[str] = Field(
|
39
|
+
"Unknown", max_length=100, description="Primary administrative division"
|
40
|
+
)
|
41
|
+
admin1_id_giga: Optional[str] = Field(
|
42
|
+
None,
|
43
|
+
max_length=50,
|
44
|
+
description="Unique identifier for the primary administrative division",
|
45
|
+
)
|
46
|
+
admin2: Optional[str] = Field(
|
47
|
+
"Unknown", max_length=100, description="Secondary administrative division"
|
48
|
+
)
|
49
|
+
admin2_id_giga: Optional[str] = Field(
|
50
|
+
None,
|
51
|
+
max_length=50,
|
52
|
+
description="Unique identifier for the secondary administrative division",
|
53
|
+
)
|
54
|
+
|
55
|
+
|
56
|
+
class GigaEntityNoLocation(BaseGigaEntity):
|
57
|
+
"""Entity without location data."""
|
58
|
+
|
59
|
+
pass
|
60
|
+
|
61
|
+
|
62
|
+
# Define a generic type bound to GigaEntity
|
63
|
+
E = TypeVar("E", bound=BaseGigaEntity)
|
64
|
+
|
65
|
+
|
66
|
+
class EntityTable(BaseModel, Generic[E]):
|
67
|
+
entities: List[E] = Field(default_factory=list)
|
68
|
+
_cached_kdtree: Optional[cKDTree] = PrivateAttr(
|
69
|
+
default=None
|
70
|
+
) # Internal cache for the KDTree
|
71
|
+
|
72
|
+
@classmethod
|
73
|
+
def from_file(
|
74
|
+
cls: Type["EntityTable"],
|
75
|
+
file_path: Union[str, Path],
|
76
|
+
entity_class: Type[E],
|
77
|
+
data_store: Optional[DataStore] = None,
|
78
|
+
**kwargs,
|
79
|
+
) -> "EntityTable":
|
80
|
+
"""
|
81
|
+
Create an EntityTable instance from a file.
|
82
|
+
|
83
|
+
Args:
|
84
|
+
file_path: Path to the dataset file
|
85
|
+
entity_class: The entity class for validation
|
86
|
+
|
87
|
+
Returns:
|
88
|
+
EntityTable instance
|
89
|
+
|
90
|
+
Raises:
|
91
|
+
ValidationError: If any row fails validation
|
92
|
+
FileNotFoundError: If the file doesn't exist
|
93
|
+
"""
|
94
|
+
data_store = data_store or LocalDataStore()
|
95
|
+
file_path = Path(file_path)
|
96
|
+
if not file_path.exists():
|
97
|
+
raise FileNotFoundError(f"File not found: {file_path}")
|
98
|
+
|
99
|
+
df = read_dataset(data_store, file_path, **kwargs)
|
100
|
+
try:
|
101
|
+
entities = [entity_class(**row) for row in df.to_dict(orient="records")]
|
102
|
+
return cls(entities=entities)
|
103
|
+
except ValidationError as e:
|
104
|
+
raise ValueError(f"Validation error in input data: {e}")
|
105
|
+
except Exception as e:
|
106
|
+
raise ValueError(f"Error reading or processing the file: {e}")
|
107
|
+
|
108
|
+
def _check_has_location(self, method_name: str) -> bool:
|
109
|
+
"""Helper method to check if entities have location data."""
|
110
|
+
if not self.entities:
|
111
|
+
return False
|
112
|
+
if not isinstance(self.entities[0], GigaEntity):
|
113
|
+
raise ValueError(
|
114
|
+
f"Cannot perform {method_name}: entities of type {type(self.entities[0]).__name__} "
|
115
|
+
"do not have location data (latitude/longitude)"
|
116
|
+
)
|
117
|
+
return True
|
118
|
+
|
119
|
+
def to_dataframe(self) -> pd.DataFrame:
|
120
|
+
"""Convert the entity table to a pandas DataFrame."""
|
121
|
+
return pd.DataFrame([e.model_dump() for e in self.entities])
|
122
|
+
|
123
|
+
def to_geodataframe(self) -> gpd.GeoDataFrame:
|
124
|
+
"""Convert the entity table to a GeoDataFrame."""
|
125
|
+
if not self._check_has_location("to_geodataframe"):
|
126
|
+
raise ValueError("Cannot create GeoDataFrame: no entities available")
|
127
|
+
df = self.to_dataframe()
|
128
|
+
return gpd.GeoDataFrame(
|
129
|
+
df,
|
130
|
+
geometry=gpd.points_from_xy(df["longitude"], df["latitude"]),
|
131
|
+
crs="EPSG:4326",
|
132
|
+
)
|
133
|
+
|
134
|
+
def to_coordinate_vector(self) -> np.ndarray:
|
135
|
+
"""Transforms the entity table into a numpy vector of coordinates"""
|
136
|
+
if not self.entities:
|
137
|
+
return np.zeros((0, 2))
|
138
|
+
|
139
|
+
if not self._check_has_location("to_coordinate_vector"):
|
140
|
+
return np.zeros((0, 2))
|
141
|
+
|
142
|
+
return np.array([[e.latitude, e.longitude] for e in self.entities])
|
143
|
+
|
144
|
+
def get_lat_array(self) -> np.ndarray:
|
145
|
+
"""Get an array of latitude values."""
|
146
|
+
if not self._check_has_location("get_lat_array"):
|
147
|
+
return np.array([])
|
148
|
+
return np.array([e.latitude for e in self.entities])
|
149
|
+
|
150
|
+
def get_lon_array(self) -> np.ndarray:
|
151
|
+
"""Get an array of longitude values."""
|
152
|
+
if not self._check_has_location("get_lon_array"):
|
153
|
+
return np.array([])
|
154
|
+
return np.array([e.longitude for e in self.entities])
|
155
|
+
|
156
|
+
def filter_by_admin1(self, admin1_id_giga: str) -> "EntityTable[E]":
|
157
|
+
"""Filter entities by primary administrative division."""
|
158
|
+
return self.__class__(
|
159
|
+
entities=[e for e in self.entities if e.admin1_id_giga == admin1_id_giga]
|
160
|
+
)
|
161
|
+
|
162
|
+
def filter_by_admin2(self, admin2_id_giga: str) -> "EntityTable[E]":
|
163
|
+
"""Filter entities by secondary administrative division."""
|
164
|
+
return self.__class__(
|
165
|
+
entities=[e for e in self.entities if e.admin2_id_giga == admin2_id_giga]
|
166
|
+
)
|
167
|
+
|
168
|
+
def filter_by_polygon(self, polygon: Polygon) -> "EntityTable[E]":
|
169
|
+
"""Filter entities within a polygon"""
|
170
|
+
if not self._check_has_location("filter_by_polygon"):
|
171
|
+
return self.__class__(entities=[])
|
172
|
+
|
173
|
+
filtered = [
|
174
|
+
e for e in self.entities if polygon.contains(Point(e.longitude, e.latitude))
|
175
|
+
]
|
176
|
+
return self.__class__(entities=filtered)
|
177
|
+
|
178
|
+
def filter_by_bounds(
|
179
|
+
self, min_lat: float, max_lat: float, min_lon: float, max_lon: float
|
180
|
+
) -> "EntityTable[E]":
|
181
|
+
"""Filter entities whose coordinates fall within the given bounds."""
|
182
|
+
if not self._check_has_location("filter_by_bounds"):
|
183
|
+
return self.__class__(entities=[])
|
184
|
+
|
185
|
+
filtered = [
|
186
|
+
e
|
187
|
+
for e in self.entities
|
188
|
+
if min_lat <= e.latitude <= max_lat and min_lon <= e.longitude <= max_lon
|
189
|
+
]
|
190
|
+
return self.__class__(entities=filtered)
|
191
|
+
|
192
|
+
def get_nearest_neighbors(
|
193
|
+
self, lat: float, lon: float, k: int = 5
|
194
|
+
) -> "EntityTable[E]":
|
195
|
+
"""Find k nearest neighbors to a point using a cached KDTree."""
|
196
|
+
if not self._check_has_location("get_nearest_neighbors"):
|
197
|
+
return self.__class__(entities=[])
|
198
|
+
|
199
|
+
if not self._cached_kdtree:
|
200
|
+
self._build_kdtree() # Build the KDTree if not already cached
|
201
|
+
|
202
|
+
if not self._cached_kdtree: # If still None after building
|
203
|
+
return self.__class__(entities=[])
|
204
|
+
|
205
|
+
_, indices = self._cached_kdtree.query([[lat, lon]], k=k)
|
206
|
+
return self.__class__(entities=[self.entities[i] for i in indices[0]])
|
207
|
+
|
208
|
+
def _build_kdtree(self):
|
209
|
+
"""Builds and caches the KDTree."""
|
210
|
+
if not self._check_has_location("_build_kdtree"):
|
211
|
+
self._cached_kdtree = None
|
212
|
+
return
|
213
|
+
coords = self.to_coordinate_vector()
|
214
|
+
if coords:
|
215
|
+
self._cached_kdtree = cKDTree(coords)
|
216
|
+
|
217
|
+
def clear_cache(self):
|
218
|
+
"""Clears the KDTree cache."""
|
219
|
+
self._cached_kdtree = None
|
220
|
+
|
221
|
+
def to_file(
|
222
|
+
self,
|
223
|
+
file_path: Union[str, Path],
|
224
|
+
data_store: Optional[DataStore] = None,
|
225
|
+
**kwargs,
|
226
|
+
) -> None:
|
227
|
+
"""
|
228
|
+
Save the entity data to a file.
|
229
|
+
|
230
|
+
Args:
|
231
|
+
file_path: Path to save the file
|
232
|
+
"""
|
233
|
+
if not self.entities:
|
234
|
+
raise ValueError("Cannot write to a file: no entities available.")
|
235
|
+
|
236
|
+
data_store = data_store or LocalDataStore()
|
237
|
+
|
238
|
+
write_dataset(self.to_dataframe(), data_store, file_path, **kwargs)
|
239
|
+
|
240
|
+
def __len__(self) -> int:
|
241
|
+
return len(self.entities)
|
242
|
+
|
243
|
+
def __iter__(self):
|
244
|
+
return iter(self.entities)
|