giga-spatial 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. giga_spatial-0.6.0.dist-info/METADATA +141 -0
  2. giga_spatial-0.6.0.dist-info/RECORD +47 -0
  3. giga_spatial-0.6.0.dist-info/WHEEL +5 -0
  4. giga_spatial-0.6.0.dist-info/licenses/LICENSE +661 -0
  5. giga_spatial-0.6.0.dist-info/top_level.txt +1 -0
  6. gigaspatial/__init__.py +1 -0
  7. gigaspatial/config.py +226 -0
  8. gigaspatial/core/__init__.py +0 -0
  9. gigaspatial/core/io/__init__.py +5 -0
  10. gigaspatial/core/io/adls_data_store.py +325 -0
  11. gigaspatial/core/io/data_api.py +113 -0
  12. gigaspatial/core/io/data_store.py +147 -0
  13. gigaspatial/core/io/local_data_store.py +92 -0
  14. gigaspatial/core/io/readers.py +265 -0
  15. gigaspatial/core/io/writers.py +128 -0
  16. gigaspatial/core/schemas/__init__.py +0 -0
  17. gigaspatial/core/schemas/entity.py +244 -0
  18. gigaspatial/generators/__init__.py +2 -0
  19. gigaspatial/generators/poi.py +636 -0
  20. gigaspatial/generators/zonal/__init__.py +3 -0
  21. gigaspatial/generators/zonal/base.py +370 -0
  22. gigaspatial/generators/zonal/geometry.py +439 -0
  23. gigaspatial/generators/zonal/mercator.py +78 -0
  24. gigaspatial/grid/__init__.py +1 -0
  25. gigaspatial/grid/mercator_tiles.py +286 -0
  26. gigaspatial/handlers/__init__.py +40 -0
  27. gigaspatial/handlers/base.py +761 -0
  28. gigaspatial/handlers/boundaries.py +305 -0
  29. gigaspatial/handlers/ghsl.py +772 -0
  30. gigaspatial/handlers/giga.py +145 -0
  31. gigaspatial/handlers/google_open_buildings.py +472 -0
  32. gigaspatial/handlers/hdx.py +241 -0
  33. gigaspatial/handlers/mapbox_image.py +208 -0
  34. gigaspatial/handlers/maxar_image.py +291 -0
  35. gigaspatial/handlers/microsoft_global_buildings.py +548 -0
  36. gigaspatial/handlers/ookla_speedtest.py +199 -0
  37. gigaspatial/handlers/opencellid.py +290 -0
  38. gigaspatial/handlers/osm.py +356 -0
  39. gigaspatial/handlers/overture.py +126 -0
  40. gigaspatial/handlers/rwi.py +157 -0
  41. gigaspatial/handlers/unicef_georepo.py +806 -0
  42. gigaspatial/handlers/worldpop.py +266 -0
  43. gigaspatial/processing/__init__.py +4 -0
  44. gigaspatial/processing/geo.py +1054 -0
  45. gigaspatial/processing/sat_images.py +39 -0
  46. gigaspatial/processing/tif_processor.py +477 -0
  47. gigaspatial/processing/utils.py +49 -0
@@ -0,0 +1,244 @@
1
+ from typing import List, Type, TypeVar, Generic, Union, Optional
2
+ from pathlib import Path
3
+ from pydantic import BaseModel, Field, ValidationError, PrivateAttr
4
+ import pandas as pd
5
+ import numpy as np
6
+ import geopandas as gpd
7
+ from shapely.geometry import Point, Polygon
8
+ from scipy.spatial import cKDTree
9
+
10
+ from gigaspatial.core.io.data_store import DataStore
11
+ from gigaspatial.core.io.local_data_store import LocalDataStore
12
+ from gigaspatial.core.io.readers import read_dataset
13
+ from gigaspatial.core.io.writers import write_dataset
14
+
15
+
16
+ # Base class for all Giga entities
17
+ class BaseGigaEntity(BaseModel):
18
+ """Base class for all Giga entities with common fields."""
19
+
20
+ source: Optional[str] = Field(None, max_length=100, description="Source reference")
21
+ source_detail: Optional[str] = None
22
+
23
+ @property
24
+ def id(self) -> str:
25
+ """Abstract property that must be implemented by subclasses."""
26
+ raise NotImplementedError("Subclasses must implement id property")
27
+
28
+
29
+ class GigaEntity(BaseGigaEntity):
30
+ """Entity with location data."""
31
+
32
+ latitude: float = Field(
33
+ ..., ge=-90, le=90, description="Latitude coordinate of the entity"
34
+ )
35
+ longitude: float = Field(
36
+ ..., ge=-180, le=180, description="Longitude coordinate of the entity"
37
+ )
38
+ admin1: Optional[str] = Field(
39
+ "Unknown", max_length=100, description="Primary administrative division"
40
+ )
41
+ admin1_id_giga: Optional[str] = Field(
42
+ None,
43
+ max_length=50,
44
+ description="Unique identifier for the primary administrative division",
45
+ )
46
+ admin2: Optional[str] = Field(
47
+ "Unknown", max_length=100, description="Secondary administrative division"
48
+ )
49
+ admin2_id_giga: Optional[str] = Field(
50
+ None,
51
+ max_length=50,
52
+ description="Unique identifier for the secondary administrative division",
53
+ )
54
+
55
+
56
+ class GigaEntityNoLocation(BaseGigaEntity):
57
+ """Entity without location data."""
58
+
59
+ pass
60
+
61
+
62
+ # Define a generic type bound to GigaEntity
63
+ E = TypeVar("E", bound=BaseGigaEntity)
64
+
65
+
66
+ class EntityTable(BaseModel, Generic[E]):
67
+ entities: List[E] = Field(default_factory=list)
68
+ _cached_kdtree: Optional[cKDTree] = PrivateAttr(
69
+ default=None
70
+ ) # Internal cache for the KDTree
71
+
72
+ @classmethod
73
+ def from_file(
74
+ cls: Type["EntityTable"],
75
+ file_path: Union[str, Path],
76
+ entity_class: Type[E],
77
+ data_store: Optional[DataStore] = None,
78
+ **kwargs,
79
+ ) -> "EntityTable":
80
+ """
81
+ Create an EntityTable instance from a file.
82
+
83
+ Args:
84
+ file_path: Path to the dataset file
85
+ entity_class: The entity class for validation
86
+
87
+ Returns:
88
+ EntityTable instance
89
+
90
+ Raises:
91
+ ValidationError: If any row fails validation
92
+ FileNotFoundError: If the file doesn't exist
93
+ """
94
+ data_store = data_store or LocalDataStore()
95
+ file_path = Path(file_path)
96
+ if not file_path.exists():
97
+ raise FileNotFoundError(f"File not found: {file_path}")
98
+
99
+ df = read_dataset(data_store, file_path, **kwargs)
100
+ try:
101
+ entities = [entity_class(**row) for row in df.to_dict(orient="records")]
102
+ return cls(entities=entities)
103
+ except ValidationError as e:
104
+ raise ValueError(f"Validation error in input data: {e}")
105
+ except Exception as e:
106
+ raise ValueError(f"Error reading or processing the file: {e}")
107
+
108
+ def _check_has_location(self, method_name: str) -> bool:
109
+ """Helper method to check if entities have location data."""
110
+ if not self.entities:
111
+ return False
112
+ if not isinstance(self.entities[0], GigaEntity):
113
+ raise ValueError(
114
+ f"Cannot perform {method_name}: entities of type {type(self.entities[0]).__name__} "
115
+ "do not have location data (latitude/longitude)"
116
+ )
117
+ return True
118
+
119
+ def to_dataframe(self) -> pd.DataFrame:
120
+ """Convert the entity table to a pandas DataFrame."""
121
+ return pd.DataFrame([e.model_dump() for e in self.entities])
122
+
123
+ def to_geodataframe(self) -> gpd.GeoDataFrame:
124
+ """Convert the entity table to a GeoDataFrame."""
125
+ if not self._check_has_location("to_geodataframe"):
126
+ raise ValueError("Cannot create GeoDataFrame: no entities available")
127
+ df = self.to_dataframe()
128
+ return gpd.GeoDataFrame(
129
+ df,
130
+ geometry=gpd.points_from_xy(df["longitude"], df["latitude"]),
131
+ crs="EPSG:4326",
132
+ )
133
+
134
+ def to_coordinate_vector(self) -> np.ndarray:
135
+ """Transforms the entity table into a numpy vector of coordinates"""
136
+ if not self.entities:
137
+ return np.zeros((0, 2))
138
+
139
+ if not self._check_has_location("to_coordinate_vector"):
140
+ return np.zeros((0, 2))
141
+
142
+ return np.array([[e.latitude, e.longitude] for e in self.entities])
143
+
144
+ def get_lat_array(self) -> np.ndarray:
145
+ """Get an array of latitude values."""
146
+ if not self._check_has_location("get_lat_array"):
147
+ return np.array([])
148
+ return np.array([e.latitude for e in self.entities])
149
+
150
+ def get_lon_array(self) -> np.ndarray:
151
+ """Get an array of longitude values."""
152
+ if not self._check_has_location("get_lon_array"):
153
+ return np.array([])
154
+ return np.array([e.longitude for e in self.entities])
155
+
156
+ def filter_by_admin1(self, admin1_id_giga: str) -> "EntityTable[E]":
157
+ """Filter entities by primary administrative division."""
158
+ return self.__class__(
159
+ entities=[e for e in self.entities if e.admin1_id_giga == admin1_id_giga]
160
+ )
161
+
162
+ def filter_by_admin2(self, admin2_id_giga: str) -> "EntityTable[E]":
163
+ """Filter entities by secondary administrative division."""
164
+ return self.__class__(
165
+ entities=[e for e in self.entities if e.admin2_id_giga == admin2_id_giga]
166
+ )
167
+
168
+ def filter_by_polygon(self, polygon: Polygon) -> "EntityTable[E]":
169
+ """Filter entities within a polygon"""
170
+ if not self._check_has_location("filter_by_polygon"):
171
+ return self.__class__(entities=[])
172
+
173
+ filtered = [
174
+ e for e in self.entities if polygon.contains(Point(e.longitude, e.latitude))
175
+ ]
176
+ return self.__class__(entities=filtered)
177
+
178
+ def filter_by_bounds(
179
+ self, min_lat: float, max_lat: float, min_lon: float, max_lon: float
180
+ ) -> "EntityTable[E]":
181
+ """Filter entities whose coordinates fall within the given bounds."""
182
+ if not self._check_has_location("filter_by_bounds"):
183
+ return self.__class__(entities=[])
184
+
185
+ filtered = [
186
+ e
187
+ for e in self.entities
188
+ if min_lat <= e.latitude <= max_lat and min_lon <= e.longitude <= max_lon
189
+ ]
190
+ return self.__class__(entities=filtered)
191
+
192
+ def get_nearest_neighbors(
193
+ self, lat: float, lon: float, k: int = 5
194
+ ) -> "EntityTable[E]":
195
+ """Find k nearest neighbors to a point using a cached KDTree."""
196
+ if not self._check_has_location("get_nearest_neighbors"):
197
+ return self.__class__(entities=[])
198
+
199
+ if not self._cached_kdtree:
200
+ self._build_kdtree() # Build the KDTree if not already cached
201
+
202
+ if not self._cached_kdtree: # If still None after building
203
+ return self.__class__(entities=[])
204
+
205
+ _, indices = self._cached_kdtree.query([[lat, lon]], k=k)
206
+ return self.__class__(entities=[self.entities[i] for i in indices[0]])
207
+
208
+ def _build_kdtree(self):
209
+ """Builds and caches the KDTree."""
210
+ if not self._check_has_location("_build_kdtree"):
211
+ self._cached_kdtree = None
212
+ return
213
+ coords = self.to_coordinate_vector()
214
+ if coords:
215
+ self._cached_kdtree = cKDTree(coords)
216
+
217
+ def clear_cache(self):
218
+ """Clears the KDTree cache."""
219
+ self._cached_kdtree = None
220
+
221
+ def to_file(
222
+ self,
223
+ file_path: Union[str, Path],
224
+ data_store: Optional[DataStore] = None,
225
+ **kwargs,
226
+ ) -> None:
227
+ """
228
+ Save the entity data to a file.
229
+
230
+ Args:
231
+ file_path: Path to save the file
232
+ """
233
+ if not self.entities:
234
+ raise ValueError("Cannot write to a file: no entities available.")
235
+
236
+ data_store = data_store or LocalDataStore()
237
+
238
+ write_dataset(self.to_dataframe(), data_store, file_path, **kwargs)
239
+
240
+ def __len__(self) -> int:
241
+ return len(self.entities)
242
+
243
+ def __iter__(self):
244
+ return iter(self.entities)
@@ -0,0 +1,2 @@
1
+ from gigaspatial.generators.poi import PoiViewGenerator, PoiViewGeneratorConfig
2
+ from gigaspatial.generators.zonal import GeometryBasedZonalViewGenerator