giga-spatial 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- giga_spatial-0.6.0.dist-info/METADATA +141 -0
- giga_spatial-0.6.0.dist-info/RECORD +47 -0
- giga_spatial-0.6.0.dist-info/WHEEL +5 -0
- giga_spatial-0.6.0.dist-info/licenses/LICENSE +661 -0
- giga_spatial-0.6.0.dist-info/top_level.txt +1 -0
- gigaspatial/__init__.py +1 -0
- gigaspatial/config.py +226 -0
- gigaspatial/core/__init__.py +0 -0
- gigaspatial/core/io/__init__.py +5 -0
- gigaspatial/core/io/adls_data_store.py +325 -0
- gigaspatial/core/io/data_api.py +113 -0
- gigaspatial/core/io/data_store.py +147 -0
- gigaspatial/core/io/local_data_store.py +92 -0
- gigaspatial/core/io/readers.py +265 -0
- gigaspatial/core/io/writers.py +128 -0
- gigaspatial/core/schemas/__init__.py +0 -0
- gigaspatial/core/schemas/entity.py +244 -0
- gigaspatial/generators/__init__.py +2 -0
- gigaspatial/generators/poi.py +636 -0
- gigaspatial/generators/zonal/__init__.py +3 -0
- gigaspatial/generators/zonal/base.py +370 -0
- gigaspatial/generators/zonal/geometry.py +439 -0
- gigaspatial/generators/zonal/mercator.py +78 -0
- gigaspatial/grid/__init__.py +1 -0
- gigaspatial/grid/mercator_tiles.py +286 -0
- gigaspatial/handlers/__init__.py +40 -0
- gigaspatial/handlers/base.py +761 -0
- gigaspatial/handlers/boundaries.py +305 -0
- gigaspatial/handlers/ghsl.py +772 -0
- gigaspatial/handlers/giga.py +145 -0
- gigaspatial/handlers/google_open_buildings.py +472 -0
- gigaspatial/handlers/hdx.py +241 -0
- gigaspatial/handlers/mapbox_image.py +208 -0
- gigaspatial/handlers/maxar_image.py +291 -0
- gigaspatial/handlers/microsoft_global_buildings.py +548 -0
- gigaspatial/handlers/ookla_speedtest.py +199 -0
- gigaspatial/handlers/opencellid.py +290 -0
- gigaspatial/handlers/osm.py +356 -0
- gigaspatial/handlers/overture.py +126 -0
- gigaspatial/handlers/rwi.py +157 -0
- gigaspatial/handlers/unicef_georepo.py +806 -0
- gigaspatial/handlers/worldpop.py +266 -0
- gigaspatial/processing/__init__.py +4 -0
- gigaspatial/processing/geo.py +1054 -0
- gigaspatial/processing/sat_images.py +39 -0
- gigaspatial/processing/tif_processor.py +477 -0
- gigaspatial/processing/utils.py +49 -0
@@ -0,0 +1,199 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import pandas as pd
|
3
|
+
import geopandas as gpd
|
4
|
+
from shapely import wkt
|
5
|
+
from datetime import datetime
|
6
|
+
import json
|
7
|
+
import requests
|
8
|
+
from pathlib import Path
|
9
|
+
from pydantic import BaseModel, ConfigDict, Field
|
10
|
+
from typing import List, Literal, Optional
|
11
|
+
|
12
|
+
from gigaspatial.grid.mercator_tiles import CountryMercatorTiles
|
13
|
+
from gigaspatial.core.io.readers import read_dataset
|
14
|
+
from gigaspatial.core.io.data_store import DataStore
|
15
|
+
from gigaspatial.core.io.local_data_store import LocalDataStore
|
16
|
+
from gigaspatial.config import config
|
17
|
+
|
18
|
+
|
19
|
+
class OoklaSpeedtestTileConfig(BaseModel):
|
20
|
+
service_type: Literal["fixed", "mobile"]
|
21
|
+
year: int
|
22
|
+
quarter: int
|
23
|
+
data_store: DataStore = Field(default_factory=LocalDataStore, exclude=True)
|
24
|
+
base_path: Path = Field(
|
25
|
+
default=config.get_path("ookla_speedtest", "bronze"), exclude=True
|
26
|
+
)
|
27
|
+
|
28
|
+
class Config:
|
29
|
+
arbitrary_types_allowed = True
|
30
|
+
|
31
|
+
@property
|
32
|
+
def quarter_start(self):
|
33
|
+
if not 1 <= self.quarter <= 4:
|
34
|
+
raise ValueError("Quarter must be within [1, 2, 3, 4]")
|
35
|
+
|
36
|
+
month = [1, 4, 7, 10]
|
37
|
+
return datetime(self.year, month[self.quarter - 1], 1)
|
38
|
+
|
39
|
+
@property
|
40
|
+
def tile_name(self):
|
41
|
+
return f"{self.quarter_start:%Y-%m-%d}_performance_{self.service_type}_tiles.parquet"
|
42
|
+
|
43
|
+
@property
|
44
|
+
def tile_url(self):
|
45
|
+
base_url = "https://ookla-open-data.s3.amazonaws.com/parquet/performance"
|
46
|
+
qs_dt = self.quarter_start
|
47
|
+
return f"{base_url}/type={self.service_type}/year={qs_dt:%Y}/quarter={self.quarter}/{qs_dt:%Y-%m-%d}_performance_{self.service_type}_tiles.parquet"
|
48
|
+
|
49
|
+
def download_tile(self):
|
50
|
+
path = str(self.base_path / self.tile_name)
|
51
|
+
if not self.data_store.file_exists(path):
|
52
|
+
response = requests.get(self.tile_url)
|
53
|
+
response.raise_for_status()
|
54
|
+
self.data_store.write_file(path, response.content)
|
55
|
+
|
56
|
+
def read_tile(self):
|
57
|
+
path = str(self.base_path / self.tile_name)
|
58
|
+
|
59
|
+
if self.data_store.file_exists(path):
|
60
|
+
df = read_dataset(self.data_store, path)
|
61
|
+
return df
|
62
|
+
else:
|
63
|
+
self.download_tile()
|
64
|
+
df = self.read_tile()
|
65
|
+
return df
|
66
|
+
|
67
|
+
|
68
|
+
class OoklaSpeedtestConfig(BaseModel):
|
69
|
+
tiles: List[OoklaSpeedtestTileConfig] = Field(default_factory=list)
|
70
|
+
|
71
|
+
@classmethod
|
72
|
+
def from_available_ookla_tiles(
|
73
|
+
cls, data_store: DataStore = None, base_path: Path = None
|
74
|
+
):
|
75
|
+
data_store = data_store or LocalDataStore()
|
76
|
+
base_path = base_path or config.get_path("ookla_speedtest", "bronze")
|
77
|
+
|
78
|
+
# first data year
|
79
|
+
start_year = 2019
|
80
|
+
# max data year
|
81
|
+
max_year = datetime.today().year
|
82
|
+
max_quarter = np.floor((datetime.today().month - 1) / 3)
|
83
|
+
if max_quarter == 0:
|
84
|
+
max_year -= 1
|
85
|
+
max_quarter = 4
|
86
|
+
|
87
|
+
ookla_tiles = []
|
88
|
+
for year in range(start_year, max_year + 1):
|
89
|
+
for quarter in range(1, 5):
|
90
|
+
if year == max_year and quarter > max_quarter:
|
91
|
+
continue
|
92
|
+
for type in ["fixed", "mobile"]:
|
93
|
+
ookla_tiles.append(
|
94
|
+
OoklaSpeedtestTileConfig(
|
95
|
+
service_type=type,
|
96
|
+
year=year,
|
97
|
+
quarter=quarter,
|
98
|
+
data_store=data_store,
|
99
|
+
base_path=base_path,
|
100
|
+
)
|
101
|
+
)
|
102
|
+
return cls(tiles=ookla_tiles)
|
103
|
+
|
104
|
+
|
105
|
+
class OoklaSpeedtestTile(BaseModel):
|
106
|
+
quadkey: str
|
107
|
+
tile: str
|
108
|
+
avg_d_kbps: float
|
109
|
+
avg_u_kbps: float
|
110
|
+
avg_lat_ms: float
|
111
|
+
avg_lat_down_ms: Optional[float] = None
|
112
|
+
avg_lat_up_ms: Optional[float] = None
|
113
|
+
tests: int
|
114
|
+
devices: int
|
115
|
+
|
116
|
+
model_config = ConfigDict(extra="allow")
|
117
|
+
|
118
|
+
|
119
|
+
class CountryOoklaTiles(BaseModel):
|
120
|
+
country: str
|
121
|
+
service_type: str
|
122
|
+
year: int
|
123
|
+
quarter: int
|
124
|
+
quadkeys: List[OoklaSpeedtestTile]
|
125
|
+
|
126
|
+
@staticmethod
|
127
|
+
def from_country(country, ookla_tile_config: OoklaSpeedtestTileConfig):
|
128
|
+
# load country zoom level 16 quadkeys
|
129
|
+
country_tiles = CountryMercatorTiles.create(country, 16)
|
130
|
+
|
131
|
+
# read ookla tiles for the config
|
132
|
+
ookla_tiles = ookla_tile_config.read_tile()
|
133
|
+
|
134
|
+
# filter country tiles by ookla tile quadkeys
|
135
|
+
country_ookla_tiles = country_tiles.filter_quadkeys(ookla_tiles.quadkey)
|
136
|
+
if len(country_ookla_tiles):
|
137
|
+
df_quadkeys = country_ookla_tiles.to_dataframe().merge(
|
138
|
+
ookla_tiles, on="quadkey", how="left"
|
139
|
+
)
|
140
|
+
return CountryOoklaTiles(
|
141
|
+
country=country,
|
142
|
+
service_type=ookla_tile_config.service_type,
|
143
|
+
year=ookla_tile_config.year,
|
144
|
+
quarter=ookla_tile_config.quarter,
|
145
|
+
quadkeys=[
|
146
|
+
OoklaSpeedtestTile(**tile_dict)
|
147
|
+
for tile_dict in df_quadkeys.to_dict("records")
|
148
|
+
],
|
149
|
+
)
|
150
|
+
else:
|
151
|
+
return CountryOoklaTiles(
|
152
|
+
country=country,
|
153
|
+
service_type=ookla_tile_config.service_type,
|
154
|
+
year=ookla_tile_config.year,
|
155
|
+
quarter=ookla_tile_config.quarter,
|
156
|
+
quadkeys=[],
|
157
|
+
)
|
158
|
+
|
159
|
+
def to_dataframe(self):
|
160
|
+
if len(self):
|
161
|
+
return pd.DataFrame([q.model_dump() for q in self.quadkeys])
|
162
|
+
else:
|
163
|
+
return pd.DataFrame(
|
164
|
+
columns=[
|
165
|
+
"quadkey",
|
166
|
+
"tile",
|
167
|
+
"avg_d_kbps",
|
168
|
+
"avg_u_kbps",
|
169
|
+
"avg_lat_ms",
|
170
|
+
"avg_lat_down_ms",
|
171
|
+
"avg_lat_up_ms",
|
172
|
+
"tests",
|
173
|
+
"devices",
|
174
|
+
]
|
175
|
+
)
|
176
|
+
|
177
|
+
def to_geodataframe(self):
|
178
|
+
if len(self):
|
179
|
+
df = self.to_dataframe()
|
180
|
+
df["geometry"] = df.tile.apply(wkt.loads)
|
181
|
+
df.drop(columns="tile", inplace=True)
|
182
|
+
return gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")
|
183
|
+
else:
|
184
|
+
return gpd.GeoDataFrame(
|
185
|
+
columns=[
|
186
|
+
"quadkey",
|
187
|
+
"avg_d_kbps",
|
188
|
+
"avg_u_kbps",
|
189
|
+
"avg_lat_ms",
|
190
|
+
"avg_lat_down_ms",
|
191
|
+
"avg_lat_up_ms",
|
192
|
+
"tests",
|
193
|
+
"devices",
|
194
|
+
"geometry",
|
195
|
+
]
|
196
|
+
)
|
197
|
+
|
198
|
+
def __len__(self):
|
199
|
+
return len(self.quadkeys)
|
@@ -0,0 +1,290 @@
|
|
1
|
+
import pandas as pd
|
2
|
+
import numpy as np
|
3
|
+
import geopandas as gpd
|
4
|
+
import requests
|
5
|
+
import logging
|
6
|
+
import gzip
|
7
|
+
import os
|
8
|
+
import tempfile
|
9
|
+
from datetime import datetime
|
10
|
+
from typing import List, Optional, Union
|
11
|
+
from pathlib import Path
|
12
|
+
from bs4 import BeautifulSoup
|
13
|
+
import pycountry
|
14
|
+
from pydantic import BaseModel, Field, HttpUrl, field_validator
|
15
|
+
|
16
|
+
from gigaspatial.core.io.data_store import DataStore
|
17
|
+
from gigaspatial.core.io.local_data_store import LocalDataStore
|
18
|
+
from gigaspatial.core.io.readers import read_dataset
|
19
|
+
from gigaspatial.config import config as global_config
|
20
|
+
|
21
|
+
|
22
|
+
class OpenCellIDConfig(BaseModel):
|
23
|
+
"""Configuration for OpenCellID data access"""
|
24
|
+
|
25
|
+
# Base URLs
|
26
|
+
BASE_URL: HttpUrl = Field(default="https://opencellid.org/")
|
27
|
+
DOWNLOAD_URL: HttpUrl = Field(default="https://opencellid.org/downloads.php?token=")
|
28
|
+
|
29
|
+
# User configuration
|
30
|
+
country: str = Field(...)
|
31
|
+
api_token: str = Field(
|
32
|
+
default=global_config.OPENCELLID_ACCESS_TOKEN,
|
33
|
+
description="OpenCellID API Access Token",
|
34
|
+
)
|
35
|
+
base_path: Path = Field(default=global_config.get_path("opencellid", "bronze"))
|
36
|
+
created_newer: int = Field(
|
37
|
+
default=2003, description="Filter out cell towers added before this year"
|
38
|
+
)
|
39
|
+
created_before: int = Field(
|
40
|
+
default=datetime.now().year,
|
41
|
+
description="Filter out cell towers added after this year",
|
42
|
+
)
|
43
|
+
drop_duplicates: bool = Field(
|
44
|
+
default=True,
|
45
|
+
description="Drop cells that are in the exact same location and radio technology",
|
46
|
+
)
|
47
|
+
|
48
|
+
@field_validator("country")
|
49
|
+
def validate_country(cls, value: str) -> str:
|
50
|
+
try:
|
51
|
+
return pycountry.countries.lookup(value).alpha_3
|
52
|
+
except LookupError:
|
53
|
+
raise ValueError(f"Invalid country code provided: {value}")
|
54
|
+
|
55
|
+
@property
|
56
|
+
def output_file_path(self) -> Path:
|
57
|
+
"""Path to save the downloaded OpenCellID data"""
|
58
|
+
return self.base_path / f"opencellid_{self.country.lower()}.csv.gz"
|
59
|
+
|
60
|
+
def __repr__(self) -> str:
|
61
|
+
return (
|
62
|
+
f"OpenCellIDConfig(\n"
|
63
|
+
f" country='{self.country}'\n"
|
64
|
+
f" created_newer={self.created_newer}\n"
|
65
|
+
f" created_before={self.created_before}\n"
|
66
|
+
f" drop_duplicates={self.drop_duplicates}\n"
|
67
|
+
f")"
|
68
|
+
)
|
69
|
+
|
70
|
+
|
71
|
+
class OpenCellIDDownloader:
|
72
|
+
"""Downloader for OpenCellID data"""
|
73
|
+
|
74
|
+
def __init__(
|
75
|
+
self,
|
76
|
+
config: Union[OpenCellIDConfig, dict],
|
77
|
+
data_store: Optional[DataStore] = None,
|
78
|
+
logger: Optional[logging.Logger] = None,
|
79
|
+
):
|
80
|
+
if isinstance(config, dict):
|
81
|
+
self.config = OpenCellIDConfig(**config)
|
82
|
+
else:
|
83
|
+
self.config = config
|
84
|
+
|
85
|
+
self.data_store = data_store or LocalDataStore()
|
86
|
+
self.logger = logger or global_config.get_logger(self.__class__.__name__)
|
87
|
+
|
88
|
+
@classmethod
|
89
|
+
def from_country(
|
90
|
+
cls,
|
91
|
+
country: str,
|
92
|
+
api_token: str = global_config.OPENCELLID_ACCESS_TOKEN,
|
93
|
+
**kwargs,
|
94
|
+
):
|
95
|
+
"""Create a downloader for a specific country"""
|
96
|
+
config = OpenCellIDConfig(country=country, api_token=api_token, **kwargs)
|
97
|
+
return cls(config=config)
|
98
|
+
|
99
|
+
def get_download_links(self) -> List[str]:
|
100
|
+
"""Get download links for the country from OpenCellID website"""
|
101
|
+
url = f"{self.config.DOWNLOAD_URL}{self.config.api_token}"
|
102
|
+
country_alpha2 = pycountry.countries.get(
|
103
|
+
alpha_3=self.config.country.upper()
|
104
|
+
).alpha_2
|
105
|
+
|
106
|
+
try:
|
107
|
+
# Find table with cell tower data links
|
108
|
+
self.logger.info(f"Fetching download links for {self.config.country}")
|
109
|
+
html_content = requests.get(url).text
|
110
|
+
soup = BeautifulSoup(html_content, "lxml")
|
111
|
+
table = soup.find("table", {"id": "regions"})
|
112
|
+
|
113
|
+
if not table:
|
114
|
+
raise ValueError(
|
115
|
+
"Could not find cell tower data table on OpenCellID website"
|
116
|
+
)
|
117
|
+
|
118
|
+
# Parse table headers
|
119
|
+
t_headers = []
|
120
|
+
for th in table.find_all("th"):
|
121
|
+
t_headers.append(th.text.replace("\n", " ").strip())
|
122
|
+
|
123
|
+
# Parse table data
|
124
|
+
table_data = []
|
125
|
+
for tr in table.tbody.find_all("tr"):
|
126
|
+
t_row = {}
|
127
|
+
|
128
|
+
for td, th in zip(tr.find_all("td"), t_headers):
|
129
|
+
if "Files" in th:
|
130
|
+
t_row[th] = []
|
131
|
+
for a in td.find_all("a"):
|
132
|
+
t_row[th].append(a.get("href"))
|
133
|
+
else:
|
134
|
+
t_row[th] = td.text.replace("\n", "").strip()
|
135
|
+
|
136
|
+
table_data.append(t_row)
|
137
|
+
|
138
|
+
cell_dict = pd.DataFrame(table_data)
|
139
|
+
|
140
|
+
# Get links for the country code
|
141
|
+
if country_alpha2 not in cell_dict["Country Code"].values:
|
142
|
+
raise ValueError(
|
143
|
+
f"Country code {country_alpha2} not found in OpenCellID database"
|
144
|
+
)
|
145
|
+
else:
|
146
|
+
links = cell_dict[cell_dict["Country Code"] == country_alpha2][
|
147
|
+
"Files (grouped by MCC)"
|
148
|
+
].values[0]
|
149
|
+
|
150
|
+
return links
|
151
|
+
|
152
|
+
except Exception as e:
|
153
|
+
self.logger.error(f"Error fetching download links: {str(e)}")
|
154
|
+
raise
|
155
|
+
|
156
|
+
def download_and_process(self) -> str:
|
157
|
+
"""Download and process OpenCellID data for the configured country"""
|
158
|
+
|
159
|
+
try:
|
160
|
+
links = self.get_download_links()
|
161
|
+
self.logger.info(f"Found {len(links)} data files for {self.config.country}")
|
162
|
+
|
163
|
+
dfs = []
|
164
|
+
|
165
|
+
for link in links:
|
166
|
+
self.logger.info(f"Downloading data from {link}")
|
167
|
+
response = requests.get(link, stream=True)
|
168
|
+
response.raise_for_status()
|
169
|
+
|
170
|
+
# Use a temporary file for download
|
171
|
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".gz") as tmpfile:
|
172
|
+
for chunk in response.iter_content(chunk_size=1024):
|
173
|
+
if chunk:
|
174
|
+
tmpfile.write(chunk)
|
175
|
+
temp_file = tmpfile.name
|
176
|
+
|
177
|
+
try:
|
178
|
+
# Read the downloaded gzipped CSV data
|
179
|
+
with gzip.open(temp_file, "rt") as feed_data:
|
180
|
+
dfs.append(
|
181
|
+
pd.read_csv(
|
182
|
+
feed_data,
|
183
|
+
names=[
|
184
|
+
"radio",
|
185
|
+
"mcc",
|
186
|
+
"net",
|
187
|
+
"area",
|
188
|
+
"cell",
|
189
|
+
"unit",
|
190
|
+
"lon",
|
191
|
+
"lat",
|
192
|
+
"range",
|
193
|
+
"samples",
|
194
|
+
"changeable",
|
195
|
+
"created",
|
196
|
+
"updated",
|
197
|
+
"average_signal",
|
198
|
+
],
|
199
|
+
)
|
200
|
+
)
|
201
|
+
except IOError as e:
|
202
|
+
with open(temp_file, "r") as error_file:
|
203
|
+
contents = error_file.readline()
|
204
|
+
|
205
|
+
if "RATE_LIMITED" in contents:
|
206
|
+
raise RuntimeError(
|
207
|
+
"API rate limit exceeded. You're rate-limited!"
|
208
|
+
)
|
209
|
+
elif "INVALID_TOKEN" in contents:
|
210
|
+
raise RuntimeError("API token rejected by OpenCellID!")
|
211
|
+
else:
|
212
|
+
raise RuntimeError(
|
213
|
+
f"Error processing downloaded data: {str(e)}"
|
214
|
+
)
|
215
|
+
finally:
|
216
|
+
# Clean up temporary file
|
217
|
+
if os.path.exists(temp_file):
|
218
|
+
os.remove(temp_file)
|
219
|
+
|
220
|
+
df_cell = pd.concat(dfs, ignore_index=True)
|
221
|
+
|
222
|
+
# Process the data
|
223
|
+
if not df_cell.empty:
|
224
|
+
# Convert timestamps to datetime
|
225
|
+
df_cell["created"] = pd.to_datetime(
|
226
|
+
df_cell["created"], unit="s", origin="unix"
|
227
|
+
)
|
228
|
+
df_cell["updated"] = pd.to_datetime(
|
229
|
+
df_cell["updated"], unit="s", origin="unix"
|
230
|
+
)
|
231
|
+
|
232
|
+
# Filter by year
|
233
|
+
df_cell = df_cell[
|
234
|
+
(df_cell.created.dt.year >= self.config.created_newer)
|
235
|
+
& (df_cell.created.dt.year < self.config.created_before)
|
236
|
+
]
|
237
|
+
|
238
|
+
# Drop duplicates if configured
|
239
|
+
if self.config.drop_duplicates:
|
240
|
+
df_cell = (
|
241
|
+
df_cell.groupby(["radio", "lon", "lat"]).first().reset_index()
|
242
|
+
)
|
243
|
+
|
244
|
+
# Save processed data using data_store
|
245
|
+
output_path = str(self.config.output_file_path)
|
246
|
+
self.logger.info(f"Saving processed data to {output_path}")
|
247
|
+
with self.data_store.open(output_path, "wb") as f:
|
248
|
+
df_cell.to_csv(f, compression="gzip", index=False)
|
249
|
+
|
250
|
+
return output_path
|
251
|
+
else:
|
252
|
+
raise ValueError(f"No data found for {self.config.country}")
|
253
|
+
|
254
|
+
except Exception as e:
|
255
|
+
self.logger.error(f"Error downloading and processing data: {str(e)}")
|
256
|
+
raise
|
257
|
+
|
258
|
+
|
259
|
+
class OpenCellIDReader:
|
260
|
+
"""Reader for OpenCellID data"""
|
261
|
+
|
262
|
+
def __init__(
|
263
|
+
self,
|
264
|
+
country: str,
|
265
|
+
data_store: Optional[DataStore] = None,
|
266
|
+
base_path: Optional[Path] = None,
|
267
|
+
):
|
268
|
+
self.country = pycountry.countries.lookup(country).alpha_3
|
269
|
+
self.data_store = data_store or LocalDataStore()
|
270
|
+
self.base_path = base_path or global_config.get_path("opencellid", "bronze")
|
271
|
+
|
272
|
+
def read_data(self) -> pd.DataFrame:
|
273
|
+
"""Read OpenCellID data for the specified country"""
|
274
|
+
file_path = str(self.base_path / f"opencellid_{self.country.lower()}.csv.gz")
|
275
|
+
|
276
|
+
if not self.data_store.file_exists(file_path):
|
277
|
+
raise FileNotFoundError(
|
278
|
+
f"OpenCellID data for {self.country} not found at {file_path}. "
|
279
|
+
"Download the data first using OpenCellIDDownloader."
|
280
|
+
)
|
281
|
+
|
282
|
+
return read_dataset(self.data_store, file_path)
|
283
|
+
|
284
|
+
def to_geodataframe(self) -> gpd.GeoDataFrame:
|
285
|
+
"""Convert OpenCellID data to a GeoDataFrame"""
|
286
|
+
df = self.read_data()
|
287
|
+
gdf = gpd.GeoDataFrame(
|
288
|
+
df, geometry=gpd.points_from_xy(df.lon, df.lat), crs="EPSG:4326"
|
289
|
+
)
|
290
|
+
return gdf
|