giga-spatial 0.6.8__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {giga_spatial-0.6.8.dist-info → giga_spatial-0.7.0.dist-info}/METADATA +30 -4
- {giga_spatial-0.6.8.dist-info → giga_spatial-0.7.0.dist-info}/RECORD +14 -13
- gigaspatial/__init__.py +1 -1
- gigaspatial/core/io/adls_data_store.py +104 -11
- gigaspatial/core/io/local_data_store.py +8 -0
- gigaspatial/generators/zonal/geometry.py +12 -5
- gigaspatial/grid/h3.py +417 -0
- gigaspatial/grid/mercator_tiles.py +1 -1
- gigaspatial/handlers/base.py +2 -2
- gigaspatial/processing/geo.py +10 -5
- gigaspatial/processing/tif_processor.py +380 -224
- {giga_spatial-0.6.8.dist-info → giga_spatial-0.7.0.dist-info}/WHEEL +0 -0
- {giga_spatial-0.6.8.dist-info → giga_spatial-0.7.0.dist-info}/licenses/LICENSE +0 -0
- {giga_spatial-0.6.8.dist-info → giga_spatial-0.7.0.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,9 @@
|
|
1
1
|
import numpy as np
|
2
2
|
import pandas as pd
|
3
3
|
import geopandas as gpd
|
4
|
-
|
4
|
+
import networkx as nx
|
5
|
+
import scipy.sparse as sp
|
6
|
+
from typing import List, Optional, Tuple, Union, Literal, Callable, Dict, Any
|
5
7
|
from pydantic import ConfigDict
|
6
8
|
from pydantic.dataclasses import dataclass
|
7
9
|
from contextlib import contextmanager
|
@@ -15,6 +17,7 @@ from functools import partial
|
|
15
17
|
import multiprocessing
|
16
18
|
from tqdm import tqdm
|
17
19
|
import tempfile
|
20
|
+
import shutil
|
18
21
|
import os
|
19
22
|
|
20
23
|
from gigaspatial.core.io.data_store import DataStore
|
@@ -35,26 +38,40 @@ class TifProcessor:
|
|
35
38
|
merge_method: Literal["first", "last", "min", "max", "mean"] = "first"
|
36
39
|
target_crs: Optional[str] = None # For reprojection if needed
|
37
40
|
resampling_method: Resampling = Resampling.nearest
|
41
|
+
reprojection_resolution: Optional[Tuple[float, float]] = None
|
38
42
|
|
39
43
|
def __post_init__(self):
|
40
44
|
"""Validate inputs, merge rasters if needed, and set up logging."""
|
41
45
|
self.data_store = self.data_store or LocalDataStore()
|
42
46
|
self.logger = config.get_logger(self.__class__.__name__)
|
43
47
|
self._cache = {}
|
48
|
+
self._temp_dir = tempfile.mkdtemp()
|
44
49
|
self._merged_file_path = None
|
45
|
-
self.
|
50
|
+
self._reprojected_file_path = None
|
46
51
|
|
47
52
|
# Handle multiple dataset paths
|
48
53
|
if isinstance(self.dataset_path, list):
|
49
|
-
self.
|
50
|
-
|
51
|
-
|
52
|
-
|
54
|
+
if len(self.dataset_path) > 1:
|
55
|
+
self.dataset_paths = [Path(p) for p in self.dataset_path]
|
56
|
+
self._validate_multiple_datasets()
|
57
|
+
self._merge_rasters()
|
58
|
+
self.dataset_path = self._merged_file_path
|
53
59
|
else:
|
54
60
|
self.dataset_paths = [Path(self.dataset_path)]
|
55
|
-
if not self.data_store.file_exists(self.dataset_path):
|
61
|
+
if not self.data_store.file_exists(str(self.dataset_path)):
|
56
62
|
raise FileNotFoundError(f"Dataset not found at {self.dataset_path}")
|
57
63
|
|
64
|
+
# Reproject single raster during initialization if target_crs is set
|
65
|
+
if self.target_crs:
|
66
|
+
self.logger.info(f"Reprojecting single raster to {self.target_crs}...")
|
67
|
+
with self.data_store.open(str(self.dataset_path), "rb") as f:
|
68
|
+
with rasterio.MemoryFile(f.read()) as memfile:
|
69
|
+
with memfile.open() as src:
|
70
|
+
self._reprojected_file_path = self._reproject_to_temp_file(
|
71
|
+
src, self.target_crs
|
72
|
+
)
|
73
|
+
self.dataset_path = self._reprojected_file_path
|
74
|
+
|
58
75
|
self._load_metadata()
|
59
76
|
|
60
77
|
# Validate mode and band count
|
@@ -67,18 +84,124 @@ class TifProcessor:
|
|
67
84
|
if self.mode == "multi" and self.count < 2:
|
68
85
|
raise ValueError("Multi mode requires a TIF file with 2 or more bands")
|
69
86
|
|
87
|
+
@contextmanager
|
88
|
+
def open_dataset(self):
|
89
|
+
"""Context manager for accessing the dataset, handling temporary reprojected files."""
|
90
|
+
if self._merged_file_path:
|
91
|
+
with rasterio.open(self._merged_file_path) as src:
|
92
|
+
yield src
|
93
|
+
elif self._reprojected_file_path:
|
94
|
+
with rasterio.open(self._reprojected_file_path) as src:
|
95
|
+
yield src
|
96
|
+
else:
|
97
|
+
with self.data_store.open(str(self.dataset_path), "rb") as f:
|
98
|
+
with rasterio.MemoryFile(f.read()) as memfile:
|
99
|
+
with memfile.open() as src:
|
100
|
+
yield src
|
101
|
+
|
102
|
+
def reproject_to(
|
103
|
+
self,
|
104
|
+
target_crs: str,
|
105
|
+
output_path: Optional[Union[str, Path]] = None,
|
106
|
+
resampling_method: Optional[Resampling] = None,
|
107
|
+
resolution: Optional[Tuple[float, float]] = None,
|
108
|
+
):
|
109
|
+
"""
|
110
|
+
Reprojects the current raster to a new CRS and optionally saves it.
|
111
|
+
|
112
|
+
Args:
|
113
|
+
target_crs: The CRS to reproject to (e.g., "EPSG:4326").
|
114
|
+
output_path: The path to save the reprojected raster. If None,
|
115
|
+
it is saved to a temporary file.
|
116
|
+
resampling_method: The resampling method to use.
|
117
|
+
resolution: The target resolution (pixel size) in the new CRS.
|
118
|
+
"""
|
119
|
+
self.logger.info(f"Reprojecting raster to {target_crs}...")
|
120
|
+
|
121
|
+
# Use provided or default values
|
122
|
+
resampling_method = resampling_method or self.resampling_method
|
123
|
+
resolution = resolution or self.reprojection_resolution
|
124
|
+
|
125
|
+
with self.open_dataset() as src:
|
126
|
+
if src.crs.to_string() == target_crs:
|
127
|
+
self.logger.info(
|
128
|
+
"Raster is already in the target CRS. No reprojection needed."
|
129
|
+
)
|
130
|
+
# If output_path is specified, copy the file
|
131
|
+
if output_path:
|
132
|
+
self.data_store.copy_file(str(self.dataset_path), output_path)
|
133
|
+
return self.dataset_path
|
134
|
+
|
135
|
+
dst_path = output_path or os.path.join(
|
136
|
+
self._temp_dir, f"reprojected_single_{os.urandom(8).hex()}.tif"
|
137
|
+
)
|
138
|
+
|
139
|
+
with rasterio.open(
|
140
|
+
dst_path,
|
141
|
+
"w",
|
142
|
+
**self._get_reprojection_profile(src, target_crs, resolution),
|
143
|
+
) as dst:
|
144
|
+
for band_idx in range(1, src.count + 1):
|
145
|
+
reproject(
|
146
|
+
source=rasterio.band(src, band_idx),
|
147
|
+
destination=rasterio.band(dst, band_idx),
|
148
|
+
src_transform=src.transform,
|
149
|
+
src_crs=src.crs,
|
150
|
+
dst_transform=dst.transform,
|
151
|
+
dst_crs=dst.crs,
|
152
|
+
resampling=resampling_method,
|
153
|
+
num_threads=multiprocessing.cpu_count(),
|
154
|
+
)
|
155
|
+
|
156
|
+
self.logger.info(f"Reprojection complete. Output saved to {dst_path}")
|
157
|
+
return Path(dst_path)
|
158
|
+
|
159
|
+
def get_raster_info(self) -> Dict[str, Any]:
|
160
|
+
"""Get comprehensive raster information."""
|
161
|
+
return {
|
162
|
+
"count": self.count,
|
163
|
+
"width": self.width,
|
164
|
+
"height": self.height,
|
165
|
+
"crs": self.crs,
|
166
|
+
"bounds": self.bounds,
|
167
|
+
"transform": self.transform,
|
168
|
+
"dtypes": self.dtype,
|
169
|
+
"nodata": self.nodata,
|
170
|
+
"mode": self.mode,
|
171
|
+
"is_merged": self.is_merged,
|
172
|
+
"source_count": self.source_count,
|
173
|
+
}
|
174
|
+
|
175
|
+
def _reproject_to_temp_file(
|
176
|
+
self, src: rasterio.DatasetReader, target_crs: str
|
177
|
+
) -> str:
|
178
|
+
"""Helper to reproject a raster and save it to a temporary file."""
|
179
|
+
dst_path = os.path.join(
|
180
|
+
self._temp_dir, f"reprojected_temp_{os.urandom(8).hex()}.tif"
|
181
|
+
)
|
182
|
+
profile = self._get_reprojection_profile(
|
183
|
+
src, target_crs, self.reprojection_resolution
|
184
|
+
)
|
185
|
+
|
186
|
+
with rasterio.open(dst_path, "w", **profile) as dst:
|
187
|
+
for band_idx in range(1, src.count + 1):
|
188
|
+
reproject(
|
189
|
+
source=rasterio.band(src, band_idx),
|
190
|
+
destination=rasterio.band(dst, band_idx),
|
191
|
+
src_transform=src.transform,
|
192
|
+
src_crs=src.crs,
|
193
|
+
dst_transform=dst.transform,
|
194
|
+
dst_crs=dst.crs,
|
195
|
+
resampling=self.resampling_method,
|
196
|
+
)
|
197
|
+
return dst_path
|
198
|
+
|
70
199
|
def _validate_multiple_datasets(self):
|
71
200
|
"""Validate that all datasets exist and have compatible properties."""
|
72
201
|
if len(self.dataset_paths) < 2:
|
73
202
|
raise ValueError("Multiple dataset paths required for merging")
|
74
203
|
|
75
|
-
|
76
|
-
for path in self.dataset_paths:
|
77
|
-
if not self.data_store.file_exists(path):
|
78
|
-
raise FileNotFoundError(f"Dataset not found at {path}")
|
79
|
-
|
80
|
-
# Load first dataset to get reference properties
|
81
|
-
with self.data_store.open(self.dataset_paths[0], "rb") as f:
|
204
|
+
with self.data_store.open(str(self.dataset_paths[0]), "rb") as f:
|
82
205
|
with rasterio.MemoryFile(f.read()) as memfile:
|
83
206
|
with memfile.open() as ref_src:
|
84
207
|
ref_count = ref_src.count
|
@@ -87,9 +210,8 @@ class TifProcessor:
|
|
87
210
|
ref_transform = ref_src.transform
|
88
211
|
ref_nodata = ref_src.nodata
|
89
212
|
|
90
|
-
# Validate all other datasets against reference
|
91
213
|
for i, path in enumerate(self.dataset_paths[1:], 1):
|
92
|
-
with self.data_store.open(path, "rb") as f:
|
214
|
+
with self.data_store.open(str(path), "rb") as f:
|
93
215
|
with rasterio.MemoryFile(f.read()) as memfile:
|
94
216
|
with memfile.open() as src:
|
95
217
|
if src.count != ref_count:
|
@@ -100,9 +222,10 @@ class TifProcessor:
|
|
100
222
|
raise ValueError(
|
101
223
|
f"Dataset {i} has dtype {src.dtypes[0]}, expected {ref_dtype}"
|
102
224
|
)
|
103
|
-
if self.target_crs
|
104
|
-
|
105
|
-
f"Dataset {i} has CRS {src.crs}, expected {ref_crs}.
|
225
|
+
if not self.target_crs and src.crs != ref_crs:
|
226
|
+
self.logger.warning(
|
227
|
+
f"Dataset {i} has CRS {src.crs}, expected {ref_crs}. "
|
228
|
+
"Consider setting target_crs parameter for reprojection before merging."
|
106
229
|
)
|
107
230
|
if self.target_crs is None and not self._transforms_compatible(
|
108
231
|
src.transform, ref_transform
|
@@ -115,6 +238,46 @@ class TifProcessor:
|
|
115
238
|
f"Dataset {i} has different nodata value: {src.nodata} vs {ref_nodata}"
|
116
239
|
)
|
117
240
|
|
241
|
+
def _get_reprojection_profile(
|
242
|
+
self,
|
243
|
+
src: rasterio.DatasetReader,
|
244
|
+
target_crs: str,
|
245
|
+
resolution: Optional[Tuple[float, float]],
|
246
|
+
compression: str = "lzw",
|
247
|
+
):
|
248
|
+
"""Calculates and returns the profile for a reprojected raster."""
|
249
|
+
if resolution:
|
250
|
+
src_res = (abs(src.transform.a), abs(src.transform.e))
|
251
|
+
self.logger.info(
|
252
|
+
f"Using target resolution: {resolution}. Source resolution: {src_res}."
|
253
|
+
)
|
254
|
+
# Calculate transform and dimensions based on the new resolution
|
255
|
+
dst_transform, width, height = calculate_default_transform(
|
256
|
+
src.crs,
|
257
|
+
target_crs,
|
258
|
+
src.width,
|
259
|
+
src.height,
|
260
|
+
*src.bounds,
|
261
|
+
resolution=resolution,
|
262
|
+
)
|
263
|
+
else:
|
264
|
+
# Keep original resolution but reproject
|
265
|
+
dst_transform, width, height = calculate_default_transform(
|
266
|
+
src.crs, target_crs, src.width, src.height, *src.bounds
|
267
|
+
)
|
268
|
+
|
269
|
+
profile = src.profile.copy()
|
270
|
+
profile.update(
|
271
|
+
{
|
272
|
+
"crs": target_crs,
|
273
|
+
"transform": dst_transform,
|
274
|
+
"width": width,
|
275
|
+
"height": height,
|
276
|
+
"compress": compression, # Add compression to save space
|
277
|
+
}
|
278
|
+
)
|
279
|
+
return profile
|
280
|
+
|
118
281
|
def _transforms_compatible(self, transform1, transform2, tolerance=1e-6):
|
119
282
|
"""Check if two transforms have compatible pixel sizes."""
|
120
283
|
return (
|
@@ -126,151 +289,77 @@ class TifProcessor:
|
|
126
289
|
"""Merge multiple rasters into a single raster."""
|
127
290
|
self.logger.info(f"Merging {len(self.dataset_paths)} rasters...")
|
128
291
|
|
129
|
-
# Create temporary directory for merged file
|
130
|
-
self._temp_dir = tempfile.mkdtemp()
|
131
|
-
merged_filename = "merged_raster.tif"
|
132
|
-
self._merged_file_path = os.path.join(self._temp_dir, merged_filename)
|
133
|
-
|
134
292
|
# Open all datasets and handle reprojection if needed
|
135
|
-
|
136
|
-
|
137
|
-
|
293
|
+
datasets_to_merge = []
|
294
|
+
temp_reprojected_files = []
|
138
295
|
try:
|
139
296
|
for path in self.dataset_paths:
|
140
|
-
with self.data_store.open(path, "rb") as f:
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
297
|
+
with self.data_store.open(str(path), "rb") as f:
|
298
|
+
with rasterio.MemoryFile(f.read()) as memfile:
|
299
|
+
with memfile.open() as src:
|
300
|
+
if self.target_crs and src.crs != self.target_crs:
|
301
|
+
self.logger.info(
|
302
|
+
f"Reprojecting {path.name} to {self.target_crs} before merging."
|
303
|
+
)
|
304
|
+
reprojected_path = self._reproject_to_temp_file(
|
305
|
+
src, self.target_crs
|
306
|
+
)
|
307
|
+
temp_reprojected_files.append(reprojected_path)
|
308
|
+
datasets_to_merge.append(
|
309
|
+
rasterio.open(reprojected_path)
|
310
|
+
)
|
311
|
+
else:
|
312
|
+
temp_path = os.path.join(
|
313
|
+
self._temp_dir,
|
314
|
+
f"temp_{path.stem}_{os.urandom(4).hex()}.tif",
|
315
|
+
)
|
316
|
+
temp_reprojected_files.append(temp_path)
|
317
|
+
|
318
|
+
profile = src.profile
|
319
|
+
with rasterio.open(temp_path, "w", **profile) as dst:
|
320
|
+
dst.write(src.read())
|
321
|
+
datasets_to_merge.append(rasterio.open(temp_path))
|
322
|
+
|
323
|
+
self._merged_file_path = os.path.join(self._temp_dir, "merged_raster.tif")
|
154
324
|
|
155
325
|
if self.merge_method == "mean":
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
# Use first source as reference for metadata
|
160
|
-
ref_src = src_files[0]
|
161
|
-
profile = ref_src.profile.copy()
|
162
|
-
profile.update(
|
163
|
-
{
|
164
|
-
"height": merged_array.shape[-2],
|
165
|
-
"width": merged_array.shape[-1],
|
166
|
-
"transform": merged_transform,
|
167
|
-
}
|
326
|
+
merged_array, merged_transform = self._merge_with_mean(
|
327
|
+
datasets_to_merge
|
168
328
|
)
|
169
|
-
|
170
|
-
# Write merged raster
|
171
|
-
with rasterio.open(self._merged_file_path, "w", **profile) as dst:
|
172
|
-
dst.write(merged_array)
|
173
|
-
|
174
329
|
else:
|
175
|
-
# Use rasterio's merge function
|
176
330
|
merged_array, merged_transform = merge(
|
177
|
-
|
331
|
+
datasets_to_merge,
|
178
332
|
method=self.merge_method,
|
179
333
|
resampling=self.resampling_method,
|
180
334
|
)
|
181
335
|
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
profile.update(
|
186
|
-
{
|
187
|
-
"height": merged_array.shape[-2],
|
188
|
-
"width": merged_array.shape[-1],
|
189
|
-
"transform": merged_transform,
|
190
|
-
}
|
191
|
-
)
|
192
|
-
|
193
|
-
if self.target_crs:
|
194
|
-
profile["crs"] = self.target_crs
|
195
|
-
|
196
|
-
# Write merged raster
|
197
|
-
with rasterio.open(self._merged_file_path, "w", **profile) as dst:
|
198
|
-
dst.write(merged_array)
|
199
|
-
|
200
|
-
finally:
|
201
|
-
# Clean up source files
|
202
|
-
for src in src_files:
|
203
|
-
temp_path = src.name
|
204
|
-
src.close()
|
205
|
-
try:
|
206
|
-
os.unlink(temp_path)
|
207
|
-
except:
|
208
|
-
pass
|
209
|
-
|
210
|
-
# Clean up reprojected files
|
211
|
-
for src in reprojected_files:
|
212
|
-
if src not in src_files: # Don't double-close
|
213
|
-
temp_path = src.name
|
214
|
-
src.close()
|
215
|
-
try:
|
216
|
-
os.unlink(temp_path)
|
217
|
-
except:
|
218
|
-
pass
|
219
|
-
|
220
|
-
self.logger.info("Raster merging completed!")
|
221
|
-
|
222
|
-
def _reproject_rasters(self, src_files, target_crs):
|
223
|
-
"""Reproject all rasters to a common CRS before merging."""
|
224
|
-
reprojected_files = []
|
225
|
-
|
226
|
-
for i, src in enumerate(src_files):
|
227
|
-
if src.crs.to_string() == target_crs:
|
228
|
-
# No reprojection needed
|
229
|
-
reprojected_files.append(src)
|
230
|
-
continue
|
231
|
-
|
232
|
-
# Calculate transform and dimensions for reprojection
|
233
|
-
transform, width, height = calculate_default_transform(
|
234
|
-
src.crs,
|
235
|
-
target_crs,
|
236
|
-
src.width,
|
237
|
-
src.height,
|
238
|
-
*src.bounds,
|
239
|
-
resolution=self.resolution if hasattr(self, "resolution") else None,
|
240
|
-
)
|
241
|
-
|
242
|
-
# Create temporary file for reprojected raster
|
243
|
-
temp_file = tempfile.NamedTemporaryFile(suffix=".tif", delete=False)
|
244
|
-
temp_file.close()
|
245
|
-
|
246
|
-
# Set up profile for reprojected raster
|
247
|
-
profile = src.profile.copy()
|
336
|
+
# Get profile from the first file in the list (all should be compatible now)
|
337
|
+
ref_src = datasets_to_merge[0]
|
338
|
+
profile = ref_src.profile.copy()
|
248
339
|
profile.update(
|
249
340
|
{
|
250
|
-
"
|
251
|
-
"
|
252
|
-
"
|
253
|
-
"
|
341
|
+
"height": merged_array.shape[-2],
|
342
|
+
"width": merged_array.shape[-1],
|
343
|
+
"transform": merged_transform,
|
344
|
+
"crs": self.target_crs if self.target_crs else ref_src.crs,
|
254
345
|
}
|
255
346
|
)
|
256
347
|
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
src_transform=src.transform,
|
264
|
-
src_crs=src.crs,
|
265
|
-
dst_transform=transform,
|
266
|
-
dst_crs=target_crs,
|
267
|
-
resampling=self.resampling_method,
|
268
|
-
)
|
348
|
+
with rasterio.open(self._merged_file_path, "w", **profile) as dst:
|
349
|
+
dst.write(merged_array)
|
350
|
+
finally:
|
351
|
+
for dataset in datasets_to_merge:
|
352
|
+
if hasattr(dataset, "close"):
|
353
|
+
dataset.close()
|
269
354
|
|
270
|
-
#
|
271
|
-
|
355
|
+
# Clean up temporary files immediately
|
356
|
+
for temp_file in temp_reprojected_files:
|
357
|
+
try:
|
358
|
+
os.remove(temp_file)
|
359
|
+
except OSError:
|
360
|
+
pass
|
272
361
|
|
273
|
-
|
362
|
+
self.logger.info("Raster merging completed!")
|
274
363
|
|
275
364
|
def _merge_with_mean(self, src_files):
|
276
365
|
"""Merge rasters using mean aggregation."""
|
@@ -295,6 +384,12 @@ class TifProcessor:
|
|
295
384
|
bounds.left, bounds.bottom, bounds.right, bounds.top, width, height
|
296
385
|
)
|
297
386
|
|
387
|
+
estimated_memory = height * width * src_files[0].count * 8 # float64
|
388
|
+
if estimated_memory > 1e9: # 1GB threshold
|
389
|
+
self.logger.warning(
|
390
|
+
f"Large memory usage expected: {estimated_memory/1e9:.1f}GB"
|
391
|
+
)
|
392
|
+
|
298
393
|
# Initialize arrays for sum and count
|
299
394
|
sum_array = np.zeros((src_files[0].count, height, width), dtype=np.float64)
|
300
395
|
count_array = np.zeros((height, width), dtype=np.int32)
|
@@ -336,33 +431,9 @@ class TifProcessor:
|
|
336
431
|
|
337
432
|
return mean_array.astype(src_files[0].dtypes[0]), merged_transform
|
338
433
|
|
339
|
-
def __del__(self):
|
340
|
-
"""Cleanup temporary files."""
|
341
|
-
if self._temp_dir and os.path.exists(self._temp_dir):
|
342
|
-
try:
|
343
|
-
import shutil
|
344
|
-
|
345
|
-
shutil.rmtree(self._temp_dir)
|
346
|
-
except:
|
347
|
-
pass
|
348
|
-
|
349
|
-
@contextmanager
|
350
|
-
def open_dataset(self):
|
351
|
-
"""Context manager for accessing the dataset"""
|
352
|
-
if self._merged_file_path:
|
353
|
-
# Open merged file directly
|
354
|
-
with rasterio.open(self._merged_file_path) as src:
|
355
|
-
yield src
|
356
|
-
else:
|
357
|
-
# Original single file logic
|
358
|
-
with self.data_store.open(self.dataset_path, "rb") as f:
|
359
|
-
with rasterio.MemoryFile(f.read()) as memfile:
|
360
|
-
with memfile.open() as src:
|
361
|
-
yield src
|
362
|
-
|
363
434
|
def _load_metadata(self):
|
364
435
|
"""Load metadata from the TIF file if not already cached"""
|
365
|
-
|
436
|
+
try:
|
366
437
|
with self.open_dataset() as src:
|
367
438
|
self._cache["transform"] = src.transform
|
368
439
|
self._cache["crs"] = src.crs.to_string()
|
@@ -375,6 +446,10 @@ class TifProcessor:
|
|
375
446
|
self._cache["nodata"] = src.nodata
|
376
447
|
self._cache["count"] = src.count
|
377
448
|
self._cache["dtype"] = src.dtypes[0]
|
449
|
+
except (rasterio.errors.RasterioIOError, FileNotFoundError) as e:
|
450
|
+
raise FileNotFoundError(f"Could not read raster metadata: {e}")
|
451
|
+
except Exception as e:
|
452
|
+
raise RuntimeError(f"Unexpected error loading metadata: {e}")
|
378
453
|
|
379
454
|
@property
|
380
455
|
def is_merged(self) -> bool:
|
@@ -386,7 +461,6 @@ class TifProcessor:
|
|
386
461
|
"""Get the number of source rasters."""
|
387
462
|
return len(self.dataset_paths)
|
388
463
|
|
389
|
-
# All other methods remain the same...
|
390
464
|
@property
|
391
465
|
def transform(self):
|
392
466
|
"""Get the transform from the TIF file"""
|
@@ -428,39 +502,17 @@ class TifProcessor:
|
|
428
502
|
return self._cache["nodata"]
|
429
503
|
|
430
504
|
@property
|
431
|
-
def
|
432
|
-
"""Get the data from the TIF file"""
|
433
|
-
self.
|
434
|
-
"The `tabular` property is deprecated, use `to_dataframe` instead"
|
435
|
-
)
|
436
|
-
if not hasattr(self, "_tabular"):
|
437
|
-
try:
|
438
|
-
if self.mode == "single":
|
439
|
-
self._tabular = self._to_band_dataframe(
|
440
|
-
drop_nodata=True, drop_values=[]
|
441
|
-
)
|
442
|
-
elif self.mode == "rgb":
|
443
|
-
self._tabular = self._to_rgb_dataframe(drop_nodata=True)
|
444
|
-
elif self.mode == "rgba":
|
445
|
-
self._tabular = self._to_rgba_dataframe(drop_transparent=True)
|
446
|
-
elif self.mode == "multi":
|
447
|
-
self._tabular = self._to_multi_band_dataframe(
|
448
|
-
drop_nodata=True,
|
449
|
-
drop_values=[],
|
450
|
-
band_names=None, # Use default band naming
|
451
|
-
)
|
452
|
-
else:
|
453
|
-
raise ValueError(
|
454
|
-
f"Invalid mode: {self.mode}. Must be one of: single, rgb, rgba, multi"
|
455
|
-
)
|
456
|
-
except Exception as e:
|
457
|
-
raise ValueError(
|
458
|
-
f"Failed to process TIF file in mode '{self.mode}'. "
|
459
|
-
f"Please ensure the file is valid and matches the selected mode. "
|
460
|
-
f"Original error: {str(e)}"
|
461
|
-
)
|
505
|
+
def dtype(self):
|
506
|
+
"""Get the data types from the TIF file"""
|
507
|
+
return self._cache.get("dtype", [])
|
462
508
|
|
463
|
-
|
509
|
+
@property
|
510
|
+
def width(self):
|
511
|
+
return self._cache["width"]
|
512
|
+
|
513
|
+
@property
|
514
|
+
def height(self):
|
515
|
+
return self._cache["height"]
|
464
516
|
|
465
517
|
def to_dataframe(self, drop_nodata=True, **kwargs) -> pd.DataFrame:
|
466
518
|
try:
|
@@ -504,30 +556,115 @@ class TifProcessor:
|
|
504
556
|
|
505
557
|
return gdf
|
506
558
|
|
507
|
-
def
|
559
|
+
def to_graph(
|
560
|
+
self,
|
561
|
+
connectivity: Literal[4, 8] = 4,
|
562
|
+
band: Optional[int] = None,
|
563
|
+
include_coordinates: bool = False,
|
564
|
+
graph_type: Literal["networkx", "sparse"] = "networkx",
|
565
|
+
chunk_size: Optional[int] = None,
|
566
|
+
) -> Union[nx.Graph, sp.csr_matrix]:
|
508
567
|
"""
|
509
|
-
Convert
|
510
|
-
Each zone is defined by its bounding box, based on pixel resolution and coordinates.
|
568
|
+
Convert raster to graph based on pixel adjacency.
|
511
569
|
"""
|
512
|
-
|
513
|
-
"
|
514
|
-
)
|
515
|
-
self.logger.info("Converting data to GeoDataFrame with zones...")
|
570
|
+
if chunk_size is not None:
|
571
|
+
raise NotImplementedError("Chunked processing is not yet implemented.")
|
516
572
|
|
517
|
-
|
573
|
+
with self.open_dataset() as src:
|
574
|
+
band_idx = band - 1 if band is not None else 0
|
575
|
+
if band_idx < 0 or band_idx >= src.count:
|
576
|
+
raise ValueError(
|
577
|
+
f"Band {band} not available. Raster has {src.count} bands"
|
578
|
+
)
|
518
579
|
|
519
|
-
|
580
|
+
data = src.read(band_idx + 1)
|
581
|
+
nodata = src.nodata if src.nodata is not None else self.nodata
|
582
|
+
valid_mask = (
|
583
|
+
data != nodata if nodata is not None else np.ones_like(data, dtype=bool)
|
584
|
+
)
|
520
585
|
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
586
|
+
height, width = data.shape
|
587
|
+
|
588
|
+
# Find all valid pixels
|
589
|
+
valid_rows, valid_cols = np.where(valid_mask)
|
590
|
+
num_valid_pixels = len(valid_rows)
|
591
|
+
|
592
|
+
# Create a sequential mapping from (row, col) to a node ID
|
593
|
+
node_map = np.full(data.shape, -1, dtype=int)
|
594
|
+
node_map[valid_rows, valid_cols] = np.arange(num_valid_pixels)
|
595
|
+
|
596
|
+
# Define neighborhood offsets
|
597
|
+
if connectivity == 4:
|
598
|
+
# von Neumann neighborhood (4-connectivity)
|
599
|
+
offsets = [(-1, 0), (1, 0), (0, -1), (0, 1)]
|
600
|
+
else: # connectivity == 8
|
601
|
+
# Moore neighborhood (8-connectivity)
|
602
|
+
offsets = [
|
603
|
+
(-1, -1),
|
604
|
+
(-1, 0),
|
605
|
+
(-1, 1),
|
606
|
+
(0, -1),
|
607
|
+
(0, 1),
|
608
|
+
(1, -1),
|
609
|
+
(1, 0),
|
610
|
+
(1, 1),
|
611
|
+
]
|
526
612
|
|
527
|
-
|
613
|
+
# Collect nodes and edges
|
614
|
+
nodes_to_add = []
|
615
|
+
edges_to_add = []
|
616
|
+
|
617
|
+
for i in range(num_valid_pixels):
|
618
|
+
row, col = valid_rows[i], valid_cols[i]
|
619
|
+
current_node_id = node_map[row, col]
|
620
|
+
|
621
|
+
# Prepare node attributes
|
622
|
+
node_attrs = {"value": float(data[row, col])}
|
623
|
+
if include_coordinates:
|
624
|
+
x, y = src.xy(row, col)
|
625
|
+
node_attrs["x"] = x
|
626
|
+
node_attrs["y"] = y
|
627
|
+
nodes_to_add.append((current_node_id, node_attrs))
|
628
|
+
|
629
|
+
# Find neighbors and collect edges
|
630
|
+
for dy, dx in offsets:
|
631
|
+
neighbor_row, neighbor_col = row + dy, col + dx
|
632
|
+
|
633
|
+
# Check if neighbor is within bounds and is a valid pixel
|
634
|
+
if (
|
635
|
+
0 <= neighbor_row < height
|
636
|
+
and 0 <= neighbor_col < width
|
637
|
+
and valid_mask[neighbor_row, neighbor_col]
|
638
|
+
):
|
639
|
+
neighbor_node_id = node_map[neighbor_row, neighbor_col]
|
640
|
+
|
641
|
+
# Ensure each edge is added only once
|
642
|
+
if current_node_id < neighbor_node_id:
|
643
|
+
neighbor_value = float(data[neighbor_row, neighbor_col])
|
644
|
+
edges_to_add.append(
|
645
|
+
(current_node_id, neighbor_node_id, neighbor_value)
|
646
|
+
)
|
528
647
|
|
529
|
-
|
530
|
-
|
648
|
+
if graph_type == "networkx":
|
649
|
+
G = nx.Graph()
|
650
|
+
G.add_nodes_from(nodes_to_add)
|
651
|
+
G.add_weighted_edges_from(edges_to_add)
|
652
|
+
return G
|
653
|
+
else: # sparse matrix
|
654
|
+
edges_array = np.array(edges_to_add)
|
655
|
+
row_indices = edges_array[:, 0]
|
656
|
+
col_indices = edges_array[:, 1]
|
657
|
+
weights = edges_array[:, 2]
|
658
|
+
|
659
|
+
# Add reverse edges for symmetric matrix
|
660
|
+
row_indices.extend(col_indices)
|
661
|
+
col_indices.extend(row_indices)
|
662
|
+
weights.extend(weights)
|
663
|
+
|
664
|
+
return sp.coo_matrix(
|
665
|
+
(weights, (row_indices, col_indices)),
|
666
|
+
shape=(num_valid_pixels, num_valid_pixels),
|
667
|
+
).tocsr()
|
531
668
|
|
532
669
|
def sample_by_coordinates(
|
533
670
|
self, coordinate_list: List[Tuple[float, float]], **kwargs
|
@@ -701,10 +838,10 @@ class TifProcessor:
|
|
701
838
|
Opens the raster dataset and stores it in a process-local variable.
|
702
839
|
This function runs once per worker, not for every task.
|
703
840
|
"""
|
704
|
-
global src_handle
|
705
|
-
with self.data_store.open(self.dataset_path, "rb") as f:
|
706
|
-
|
707
|
-
|
841
|
+
global src_handle, memfile_handle
|
842
|
+
with self.data_store.open(str(self.dataset_path), "rb") as f:
|
843
|
+
memfile_handle = rasterio.MemoryFile(f.read())
|
844
|
+
src_handle = memfile_handle.open()
|
708
845
|
|
709
846
|
def _process_single_polygon(self, polygon, stat_func):
|
710
847
|
"""
|
@@ -991,6 +1128,25 @@ class TifProcessor:
|
|
991
1128
|
|
992
1129
|
return self._cache["pixel_coords"]
|
993
1130
|
|
1131
|
+
def __enter__(self):
|
1132
|
+
return self
|
1133
|
+
|
1134
|
+
def __del__(self):
|
1135
|
+
"""Clean up temporary files and directories."""
|
1136
|
+
if hasattr(self, "_temp_dir") and os.path.exists(self._temp_dir):
|
1137
|
+
shutil.rmtree(self._temp_dir, ignore_errors=True)
|
1138
|
+
|
1139
|
+
def cleanup(self):
|
1140
|
+
"""Explicit cleanup method for better control."""
|
1141
|
+
if hasattr(self, "_temp_dir") and os.path.exists(self._temp_dir):
|
1142
|
+
shutil.rmtree(self._temp_dir)
|
1143
|
+
self.logger.info("Cleaned up temporary files")
|
1144
|
+
|
1145
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
1146
|
+
"""Proper context manager exit with cleanup."""
|
1147
|
+
self.cleanup()
|
1148
|
+
return False
|
1149
|
+
|
994
1150
|
|
995
1151
|
def sample_multiple_tifs_by_coordinates(
|
996
1152
|
tif_processors: List[TifProcessor], coordinate_list: List[Tuple[float, float]]
|