geomind-ai 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,108 @@
1
+ """
2
+ Geocoding tools for converting place names to coordinates.
3
+
4
+ Uses OpenStreetMap's Nominatim service via geopy.
5
+ """
6
+
7
+ from typing import Optional
8
+ from geopy.geocoders import Nominatim
9
+ from geopy.extra.rate_limiter import RateLimiter
10
+
11
+ from ..config import GEOCODER_USER_AGENT, DEFAULT_BUFFER_KM
12
+
13
+
14
+ def geocode_location(place_name: str) -> dict:
15
+ """
16
+ Convert a place name to geographic coordinates.
17
+
18
+ Args:
19
+ place_name: Name of the location (e.g., "New York", "Paris, France")
20
+
21
+ Returns:
22
+ Dictionary with latitude, longitude, and full address
23
+
24
+ Example:
25
+ >>> geocode_location("Central Park, New York")
26
+ {'latitude': 40.7828, 'longitude': -73.9653, 'address': '...'}
27
+ """
28
+ geolocator = Nominatim(user_agent=GEOCODER_USER_AGENT, timeout=10)
29
+ geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)
30
+
31
+ location = geocode(place_name)
32
+
33
+ if location is None:
34
+ return {
35
+ "success": False,
36
+ "error": f"Could not find location: {place_name}",
37
+ "latitude": None,
38
+ "longitude": None,
39
+ "address": None,
40
+ }
41
+
42
+ return {
43
+ "success": True,
44
+ "latitude": location.latitude,
45
+ "longitude": location.longitude,
46
+ "address": location.address,
47
+ }
48
+
49
+
50
+ def get_bbox_from_location(place_name: str, buffer_km: Optional[float] = None) -> dict:
51
+ """
52
+ Convert a place name to a bounding box suitable for STAC queries.
53
+
54
+ Creates a square bounding box centered on the location with the
55
+ specified buffer distance.
56
+
57
+ Args:
58
+ place_name: Name of the location (e.g., "San Francisco")
59
+ buffer_km: Buffer distance in kilometers (default: 10km)
60
+
61
+ Returns:
62
+ Dictionary with bbox [min_lon, min_lat, max_lon, max_lat] and center point
63
+
64
+ Example:
65
+ >>> get_bbox_from_location("London", buffer_km=5)
66
+ {'bbox': [-0.17, 51.46, -0.08, 51.55], 'center': {...}}
67
+ """
68
+ if buffer_km is None:
69
+ buffer_km = DEFAULT_BUFFER_KM
70
+
71
+ # Get coordinates
72
+ location_result = geocode_location(place_name)
73
+
74
+ if not location_result["success"]:
75
+ return {
76
+ "success": False,
77
+ "error": location_result["error"],
78
+ "bbox": None,
79
+ }
80
+
81
+ lat = location_result["latitude"]
82
+ lon = location_result["longitude"]
83
+
84
+ # Calculate approximate degree offset
85
+ # 1 degree latitude ≈ 111 km
86
+ # 1 degree longitude ≈ 111 * cos(latitude) km
87
+ import math
88
+
89
+ lat_offset = buffer_km / 111.0
90
+ lon_offset = buffer_km / (111.0 * math.cos(math.radians(lat)))
91
+
92
+ bbox = [
93
+ lon - lon_offset, # min_lon (west)
94
+ lat - lat_offset, # min_lat (south)
95
+ lon + lon_offset, # max_lon (east)
96
+ lat + lat_offset, # max_lat (north)
97
+ ]
98
+
99
+ return {
100
+ "success": True,
101
+ "bbox": bbox,
102
+ "center": {
103
+ "latitude": lat,
104
+ "longitude": lon,
105
+ },
106
+ "address": location_result["address"],
107
+ "buffer_km": buffer_km,
108
+ }
@@ -0,0 +1,349 @@
1
+ """
2
+ Image processing tools for Sentinel-2 data.
3
+
4
+ Handles loading Zarr data, applying corrections, and creating visualizations.
5
+ """
6
+
7
+ from typing import Optional, List
8
+ from pathlib import Path
9
+ import numpy as np
10
+
11
+ from ..config import (
12
+ REFLECTANCE_SCALE,
13
+ REFLECTANCE_OFFSET,
14
+ OUTPUT_DIR,
15
+ )
16
+
17
+
18
+ def _apply_scale_offset(
19
+ data: np.ndarray,
20
+ scale: float = REFLECTANCE_SCALE,
21
+ offset: float = REFLECTANCE_OFFSET,
22
+ nodata: int = 0,
23
+ ) -> np.ndarray:
24
+ """
25
+ Apply scale and offset to convert DN to surface reflectance.
26
+
27
+ Formula: reflectance = (DN * scale) + offset
28
+
29
+ Args:
30
+ data: Raw digital number values
31
+ scale: Scale factor (default: 0.0001)
32
+ offset: Offset value (default: -0.1)
33
+ nodata: NoData value to mask (default: 0)
34
+
35
+ Returns:
36
+ Surface reflectance values
37
+ """
38
+ # Create mask for nodata
39
+ mask = data == nodata
40
+
41
+ # Apply transformation
42
+ result = (data.astype(np.float32) * scale) + offset
43
+
44
+ # Set nodata pixels to NaN
45
+ result[mask] = np.nan
46
+
47
+ return result
48
+
49
+
50
+ def _normalize_for_display(
51
+ data: np.ndarray,
52
+ percentile_low: float = 2,
53
+ percentile_high: float = 98,
54
+ ) -> np.ndarray:
55
+ """
56
+ Normalize data to 0-1 range for display using percentile stretch.
57
+
58
+ Args:
59
+ data: Input array
60
+ percentile_low: Lower percentile for clipping
61
+ percentile_high: Upper percentile for clipping
62
+
63
+ Returns:
64
+ Normalized array in 0-1 range
65
+ """
66
+ # Get valid (non-NaN) values
67
+ valid = data[~np.isnan(data)]
68
+
69
+ if len(valid) == 0:
70
+ return np.zeros_like(data)
71
+
72
+ # Calculate percentiles
73
+ low = np.percentile(valid, percentile_low)
74
+ high = np.percentile(valid, percentile_high)
75
+
76
+ # Normalize
77
+ if high > low:
78
+ result = (data - low) / (high - low)
79
+ else:
80
+ result = np.zeros_like(data)
81
+
82
+ # Clip to 0-1
83
+ result = np.clip(result, 0, 1)
84
+
85
+ # Set NaN to 0 for display
86
+ result = np.nan_to_num(result, nan=0)
87
+
88
+ return result
89
+
90
+
91
+ def create_rgb_composite(
92
+ zarr_url: str,
93
+ output_path: Optional[str] = None,
94
+ subset_size: Optional[int] = 1000,
95
+ ) -> dict:
96
+ """
97
+ Create an RGB composite image from Sentinel-2 10m bands.
98
+
99
+ Uses B04 (Red), B03 (Green), B02 (Blue) bands.
100
+
101
+ Args:
102
+ zarr_url: URL to the SR_10m Zarr asset
103
+ output_path: Optional path to save the image
104
+ subset_size: Size to subset the image (for faster processing)
105
+
106
+ Returns:
107
+ Dictionary with path to saved image and metadata
108
+ """
109
+ try:
110
+ import matplotlib.pyplot as plt
111
+ import zarr
112
+
113
+ # Open the Zarr store
114
+ # The SR_10m asset contains b02, b03, b04, b08
115
+ store = zarr.open(zarr_url, mode="r")
116
+
117
+ # Read the bands
118
+ # Note: Band names are lowercase in the Zarr structure
119
+ red = np.array(store["b04"])
120
+ green = np.array(store["b03"])
121
+ blue = np.array(store["b02"])
122
+
123
+ # Subset if requested (for faster processing)
124
+ if subset_size and red.shape[0] > subset_size:
125
+ # Take center subset
126
+ h, w = red.shape
127
+ start_h = (h - subset_size) // 2
128
+ start_w = (w - subset_size) // 2
129
+ red = red[start_h : start_h + subset_size, start_w : start_w + subset_size]
130
+ green = green[
131
+ start_h : start_h + subset_size, start_w : start_w + subset_size
132
+ ]
133
+ blue = blue[
134
+ start_h : start_h + subset_size, start_w : start_w + subset_size
135
+ ]
136
+
137
+ # Apply scale and offset
138
+ red = _apply_scale_offset(red)
139
+ green = _apply_scale_offset(green)
140
+ blue = _apply_scale_offset(blue)
141
+
142
+ # Normalize for display
143
+ red = _normalize_for_display(red)
144
+ green = _normalize_for_display(green)
145
+ blue = _normalize_for_display(blue)
146
+
147
+ # Stack into RGB
148
+ rgb = np.dstack([red, green, blue])
149
+
150
+ # Generate output path
151
+ if output_path is None:
152
+ output_path = OUTPUT_DIR / f"rgb_composite_{np.random.randint(10000)}.png"
153
+ else:
154
+ output_path = Path(output_path)
155
+
156
+ # Create figure
157
+ fig, ax = plt.subplots(figsize=(10, 10))
158
+ ax.imshow(rgb)
159
+ ax.set_title("Sentinel-2 RGB Composite (B4/B3/B2)")
160
+ ax.axis("off")
161
+
162
+ # Save
163
+ plt.savefig(output_path, dpi=150, bbox_inches="tight", pad_inches=0.1)
164
+ plt.close(fig)
165
+
166
+ return {
167
+ "success": True,
168
+ "output_path": str(output_path),
169
+ "image_size": rgb.shape[:2],
170
+ "bands_used": ["B04 (Red)", "B03 (Green)", "B02 (Blue)"],
171
+ }
172
+
173
+ except Exception as e:
174
+ return {
175
+ "success": False,
176
+ "error": str(e),
177
+ }
178
+
179
+
180
+ def calculate_ndvi(
181
+ zarr_url: str,
182
+ output_path: Optional[str] = None,
183
+ subset_size: Optional[int] = 1000,
184
+ ) -> dict:
185
+ """
186
+ Calculate NDVI (Normalized Difference Vegetation Index) from Sentinel-2 data.
187
+
188
+ NDVI = (NIR - Red) / (NIR + Red)
189
+ Uses B08 (NIR) and B04 (Red) bands.
190
+
191
+ Args:
192
+ zarr_url: URL to the SR_10m Zarr asset
193
+ output_path: Optional path to save the NDVI image
194
+ subset_size: Size to subset the image
195
+
196
+ Returns:
197
+ Dictionary with NDVI statistics and output path
198
+ """
199
+ try:
200
+ import zarr
201
+ import matplotlib.pyplot as plt
202
+ from matplotlib.colors import LinearSegmentedColormap
203
+
204
+ # Open the Zarr store
205
+ store = zarr.open(zarr_url, mode="r")
206
+
207
+ # Read the bands
208
+ nir = np.array(store["b08"]) # NIR
209
+ red = np.array(store["b04"]) # Red
210
+
211
+ # Subset if requested
212
+ if subset_size and nir.shape[0] > subset_size:
213
+ h, w = nir.shape
214
+ start_h = (h - subset_size) // 2
215
+ start_w = (w - subset_size) // 2
216
+ nir = nir[start_h : start_h + subset_size, start_w : start_w + subset_size]
217
+ red = red[start_h : start_h + subset_size, start_w : start_w + subset_size]
218
+
219
+ # Apply scale and offset
220
+ nir = _apply_scale_offset(nir)
221
+ red = _apply_scale_offset(red)
222
+
223
+ # Calculate NDVI
224
+ # Avoid division by zero
225
+ denominator = nir + red
226
+ denominator[denominator == 0] = np.nan
227
+
228
+ ndvi = (nir - red) / denominator
229
+
230
+ # NDVI statistics
231
+ valid_ndvi = ndvi[~np.isnan(ndvi)]
232
+ stats = {
233
+ "min": float(np.min(valid_ndvi)) if len(valid_ndvi) > 0 else None,
234
+ "max": float(np.max(valid_ndvi)) if len(valid_ndvi) > 0 else None,
235
+ "mean": float(np.mean(valid_ndvi)) if len(valid_ndvi) > 0 else None,
236
+ "std": float(np.std(valid_ndvi)) if len(valid_ndvi) > 0 else None,
237
+ }
238
+
239
+ # Generate output path
240
+ if output_path is None:
241
+ output_path = OUTPUT_DIR / f"ndvi_{np.random.randint(10000)}.png"
242
+ else:
243
+ output_path = Path(output_path)
244
+
245
+ # Create NDVI colormap (brown -> yellow -> green)
246
+ colors = ["#8B4513", "#D2691E", "#FFD700", "#ADFF2F", "#228B22", "#006400"]
247
+ ndvi_cmap = LinearSegmentedColormap.from_list("ndvi", colors)
248
+
249
+ # Create figure
250
+ fig, ax = plt.subplots(figsize=(10, 10))
251
+ im = ax.imshow(ndvi, cmap=ndvi_cmap, vmin=-1, vmax=1)
252
+ ax.set_title("NDVI - Normalized Difference Vegetation Index")
253
+ ax.axis("off")
254
+
255
+ # Add colorbar
256
+ cbar = plt.colorbar(im, ax=ax, shrink=0.8)
257
+ cbar.set_label("NDVI")
258
+
259
+ # Save
260
+ plt.savefig(output_path, dpi=150, bbox_inches="tight", pad_inches=0.1)
261
+ plt.close(fig)
262
+
263
+ return {
264
+ "success": True,
265
+ "output_path": str(output_path),
266
+ "statistics": stats,
267
+ "interpretation": _interpret_ndvi(stats["mean"]) if stats["mean"] else None,
268
+ }
269
+
270
+ except Exception as e:
271
+ return {
272
+ "success": False,
273
+ "error": str(e),
274
+ }
275
+
276
+
277
+ def _interpret_ndvi(mean_ndvi: float) -> str:
278
+ """Provide interpretation of mean NDVI value."""
279
+ if mean_ndvi < 0:
280
+ return "Water or bare surfaces dominant"
281
+ elif mean_ndvi < 0.1:
282
+ return "Bare soil or built-up areas"
283
+ elif mean_ndvi < 0.2:
284
+ return "Sparse vegetation or stressed plants"
285
+ elif mean_ndvi < 0.4:
286
+ return "Moderate vegetation"
287
+ elif mean_ndvi < 0.6:
288
+ return "Dense vegetation"
289
+ else:
290
+ return "Very dense/healthy vegetation"
291
+
292
+
293
+ def get_band_statistics(
294
+ zarr_url: str,
295
+ bands: Optional[List[str]] = None,
296
+ ) -> dict:
297
+ """
298
+ Get statistics for specified bands from a Sentinel-2 Zarr asset.
299
+
300
+ Args:
301
+ zarr_url: URL to the Zarr asset (e.g., SR_10m)
302
+ bands: List of band names (default: all available)
303
+
304
+ Returns:
305
+ Dictionary with statistics for each band
306
+ """
307
+ try:
308
+ import zarr
309
+
310
+ store = zarr.open(zarr_url, mode="r")
311
+
312
+ # Get available bands if not specified
313
+ if bands is None:
314
+ bands = [key for key in store.keys() if key.startswith("b")]
315
+
316
+ results = {}
317
+
318
+ for band in bands:
319
+ if band not in store:
320
+ results[band] = {"error": "Band not found"}
321
+ continue
322
+
323
+ data = np.array(store[band])
324
+
325
+ # Apply scale/offset
326
+ data = _apply_scale_offset(data)
327
+ valid = data[~np.isnan(data)]
328
+
329
+ if len(valid) > 0:
330
+ results[band] = {
331
+ "min": float(np.min(valid)),
332
+ "max": float(np.max(valid)),
333
+ "mean": float(np.mean(valid)),
334
+ "std": float(np.std(valid)),
335
+ "shape": data.shape,
336
+ }
337
+ else:
338
+ results[band] = {"error": "No valid data"}
339
+
340
+ return {
341
+ "success": True,
342
+ "band_statistics": results,
343
+ }
344
+
345
+ except Exception as e:
346
+ return {
347
+ "success": False,
348
+ "error": str(e),
349
+ }
@@ -0,0 +1,231 @@
1
+ """
2
+ STAC API search tools for querying Sentinel-2 imagery.
3
+
4
+ Uses the EOPF STAC API at https://stac.core.eopf.eodc.eu
5
+ """
6
+
7
+ from typing import Optional, List
8
+ from datetime import datetime, timedelta
9
+ from pystac_client import Client
10
+
11
+ from ..config import (
12
+ STAC_API_URL,
13
+ STAC_COLLECTION,
14
+ DEFAULT_MAX_CLOUD_COVER,
15
+ DEFAULT_MAX_ITEMS,
16
+ )
17
+
18
+
19
+ def _get_stac_client() -> Client:
20
+ """Get a STAC API client instance."""
21
+ return Client.open(STAC_API_URL)
22
+
23
+
24
+ def _format_item(item) -> dict:
25
+ """Format a STAC item into a simplified dictionary."""
26
+ props = item.properties
27
+
28
+ return {
29
+ "id": item.id,
30
+ "datetime": props.get("datetime"),
31
+ "cloud_cover": props.get("eo:cloud_cover"),
32
+ "platform": props.get("platform"),
33
+ "bbox": item.bbox,
34
+ "geometry": item.geometry,
35
+ "assets": {
36
+ key: {
37
+ "title": asset.title,
38
+ "href": asset.href,
39
+ "type": asset.media_type,
40
+ }
41
+ for key, asset in item.assets.items()
42
+ if key in ["SR_10m", "SR_20m", "SR_60m", "TCI_10m", "product"]
43
+ },
44
+ "stac_url": f"{STAC_API_URL}/collections/{STAC_COLLECTION}/items/{item.id}",
45
+ }
46
+
47
+
48
+ def search_imagery(
49
+ bbox: Optional[List[float]] = None,
50
+ start_date: Optional[str] = None,
51
+ end_date: Optional[str] = None,
52
+ max_cloud_cover: Optional[float] = None,
53
+ max_items: Optional[int] = None,
54
+ ) -> dict:
55
+ """
56
+ Search for Sentinel-2 L2A imagery in the EOPF STAC catalog.
57
+
58
+ Args:
59
+ bbox: Bounding box [min_lon, min_lat, max_lon, max_lat]
60
+ start_date: Start date in YYYY-MM-DD format
61
+ end_date: End date in YYYY-MM-DD format
62
+ max_cloud_cover: Maximum cloud cover percentage (0-100)
63
+ max_items: Maximum number of items to return
64
+
65
+ Returns:
66
+ Dictionary with search results including items found
67
+
68
+ Example:
69
+ >>> search_imagery(
70
+ ... bbox=[-74.0, 40.7, -73.9, 40.8],
71
+ ... start_date="2024-12-01",
72
+ ... end_date="2024-12-20",
73
+ ... max_cloud_cover=20
74
+ ... )
75
+ """
76
+ if max_cloud_cover is None:
77
+ max_cloud_cover = DEFAULT_MAX_CLOUD_COVER
78
+ if max_items is None:
79
+ max_items = DEFAULT_MAX_ITEMS
80
+
81
+ # Build datetime string
82
+ datetime_str = None
83
+ if start_date or end_date:
84
+ start = start_date or "2015-01-01"
85
+ end = end_date or datetime.now().strftime("%Y-%m-%d")
86
+ datetime_str = f"{start}/{end}"
87
+
88
+ try:
89
+ client = _get_stac_client()
90
+
91
+ # Build search parameters
92
+ search_params = {
93
+ "collections": [STAC_COLLECTION],
94
+ "max_items": max_items,
95
+ }
96
+
97
+ if bbox:
98
+ search_params["bbox"] = bbox
99
+
100
+ if datetime_str:
101
+ search_params["datetime"] = datetime_str
102
+
103
+ # Execute search
104
+ search = client.search(**search_params)
105
+ items = list(search.items())
106
+
107
+ # Filter by cloud cover (post-filter since API may not support query param)
108
+ filtered_items = [
109
+ item
110
+ for item in items
111
+ if item.properties.get("eo:cloud_cover", 100) <= max_cloud_cover
112
+ ]
113
+
114
+ # Sort by date (newest first)
115
+ filtered_items.sort(
116
+ key=lambda x: x.properties.get("datetime", ""), reverse=True
117
+ )
118
+
119
+ # Format results
120
+ formatted_items = [_format_item(item) for item in filtered_items]
121
+
122
+ return {
123
+ "success": True,
124
+ "total_found": len(items),
125
+ "filtered_count": len(filtered_items),
126
+ "items": formatted_items,
127
+ "search_params": {
128
+ "bbox": bbox,
129
+ "datetime": datetime_str,
130
+ "max_cloud_cover": max_cloud_cover,
131
+ },
132
+ }
133
+
134
+ except Exception as e:
135
+ return {
136
+ "success": False,
137
+ "error": str(e),
138
+ "items": [],
139
+ }
140
+
141
+
142
+ def get_item_details(item_id: str) -> dict:
143
+ """
144
+ Get detailed information about a specific STAC item.
145
+
146
+ Args:
147
+ item_id: The STAC item ID (e.g., "S2B_MSIL2A_20251218T110359_...")
148
+
149
+ Returns:
150
+ Dictionary with full item details including all assets
151
+ """
152
+ try:
153
+ # Get the item
154
+ item_url = f"{STAC_API_URL}/collections/{STAC_COLLECTION}/items/{item_id}"
155
+
156
+ import requests
157
+
158
+ response = requests.get(item_url)
159
+ response.raise_for_status()
160
+ item_data = response.json()
161
+
162
+ return {
163
+ "success": True,
164
+ "item": item_data,
165
+ }
166
+
167
+ except Exception as e:
168
+ return {
169
+ "success": False,
170
+ "error": str(e),
171
+ }
172
+
173
+
174
+ def list_recent_imagery(
175
+ location_name: Optional[str] = None,
176
+ days: int = 7,
177
+ max_cloud_cover: Optional[float] = None,
178
+ max_items: Optional[int] = None,
179
+ ) -> dict:
180
+ """
181
+ List recent Sentinel-2 imagery, optionally for a specific location.
182
+
183
+ This is a convenience function that combines geocoding and search.
184
+
185
+ Args:
186
+ location_name: Optional place name to search around
187
+ days: Number of days to look back (default: 7)
188
+ max_cloud_cover: Maximum cloud cover percentage
189
+ max_items: Maximum items to return
190
+
191
+ Returns:
192
+ Dictionary with recent imagery items
193
+ """
194
+ from .geocoding import get_bbox_from_location
195
+
196
+ # Calculate date range
197
+ end_date = datetime.now()
198
+ start_date = end_date - timedelta(days=days)
199
+
200
+ # Get bbox if location provided
201
+ bbox = None
202
+ location_info = None
203
+
204
+ if location_name:
205
+ bbox_result = get_bbox_from_location(location_name)
206
+ if bbox_result["success"]:
207
+ bbox = bbox_result["bbox"]
208
+ location_info = {
209
+ "name": location_name,
210
+ "center": bbox_result["center"],
211
+ "address": bbox_result["address"],
212
+ }
213
+ else:
214
+ return {
215
+ "success": False,
216
+ "error": f"Could not geocode location: {location_name}",
217
+ }
218
+
219
+ # Search for imagery
220
+ result = search_imagery(
221
+ bbox=bbox,
222
+ start_date=start_date.strftime("%Y-%m-%d"),
223
+ end_date=end_date.strftime("%Y-%m-%d"),
224
+ max_cloud_cover=max_cloud_cover,
225
+ max_items=max_items,
226
+ )
227
+
228
+ if location_info:
229
+ result["location"] = location_info
230
+
231
+ return result