satdatakit 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Rafael Cañete Vazquez
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,165 @@
1
+ Metadata-Version: 2.4
2
+ Name: satdatakit
3
+ Version: 0.1.0
4
+ Summary: Unified satellite data analysis toolkit
5
+ Author-email: Rafael Cañete Vazquez <rafael@satdatakit.dev>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/rafaelcanete/satdatakit
8
+ Requires-Python: >=3.9
9
+ Description-Content-Type: text/markdown
10
+ License-File: LICENSE
11
+ Requires-Dist: numpy>=1.23.0
12
+ Requires-Dist: xarray>=2023.1.0
13
+ Requires-Dist: rioxarray>=0.14.0
14
+ Requires-Dist: rasterio>=1.3.0
15
+ Requires-Dist: netCDF4>=1.6.0
16
+ Requires-Dist: h5py>=3.7.0
17
+ Requires-Dist: pandas>=1.5.0
18
+ Requires-Dist: pyproj>=3.4.0
19
+ Requires-Dist: shapely>=2.0.0
20
+ Requires-Dist: geopandas>=0.12.0
21
+ Requires-Dist: pillow>=9.0.0
22
+ Requires-Dist: python-dateutil>=2.8.0
23
+ Requires-Dist: typing-extensions>=4.0.0
24
+ Provides-Extra: dask
25
+ Requires-Dist: dask[complete]>=2024.1.0; extra == "dask"
26
+ Requires-Dist: distributed>=2024.1.0; extra == "dask"
27
+ Provides-Extra: stac
28
+ Requires-Dist: pystac>=1.9.0; extra == "stac"
29
+ Requires-Dist: pystac-client>=0.7.0; extra == "stac"
30
+ Requires-Dist: stackstac>=0.5.0; extra == "stac"
31
+ Provides-Extra: zarr
32
+ Requires-Dist: zarr>=2.16.0; extra == "zarr"
33
+ Requires-Dist: fsspec>=2024.1.0; extra == "zarr"
34
+ Provides-Extra: cloud
35
+ Requires-Dist: satdatakit[stac,zarr]; extra == "cloud"
36
+ Requires-Dist: s3fs>=2024.1.0; extra == "cloud"
37
+ Provides-Extra: full
38
+ Requires-Dist: satdatakit[cloud,dask]; extra == "full"
39
+ Provides-Extra: dev
40
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
41
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
42
+ Requires-Dist: black>=23.0.0; extra == "dev"
43
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
44
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
45
+ Dynamic: license-file
46
+
47
+ <p align="center">
48
+ <img src="satdatakit_banner.png" alt="SatDataKit Banner" width="100%">
49
+ </p>
50
+
51
+ <h1 align="center">SatDataKit</h1>
52
+
53
+ <p align="center">
54
+ <strong>Unified satellite data analysis toolkit — one API for all Earth Observation formats.</strong>
55
+ </p>
56
+
57
+ <p align="center">
58
+ <a href="https://github.com/raicanvag/satdatakit/blob/main/LICENSE">
59
+ <img src="https://img.shields.io/badge/License-MIT-green.svg" alt="License: MIT">
60
+ </a>
61
+ <img src="https://img.shields.io/badge/Python-3.9%2B-blue.svg" alt="Python 3.9+">
62
+ <img src="https://img.shields.io/badge/EO-GeoTIFF%20%7C%20NetCDF%20%7C%20HDF%20%7C%20SAFE-orange.svg" alt="Formats">
63
+ </p>
64
+
65
+ ---
66
+
67
+ ## What is SatDataKit?
68
+
69
+ SatDataKit solves a real problem in the Earth Observation community: **every satellite data format has its own API**, forcing scientists to learn GDAL, NetCDF4, h5py, rasterio, and Sentinel-specific tools just to read a single image.
70
+
71
+ SatDataKit **unifies all of that into one clean API**:
72
+
73
+ | Format | Library needed without SatDataKit | With SatDataKit |
74
+ |---|---|---|
75
+ | GeoTIFF | `rasterio` + coordinate handling | `read("file.tif")` |
76
+ | NetCDF | `xarray` + `netCDF4` + CF conventions | `read("file.nc")` |
77
+ | HDF5 | `h5py` + dataset discovery logic | `read("file.h5")` |
78
+ | Sentinel SAFE | `zipfile` + XML parsing + JP2 reader | `read("file.SAFE")` |
79
+
80
+ **Built on the same stack NASA uses** (xarray, rioxarray, rasterio, netCDF4, h5py) but with a unified abstraction layer that eliminates boilerplate.
81
+
82
+ ---
83
+
84
+ ## Quick Start
85
+
86
+ ```python
87
+ from satdatakit import read, compute_index, Pipeline
88
+
89
+ # Read any format
90
+ ds = read("sentinel2.tif") # GeoTIFF, NetCDF, HDF, SAFE
91
+
92
+ # Compute indices
93
+ ds = compute_index(ds, "NDVI")
94
+
95
+ # Pipeline
96
+ result = (
97
+ Pipeline()
98
+ .read("data.tif")
99
+ .reproject("EPSG:4326")
100
+ .resample(30)
101
+ .compute_index("NDVI")
102
+ .to_geotiff("output.tif")
103
+ )
104
+
105
+ ---
106
+ ## Installation
107
+
108
+ # Docker (recommended)
109
+ docker-compose up --build satdatakit
110
+
111
+ # Or Conda
112
+ conda env create -f environment.yml
113
+ conda activate satdatakit
114
+ pip install -e ".[dev]"
115
+
116
+ ## Optional Extensions
117
+
118
+ SatDataKit core supports GeoTIFF, NetCDF, HDF5, and SAFE out of the box.
119
+
120
+ For large-scale processing, install optional extensions:
121
+
122
+ ```bash
123
+ # Parallel processing with Dask
124
+ pip install satdatakit[dask]
125
+
126
+ # Cloud catalogs (STAC) + Zarr format
127
+ pip install satdatakit[cloud]
128
+
129
+ # Everything (production servers)
130
+ pip install satdatakit[full]
131
+
132
+ | Extension | Command | Use Case |
133
+ | --------- | ------------------------------- | ---------------------------------------- |
134
+ | **Dask** | `pip install satdatakit[dask]` | 10+ files, lazy chunks, parallel compute |
135
+ | **STAC** | `pip install satdatakit[stac]` | Search cloud catalogs (AWS, Copernicus) |
136
+ | **Zarr** | `pip install satdatakit[zarr]` | Cloud-native format, chunked storage |
137
+ | **Cloud** | `pip install satdatakit[cloud]` | STAC + Zarr + S3 access |
138
+ | **Full** | `pip install satdatakit[full]` | All extensions (servers, production) |
139
+
140
+
141
+ ### Dask Example
142
+
143
+ from satdatakit.extensions.dask_ext import enable_dask, read_dask
144
+
145
+ enable_dask()
146
+
147
+ # Lazy load with chunks
148
+ ds = read_dask(["file1.tif", "file2.tif"], chunks={"x": 1024})
149
+
150
+ # Compute when ready
151
+ ds = ds.compute()
152
+
153
+
154
+ ## Features
155
+
156
+ Unified API: One read() for GeoTIFF, NetCDF, HDF, SAFE
157
+ Spectral Indices: NDVI, NDWI, EVI, SAVI, and more
158
+ Pipeline API: Fluent, chainable operations
159
+ Time Series: Stack multiple scenes automatically
160
+
161
+
162
+ ## License
163
+
164
+ MIT License — see LICENSE for details.
165
+ Author: Rafael Cañete Vazquez
@@ -0,0 +1,119 @@
1
+ <p align="center">
2
+ <img src="satdatakit_banner.png" alt="SatDataKit Banner" width="100%">
3
+ </p>
4
+
5
+ <h1 align="center">SatDataKit</h1>
6
+
7
+ <p align="center">
8
+ <strong>Unified satellite data analysis toolkit — one API for all Earth Observation formats.</strong>
9
+ </p>
10
+
11
+ <p align="center">
12
+ <a href="https://github.com/raicanvag/satdatakit/blob/main/LICENSE">
13
+ <img src="https://img.shields.io/badge/License-MIT-green.svg" alt="License: MIT">
14
+ </a>
15
+ <img src="https://img.shields.io/badge/Python-3.9%2B-blue.svg" alt="Python 3.9+">
16
+ <img src="https://img.shields.io/badge/EO-GeoTIFF%20%7C%20NetCDF%20%7C%20HDF%20%7C%20SAFE-orange.svg" alt="Formats">
17
+ </p>
18
+
19
+ ---
20
+
21
+ ## What is SatDataKit?
22
+
23
+ SatDataKit solves a real problem in the Earth Observation community: **every satellite data format has its own API**, forcing scientists to learn GDAL, NetCDF4, h5py, rasterio, and Sentinel-specific tools just to read a single image.
24
+
25
+ SatDataKit **unifies all of that into one clean API**:
26
+
27
+ | Format | Library needed without SatDataKit | With SatDataKit |
28
+ |---|---|---|
29
+ | GeoTIFF | `rasterio` + coordinate handling | `read("file.tif")` |
30
+ | NetCDF | `xarray` + `netCDF4` + CF conventions | `read("file.nc")` |
31
+ | HDF5 | `h5py` + dataset discovery logic | `read("file.h5")` |
32
+ | Sentinel SAFE | `zipfile` + XML parsing + JP2 reader | `read("file.SAFE")` |
33
+
34
+ **Built on the same stack NASA uses** (xarray, rioxarray, rasterio, netCDF4, h5py) but with a unified abstraction layer that eliminates boilerplate.
35
+
36
+ ---
37
+
38
+ ## Quick Start
39
+
40
+ ```python
41
+ from satdatakit import read, compute_index, Pipeline
42
+
43
+ # Read any format
44
+ ds = read("sentinel2.tif") # GeoTIFF, NetCDF, HDF, SAFE
45
+
46
+ # Compute indices
47
+ ds = compute_index(ds, "NDVI")
48
+
49
+ # Pipeline
50
+ result = (
51
+ Pipeline()
52
+ .read("data.tif")
53
+ .reproject("EPSG:4326")
54
+ .resample(30)
55
+ .compute_index("NDVI")
56
+ .to_geotiff("output.tif")
57
+ )
58
+
59
+ ---
60
+ ## Installation
61
+
62
+ # Docker (recommended)
63
+ docker-compose up --build satdatakit
64
+
65
+ # Or Conda
66
+ conda env create -f environment.yml
67
+ conda activate satdatakit
68
+ pip install -e ".[dev]"
69
+
70
+ ## Optional Extensions
71
+
72
+ SatDataKit core supports GeoTIFF, NetCDF, HDF5, and SAFE out of the box.
73
+
74
+ For large-scale processing, install optional extensions:
75
+
76
+ ```bash
77
+ # Parallel processing with Dask
78
+ pip install satdatakit[dask]
79
+
80
+ # Cloud catalogs (STAC) + Zarr format
81
+ pip install satdatakit[cloud]
82
+
83
+ # Everything (production servers)
84
+ pip install satdatakit[full]
85
+
86
+ | Extension | Command | Use Case |
87
+ | --------- | ------------------------------- | ---------------------------------------- |
88
+ | **Dask** | `pip install satdatakit[dask]` | 10+ files, lazy chunks, parallel compute |
89
+ | **STAC** | `pip install satdatakit[stac]` | Search cloud catalogs (AWS, Copernicus) |
90
+ | **Zarr** | `pip install satdatakit[zarr]` | Cloud-native format, chunked storage |
91
+ | **Cloud** | `pip install satdatakit[cloud]` | STAC + Zarr + S3 access |
92
+ | **Full** | `pip install satdatakit[full]` | All extensions (servers, production) |
93
+
94
+
95
+ ### Dask Example
96
+
97
+ from satdatakit.extensions.dask_ext import enable_dask, read_dask
98
+
99
+ enable_dask()
100
+
101
+ # Lazy load with chunks
102
+ ds = read_dask(["file1.tif", "file2.tif"], chunks={"x": 1024})
103
+
104
+ # Compute when ready
105
+ ds = ds.compute()
106
+
107
+
108
+ ## Features
109
+
110
+ Unified API: One read() for GeoTIFF, NetCDF, HDF, SAFE
111
+ Spectral Indices: NDVI, NDWI, EVI, SAVI, and more
112
+ Pipeline API: Fluent, chainable operations
113
+ Time Series: Stack multiple scenes automatically
114
+
115
+
116
+ ## License
117
+
118
+ MIT License — see LICENSE for details.
119
+ Author: Rafael Cañete Vazquez
@@ -0,0 +1,68 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "satdatakit"
7
+ version = "0.1.0"
8
+ description = "Unified satellite data analysis toolkit"
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+ authors = [{name = "Rafael Cañete Vazquez", email = "rafael@satdatakit.dev"}]
12
+ requires-python = ">=3.9"
13
+ dependencies = [
14
+ "numpy>=1.23.0", "xarray>=2023.1.0", "rioxarray>=0.14.0",
15
+ "rasterio>=1.3.0", "netCDF4>=1.6.0", "h5py>=3.7.0",
16
+ "pandas>=1.5.0", "pyproj>=3.4.0", "shapely>=2.0.0",
17
+ "geopandas>=0.12.0", "pillow>=9.0.0",
18
+ "python-dateutil>=2.8.0", "typing-extensions>=4.0.0"
19
+ ]
20
+
21
+ [project.optional-dependencies]
22
+ dask = [
23
+ "dask[complete]>=2024.1.0",
24
+ "distributed>=2024.1.0",
25
+ ]
26
+ stac = [
27
+ "pystac>=1.9.0",
28
+ "pystac-client>=0.7.0",
29
+ "stackstac>=0.5.0",
30
+ ]
31
+ zarr = [
32
+ "zarr>=2.16.0",
33
+ "fsspec>=2024.1.0",
34
+ ]
35
+ cloud = [
36
+ "satdatakit[stac,zarr]",
37
+ "s3fs>=2024.1.0",
38
+ ]
39
+ full = [
40
+ "satdatakit[dask,cloud]",
41
+ ]
42
+
43
+ dev = ["pytest>=7.0.0",
44
+ "pytest-cov>=4.0.0",
45
+ "black>=23.0.0",
46
+ "ruff>=0.1.0",
47
+ "mypy>=1.0.0"
48
+ ]
49
+
50
+ [project.urls]
51
+ Homepage = "https://github.com/rafaelcanete/satdatakit"
52
+
53
+ [tool.setuptools.packages.find]
54
+ where = ["src"]
55
+
56
+ [tool.black]
57
+ line-length = 100
58
+
59
+ [tool.ruff]
60
+ line-length = 100
61
+
62
+ [tool.mypy]
63
+ python_version = "3.9"
64
+ disallow_untyped_defs = true
65
+
66
+ [tool.pytest.ini_options]
67
+ testpaths = ["tests"]
68
+ python_files = "test_*.py"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,14 @@
1
+ """SatDataKit - Unified satellite data analysis toolkit.
2
+
3
+ Author: Rafael Cañete Vazquez
4
+ License: MIT
5
+ """
6
+ __version__ = "0.1.0"
7
+ __author__ = "Rafael Cañete Vazquez"
8
+
9
+ from satdatakit.core import SatelliteDataset
10
+ from satdatakit.io import read, read_collection
11
+ from satdatakit.indices import compute_index
12
+ from satdatakit.pipeline import Pipeline
13
+
14
+ __all__ = ["SatelliteDataset", "read", "read_collection", "compute_index", "Pipeline"]
@@ -0,0 +1,233 @@
1
+ """Core data model: SatelliteDataset.
2
+
3
+ Author: Rafael Cañete Vazquez
4
+ License: MIT
5
+ """
6
+ from __future__ import annotations
7
+
8
+ import warnings
9
+ from dataclasses import dataclass, field
10
+ from datetime import datetime
11
+ from pathlib import Path
12
+ from typing import Any, Dict, List, Optional, Tuple, Union
13
+
14
+ import numpy as np
15
+ import pandas as pd
16
+ import xarray as xr
17
+ from shapely.geometry import box
18
+
19
+
20
+ @dataclass
21
+ class SatelliteDataset:
22
+ """Universal container for Earth Observation data."""
23
+
24
+ data: xr.DataArray
25
+ bands: List[str]
26
+ crs: Optional[str] = None
27
+ resolution: Optional[Tuple[float, float]] = None
28
+ bounds: Optional[Tuple[float, float, float, float]] = None
29
+ datetime: Optional[Union[datetime, List[datetime]]] = None
30
+ sensor: Optional[str] = None
31
+ platform: Optional[str] = None
32
+ cloud_cover: Optional[float] = None
33
+ metadata: Dict[str, Any] = field(default_factory=dict)
34
+ source_format: Optional[str] = None
35
+ source_path: Optional[Path] = None
36
+
37
+ def __post_init__(self) -> None:
38
+ self._validate_data()
39
+ self._normalize_bands()
40
+
41
+ def _validate_data(self) -> None:
42
+ dims = list(self.data.dims)
43
+ if "band" not in dims:
44
+ raise ValueError(f"DataArray must have 'band' dimension. Got: {dims}")
45
+ if "y" not in dims or "x" not in dims:
46
+ raise ValueError(f"DataArray must have 'y' and 'x' dimensions. Got: {dims}")
47
+ n_bands = self.data.sizes["band"]
48
+ if n_bands != len(self.bands):
49
+ warnings.warn(f"Band count mismatch: {n_bands} vs {len(self.bands)}", UserWarning)
50
+ self.bands = [f"band_{i}" for i in range(n_bands)]
51
+
52
+ def _normalize_bands(self) -> None:
53
+ self.bands = [str(b) for b in self.bands]
54
+ seen = set()
55
+ for i, name in enumerate(self.bands):
56
+ if name in seen:
57
+ self.bands[i] = f"{name}_{i}"
58
+ seen.add(self.bands[i])
59
+
60
+ @property
61
+ def shape(self) -> Tuple[int, ...]:
62
+ return tuple(self.data.sizes[d] for d in self.data.dims)
63
+
64
+ @property
65
+ def n_bands(self) -> int:
66
+ return self.data.sizes["band"]
67
+
68
+ @property
69
+ def width(self) -> int:
70
+ return self.data.sizes["x"]
71
+
72
+ @property
73
+ def height(self) -> int:
74
+ return self.data.sizes["y"]
75
+
76
+ @property
77
+ def dtype(self):
78
+ """Return data type."""
79
+ return self.data.dtype
80
+
81
+ def __getitem__(self, key: Union[str, int]) -> xr.DataArray:
82
+ if isinstance(key, str):
83
+ if key not in self.bands:
84
+ raise KeyError(f"Band '{key}' not found. Available: {self.bands}")
85
+ idx = self.bands.index(key)
86
+ elif isinstance(key, int):
87
+ idx = key
88
+ else:
89
+ raise TypeError(f"Key must be str or int, got {type(key)}")
90
+ return self.data.isel(band=idx)
91
+
92
+ def get_bands(self, names: List[str]) -> "SatelliteDataset":
93
+ indices = [self.bands.index(n) for n in names if n in self.bands]
94
+ new_data = self.data.isel(band=indices)
95
+ return SatelliteDataset(
96
+ data=new_data, bands=[self.bands[i] for i in indices],
97
+ crs=self.crs, resolution=self.resolution, bounds=self.bounds,
98
+ datetime=self.datetime, sensor=self.sensor, platform=self.platform,
99
+ cloud_cover=self.cloud_cover, metadata=self.metadata.copy(),
100
+ source_format=self.source_format, source_path=self.source_path)
101
+
102
+ def to_numpy(self) -> np.ndarray:
103
+ return self.data.values
104
+
105
+ def to_xarray(self) -> xr.DataArray:
106
+ return self.data
107
+
108
+ def to_dataset(self) -> xr.Dataset:
109
+ datasets = {b: self.data.isel(band=i).drop_vars("band")
110
+ for i, b in enumerate(self.bands)}
111
+ ds = xr.Dataset(datasets)
112
+ if self.crs:
113
+ ds.attrs["crs"] = self.crs
114
+ return ds
115
+
116
+ def add_band(self, name: str, data: Union[np.ndarray, xr.DataArray]) -> "SatelliteDataset":
117
+ if name in self.bands:
118
+ raise ValueError(f"Band '{name}' already exists.")
119
+ if isinstance(data, np.ndarray):
120
+ data = xr.DataArray(data, dims=["y", "x"])
121
+ data = data.expand_dims(band=[name])
122
+ new_data = xr.concat([self.data, data], dim="band")
123
+ return SatelliteDataset(
124
+ data=new_data, bands=self.bands + [name], crs=self.crs,
125
+ resolution=self.resolution, bounds=self.bounds,
126
+ datetime=self.datetime, sensor=self.sensor, platform=self.platform,
127
+ cloud_cover=self.cloud_cover, metadata=self.metadata.copy(),
128
+ source_format=self.source_format, source_path=self.source_path)
129
+
130
+ def remove_band(self, name: str) -> "SatelliteDataset":
131
+ if name not in self.bands:
132
+ raise KeyError(f"Band '{name}' not found.")
133
+ idx = self.bands.index(name)
134
+ new_data = self.data.drop_isel(band=idx)
135
+ return SatelliteDataset(
136
+ data=new_data, bands=[b for b in self.bands if b != name],
137
+ crs=self.crs, resolution=self.resolution, bounds=self.bounds,
138
+ datetime=self.datetime, sensor=self.sensor, platform=self.platform,
139
+ cloud_cover=self.cloud_cover, metadata=self.metadata.copy(),
140
+ source_format=self.source_format, source_path=self.source_path)
141
+
142
+ def rename_bands(self, mapping: Dict[str, str]) -> "SatelliteDataset":
143
+ new_bands = [mapping.get(b, b) for b in self.bands]
144
+ new_data = self.data.copy()
145
+ new_data = new_data.assign_coords(band=new_bands)
146
+ return SatelliteDataset(
147
+ data=new_data, bands=new_bands, crs=self.crs,
148
+ resolution=self.resolution, bounds=self.bounds,
149
+ datetime=self.datetime, sensor=self.sensor, platform=self.platform,
150
+ cloud_cover=self.cloud_cover, metadata=self.metadata.copy(),
151
+ source_format=self.source_format, source_path=self.source_path)
152
+
153
+ def reproject(self, dst_crs: Union[str, int], **kwargs) -> "SatelliteDataset":
154
+ import rioxarray
155
+ if self.crs is None:
156
+ raise ValueError("Source CRS is not set. Cannot reproject.")
157
+ if self.data.rio.crs is None:
158
+ self.data = self.data.rio.write_crs(self.crs)
159
+ reprojected = self.data.rio.reproject(dst_crs, **kwargs)
160
+ new_bounds = reprojected.rio.bounds()
161
+ return SatelliteDataset(
162
+ data=reprojected, bands=self.bands.copy(), crs=str(dst_crs),
163
+ resolution=self.resolution, bounds=new_bounds,
164
+ datetime=self.datetime, sensor=self.sensor, platform=self.platform,
165
+ cloud_cover=self.cloud_cover, metadata=self.metadata.copy(),
166
+ source_format=self.source_format, source_path=self.source_path)
167
+
168
+ def resample(self, resolution: Union[float, Tuple[float, float]], **kwargs) -> "SatelliteDataset":
169
+ if self.crs is None:
170
+ raise ValueError("CRS must be set to resample.")
171
+ return self.reproject(dst_crs=self.crs, resolution=resolution, **kwargs)
172
+
173
+ def clip(self, geometry, crs=None, drop=True, **kwargs) -> "SatelliteDataset":
174
+ import rioxarray
175
+ if self.data.rio.crs is None and self.crs is not None:
176
+ self.data = self.data.rio.write_crs(self.crs)
177
+ clipped = self.data.rio.clip([geometry], crs=crs, drop=drop, all_touched=True, **kwargs)
178
+ new_bounds = clipped.rio.bounds()
179
+ return SatelliteDataset(
180
+ data=clipped, bands=self.bands.copy(), crs=self.crs,
181
+ resolution=self.resolution, bounds=new_bounds,
182
+ datetime=self.datetime, sensor=self.sensor, platform=self.platform,
183
+ cloud_cover=self.cloud_cover, metadata=self.metadata.copy(),
184
+ source_format=self.source_format, source_path=self.source_path)
185
+
186
+ def mask(self, mask_array: np.ndarray, fill_value: float = np.nan) -> "SatelliteDataset":
187
+ if mask_array.shape != self.data.shape[-2:]:
188
+ raise ValueError(f"Mask shape {mask_array.shape} does not match data spatial shape {self.data.shape[-2:]}")
189
+ masked_data = self.data.where(mask_array, fill_value)
190
+ return SatelliteDataset(
191
+ data=masked_data, bands=self.bands.copy(), crs=self.crs,
192
+ resolution=self.resolution, bounds=self.bounds,
193
+ datetime=self.datetime, sensor=self.sensor, platform=self.platform,
194
+ cloud_cover=self.cloud_cover, metadata=self.metadata.copy(),
195
+ source_format=self.source_format, source_path=self.source_path)
196
+
197
+ def to_geotiff(self, path: Union[str, Path], **kwargs) -> None:
198
+ import rioxarray
199
+ path = Path(path)
200
+ path.parent.mkdir(parents=True, exist_ok=True)
201
+ data = self.data
202
+ if data.rio.crs is None and self.crs is not None:
203
+ data = data.rio.write_crs(self.crs)
204
+ data.rio.to_raster(path, **kwargs)
205
+
206
+ def to_netcdf(self, path: Union[str, Path], **kwargs) -> None:
207
+ path = Path(path)
208
+ path.parent.mkdir(parents=True, exist_ok=True)
209
+ self.to_dataset().to_netcdf(path, **kwargs)
210
+
211
+ def __repr__(self) -> str:
212
+ return f"SatelliteDataset(shape={self.shape}, bands={self.bands}, crs={self.crs!r})"
213
+
214
+ def info(self) -> str:
215
+ lines = [
216
+ "=" * 50,
217
+ "SatelliteDataset Information",
218
+ "=" * 50,
219
+ f"Shape: {self.shape}",
220
+ f"Bands: {self.n_bands} ({self.bands})",
221
+ f"Width: {self.width} px",
222
+ f"Height: {self.height} px",
223
+ f"CRS: {self.crs}",
224
+ f"Resolution: {self.resolution}",
225
+ f"Bounds: {self.bounds}",
226
+ f"Sensor: {self.sensor}",
227
+ f"Platform: {self.platform}",
228
+ f"Datetime: {self.datetime}",
229
+ f"Cloud cover: {self.cloud_cover}%",
230
+ f"Dtype: {self.dtype}",
231
+ "=" * 50,
232
+ ]
233
+ return "\n".join(lines)
@@ -0,0 +1,26 @@
1
+ """SatDataKit extensions — optional add-ons for scalability.
2
+
3
+ Extensions load on demand and do not modify core code.
4
+ """
5
+
6
+ __version__ = "0.1.0"
7
+
8
+
9
+ def list_extensions():
10
+ """Return available extensions."""
11
+ return ["dask", "stac", "zarr"]
12
+
13
+
14
+ def enable(extension: str):
15
+ """Activate an extension by name."""
16
+ if extension == "dask":
17
+ from .dask_ext import enable_dask
18
+ enable_dask()
19
+ elif extension == "stac":
20
+ from .stac_ext import enable_stac
21
+ enable_stac()
22
+ elif extension == "zarr":
23
+ from .zarr_ext import enable_zarr
24
+ enable_zarr()
25
+ else:
26
+ raise ValueError(f"Unknown extension: {extension}. Available: {list_extensions()}")