satdatakit 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- satdatakit-0.1.0/LICENSE +21 -0
- satdatakit-0.1.0/PKG-INFO +165 -0
- satdatakit-0.1.0/README.md +119 -0
- satdatakit-0.1.0/pyproject.toml +68 -0
- satdatakit-0.1.0/setup.cfg +4 -0
- satdatakit-0.1.0/src/satdatakit/__init__.py +14 -0
- satdatakit-0.1.0/src/satdatakit/core.py +233 -0
- satdatakit-0.1.0/src/satdatakit/extensions/__init__.py +26 -0
- satdatakit-0.1.0/src/satdatakit/extensions/dask_ext.py +157 -0
- satdatakit-0.1.0/src/satdatakit/extensions/stac_ext.py +74 -0
- satdatakit-0.1.0/src/satdatakit/extensions/zarr_ext.py +89 -0
- satdatakit-0.1.0/src/satdatakit/indices.py +100 -0
- satdatakit-0.1.0/src/satdatakit/io.py +87 -0
- satdatakit-0.1.0/src/satdatakit/pipeline.py +93 -0
- satdatakit-0.1.0/src/satdatakit/readers/__init__.py +18 -0
- satdatakit-0.1.0/src/satdatakit/readers/geotiff.py +82 -0
- satdatakit-0.1.0/src/satdatakit/readers/hdf.py +171 -0
- satdatakit-0.1.0/src/satdatakit/readers/netcdf.py +116 -0
- satdatakit-0.1.0/src/satdatakit/readers/safe.py +122 -0
- satdatakit-0.1.0/src/satdatakit.egg-info/PKG-INFO +165 -0
- satdatakit-0.1.0/src/satdatakit.egg-info/SOURCES.txt +26 -0
- satdatakit-0.1.0/src/satdatakit.egg-info/dependency_links.txt +1 -0
- satdatakit-0.1.0/src/satdatakit.egg-info/requires.txt +40 -0
- satdatakit-0.1.0/src/satdatakit.egg-info/top_level.txt +1 -0
- satdatakit-0.1.0/tests/test_core.py +43 -0
- satdatakit-0.1.0/tests/test_indices.py +34 -0
- satdatakit-0.1.0/tests/test_io.py +42 -0
- satdatakit-0.1.0/tests/test_pipeline.py +33 -0
satdatakit-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Rafael Cañete Vazquez
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: satdatakit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Unified satellite data analysis toolkit
|
|
5
|
+
Author-email: Rafael Cañete Vazquez <rafael@satdatakit.dev>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/rafaelcanete/satdatakit
|
|
8
|
+
Requires-Python: >=3.9
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Requires-Dist: numpy>=1.23.0
|
|
12
|
+
Requires-Dist: xarray>=2023.1.0
|
|
13
|
+
Requires-Dist: rioxarray>=0.14.0
|
|
14
|
+
Requires-Dist: rasterio>=1.3.0
|
|
15
|
+
Requires-Dist: netCDF4>=1.6.0
|
|
16
|
+
Requires-Dist: h5py>=3.7.0
|
|
17
|
+
Requires-Dist: pandas>=1.5.0
|
|
18
|
+
Requires-Dist: pyproj>=3.4.0
|
|
19
|
+
Requires-Dist: shapely>=2.0.0
|
|
20
|
+
Requires-Dist: geopandas>=0.12.0
|
|
21
|
+
Requires-Dist: pillow>=9.0.0
|
|
22
|
+
Requires-Dist: python-dateutil>=2.8.0
|
|
23
|
+
Requires-Dist: typing-extensions>=4.0.0
|
|
24
|
+
Provides-Extra: dask
|
|
25
|
+
Requires-Dist: dask[complete]>=2024.1.0; extra == "dask"
|
|
26
|
+
Requires-Dist: distributed>=2024.1.0; extra == "dask"
|
|
27
|
+
Provides-Extra: stac
|
|
28
|
+
Requires-Dist: pystac>=1.9.0; extra == "stac"
|
|
29
|
+
Requires-Dist: pystac-client>=0.7.0; extra == "stac"
|
|
30
|
+
Requires-Dist: stackstac>=0.5.0; extra == "stac"
|
|
31
|
+
Provides-Extra: zarr
|
|
32
|
+
Requires-Dist: zarr>=2.16.0; extra == "zarr"
|
|
33
|
+
Requires-Dist: fsspec>=2024.1.0; extra == "zarr"
|
|
34
|
+
Provides-Extra: cloud
|
|
35
|
+
Requires-Dist: satdatakit[stac,zarr]; extra == "cloud"
|
|
36
|
+
Requires-Dist: s3fs>=2024.1.0; extra == "cloud"
|
|
37
|
+
Provides-Extra: full
|
|
38
|
+
Requires-Dist: satdatakit[cloud,dask]; extra == "full"
|
|
39
|
+
Provides-Extra: dev
|
|
40
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
41
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
42
|
+
Requires-Dist: black>=23.0.0; extra == "dev"
|
|
43
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
44
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
45
|
+
Dynamic: license-file
|
|
46
|
+
|
|
47
|
+
<p align="center">
|
|
48
|
+
<img src="satdatakit_banner.png" alt="SatDataKit Banner" width="100%">
|
|
49
|
+
</p>
|
|
50
|
+
|
|
51
|
+
<h1 align="center">SatDataKit</h1>
|
|
52
|
+
|
|
53
|
+
<p align="center">
|
|
54
|
+
<strong>Unified satellite data analysis toolkit — one API for all Earth Observation formats.</strong>
|
|
55
|
+
</p>
|
|
56
|
+
|
|
57
|
+
<p align="center">
|
|
58
|
+
<a href="https://github.com/raicanvag/satdatakit/blob/main/LICENSE">
|
|
59
|
+
<img src="https://img.shields.io/badge/License-MIT-green.svg" alt="License: MIT">
|
|
60
|
+
</a>
|
|
61
|
+
<img src="https://img.shields.io/badge/Python-3.9%2B-blue.svg" alt="Python 3.9+">
|
|
62
|
+
<img src="https://img.shields.io/badge/EO-GeoTIFF%20%7C%20NetCDF%20%7C%20HDF%20%7C%20SAFE-orange.svg" alt="Formats">
|
|
63
|
+
</p>
|
|
64
|
+
|
|
65
|
+
---
|
|
66
|
+
|
|
67
|
+
## What is SatDataKit?
|
|
68
|
+
|
|
69
|
+
SatDataKit solves a real problem in the Earth Observation community: **every satellite data format has its own API**, forcing scientists to learn GDAL, NetCDF4, h5py, rasterio, and Sentinel-specific tools just to read a single image.
|
|
70
|
+
|
|
71
|
+
SatDataKit **unifies all of that into one clean API**:
|
|
72
|
+
|
|
73
|
+
| Format | Library needed without SatDataKit | With SatDataKit |
|
|
74
|
+
|---|---|---|
|
|
75
|
+
| GeoTIFF | `rasterio` + coordinate handling | `read("file.tif")` |
|
|
76
|
+
| NetCDF | `xarray` + `netCDF4` + CF conventions | `read("file.nc")` |
|
|
77
|
+
| HDF5 | `h5py` + dataset discovery logic | `read("file.h5")` |
|
|
78
|
+
| Sentinel SAFE | `zipfile` + XML parsing + JP2 reader | `read("file.SAFE")` |
|
|
79
|
+
|
|
80
|
+
**Built on the same stack NASA uses** (xarray, rioxarray, rasterio, netCDF4, h5py) but with a unified abstraction layer that eliminates boilerplate.
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
## Quick Start
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
from satdatakit import read, compute_index, Pipeline
|
|
88
|
+
|
|
89
|
+
# Read any format
|
|
90
|
+
ds = read("sentinel2.tif") # GeoTIFF, NetCDF, HDF, SAFE
|
|
91
|
+
|
|
92
|
+
# Compute indices
|
|
93
|
+
ds = compute_index(ds, "NDVI")
|
|
94
|
+
|
|
95
|
+
# Pipeline
|
|
96
|
+
result = (
|
|
97
|
+
Pipeline()
|
|
98
|
+
.read("data.tif")
|
|
99
|
+
.reproject("EPSG:4326")
|
|
100
|
+
.resample(30)
|
|
101
|
+
.compute_index("NDVI")
|
|
102
|
+
.to_geotiff("output.tif")
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
---
|
|
106
|
+
## Installation
|
|
107
|
+
|
|
108
|
+
# Docker (recommended)
|
|
109
|
+
docker-compose up --build satdatakit
|
|
110
|
+
|
|
111
|
+
# Or Conda
|
|
112
|
+
conda env create -f environment.yml
|
|
113
|
+
conda activate satdatakit
|
|
114
|
+
pip install -e ".[dev]"
|
|
115
|
+
|
|
116
|
+
## Optional Extensions
|
|
117
|
+
|
|
118
|
+
SatDataKit core supports GeoTIFF, NetCDF, HDF5, and SAFE out of the box.
|
|
119
|
+
|
|
120
|
+
For large-scale processing, install optional extensions:
|
|
121
|
+
|
|
122
|
+
```bash
|
|
123
|
+
# Parallel processing with Dask
|
|
124
|
+
pip install satdatakit[dask]
|
|
125
|
+
|
|
126
|
+
# Cloud catalogs (STAC) + Zarr format
|
|
127
|
+
pip install satdatakit[cloud]
|
|
128
|
+
|
|
129
|
+
# Everything (production servers)
|
|
130
|
+
pip install satdatakit[full]
|
|
131
|
+
|
|
132
|
+
| Extension | Command | Use Case |
|
|
133
|
+
| --------- | ------------------------------- | ---------------------------------------- |
|
|
134
|
+
| **Dask** | `pip install satdatakit[dask]` | 10+ files, lazy chunks, parallel compute |
|
|
135
|
+
| **STAC** | `pip install satdatakit[stac]` | Search cloud catalogs (AWS, Copernicus) |
|
|
136
|
+
| **Zarr** | `pip install satdatakit[zarr]` | Cloud-native format, chunked storage |
|
|
137
|
+
| **Cloud** | `pip install satdatakit[cloud]` | STAC + Zarr + S3 access |
|
|
138
|
+
| **Full** | `pip install satdatakit[full]` | All extensions (servers, production) |
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
### Dask Example
|
|
142
|
+
|
|
143
|
+
from satdatakit.extensions.dask_ext import enable_dask, read_dask
|
|
144
|
+
|
|
145
|
+
enable_dask()
|
|
146
|
+
|
|
147
|
+
# Lazy load with chunks
|
|
148
|
+
ds = read_dask(["file1.tif", "file2.tif"], chunks={"x": 1024})
|
|
149
|
+
|
|
150
|
+
# Compute when ready
|
|
151
|
+
ds = ds.compute()
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
## Features
|
|
155
|
+
|
|
156
|
+
Unified API: One read() for GeoTIFF, NetCDF, HDF, SAFE
|
|
157
|
+
Spectral Indices: NDVI, NDWI, EVI, SAVI, and more
|
|
158
|
+
Pipeline API: Fluent, chainable operations
|
|
159
|
+
Time Series: Stack multiple scenes automatically
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
## License
|
|
163
|
+
|
|
164
|
+
MIT License — see LICENSE for details.
|
|
165
|
+
Author: Rafael Cañete Vazquez
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<img src="satdatakit_banner.png" alt="SatDataKit Banner" width="100%">
|
|
3
|
+
</p>
|
|
4
|
+
|
|
5
|
+
<h1 align="center">SatDataKit</h1>
|
|
6
|
+
|
|
7
|
+
<p align="center">
|
|
8
|
+
<strong>Unified satellite data analysis toolkit — one API for all Earth Observation formats.</strong>
|
|
9
|
+
</p>
|
|
10
|
+
|
|
11
|
+
<p align="center">
|
|
12
|
+
<a href="https://github.com/raicanvag/satdatakit/blob/main/LICENSE">
|
|
13
|
+
<img src="https://img.shields.io/badge/License-MIT-green.svg" alt="License: MIT">
|
|
14
|
+
</a>
|
|
15
|
+
<img src="https://img.shields.io/badge/Python-3.9%2B-blue.svg" alt="Python 3.9+">
|
|
16
|
+
<img src="https://img.shields.io/badge/EO-GeoTIFF%20%7C%20NetCDF%20%7C%20HDF%20%7C%20SAFE-orange.svg" alt="Formats">
|
|
17
|
+
</p>
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## What is SatDataKit?
|
|
22
|
+
|
|
23
|
+
SatDataKit solves a real problem in the Earth Observation community: **every satellite data format has its own API**, forcing scientists to learn GDAL, NetCDF4, h5py, rasterio, and Sentinel-specific tools just to read a single image.
|
|
24
|
+
|
|
25
|
+
SatDataKit **unifies all of that into one clean API**:
|
|
26
|
+
|
|
27
|
+
| Format | Library needed without SatDataKit | With SatDataKit |
|
|
28
|
+
|---|---|---|
|
|
29
|
+
| GeoTIFF | `rasterio` + coordinate handling | `read("file.tif")` |
|
|
30
|
+
| NetCDF | `xarray` + `netCDF4` + CF conventions | `read("file.nc")` |
|
|
31
|
+
| HDF5 | `h5py` + dataset discovery logic | `read("file.h5")` |
|
|
32
|
+
| Sentinel SAFE | `zipfile` + XML parsing + JP2 reader | `read("file.SAFE")` |
|
|
33
|
+
|
|
34
|
+
**Built on the same stack NASA uses** (xarray, rioxarray, rasterio, netCDF4, h5py) but with a unified abstraction layer that eliminates boilerplate.
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
## Quick Start
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
from satdatakit import read, compute_index, Pipeline
|
|
42
|
+
|
|
43
|
+
# Read any format
|
|
44
|
+
ds = read("sentinel2.tif") # GeoTIFF, NetCDF, HDF, SAFE
|
|
45
|
+
|
|
46
|
+
# Compute indices
|
|
47
|
+
ds = compute_index(ds, "NDVI")
|
|
48
|
+
|
|
49
|
+
# Pipeline
|
|
50
|
+
result = (
|
|
51
|
+
Pipeline()
|
|
52
|
+
.read("data.tif")
|
|
53
|
+
.reproject("EPSG:4326")
|
|
54
|
+
.resample(30)
|
|
55
|
+
.compute_index("NDVI")
|
|
56
|
+
.to_geotiff("output.tif")
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
---
|
|
60
|
+
## Installation
|
|
61
|
+
|
|
62
|
+
# Docker (recommended)
|
|
63
|
+
docker-compose up --build satdatakit
|
|
64
|
+
|
|
65
|
+
# Or Conda
|
|
66
|
+
conda env create -f environment.yml
|
|
67
|
+
conda activate satdatakit
|
|
68
|
+
pip install -e ".[dev]"
|
|
69
|
+
|
|
70
|
+
## Optional Extensions
|
|
71
|
+
|
|
72
|
+
SatDataKit core supports GeoTIFF, NetCDF, HDF5, and SAFE out of the box.
|
|
73
|
+
|
|
74
|
+
For large-scale processing, install optional extensions:
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
# Parallel processing with Dask
|
|
78
|
+
pip install satdatakit[dask]
|
|
79
|
+
|
|
80
|
+
# Cloud catalogs (STAC) + Zarr format
|
|
81
|
+
pip install satdatakit[cloud]
|
|
82
|
+
|
|
83
|
+
# Everything (production servers)
|
|
84
|
+
pip install satdatakit[full]
|
|
85
|
+
|
|
86
|
+
| Extension | Command | Use Case |
|
|
87
|
+
| --------- | ------------------------------- | ---------------------------------------- |
|
|
88
|
+
| **Dask** | `pip install satdatakit[dask]` | 10+ files, lazy chunks, parallel compute |
|
|
89
|
+
| **STAC** | `pip install satdatakit[stac]` | Search cloud catalogs (AWS, Copernicus) |
|
|
90
|
+
| **Zarr** | `pip install satdatakit[zarr]` | Cloud-native format, chunked storage |
|
|
91
|
+
| **Cloud** | `pip install satdatakit[cloud]` | STAC + Zarr + S3 access |
|
|
92
|
+
| **Full** | `pip install satdatakit[full]` | All extensions (servers, production) |
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
### Dask Example
|
|
96
|
+
|
|
97
|
+
from satdatakit.extensions.dask_ext import enable_dask, read_dask
|
|
98
|
+
|
|
99
|
+
enable_dask()
|
|
100
|
+
|
|
101
|
+
# Lazy load with chunks
|
|
102
|
+
ds = read_dask(["file1.tif", "file2.tif"], chunks={"x": 1024})
|
|
103
|
+
|
|
104
|
+
# Compute when ready
|
|
105
|
+
ds = ds.compute()
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
## Features
|
|
109
|
+
|
|
110
|
+
Unified API: One read() for GeoTIFF, NetCDF, HDF, SAFE
|
|
111
|
+
Spectral Indices: NDVI, NDWI, EVI, SAVI, and more
|
|
112
|
+
Pipeline API: Fluent, chainable operations
|
|
113
|
+
Time Series: Stack multiple scenes automatically
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
## License
|
|
117
|
+
|
|
118
|
+
MIT License — see LICENSE for details.
|
|
119
|
+
Author: Rafael Cañete Vazquez
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "satdatakit"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Unified satellite data analysis toolkit"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {text = "MIT"}
|
|
11
|
+
authors = [{name = "Rafael Cañete Vazquez", email = "rafael@satdatakit.dev"}]
|
|
12
|
+
requires-python = ">=3.9"
|
|
13
|
+
dependencies = [
|
|
14
|
+
"numpy>=1.23.0", "xarray>=2023.1.0", "rioxarray>=0.14.0",
|
|
15
|
+
"rasterio>=1.3.0", "netCDF4>=1.6.0", "h5py>=3.7.0",
|
|
16
|
+
"pandas>=1.5.0", "pyproj>=3.4.0", "shapely>=2.0.0",
|
|
17
|
+
"geopandas>=0.12.0", "pillow>=9.0.0",
|
|
18
|
+
"python-dateutil>=2.8.0", "typing-extensions>=4.0.0"
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
[project.optional-dependencies]
|
|
22
|
+
dask = [
|
|
23
|
+
"dask[complete]>=2024.1.0",
|
|
24
|
+
"distributed>=2024.1.0",
|
|
25
|
+
]
|
|
26
|
+
stac = [
|
|
27
|
+
"pystac>=1.9.0",
|
|
28
|
+
"pystac-client>=0.7.0",
|
|
29
|
+
"stackstac>=0.5.0",
|
|
30
|
+
]
|
|
31
|
+
zarr = [
|
|
32
|
+
"zarr>=2.16.0",
|
|
33
|
+
"fsspec>=2024.1.0",
|
|
34
|
+
]
|
|
35
|
+
cloud = [
|
|
36
|
+
"satdatakit[stac,zarr]",
|
|
37
|
+
"s3fs>=2024.1.0",
|
|
38
|
+
]
|
|
39
|
+
full = [
|
|
40
|
+
"satdatakit[dask,cloud]",
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
dev = ["pytest>=7.0.0",
|
|
44
|
+
"pytest-cov>=4.0.0",
|
|
45
|
+
"black>=23.0.0",
|
|
46
|
+
"ruff>=0.1.0",
|
|
47
|
+
"mypy>=1.0.0"
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
[project.urls]
|
|
51
|
+
Homepage = "https://github.com/rafaelcanete/satdatakit"
|
|
52
|
+
|
|
53
|
+
[tool.setuptools.packages.find]
|
|
54
|
+
where = ["src"]
|
|
55
|
+
|
|
56
|
+
[tool.black]
|
|
57
|
+
line-length = 100
|
|
58
|
+
|
|
59
|
+
[tool.ruff]
|
|
60
|
+
line-length = 100
|
|
61
|
+
|
|
62
|
+
[tool.mypy]
|
|
63
|
+
python_version = "3.9"
|
|
64
|
+
disallow_untyped_defs = true
|
|
65
|
+
|
|
66
|
+
[tool.pytest.ini_options]
|
|
67
|
+
testpaths = ["tests"]
|
|
68
|
+
python_files = "test_*.py"
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""SatDataKit - Unified satellite data analysis toolkit.
|
|
2
|
+
|
|
3
|
+
Author: Rafael Cañete Vazquez
|
|
4
|
+
License: MIT
|
|
5
|
+
"""
|
|
6
|
+
__version__ = "0.1.0"
|
|
7
|
+
__author__ = "Rafael Cañete Vazquez"
|
|
8
|
+
|
|
9
|
+
from satdatakit.core import SatelliteDataset
|
|
10
|
+
from satdatakit.io import read, read_collection
|
|
11
|
+
from satdatakit.indices import compute_index
|
|
12
|
+
from satdatakit.pipeline import Pipeline
|
|
13
|
+
|
|
14
|
+
__all__ = ["SatelliteDataset", "read", "read_collection", "compute_index", "Pipeline"]
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
"""Core data model: SatelliteDataset.
|
|
2
|
+
|
|
3
|
+
Author: Rafael Cañete Vazquez
|
|
4
|
+
License: MIT
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import warnings
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
13
|
+
|
|
14
|
+
import numpy as np
|
|
15
|
+
import pandas as pd
|
|
16
|
+
import xarray as xr
|
|
17
|
+
from shapely.geometry import box
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class SatelliteDataset:
|
|
22
|
+
"""Universal container for Earth Observation data."""
|
|
23
|
+
|
|
24
|
+
data: xr.DataArray
|
|
25
|
+
bands: List[str]
|
|
26
|
+
crs: Optional[str] = None
|
|
27
|
+
resolution: Optional[Tuple[float, float]] = None
|
|
28
|
+
bounds: Optional[Tuple[float, float, float, float]] = None
|
|
29
|
+
datetime: Optional[Union[datetime, List[datetime]]] = None
|
|
30
|
+
sensor: Optional[str] = None
|
|
31
|
+
platform: Optional[str] = None
|
|
32
|
+
cloud_cover: Optional[float] = None
|
|
33
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
34
|
+
source_format: Optional[str] = None
|
|
35
|
+
source_path: Optional[Path] = None
|
|
36
|
+
|
|
37
|
+
def __post_init__(self) -> None:
|
|
38
|
+
self._validate_data()
|
|
39
|
+
self._normalize_bands()
|
|
40
|
+
|
|
41
|
+
def _validate_data(self) -> None:
|
|
42
|
+
dims = list(self.data.dims)
|
|
43
|
+
if "band" not in dims:
|
|
44
|
+
raise ValueError(f"DataArray must have 'band' dimension. Got: {dims}")
|
|
45
|
+
if "y" not in dims or "x" not in dims:
|
|
46
|
+
raise ValueError(f"DataArray must have 'y' and 'x' dimensions. Got: {dims}")
|
|
47
|
+
n_bands = self.data.sizes["band"]
|
|
48
|
+
if n_bands != len(self.bands):
|
|
49
|
+
warnings.warn(f"Band count mismatch: {n_bands} vs {len(self.bands)}", UserWarning)
|
|
50
|
+
self.bands = [f"band_{i}" for i in range(n_bands)]
|
|
51
|
+
|
|
52
|
+
def _normalize_bands(self) -> None:
|
|
53
|
+
self.bands = [str(b) for b in self.bands]
|
|
54
|
+
seen = set()
|
|
55
|
+
for i, name in enumerate(self.bands):
|
|
56
|
+
if name in seen:
|
|
57
|
+
self.bands[i] = f"{name}_{i}"
|
|
58
|
+
seen.add(self.bands[i])
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def shape(self) -> Tuple[int, ...]:
|
|
62
|
+
return tuple(self.data.sizes[d] for d in self.data.dims)
|
|
63
|
+
|
|
64
|
+
@property
|
|
65
|
+
def n_bands(self) -> int:
|
|
66
|
+
return self.data.sizes["band"]
|
|
67
|
+
|
|
68
|
+
@property
|
|
69
|
+
def width(self) -> int:
|
|
70
|
+
return self.data.sizes["x"]
|
|
71
|
+
|
|
72
|
+
@property
|
|
73
|
+
def height(self) -> int:
|
|
74
|
+
return self.data.sizes["y"]
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def dtype(self):
|
|
78
|
+
"""Return data type."""
|
|
79
|
+
return self.data.dtype
|
|
80
|
+
|
|
81
|
+
def __getitem__(self, key: Union[str, int]) -> xr.DataArray:
|
|
82
|
+
if isinstance(key, str):
|
|
83
|
+
if key not in self.bands:
|
|
84
|
+
raise KeyError(f"Band '{key}' not found. Available: {self.bands}")
|
|
85
|
+
idx = self.bands.index(key)
|
|
86
|
+
elif isinstance(key, int):
|
|
87
|
+
idx = key
|
|
88
|
+
else:
|
|
89
|
+
raise TypeError(f"Key must be str or int, got {type(key)}")
|
|
90
|
+
return self.data.isel(band=idx)
|
|
91
|
+
|
|
92
|
+
def get_bands(self, names: List[str]) -> "SatelliteDataset":
|
|
93
|
+
indices = [self.bands.index(n) for n in names if n in self.bands]
|
|
94
|
+
new_data = self.data.isel(band=indices)
|
|
95
|
+
return SatelliteDataset(
|
|
96
|
+
data=new_data, bands=[self.bands[i] for i in indices],
|
|
97
|
+
crs=self.crs, resolution=self.resolution, bounds=self.bounds,
|
|
98
|
+
datetime=self.datetime, sensor=self.sensor, platform=self.platform,
|
|
99
|
+
cloud_cover=self.cloud_cover, metadata=self.metadata.copy(),
|
|
100
|
+
source_format=self.source_format, source_path=self.source_path)
|
|
101
|
+
|
|
102
|
+
def to_numpy(self) -> np.ndarray:
|
|
103
|
+
return self.data.values
|
|
104
|
+
|
|
105
|
+
def to_xarray(self) -> xr.DataArray:
|
|
106
|
+
return self.data
|
|
107
|
+
|
|
108
|
+
def to_dataset(self) -> xr.Dataset:
|
|
109
|
+
datasets = {b: self.data.isel(band=i).drop_vars("band")
|
|
110
|
+
for i, b in enumerate(self.bands)}
|
|
111
|
+
ds = xr.Dataset(datasets)
|
|
112
|
+
if self.crs:
|
|
113
|
+
ds.attrs["crs"] = self.crs
|
|
114
|
+
return ds
|
|
115
|
+
|
|
116
|
+
def add_band(self, name: str, data: Union[np.ndarray, xr.DataArray]) -> "SatelliteDataset":
|
|
117
|
+
if name in self.bands:
|
|
118
|
+
raise ValueError(f"Band '{name}' already exists.")
|
|
119
|
+
if isinstance(data, np.ndarray):
|
|
120
|
+
data = xr.DataArray(data, dims=["y", "x"])
|
|
121
|
+
data = data.expand_dims(band=[name])
|
|
122
|
+
new_data = xr.concat([self.data, data], dim="band")
|
|
123
|
+
return SatelliteDataset(
|
|
124
|
+
data=new_data, bands=self.bands + [name], crs=self.crs,
|
|
125
|
+
resolution=self.resolution, bounds=self.bounds,
|
|
126
|
+
datetime=self.datetime, sensor=self.sensor, platform=self.platform,
|
|
127
|
+
cloud_cover=self.cloud_cover, metadata=self.metadata.copy(),
|
|
128
|
+
source_format=self.source_format, source_path=self.source_path)
|
|
129
|
+
|
|
130
|
+
def remove_band(self, name: str) -> "SatelliteDataset":
|
|
131
|
+
if name not in self.bands:
|
|
132
|
+
raise KeyError(f"Band '{name}' not found.")
|
|
133
|
+
idx = self.bands.index(name)
|
|
134
|
+
new_data = self.data.drop_isel(band=idx)
|
|
135
|
+
return SatelliteDataset(
|
|
136
|
+
data=new_data, bands=[b for b in self.bands if b != name],
|
|
137
|
+
crs=self.crs, resolution=self.resolution, bounds=self.bounds,
|
|
138
|
+
datetime=self.datetime, sensor=self.sensor, platform=self.platform,
|
|
139
|
+
cloud_cover=self.cloud_cover, metadata=self.metadata.copy(),
|
|
140
|
+
source_format=self.source_format, source_path=self.source_path)
|
|
141
|
+
|
|
142
|
+
def rename_bands(self, mapping: Dict[str, str]) -> "SatelliteDataset":
|
|
143
|
+
new_bands = [mapping.get(b, b) for b in self.bands]
|
|
144
|
+
new_data = self.data.copy()
|
|
145
|
+
new_data = new_data.assign_coords(band=new_bands)
|
|
146
|
+
return SatelliteDataset(
|
|
147
|
+
data=new_data, bands=new_bands, crs=self.crs,
|
|
148
|
+
resolution=self.resolution, bounds=self.bounds,
|
|
149
|
+
datetime=self.datetime, sensor=self.sensor, platform=self.platform,
|
|
150
|
+
cloud_cover=self.cloud_cover, metadata=self.metadata.copy(),
|
|
151
|
+
source_format=self.source_format, source_path=self.source_path)
|
|
152
|
+
|
|
153
|
+
def reproject(self, dst_crs: Union[str, int], **kwargs) -> "SatelliteDataset":
|
|
154
|
+
import rioxarray
|
|
155
|
+
if self.crs is None:
|
|
156
|
+
raise ValueError("Source CRS is not set. Cannot reproject.")
|
|
157
|
+
if self.data.rio.crs is None:
|
|
158
|
+
self.data = self.data.rio.write_crs(self.crs)
|
|
159
|
+
reprojected = self.data.rio.reproject(dst_crs, **kwargs)
|
|
160
|
+
new_bounds = reprojected.rio.bounds()
|
|
161
|
+
return SatelliteDataset(
|
|
162
|
+
data=reprojected, bands=self.bands.copy(), crs=str(dst_crs),
|
|
163
|
+
resolution=self.resolution, bounds=new_bounds,
|
|
164
|
+
datetime=self.datetime, sensor=self.sensor, platform=self.platform,
|
|
165
|
+
cloud_cover=self.cloud_cover, metadata=self.metadata.copy(),
|
|
166
|
+
source_format=self.source_format, source_path=self.source_path)
|
|
167
|
+
|
|
168
|
+
def resample(self, resolution: Union[float, Tuple[float, float]], **kwargs) -> "SatelliteDataset":
|
|
169
|
+
if self.crs is None:
|
|
170
|
+
raise ValueError("CRS must be set to resample.")
|
|
171
|
+
return self.reproject(dst_crs=self.crs, resolution=resolution, **kwargs)
|
|
172
|
+
|
|
173
|
+
def clip(self, geometry, crs=None, drop=True, **kwargs) -> "SatelliteDataset":
|
|
174
|
+
import rioxarray
|
|
175
|
+
if self.data.rio.crs is None and self.crs is not None:
|
|
176
|
+
self.data = self.data.rio.write_crs(self.crs)
|
|
177
|
+
clipped = self.data.rio.clip([geometry], crs=crs, drop=drop, all_touched=True, **kwargs)
|
|
178
|
+
new_bounds = clipped.rio.bounds()
|
|
179
|
+
return SatelliteDataset(
|
|
180
|
+
data=clipped, bands=self.bands.copy(), crs=self.crs,
|
|
181
|
+
resolution=self.resolution, bounds=new_bounds,
|
|
182
|
+
datetime=self.datetime, sensor=self.sensor, platform=self.platform,
|
|
183
|
+
cloud_cover=self.cloud_cover, metadata=self.metadata.copy(),
|
|
184
|
+
source_format=self.source_format, source_path=self.source_path)
|
|
185
|
+
|
|
186
|
+
def mask(self, mask_array: np.ndarray, fill_value: float = np.nan) -> "SatelliteDataset":
|
|
187
|
+
if mask_array.shape != self.data.shape[-2:]:
|
|
188
|
+
raise ValueError(f"Mask shape {mask_array.shape} does not match data spatial shape {self.data.shape[-2:]}")
|
|
189
|
+
masked_data = self.data.where(mask_array, fill_value)
|
|
190
|
+
return SatelliteDataset(
|
|
191
|
+
data=masked_data, bands=self.bands.copy(), crs=self.crs,
|
|
192
|
+
resolution=self.resolution, bounds=self.bounds,
|
|
193
|
+
datetime=self.datetime, sensor=self.sensor, platform=self.platform,
|
|
194
|
+
cloud_cover=self.cloud_cover, metadata=self.metadata.copy(),
|
|
195
|
+
source_format=self.source_format, source_path=self.source_path)
|
|
196
|
+
|
|
197
|
+
def to_geotiff(self, path: Union[str, Path], **kwargs) -> None:
|
|
198
|
+
import rioxarray
|
|
199
|
+
path = Path(path)
|
|
200
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
201
|
+
data = self.data
|
|
202
|
+
if data.rio.crs is None and self.crs is not None:
|
|
203
|
+
data = data.rio.write_crs(self.crs)
|
|
204
|
+
data.rio.to_raster(path, **kwargs)
|
|
205
|
+
|
|
206
|
+
def to_netcdf(self, path: Union[str, Path], **kwargs) -> None:
|
|
207
|
+
path = Path(path)
|
|
208
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
209
|
+
self.to_dataset().to_netcdf(path, **kwargs)
|
|
210
|
+
|
|
211
|
+
def __repr__(self) -> str:
|
|
212
|
+
return f"SatelliteDataset(shape={self.shape}, bands={self.bands}, crs={self.crs!r})"
|
|
213
|
+
|
|
214
|
+
def info(self) -> str:
|
|
215
|
+
lines = [
|
|
216
|
+
"=" * 50,
|
|
217
|
+
"SatelliteDataset Information",
|
|
218
|
+
"=" * 50,
|
|
219
|
+
f"Shape: {self.shape}",
|
|
220
|
+
f"Bands: {self.n_bands} ({self.bands})",
|
|
221
|
+
f"Width: {self.width} px",
|
|
222
|
+
f"Height: {self.height} px",
|
|
223
|
+
f"CRS: {self.crs}",
|
|
224
|
+
f"Resolution: {self.resolution}",
|
|
225
|
+
f"Bounds: {self.bounds}",
|
|
226
|
+
f"Sensor: {self.sensor}",
|
|
227
|
+
f"Platform: {self.platform}",
|
|
228
|
+
f"Datetime: {self.datetime}",
|
|
229
|
+
f"Cloud cover: {self.cloud_cover}%",
|
|
230
|
+
f"Dtype: {self.dtype}",
|
|
231
|
+
"=" * 50,
|
|
232
|
+
]
|
|
233
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""SatDataKit extensions — optional add-ons for scalability.
|
|
2
|
+
|
|
3
|
+
Extensions load on demand and do not modify core code.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
__version__ = "0.1.0"
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def list_extensions():
|
|
10
|
+
"""Return available extensions."""
|
|
11
|
+
return ["dask", "stac", "zarr"]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def enable(extension: str):
|
|
15
|
+
"""Activate an extension by name."""
|
|
16
|
+
if extension == "dask":
|
|
17
|
+
from .dask_ext import enable_dask
|
|
18
|
+
enable_dask()
|
|
19
|
+
elif extension == "stac":
|
|
20
|
+
from .stac_ext import enable_stac
|
|
21
|
+
enable_stac()
|
|
22
|
+
elif extension == "zarr":
|
|
23
|
+
from .zarr_ext import enable_zarr
|
|
24
|
+
enable_zarr()
|
|
25
|
+
else:
|
|
26
|
+
raise ValueError(f"Unknown extension: {extension}. Available: {list_extensions()}")
|