sentle 2024.5.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Clemens Mosig
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,19 @@
1
+ Metadata-Version: 2.1
2
+ Name: sentle
3
+ Version: 2024.5.2
4
+ Summary: Sentinel-2 scalable downloader.
5
+ Home-page: UNKNOWN
6
+ Author: Clemens Mosig
7
+ Author-email: clemens.mosig@uni-leipzig.de
8
+ License: UNKNOWN
9
+ Platform: UNKNOWN
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Education
12
+ Classifier: Programming Language :: Python :: 2
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Operating System :: MacOS :: MacOS X
15
+ Classifier: Operating System :: Microsoft :: Windows
16
+ License-File: LICENSE.md
17
+
18
+ Download Sentinel-2 data cubes of any scale (larger-than-memory) on any machine with integrated cloud detection, snow masking, harmonization, merging, and temporal composites.
19
+
@@ -0,0 +1,125 @@
1
+ <p align="center">
2
+ <a href="https://github.com/cmosig/sentle/"><img src="https://github.com/cmosig/sentle/raw/main/docs/logo.png" alt="sentle"></a>
3
+ </p>
4
+
5
+ <p align="center">
6
+ <a href="https://opensource.org/licenses/MIT" target="_blank">
7
+ <img src="https://img.shields.io/badge/License-MIT-blue.svg" alt="License">
8
+ </a>
9
+ <a href="https://peps.python.org/pep-0008/" target="_blank">
10
+ <img src="https://img.shields.io/badge/code_style-pep8-blue" alt="Black">
11
+ </a>
12
+ </p>
13
+ <p align="center">
14
+ <em>Download Sentinel-1 & Sentinel-2 data cubes of huge-scale (larger-than-memory) on any machine with integrated cloud
15
+ detection, snow masking, harmonization, merging, and temporal composites.</em>
16
+ </p>
17
+
18
+ ---
19
+
20
+ ## Important Note
21
+
22
+ 1) The model for cloud detection will be made available within the next couple of weeks.
23
+ 2) **This package is in early alpha stage. There will be bugs!** If you encounter any error, warning, memory issue, etc. please open a GitHub issue with the code to reproduce.
24
+
25
+ ## Installing
26
+
27
+ ```
28
+ pip install sentle
29
+ ```
30
+ or
31
+ ```
32
+ git clone git@github.com:cmosig/sentle.git
33
+ cd sentle
34
+ pip install -e .
35
+ ```
36
+
37
+ ## Guide
38
+
39
+ **(1) Setup**
40
+
41
+ There is only one important function: `process`. Here, you specify all parameters and the function returns a lazy [dask](https://www.dask.org/) array with the shape `(#timesteps, #bands, #pixelsy, #pixelsx)`.
42
+
43
+ ```
44
+ from sentle import sentle
45
+ from rasterio.crs import CRS
46
+
47
+ da = sentle.process(
48
+ target_crs=CRS.from_string("EPSG:32633"),
49
+ bound_left=176000,
50
+ bound_bottom=5660000,
51
+ bound_right=216000,
52
+ bound_top=5700000,
53
+ datetime="2022-06-17/2023-06-17",
54
+ target_resolution=10,
55
+ S2_mask_snow=True,
56
+ S2_cloud_classification=True,
57
+ S2_cloud_classification_device="cuda",
58
+ S1_assets=["vv", "vh"],
59
+ S2_apply_snow_mask=True,
60
+ S2_apply_cloud_mask=True,
61
+ time_composite_freq="7d",
62
+ num_workers=7,
63
+ )
64
+ ```
65
+ This code downloads data for a 40km by 40km area with one year of both Sentinel-1 and Sentinel-2. Clouds and snow are detected and replaced with NaNs. Data is also averaged every 7 days. A lazy dask array is returned:
66
+
67
+ <p align="center">
68
+ <img src="https://github.com/cmosig/sentle/assets/32590522/f487bba1-3c10-42a2-9b10-356ab2b44825" width="600">
69
+ </p>
70
+
71
+ Explanation:
72
+ - `target_crs`: Specifies the target CRS that all data will be reprojected to.
73
+ - `target_resolution`: Determines the resolution that all data is reprojected to in the `target_crs`.
74
+ - `bound_*`: Bounds in `target_crs` of the area you want to download. Undefined behavior if difference between opposite bounds is not divisable by `target_resolution`.
75
+ - `datetime`: Time range that will be downloaded.
76
+ - `S2_mask_snow`: Whether to compute snow mask for Sentinel-2 data.
77
+ - `S2_cloud_classification`: Whether to a cloud classification layer for Sentinel-2 data.
78
+ - `S2_cloud_classification_device`: Where to run cloud classification. If you have an Nvidia GPU then pass `cuda` otherwise `cpu`(default).
79
+ - `S2_apply_*`: Whether to apply the respective mask, i.e., replace values by NaN.
80
+ - `S1_assets`: Which Sentinel-1 assets to download. Disable Sentinel-1 by setting this to `None`.
81
+ - `time_composite_freq`: Rounding interval across which data is averaged. Uses `pandas.Timestamp.round(time_composite_freq)`. Cloud/snow masks are dropped after masking because they cannot be aggregated.
82
+ - `num_workers`: Number of cores to use. Plan about 4 GiB of memory usage per worker.
83
+
84
+ **(2) Compute**
85
+
86
+ You either run `.compute()` on the returned dask array or pass the object to
87
+ `sentle.save_as_zarr(da, path="..."))` which setups zarr storage and saves each chunk as to disk as
88
+ soon as it's ready. The latter enables an area and temporal range to be
89
+ computed that is much larger than the RAM on your machine.
90
+
91
+ **(3 Visualize)**
92
+
93
+ Load the data with xarray and visualize using for example the awesome [lexcube](https://github.com/msoechting/lexcube) package. Here, band B02 is visualized from the above example. One is able to spot the cloud gaps and the spotty coverage during winter.
94
+
95
+ <p align="center">
96
+ <img src=https://github.com/cmosig/sentle/assets/32590522/33b7f6a0-532e-453b-80db-748d99e753a2/>
97
+ </p>
98
+
99
+ ## Questions you may have
100
+
101
+ #### Where can I watch the progress of the download?
102
+ Upon initialization, `sentle` prints a link to a [dask dashboard](https://docs.dask.org/en/latest/dashboard.html). Check the bottom right pane in the Status tab for a progress bar.
103
+ A variety of other stats are also visible there. If you are working on a remote machine you may need to use [port forwarding](https://help.ubuntu.com/community/SSH/OpenSSH/PortForwarding) to access the remote dashboard.
104
+ ![image](https://github.com/cmosig/sentle/assets/32590522/c20516b5-7a9e-4e99-953a-9c8325edea7b)
105
+
106
+
107
+ #### How do I scale this program?
108
+ Increase the number of workers using the `num_workers` parameter when setting up the `Sentle` class. With default spatial chunk size of 4000, specified by `processing_spatial_chunk_size`, you should plan with 4GiB per worker. At the moment (will change), each worker also initiates its own model on the GPU, meaning more workers will also mean that more GPU VRAM will be used.
109
+
110
+ #### My dask graph is too big, what do I do?
111
+ Increase the `processing_spatial_chunk_size` from `4000` to something higher in the `process` function. This will increase spatial chunk sizes, but will also increase worker memory requirements.
112
+
113
+ ## Contributing
114
+
115
+ Please submit issues or pull requests if you feel like something is missing or
116
+ needs to be fixed.
117
+
118
+ ## License
119
+
120
+ This project is licensed under the MIT License - see the [LICENSE.md](LICENSE.md) file for details.
121
+
122
+ ## Acknowledgments
123
+
124
+ Thank you to [David Montero](https://github.com/davemlz) for all the
125
+ discussions and his awesome packages which inspired this.
File without changes
@@ -0,0 +1,54 @@
1
+ import os
2
+
3
+ import numpy as np
4
+ import pkg_resources
5
+ import torch
6
+ import xarray as xr
7
+
8
+
9
+ def load_cloudsen_model(device: str = "cpu"):
10
+ pkg_path = os.path.dirname(
11
+ pkg_resources.resource_filename("sentle", "sentle.py"))
12
+ model_path = os.path.join(pkg_path, "data", "cloudmodel.pt")
13
+ cloudsen_model = torch.jit.load(model_path)
14
+ cloudsen_model.eval()
15
+ cloudsen_model.to(device)
16
+
17
+ return cloudsen_model
18
+
19
+
20
+ S2_cloud_mask_band = "S2_cloud_classification"
21
+ S2_cloud_prob_bands = [
22
+ "S2_clear_sky_probability", "S2_thick_cloud_probability",
23
+ "S2_thin_cloud_probability", "S2_shadow_probability"
24
+ ]
25
+
26
+
27
+ def compute_cloud_mask(array: np.array, model: torch.jit.ScriptModule,
28
+ S2_cloud_classification_device: str):
29
+
30
+ assert array.shape == (
31
+ 12, 732,
32
+ 732), "only supporting shape (12, 732, 732) for cloud masking for now"
33
+
34
+ # add padding so that shape is divisable by 16 for cloudsen
35
+ array = np.pad(array, [(0, 0), (2, 2), (2, 2)], "edge")
36
+
37
+ # expand one dim because it needs it
38
+ array = np.expand_dims(array, axis=0)
39
+
40
+ # Convert array to torch tensor, divide by 10000
41
+ # This mantains the array in [0,1]
42
+ tensor = torch.from_numpy(array) / 10000
43
+
44
+ # move to device
45
+ tensor = tensor.to(S2_cloud_classification_device)
46
+
47
+ # Compute the cloud mask
48
+ with torch.no_grad():
49
+ cloud_probabilities = model(tensor.type(torch.float32)).cpu().numpy()
50
+
51
+ # remove padding again
52
+ cloud_probabilities = cloud_probabilities[0, :, 2:-2, 2:-2]
53
+
54
+ return cloud_probabilities
@@ -0,0 +1,20 @@
1
+ S2_subtile_size = 732
2
+
3
+ S2_RAW_BANDS = [
4
+ 'B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B8A', 'B09',
5
+ 'B11', 'B12'
6
+ ]
7
+ S2_RAW_BAND_RESOLUTION = {
8
+ 'B01': 60,
9
+ 'B02': 10,
10
+ 'B03': 10,
11
+ 'B04': 10,
12
+ 'B05': 20,
13
+ 'B06': 20,
14
+ 'B07': 20,
15
+ 'B08': 10,
16
+ 'B8A': 20,
17
+ 'B09': 60,
18
+ 'B11': 20,
19
+ 'B12': 20
20
+ }