nrt-data 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nrt/data/__init__.py +92 -23
- nrt/data/simulate.py +288 -14
- nrt_data-0.0.2.dist-info/METADATA +72 -0
- nrt_data-0.0.2.dist-info/RECORD +9 -0
- {nrt_data-0.0.1.dist-info → nrt_data-0.0.2.dist-info}/WHEEL +1 -1
- nrt_data-0.0.2.dist-info/licenses/NOTICE +427 -0
- nrt_data-0.0.1.dist-info/METADATA +0 -356
- nrt_data-0.0.1.dist-info/RECORD +0 -8
- {nrt_data-0.0.1.dist-info → nrt_data-0.0.2.dist-info/licenses}/LICENSE +0 -0
- {nrt_data-0.0.1.dist-info → nrt_data-0.0.2.dist-info}/top_level.txt +0 -0
nrt/data/__init__.py
CHANGED
|
@@ -1,22 +1,26 @@
|
|
|
1
|
-
# Copyright (C) 2024 European Union
|
|
2
|
-
#
|
|
3
|
-
#
|
|
4
|
-
# the
|
|
5
|
-
#
|
|
6
|
-
#
|
|
7
|
-
#
|
|
8
|
-
#
|
|
9
|
-
#
|
|
10
|
-
#
|
|
11
|
-
#
|
|
12
|
-
#
|
|
13
|
-
#
|
|
14
|
-
#
|
|
1
|
+
# Copyright (C) 2024 European Union
|
|
2
|
+
#
|
|
3
|
+
# This program is free software: you can redistribute it and/or modify it under
|
|
4
|
+
# the terms of the EUROPEAN UNION PUBLIC LICENCE v. 1.2 as published by
|
|
5
|
+
# the European Union.
|
|
6
|
+
#
|
|
7
|
+
# This program is distributed in the hope that it will be useful, but
|
|
8
|
+
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
9
|
+
# or FITNESS FOR A PARTICULAR PURPOSE.
|
|
10
|
+
# See the EUROPEAN UNION PUBLIC LICENCE v. 1.2 for further details.
|
|
11
|
+
#
|
|
12
|
+
# You should have received a copy of the EUROPEAN UNION PUBLIC LICENCE v. 1.2.
|
|
13
|
+
# along with this program.
|
|
14
|
+
# If not, see <https://joinup.ec.europa.eu/collection/eupl/eupl-text-eupl-12 >
|
|
15
|
+
|
|
16
|
+
__path__ = __import__('pkgutil').extend_path(__path__, __name__)
|
|
15
17
|
|
|
16
18
|
import os
|
|
17
19
|
import json
|
|
18
20
|
import warnings
|
|
21
|
+
import sqlite3
|
|
19
22
|
|
|
23
|
+
import pandas as pd
|
|
20
24
|
import xarray as xr
|
|
21
25
|
import rasterio
|
|
22
26
|
import fiona
|
|
@@ -31,14 +35,15 @@ from .simulate import make_cube as _make_cube
|
|
|
31
35
|
DATA_DIR = os.path.abspath(os.path.dirname(__file__))
|
|
32
36
|
|
|
33
37
|
GOODBOY = pooch.create(
|
|
34
|
-
path=pooch.os_cache("nrt-
|
|
38
|
+
path=pooch.os_cache("nrt-data"),
|
|
35
39
|
base_url="https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/FOREST/NRT/NRT-DATA/VER1-0/",
|
|
36
40
|
registry={
|
|
37
|
-
"sentinel2_cube_subset_romania_10m.nc":
|
|
38
|
-
"sentinel2_cube_subset_romania_20m.nc":
|
|
39
|
-
"tree_cover_density_2018_romania.tif":
|
|
40
|
-
"germany_stratification.tif":
|
|
41
|
-
"germany_sample_points.fgb":
|
|
41
|
+
"sentinel2_cube_subset_romania_10m.nc": "sha256:f88716ff11353fa46990c59c57f31b4c9a8dfd4a950a845e073e41f6beb0ac07",
|
|
42
|
+
"sentinel2_cube_subset_romania_20m.nc": "sha256:5e6858fc0e31555e8de44bac57b989bb9a3c10117f4fddead943eb45b8c1be17",
|
|
43
|
+
"tree_cover_density_2018_romania.tif": "sha256:0d6a445b112de8ba44729df26c063650198e6214bc9d9349c3b4c86ee43894bb",
|
|
44
|
+
"germany_stratification.tif": "sha256:149d0c36b9f5d933ca12c4ea976f866e74c205ab708ac3bc4dd7062c74c4968c",
|
|
45
|
+
"germany_sample_points.fgb": "sha256:068cbda19fcfbd2dd1cf9a1d18f032685c971d6d22cb7bef1415109030753ace",
|
|
46
|
+
"germany_temporal_segments.sqlite": "sha256:248fc9ffd020764b4a5a1ece40976dc5f0622c68de6e9ae3005ad192d0233a14"
|
|
42
47
|
}
|
|
43
48
|
)
|
|
44
49
|
|
|
@@ -53,8 +58,8 @@ def _load(f, return_meta=False, **kwargs):
|
|
|
53
58
|
**kwargs: Keyword arguments for xarray when loading nc files
|
|
54
59
|
|
|
55
60
|
Returns:
|
|
56
|
-
Dataset, array, dictionary, or tuple depending on
|
|
57
|
-
type and options.
|
|
61
|
+
Dataset, array, dictionary, sqlite3 connection, or tuple depending on
|
|
62
|
+
the file type and options.
|
|
58
63
|
"""
|
|
59
64
|
file_path = GOODBOY.fetch(f)
|
|
60
65
|
if f.endswith('.nc'):
|
|
@@ -73,6 +78,8 @@ def _load(f, return_meta=False, **kwargs):
|
|
|
73
78
|
meta = src.meta
|
|
74
79
|
return data, meta
|
|
75
80
|
return data
|
|
81
|
+
elif f.endswith('.sqlite'):
|
|
82
|
+
return sqlite3.connect(file_path)
|
|
76
83
|
|
|
77
84
|
|
|
78
85
|
def romania_10m(**kwargs):
|
|
@@ -124,7 +131,12 @@ def germany_zarr(**kwargs):
|
|
|
124
131
|
also automatically converted to ``np.nan``
|
|
125
132
|
|
|
126
133
|
Args:
|
|
127
|
-
**kwargs: Additional keyword arguments passed to ``xarray.open_zarr()
|
|
134
|
+
**kwargs: Additional keyword arguments passed to ``xarray.open_zarr()``.
|
|
135
|
+
A common option useful, when working on a corporate network and
|
|
136
|
+
accessing the internet via a proxy is
|
|
137
|
+
``storage_options={"client_kwargs": {"trust_env": True}}``. This
|
|
138
|
+
ensures that the proxy settings are automatically picked up from the
|
|
139
|
+
environment variables.
|
|
128
140
|
|
|
129
141
|
Examples:
|
|
130
142
|
>>> import sys
|
|
@@ -232,6 +244,63 @@ def germany_sample_points(return_meta=False):
|
|
|
232
244
|
return _load('germany_sample_points.fgb', return_meta=return_meta)
|
|
233
245
|
|
|
234
246
|
|
|
247
|
+
def germany_temporal_segments():
|
|
248
|
+
"""Visually interpreted temporal segments for 300 sample locations in Germany.
|
|
249
|
+
|
|
250
|
+
This function loads temporal segmentation data, which has been visually
|
|
251
|
+
interpreted using the ``SegmentLabellingInterface`` of the ``nrt-data`` package.
|
|
252
|
+
The data corresponds to the sample locations from ``nrt.data.germany_sample_points()``
|
|
253
|
+
and can be joined with it using the ``fid`` or ``feature_id`` keys.
|
|
254
|
+
|
|
255
|
+
Each segment is labeled with one of three possible categories:
|
|
256
|
+
|
|
257
|
+
- Non-treed
|
|
258
|
+
- Stable tree cover
|
|
259
|
+
- Dieback
|
|
260
|
+
|
|
261
|
+
A common disturbance trajectory in this region, which has been heavily
|
|
262
|
+
affected by bark beetle activity, follows the pattern "Stable tree cover",
|
|
263
|
+
"Dieback", and then "Non-treed." For some sample locations, no label could be
|
|
264
|
+
confidently assigned, and these are represented with a single segment labeled `None`.
|
|
265
|
+
|
|
266
|
+
Additional information about the dataset:
|
|
267
|
+
|
|
268
|
+
- Temporal segmentation is valid for the period between 2019-01-01 and 2021-12-31.
|
|
269
|
+
- Each segment has a ``begin`` and ``end`` time represented as days since epoch.
|
|
270
|
+
- The segmentation data may contain errors for ambiguous samples, particularly
|
|
271
|
+
near edges, in mixed or sparse forests, or for shrub-like vegetation easily
|
|
272
|
+
mistaken for trees.
|
|
273
|
+
- Temporal precision may vary, especially in cases where gradual processes like
|
|
274
|
+
canopy dieback are difficult to date accurately.
|
|
275
|
+
|
|
276
|
+
Examples:
|
|
277
|
+
>>> from nrt import data
|
|
278
|
+
>>> data.germany_temporal_segments()
|
|
279
|
+
id begin end label feature_id
|
|
280
|
+
0 1 17916 18981 Non-treed 0
|
|
281
|
+
1 2 17916 18981 Non-treed 1
|
|
282
|
+
2 3 17916 18981 Non-treed 2
|
|
283
|
+
3 4 17916 18981 Non-treed 3
|
|
284
|
+
4 5 17916 18981 Stable tree cover 4
|
|
285
|
+
.. ... ... ... ... ...
|
|
286
|
+
413 414 17916 18981 Non-treed 295
|
|
287
|
+
414 415 17916 18981 Stable tree cover 296
|
|
288
|
+
415 416 17916 18981 Non-treed 297
|
|
289
|
+
416 417 17916 18981 Non-treed 298
|
|
290
|
+
417 418 17916 18981 Stable tree cover 299
|
|
291
|
+
|
|
292
|
+
[418 rows x 5 columns]
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
pandas.DataFrame: A data-frame containing 418 rows and 5 columns, with each
|
|
296
|
+
row representing a temporal segment for a sample location.
|
|
297
|
+
"""
|
|
298
|
+
con = _load('germany_temporal_segments.sqlite')
|
|
299
|
+
df = pd.read_sql('SELECT * FROM segments;', con=con)
|
|
300
|
+
con.close()
|
|
301
|
+
return df
|
|
302
|
+
|
|
303
|
+
|
|
235
304
|
def romania_forest_cover_percentage(return_meta=False):
|
|
236
305
|
"""Subset of Copernicus HR layer tree cover percentage - 20 m - Romania
|
|
237
306
|
|
nrt/data/simulate.py
CHANGED
|
@@ -1,20 +1,24 @@
|
|
|
1
|
-
# Copyright (C) 2024 European Union
|
|
2
|
-
#
|
|
3
|
-
#
|
|
4
|
-
# the
|
|
5
|
-
#
|
|
6
|
-
#
|
|
7
|
-
#
|
|
8
|
-
#
|
|
9
|
-
#
|
|
10
|
-
#
|
|
11
|
-
#
|
|
12
|
-
#
|
|
13
|
-
#
|
|
14
|
-
#
|
|
1
|
+
# Copyright (C) 2024 European Union
|
|
2
|
+
#
|
|
3
|
+
# This program is free software: you can redistribute it and/or modify it under
|
|
4
|
+
# the terms of the EUROPEAN UNION PUBLIC LICENCE v. 1.2 as published by
|
|
5
|
+
# the European Union.
|
|
6
|
+
#
|
|
7
|
+
# This program is distributed in the hope that it will be useful, but
|
|
8
|
+
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
9
|
+
# or FITNESS FOR A PARTICULAR PURPOSE.
|
|
10
|
+
# See the EUROPEAN UNION PUBLIC LICENCE v. 1.2 for further details.
|
|
11
|
+
#
|
|
12
|
+
# You should have received a copy of the EUROPEAN UNION PUBLIC LICENCE v. 1.2.
|
|
13
|
+
# along with this program.
|
|
14
|
+
# If not, see <https://joinup.ec.europa.eu/collection/eupl/eupl-text-eupl-12 >
|
|
15
|
+
|
|
16
|
+
import datetime
|
|
17
|
+
from typing import Tuple, Optional
|
|
15
18
|
|
|
16
19
|
import numpy as np
|
|
17
20
|
import xarray as xr
|
|
21
|
+
from scipy.ndimage import gaussian_filter
|
|
18
22
|
|
|
19
23
|
|
|
20
24
|
def make_ts(dates, break_idx=-1, intercept=0.7, amplitude=0.15, magnitude=0.25,
|
|
@@ -268,6 +272,276 @@ def make_cube(dates, params_ds, outlier_value=0.1, name='ndvi'):
|
|
|
268
272
|
name=name)
|
|
269
273
|
return xr_cube
|
|
270
274
|
|
|
275
|
+
|
|
276
|
+
def make_landscape(
|
|
277
|
+
shape: Tuple[int, int] = (5000, 5000),
|
|
278
|
+
year: int = 2020,
|
|
279
|
+
forest_pct: float = 0.70,
|
|
280
|
+
loss_pct: float = 0.03,
|
|
281
|
+
forest_compactness: float = 60.0,
|
|
282
|
+
disturbance_clustering: float = 30.0,
|
|
283
|
+
disturbance_roughness: float = 3.0,
|
|
284
|
+
disturbance_roughness_share: float = 0.1,
|
|
285
|
+
seed: Optional[int] = None
|
|
286
|
+
) -> Tuple[np.ndarray, np.ndarray]:
|
|
287
|
+
"""Generates a synthetic landscape with spatio-temporally correlated forest loss.
|
|
288
|
+
|
|
289
|
+
Uses Gaussian filtering of white noise to create spatially autocorrelated
|
|
290
|
+
structures for both forest cover and disturbance patches.
|
|
291
|
+
Temporal dates are assigned using a gradient from patch center (early) to
|
|
292
|
+
edge (late).
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
shape (tuple): Dimensions of the output array (rows, cols). Defaults to (5000, 5000).
|
|
296
|
+
year (int): The year to simulate. Dates will be returned as days since
|
|
297
|
+
1970-01-01, starting from Jan 1st of this year. Defaults to 2020.
|
|
298
|
+
forest_pct (float): Target percentage of forest cover (0.0 to 1.0).
|
|
299
|
+
Defaults to 0.70.
|
|
300
|
+
loss_pct (float): Target percentage of forest loss relative to the total
|
|
301
|
+
landscape area. Defaults to 0.03.
|
|
302
|
+
forest_compactness (float): Sigma for forest smoothing. Higher values create
|
|
303
|
+
larger, smoother contiguous forest blocks. Defaults to 60.0.
|
|
304
|
+
disturbance_clustering (float): Sigma for disturbance structure. Higher values
|
|
305
|
+
create larger, fewer disturbance patches. Defaults to 30.0.
|
|
306
|
+
disturbance_roughness (float): Sigma for disturbance texture. Lower values
|
|
307
|
+
create jagged, scattered pixel edges. Defaults to 3.0.
|
|
308
|
+
disturbance_roughness_share (float): Weight (0.0 to 1.0) of roughness vs
|
|
309
|
+
clustering. Higher values add more salt-and-pepper noise.
|
|
310
|
+
Defaults to 0.1.
|
|
311
|
+
seed: Random seed for reproducibility. Defaults to None.
|
|
312
|
+
|
|
313
|
+
Examples:
|
|
314
|
+
>>> import numpy as np
|
|
315
|
+
>>> from nrt.data import simulate
|
|
316
|
+
>>> from matplotlib import pyplot as plt
|
|
317
|
+
|
|
318
|
+
>>> mask, disturbance = simulate.make_landscape(shape=(2000,2000),
|
|
319
|
+
... forest_pct=0.60,
|
|
320
|
+
... loss_pct=0.02,
|
|
321
|
+
... seed=42)
|
|
322
|
+
|
|
323
|
+
>>> # For mask plot make green forests and magenta disturbances
|
|
324
|
+
>>> # For disturbance plot use a colormap that goes from jan 2020 to dec 2020 (in days since 1970)
|
|
325
|
+
>>> fig, ax = plt.subplots(1, 2, figsize=(16, 8))
|
|
326
|
+
>>> # 1. Land Cover Map
|
|
327
|
+
>>> cmap_lc = plt.cm.colors.ListedColormap(['#eecfa1', '#228b22', '#ff00ff'])
|
|
328
|
+
>>> ax[0].imshow(mask, cmap=cmap_lc, interpolation='none')
|
|
329
|
+
>>> ax[0].set_title("Mask")
|
|
330
|
+
>>> ax[0].axis('off')
|
|
331
|
+
>>> # 2. Date of Disturbance Map
|
|
332
|
+
>>> masked_disturbance = np.ma.masked_where(disturbance == 0, disturbance)
|
|
333
|
+
>>> im = ax[1].imshow(masked_disturbance, cmap='jet_r', interpolation='none')
|
|
334
|
+
>>> ax[1].set_title("Disturbance date (Temporally Correlated)")
|
|
335
|
+
>>> ax[1].axis('off')
|
|
336
|
+
>>> plt.colorbar(im, ax=ax[1], label="Day since 1970", fraction=0.046, pad=0.04)
|
|
337
|
+
>>> plt.tight_layout()
|
|
338
|
+
>>> plt.show()
|
|
339
|
+
|
|
340
|
+
Returns:
|
|
341
|
+
A tuple (land_cover, loss_dates) where:
|
|
342
|
+
|
|
343
|
+
* **land_cover** (np.ndarray): int8 array where 0=Non-Forest,
|
|
344
|
+
1=Forest, 2=Loss.
|
|
345
|
+
* **loss_dates** (np.ndarray): int32 array representing the date of
|
|
346
|
+
loss as days since 1970-01-01. Pixels with no loss are 0.
|
|
347
|
+
"""
|
|
348
|
+
if seed is not None:
|
|
349
|
+
np.random.seed(seed)
|
|
350
|
+
# 1. Generate Forest Cover
|
|
351
|
+
# High sigma creates large contiguous 'continents'
|
|
352
|
+
noise_forest = np.random.rand(*shape)
|
|
353
|
+
smooth_forest = gaussian_filter(noise_forest, sigma=forest_compactness)
|
|
354
|
+
forest_threshold = np.percentile(smooth_forest, (1 - forest_pct) * 100)
|
|
355
|
+
is_forest = smooth_forest >= forest_threshold
|
|
356
|
+
|
|
357
|
+
# 2. Generate Disturbance Potential
|
|
358
|
+
# Mix large blobs (structure) with fine grain noise (roughness/texture)
|
|
359
|
+
noise_struct = gaussian_filter(np.random.rand(*shape), sigma=disturbance_clustering)
|
|
360
|
+
noise_text = gaussian_filter(np.random.rand(*shape), sigma=disturbance_roughness)
|
|
361
|
+
|
|
362
|
+
# Weighted composition
|
|
363
|
+
w_struct = 1.0 - disturbance_roughness_share
|
|
364
|
+
loss_potential = (w_struct * noise_struct) + (disturbance_roughness_share * noise_text)
|
|
365
|
+
|
|
366
|
+
# 3. Apply Loss Threshold (Strictly inside Forest)
|
|
367
|
+
valid_potential = loss_potential[is_forest]
|
|
368
|
+
# Calculate how many pixels we need relative to the forest area
|
|
369
|
+
target_px = shape[0] * shape[1] * loss_pct
|
|
370
|
+
|
|
371
|
+
# Safety check for 0 loss
|
|
372
|
+
if target_px > 0 and len(valid_potential) > 0:
|
|
373
|
+
relative_pct = target_px / len(valid_potential)
|
|
374
|
+
loss_cutoff = np.percentile(valid_potential, (1 - relative_pct) * 100)
|
|
375
|
+
is_loss = (loss_potential >= loss_cutoff) & is_forest
|
|
376
|
+
else:
|
|
377
|
+
is_loss = np.zeros(shape, dtype=bool)
|
|
378
|
+
|
|
379
|
+
# 4. Temporal Attribution (Gradient Method)
|
|
380
|
+
# Use int32 for absolute dates (days since 1970 can exceed int16 limit)
|
|
381
|
+
loss_dates = np.zeros(shape, dtype=np.int32)
|
|
382
|
+
|
|
383
|
+
if np.any(is_loss):
|
|
384
|
+
# Calculate start offset (Days from 1970-01-01 to Year-01-01)
|
|
385
|
+
start_date = datetime.date(year, 1, 1)
|
|
386
|
+
epoch = datetime.date(1970, 1, 1)
|
|
387
|
+
start_offset = (start_date - epoch).days
|
|
388
|
+
|
|
389
|
+
loss_values = loss_potential[is_loss]
|
|
390
|
+
v_min, v_max = loss_values.min(), loss_values.max()
|
|
391
|
+
|
|
392
|
+
# Normalize 0..1
|
|
393
|
+
norm_values = (loss_values - v_min) / (v_max - v_min)
|
|
394
|
+
|
|
395
|
+
# Invert: Center (High Pot) -> Day 1, Edge (Low Pot) -> Day 365
|
|
396
|
+
# We assume loss spans the full year (1-365)
|
|
397
|
+
doy_values = 1 + ((1 - norm_values) * 364).astype(np.int32)
|
|
398
|
+
|
|
399
|
+
# Add jitter
|
|
400
|
+
jitter = np.random.randint(-3, 4, size=doy_values.shape)
|
|
401
|
+
doy_values = np.clip(doy_values + jitter, 1, 365)
|
|
402
|
+
|
|
403
|
+
# Convert to Days since 1970
|
|
404
|
+
# date = start_offset + doy - 1
|
|
405
|
+
loss_dates[is_loss] = start_offset + doy_values - 1
|
|
406
|
+
|
|
407
|
+
# 5. Assemble Land Cover
|
|
408
|
+
land_cover = np.zeros(shape, dtype=np.int8)
|
|
409
|
+
land_cover[is_forest] = 1
|
|
410
|
+
land_cover[is_loss] = 2 # Overwrite forest with loss
|
|
411
|
+
|
|
412
|
+
return land_cover, loss_dates
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
def make_prediction(
|
|
416
|
+
ref_lc: np.ndarray,
|
|
417
|
+
ref_dates: np.ndarray,
|
|
418
|
+
omission_rate: float = 0.10,
|
|
419
|
+
commission_rate: float = 0.01,
|
|
420
|
+
lag_mean: float = 15.0,
|
|
421
|
+
lag_std: float = 10.0,
|
|
422
|
+
seed: Optional[int] = None
|
|
423
|
+
) -> Tuple[np.ndarray, np.ndarray]:
|
|
424
|
+
"""Simulates a monitoring algorithm's output by degrading reference data.
|
|
425
|
+
|
|
426
|
+
Adds omission errors, spatially correlated commission errors, and temporal
|
|
427
|
+
detection lag. Detection lag is modeled using a Gamma distribution to
|
|
428
|
+
simulate the 'long tail' of delayed detections often seen in satellite
|
|
429
|
+
alerts.
|
|
430
|
+
This function is ideally used in combination with ``inrt.data.simulate.make_landscape``.
|
|
431
|
+
|
|
432
|
+
Args:
|
|
433
|
+
ref_lc (np.ndarray): Reference land cover array (0=Non, 1=Forest, 2=Loss).
|
|
434
|
+
ref_dates (np.ndarray): Reference dates of loss (days since 1970).
|
|
435
|
+
omission_rate (float): Probability (0.0 to 1.0) of missing a true loss pixel.
|
|
436
|
+
Defaults to 0.10.
|
|
437
|
+
commission_rate (float): Probability (0.0 to 1.0) of falsely flagging a
|
|
438
|
+
stable pixel as loss. Applied to the Total Stable Area.
|
|
439
|
+
Defaults to 0.01.
|
|
440
|
+
lag_mean (float): Mean delay in days between event and detection.
|
|
441
|
+
Defaults to 15.0.
|
|
442
|
+
lag_std (float): Standard deviation of the delay in days. Defaults to 10.0.
|
|
443
|
+
seed (int): Random seed for reproducibility. Defaults to None.
|
|
444
|
+
|
|
445
|
+
Examples:
|
|
446
|
+
>>> import numpy as np
|
|
447
|
+
>>> import matplotlib.pyplot as plt
|
|
448
|
+
>>> from nrt.data import simulate
|
|
449
|
+
|
|
450
|
+
>>> # 1. Generate Reference and Prediction
|
|
451
|
+
>>> mask, dates = make_landscape(shape=(1000, 1000), seed=42)
|
|
452
|
+
>>> pred_mask, pred_dates = make_prediction(mask, dates, seed=42)
|
|
453
|
+
|
|
454
|
+
>>> # 2. Compute Accuracy
|
|
455
|
+
>>> # Find ALL spatial matches (Intersection of Reference and Prediction)
|
|
456
|
+
>>> spatial_match = (mask == 2) & (pred_mask == 1)
|
|
457
|
+
>>>
|
|
458
|
+
>>> # Calculate lags for all matches
|
|
459
|
+
>>> all_lags = pred_dates[spatial_match].astype(float) - dates[spatial_match].astype(float)
|
|
460
|
+
>>>
|
|
461
|
+
>>> # Apply Temporal Rules: Valid if lag is within [-5, +30] days
|
|
462
|
+
>>> valid_window = (all_lags >= -5) & (all_lags <= 30)
|
|
463
|
+
>>> tp_count = np.count_nonzero(valid_window)
|
|
464
|
+
>>>
|
|
465
|
+
>>> ua = tp_count / np.count_nonzero(pred_mask == 1)
|
|
466
|
+
>>> pa = tp_count / np.count_nonzero(mask == 2)
|
|
467
|
+
>>> print(f"UA: {ua:.1%}, PA: {pa:.1%}")
|
|
468
|
+
UA: 74.0%, PA: 83.4%
|
|
469
|
+
|
|
470
|
+
>>> # 3. Visualize Lag Distribution (All Matches)
|
|
471
|
+
>>> plt.figure(figsize=(7, 4))
|
|
472
|
+
>>> # Plot all matches in grey (background)
|
|
473
|
+
>>> plt.hist(all_lags, bins=range(-20, 60), color='lightgrey', label='Rejected Matches')
|
|
474
|
+
>>> # Overlay True Positives in teal
|
|
475
|
+
>>> plt.hist(all_lags[valid_window], bins=range(-20, 60), color='teal', label='True Positives')
|
|
476
|
+
>>>
|
|
477
|
+
>>> # Add Tolerance Lines
|
|
478
|
+
>>> plt.axvline(x=-5, color='red', linestyle='--', linewidth=1, label='Tolerance (-5, +30)')
|
|
479
|
+
>>> plt.axvline(x=30, color='red', linestyle='--', linewidth=1)
|
|
480
|
+
>>>
|
|
481
|
+
>>> plt.xlabel('Detection Lag (days)')
|
|
482
|
+
>>> plt.ylabel('Pixel Count')
|
|
483
|
+
>>> plt.title(f'Temporal Accuracy Analysis (N={len(all_lags)})')
|
|
484
|
+
>>> plt.legend()
|
|
485
|
+
>>> plt.show()
|
|
486
|
+
|
|
487
|
+
Returns:
|
|
488
|
+
A tuple (pred_lc, pred_dates) where:
|
|
489
|
+
|
|
490
|
+
* **pred_lc** (np.ndarray): int8 array where 0=Stable, 1=Loss.
|
|
491
|
+
* **pred_dates** (np.ndarray): int32 array representing detection
|
|
492
|
+
dates as days since 1970-01-01.
|
|
493
|
+
"""
|
|
494
|
+
if seed is not None:
|
|
495
|
+
np.random.seed(seed)
|
|
496
|
+
shape = ref_lc.shape
|
|
497
|
+
# --- 1. Handle True Positives (TP) ---
|
|
498
|
+
# TP must be Class 2 (Loss) AND have a valid date (>0)
|
|
499
|
+
true_loss_mask = (ref_lc == 2) & (ref_dates > 0)
|
|
500
|
+
omission_mask = np.random.random(shape) < omission_rate
|
|
501
|
+
detected_tp_mask = true_loss_mask & (~omission_mask)
|
|
502
|
+
|
|
503
|
+
# --- 2. Handle False Positives (FP) ---
|
|
504
|
+
# RESTRICTION: Commission errors only allowed in STABLE FOREST (Class 1)
|
|
505
|
+
# Class 0 (Non-Forest) is excluded.
|
|
506
|
+
stable_forest_mask = (ref_lc == 1)
|
|
507
|
+
noise_fp = gaussian_filter(np.random.rand(*shape), sigma=10)
|
|
508
|
+
valid_fp_noise = noise_fp[stable_forest_mask]
|
|
509
|
+
|
|
510
|
+
if len(valid_fp_noise) > 0:
|
|
511
|
+
# Rate applies to the Stable Forest area
|
|
512
|
+
fp_cutoff = np.percentile(valid_fp_noise, (1 - commission_rate) * 100)
|
|
513
|
+
detected_fp_mask = (noise_fp >= fp_cutoff) & stable_forest_mask
|
|
514
|
+
else:
|
|
515
|
+
detected_fp_mask = np.zeros(shape, dtype=bool)
|
|
516
|
+
|
|
517
|
+
# --- 3. Assemble Output ---
|
|
518
|
+
pred_lc = np.zeros(shape, dtype=np.int8)
|
|
519
|
+
pred_lc[detected_tp_mask | detected_fp_mask] = 1
|
|
520
|
+
pred_dates = np.zeros(shape, dtype=np.int32)
|
|
521
|
+
|
|
522
|
+
# A. Dates for TPs (Reference + Lag)
|
|
523
|
+
if np.any(detected_tp_mask):
|
|
524
|
+
real_dates = ref_dates[detected_tp_mask]
|
|
525
|
+
theta = (lag_std ** 2) / lag_mean
|
|
526
|
+
k = lag_mean / theta
|
|
527
|
+
lags = np.random.gamma(k, theta, size=np.count_nonzero(detected_tp_mask))
|
|
528
|
+
lags = np.round(lags).astype(np.int32)
|
|
529
|
+
pred_dates[detected_tp_mask] = real_dates + lags
|
|
530
|
+
|
|
531
|
+
# B. Dates for FPs (Random within context)
|
|
532
|
+
if np.any(detected_fp_mask):
|
|
533
|
+
# Infer context year from reference data
|
|
534
|
+
if np.any(ref_dates > 0):
|
|
535
|
+
min_d, max_d = ref_dates[ref_dates > 0].min(), ref_dates[ref_dates > 0].max()
|
|
536
|
+
else:
|
|
537
|
+
min_d, max_d = 18262, 18627 # 2020 fallback
|
|
538
|
+
|
|
539
|
+
fp_dates = np.random.randint(min_d, max_d + 1, size=np.count_nonzero(detected_fp_mask))
|
|
540
|
+
pred_dates[detected_fp_mask] = fp_dates
|
|
541
|
+
|
|
542
|
+
return pred_lc, pred_dates
|
|
543
|
+
|
|
544
|
+
|
|
271
545
|
if __name__ == "__main__":
|
|
272
546
|
import doctest
|
|
273
547
|
doctest.testmod()
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: nrt-data
|
|
3
|
+
Version: 0.0.2
|
|
4
|
+
Summary: nrt namespace package giving access to demo and test datasets, extracted from nrt core
|
|
5
|
+
Author: Jonas Viehweger, Kenji Ose
|
|
6
|
+
Author-email: Loic Dutrieux <loic.dutrieux@ec.europa.eu>
|
|
7
|
+
Maintainer-email: Loic Dutrieux <loic.dutrieux@ec.europa.eu>
|
|
8
|
+
License: EUPL-1.2
|
|
9
|
+
Project-URL: homepage, https://code.europa.eu/jrc-forest/nrt-data
|
|
10
|
+
Keywords: sentinel2,xarray,zarr,netcdf
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: License :: OSI Approved :: European Union Public Licence 1.2 (EUPL 1.2)
|
|
16
|
+
Requires-Python: >=3.9
|
|
17
|
+
Description-Content-Type: text/x-rst
|
|
18
|
+
License-File: LICENSE
|
|
19
|
+
License-File: NOTICE
|
|
20
|
+
Requires-Dist: numpy
|
|
21
|
+
Requires-Dist: scipy
|
|
22
|
+
Requires-Dist: xarray
|
|
23
|
+
Requires-Dist: rasterio
|
|
24
|
+
Requires-Dist: fiona
|
|
25
|
+
Requires-Dist: netCDF4
|
|
26
|
+
Requires-Dist: zarr
|
|
27
|
+
Requires-Dist: aiohttp
|
|
28
|
+
Requires-Dist: fsspec
|
|
29
|
+
Requires-Dist: pooch
|
|
30
|
+
Provides-Extra: docs
|
|
31
|
+
Requires-Dist: sphinx; extra == "docs"
|
|
32
|
+
Requires-Dist: sphinx_rtd_theme; extra == "docs"
|
|
33
|
+
Dynamic: license-file
|
|
34
|
+
|
|
35
|
+
********
|
|
36
|
+
nrt-data
|
|
37
|
+
********
|
|
38
|
+
|
|
39
|
+
**nrt-data** is a companion package to `nrt <https://github.com/ec-jrc/nrt>`_, designed for near real-time monitoring of satellite image time series. It provides easy access to curated datasets for testing and demonstrating **nrt**'s capabilities.
|
|
40
|
+
|
|
41
|
+
As of ``nrt==0.2.1``, the ``data`` module has been separated from the core package to simplify maintenance and keep the core **nrt** lightweight. This package is distributed as a namespace package, ensuring backward compatibility with previous versions.
|
|
42
|
+
|
|
43
|
+
For more details, see the full documentation at `nrt-data.readthedocs.io <https://nrt-data.readthedocs.io/>`_.
|
|
44
|
+
|
|
45
|
+
Features
|
|
46
|
+
========
|
|
47
|
+
|
|
48
|
+
- Access to small-sized test data in NetCDF format and associated reference data in FlatGeoBuf, managed via `Pooch <https://www.fatiando.org/pooch/latest/>`_.
|
|
49
|
+
- Streaming access to larger datasets stored as cloud-optimized Zarr stores.
|
|
50
|
+
- Synthetic data simulation functionalities.
|
|
51
|
+
|
|
52
|
+
Installation
|
|
53
|
+
============
|
|
54
|
+
|
|
55
|
+
To install **nrt-data**, run:
|
|
56
|
+
|
|
57
|
+
.. code-block:: bash
|
|
58
|
+
|
|
59
|
+
pip install nrt-data
|
|
60
|
+
|
|
61
|
+
Please note that **nrt-data** can be installed independently of **nrt** but is incompatible with ``nrt<=0.2.1``. To check your current **nrt** version, run:
|
|
62
|
+
|
|
63
|
+
.. code-block:: bash
|
|
64
|
+
|
|
65
|
+
pip freeze | grep nrt
|
|
66
|
+
|
|
67
|
+
If necessary, update **nrt** by running:
|
|
68
|
+
|
|
69
|
+
.. code-block:: bash
|
|
70
|
+
|
|
71
|
+
pip install -U nrt
|
|
72
|
+
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
nrt/data/__init__.py,sha256=CujJekZtW4J7foIy-L3SBdBHB9is-Latm7OzKsUT5mA,16022
|
|
2
|
+
nrt/data/mreCritValTable.json,sha256=PVtAXUsZCPlPho2WQzFfvnj6BCBj8Jx64dIpLgShvl4,26151
|
|
3
|
+
nrt/data/simulate.py,sha256=4w9KwKiBUQEDFM-b4_Xlcp2lnOwuRjlrgeb0yBDRDXQ,26422
|
|
4
|
+
nrt_data-0.0.2.dist-info/licenses/LICENSE,sha256=b8nnCcy_4Nd_v_okJ6mDKCvi64jkexzbSfIag7TR5mU,13827
|
|
5
|
+
nrt_data-0.0.2.dist-info/licenses/NOTICE,sha256=ct946s5jPzw3oyKEye9-wRyDoWjeuHpoPFzlVr5Qn8I,23032
|
|
6
|
+
nrt_data-0.0.2.dist-info/METADATA,sha256=QkbMPK90AmR1RNTuii0egdUMU39GrC1p_skiK8s9J2g,2507
|
|
7
|
+
nrt_data-0.0.2.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
8
|
+
nrt_data-0.0.2.dist-info/top_level.txt,sha256=MZT1YZNVoDJG3HQqwAmY5i310aWiOxB7GaPANWE8bfM,4
|
|
9
|
+
nrt_data-0.0.2.dist-info/RECORD,,
|