py4dgeo 0.7.0__cp313-cp313-macosx_14_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _py4dgeo.cpython-313-darwin.so +0 -0
- py4dgeo/.dylibs/libomp.dylib +0 -0
- py4dgeo/UpdateableZipFile.py +81 -0
- py4dgeo/__init__.py +32 -0
- py4dgeo/cloudcompare.py +32 -0
- py4dgeo/epoch.py +814 -0
- py4dgeo/fallback.py +159 -0
- py4dgeo/logger.py +77 -0
- py4dgeo/m3c2.py +244 -0
- py4dgeo/m3c2ep.py +855 -0
- py4dgeo/pbm3c2.py +3870 -0
- py4dgeo/py4dgeo_python.cpp +487 -0
- py4dgeo/registration.py +474 -0
- py4dgeo/segmentation.py +1280 -0
- py4dgeo/util.py +263 -0
- py4dgeo-0.7.0.dist-info/METADATA +200 -0
- py4dgeo-0.7.0.dist-info/RECORD +21 -0
- py4dgeo-0.7.0.dist-info/WHEEL +5 -0
- py4dgeo-0.7.0.dist-info/entry_points.txt +3 -0
- py4dgeo-0.7.0.dist-info/licenses/COPYING.md +17 -0
- py4dgeo-0.7.0.dist-info/licenses/LICENSE.md +5 -0
py4dgeo/segmentation.py
ADDED
|
@@ -0,0 +1,1280 @@
|
|
|
1
|
+
from py4dgeo.epoch import Epoch, as_epoch
|
|
2
|
+
from py4dgeo.logger import logger_context
|
|
3
|
+
from py4dgeo.util import Py4DGeoError, find_file
|
|
4
|
+
from py4dgeo.UpdateableZipFile import UpdateableZipFile
|
|
5
|
+
|
|
6
|
+
import datetime
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
import matplotlib
|
|
10
|
+
import matplotlib.pyplot as plt
|
|
11
|
+
import numpy as np
|
|
12
|
+
import os
|
|
13
|
+
import pickle
|
|
14
|
+
import seaborn
|
|
15
|
+
import tempfile
|
|
16
|
+
import zipfile
|
|
17
|
+
import _py4dgeo
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# Get the py4dgeo logger instance
|
|
21
|
+
logger = logging.getLogger("py4dgeo")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# This integer controls the versioning of the _segmentation file format. Whenever the
|
|
25
|
+
# format is changed, this version should be increased, so that py4dgeo can warn
|
|
26
|
+
# about incompatibilities of py4dgeo with loaded data. This version is intentionally
|
|
27
|
+
# different from py4dgeo's version, because not all releases of py4dgeo necessarily
|
|
28
|
+
# change the _segmentation file format and we want to be as compatible as possible.
|
|
29
|
+
PY4DGEO_SEGMENTATION_FILE_FORMAT_VERSION = 0
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class SpatiotemporalAnalysis:
|
|
33
|
+
def __init__(self, filename, compress=True, allow_pickle=True, force=False):
|
|
34
|
+
"""Construct a spatiotemporal _segmentation object
|
|
35
|
+
|
|
36
|
+
This is the basic data structure for the 4D objects by change algorithm
|
|
37
|
+
and its derived variants. It manages storage of M3C2 distances and other
|
|
38
|
+
intermediate results for a time series of epochs. The original point clouds
|
|
39
|
+
themselves are not needed after initial distance calculation and additional
|
|
40
|
+
epochs can be added to an existing analysis. The class uses a disk backend
|
|
41
|
+
to store information and allows lazy loading of additional data like e.g.
|
|
42
|
+
M3C2 uncertainty values for postprocessing.
|
|
43
|
+
|
|
44
|
+
:param filename:
|
|
45
|
+
The filename used for this analysis. If it does not exist on the file
|
|
46
|
+
system, a new analysis is created. Otherwise, the data is loaded from the existent file.
|
|
47
|
+
:type filename: str
|
|
48
|
+
:param compress:
|
|
49
|
+
Whether to compress the stored data. This is a tradeoff decision between
|
|
50
|
+
disk space and runtime. Especially appending new epochs to an existing
|
|
51
|
+
analysis is an operation whose runtime can easily be dominated by
|
|
52
|
+
decompression/compression of data.
|
|
53
|
+
:type compress: bool
|
|
54
|
+
:param allow_pickle:
|
|
55
|
+
Whether py4dgeo is allowed to use the pickle module to store some data
|
|
56
|
+
in the file representation of the analysis. If set to false, some data
|
|
57
|
+
may not be stored and needs to be recomputed instead.
|
|
58
|
+
:type allow_pickle: bool
|
|
59
|
+
:param force:
|
|
60
|
+
Force creation of a new analysis object, even if a file of this name
|
|
61
|
+
already exists.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
# Store the given parameters
|
|
65
|
+
self.filename = find_file(filename, fatal=False)
|
|
66
|
+
self.compress = compress
|
|
67
|
+
self.allow_pickle = allow_pickle
|
|
68
|
+
|
|
69
|
+
# Instantiate some properties used later on
|
|
70
|
+
self._m3c2 = None
|
|
71
|
+
|
|
72
|
+
# This is the cache for lazily loaded data
|
|
73
|
+
self._corepoints = None
|
|
74
|
+
self._distances = None
|
|
75
|
+
self._smoothed_distances = None
|
|
76
|
+
self._uncertainties = None
|
|
77
|
+
self._reference_epoch = None
|
|
78
|
+
|
|
79
|
+
# If the filename does not already exist, we create a new archive
|
|
80
|
+
if force or not os.path.exists(self.filename):
|
|
81
|
+
logger.info(f"Creating analysis file {self.filename}")
|
|
82
|
+
with zipfile.ZipFile(self.filename, mode="w") as zf:
|
|
83
|
+
# Write the _segmentation file format version number
|
|
84
|
+
zf.writestr(
|
|
85
|
+
"SEGMENTATION_FILE_FORMAT",
|
|
86
|
+
str(PY4DGEO_SEGMENTATION_FILE_FORMAT_VERSION),
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
# Write the compression algorithm used for all suboperations
|
|
90
|
+
zf.writestr("USE_COMPRESSION", str(self.compress))
|
|
91
|
+
|
|
92
|
+
# Assert that the _segmentation file format is still valid
|
|
93
|
+
with zipfile.ZipFile(self.filename, mode="r") as zf:
|
|
94
|
+
# Read the _segmentation file version number and compare to current
|
|
95
|
+
version = int(zf.read("SEGMENTATION_FILE_FORMAT").decode())
|
|
96
|
+
if version != PY4DGEO_SEGMENTATION_FILE_FORMAT_VERSION:
|
|
97
|
+
raise Py4DGeoError("_segmentation file format is out of date!")
|
|
98
|
+
|
|
99
|
+
# Read the compression algorithm
|
|
100
|
+
self.compress = eval(zf.read("USE_COMPRESSION").decode())
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def reference_epoch(self):
|
|
104
|
+
"""Access the reference epoch of this analysis"""
|
|
105
|
+
|
|
106
|
+
if self._reference_epoch is None:
|
|
107
|
+
with zipfile.ZipFile(self.filename, mode="r") as zf:
|
|
108
|
+
# Double check that the reference has already been set
|
|
109
|
+
if "reference_epoch.zip" not in zf.namelist():
|
|
110
|
+
raise Py4DGeoError("Reference epoch for analysis not yet set")
|
|
111
|
+
|
|
112
|
+
# Extract it from the archive
|
|
113
|
+
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
114
|
+
ref_epochfile = zf.extract("reference_epoch.zip", path=tmp_dir)
|
|
115
|
+
self._reference_epoch = Epoch.load(ref_epochfile)
|
|
116
|
+
|
|
117
|
+
return self._reference_epoch
|
|
118
|
+
|
|
119
|
+
@reference_epoch.setter
|
|
120
|
+
def reference_epoch(self, epoch):
|
|
121
|
+
"""Set the reference epoch of this analysis (only possible once)"""
|
|
122
|
+
with zipfile.ZipFile(self.filename, mode="a") as zf:
|
|
123
|
+
# If we already have a reference epoch, the user should start a
|
|
124
|
+
# new analysis instead
|
|
125
|
+
if "reference_epoch.zip" in zf.namelist():
|
|
126
|
+
raise Py4DGeoError(
|
|
127
|
+
"Reference epoch cannot be changed - please start a new analysis"
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# Ensure that we do have a timestamp on the epoch
|
|
131
|
+
epoch = check_epoch_timestamp(epoch)
|
|
132
|
+
|
|
133
|
+
# Ensure that the KDTree is built - no-op if triggered by the user
|
|
134
|
+
epoch.build_kdtree()
|
|
135
|
+
|
|
136
|
+
# Write the reference epoch into the archive
|
|
137
|
+
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
138
|
+
epochfilename = os.path.join(tmp_dir, "reference_epoch.zip")
|
|
139
|
+
epoch.save(epochfilename)
|
|
140
|
+
zf.write(epochfilename, arcname="reference_epoch.zip")
|
|
141
|
+
|
|
142
|
+
# Also cache it directly
|
|
143
|
+
self._reference_epoch = epoch
|
|
144
|
+
|
|
145
|
+
@reference_epoch.deleter
|
|
146
|
+
def reference_epoch(self):
|
|
147
|
+
self._reference_epoch = None
|
|
148
|
+
|
|
149
|
+
@property
|
|
150
|
+
def corepoints(self):
|
|
151
|
+
"""Access the corepoints of this analysis"""
|
|
152
|
+
if self._corepoints is None:
|
|
153
|
+
with zipfile.ZipFile(self.filename, mode="r") as zf:
|
|
154
|
+
# Double check that the reference has already been set
|
|
155
|
+
if "corepoints.zip" not in zf.namelist():
|
|
156
|
+
raise Py4DGeoError("Corepoints for analysis not yet set")
|
|
157
|
+
|
|
158
|
+
# Extract it from the archive
|
|
159
|
+
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
160
|
+
cpfile = zf.extract("corepoints.zip", path=tmp_dir)
|
|
161
|
+
self._corepoints = Epoch.load(cpfile)
|
|
162
|
+
|
|
163
|
+
return self._corepoints
|
|
164
|
+
|
|
165
|
+
@corepoints.setter
|
|
166
|
+
def corepoints(self, _corepoints):
|
|
167
|
+
"""Set the corepoints for this analysis (only possible once)"""
|
|
168
|
+
with zipfile.ZipFile(self.filename, mode="a") as zf:
|
|
169
|
+
# If we already have corepoints in the archive, the user should start a
|
|
170
|
+
# new analysis instead
|
|
171
|
+
if "corepoints.zip" in zf.namelist():
|
|
172
|
+
raise Py4DGeoError(
|
|
173
|
+
"Corepoints cannot be changed - please start a new analysis"
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
# Ensure that the corepoints are stored as an epoch and build its KDTree
|
|
177
|
+
self._corepoints = as_epoch(_corepoints)
|
|
178
|
+
self._corepoints.build_kdtree()
|
|
179
|
+
|
|
180
|
+
# Write the corepoints into the archive
|
|
181
|
+
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
182
|
+
cpfilename = os.path.join(tmp_dir, "corepoints.zip")
|
|
183
|
+
self._corepoints.save(cpfilename)
|
|
184
|
+
zf.write(cpfilename, arcname="corepoints.zip")
|
|
185
|
+
|
|
186
|
+
@corepoints.deleter
|
|
187
|
+
def corepoints(self):
|
|
188
|
+
self._corepoints = None
|
|
189
|
+
|
|
190
|
+
@property
|
|
191
|
+
def m3c2(self):
|
|
192
|
+
"""Access the M3C2 algorithm of this analysis"""
|
|
193
|
+
# If M3C2 has not been set, we use a default constructed one
|
|
194
|
+
return self._m3c2
|
|
195
|
+
|
|
196
|
+
@m3c2.setter
|
|
197
|
+
def m3c2(self, _m3c2):
|
|
198
|
+
"""Set the M3C2 algorithm of this analysis"""
|
|
199
|
+
self._m3c2 = _m3c2
|
|
200
|
+
|
|
201
|
+
@property
|
|
202
|
+
def timedeltas(self):
|
|
203
|
+
"""Access the sequence of time stamp deltas for the time series"""
|
|
204
|
+
with zipfile.ZipFile(self.filename, mode="r") as zf:
|
|
205
|
+
if "timestamps.json" not in zf.namelist():
|
|
206
|
+
return []
|
|
207
|
+
|
|
208
|
+
# Read timedeltas
|
|
209
|
+
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
210
|
+
timestampsfile = zf.extract("timestamps.json", path=tmp_dir)
|
|
211
|
+
with open(timestampsfile) as f:
|
|
212
|
+
timedeltas = json.load(f)
|
|
213
|
+
|
|
214
|
+
# Convert the serialized deltas to datetime.timedelta
|
|
215
|
+
return [datetime.timedelta(**data) for data in timedeltas]
|
|
216
|
+
|
|
217
|
+
@timedeltas.setter
|
|
218
|
+
def timedeltas(self, _timedeltas):
|
|
219
|
+
"""Set the timedeltas manually
|
|
220
|
+
|
|
221
|
+
This is only possible exactly once and mutually exclusive with adding
|
|
222
|
+
epochs via the :ref:`add_epochs` method.
|
|
223
|
+
"""
|
|
224
|
+
with zipfile.ZipFile(self.filename, mode="a") as zf:
|
|
225
|
+
# If we already have timestamps in the archive, this is not possible
|
|
226
|
+
if "timestamps.json" in zf.namelist():
|
|
227
|
+
raise Py4DGeoError(
|
|
228
|
+
"Timestamps can only be set on freshly created analysis instances"
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
232
|
+
timestampsfile = os.path.join(tmp_dir, "timestamps.json")
|
|
233
|
+
with open(timestampsfile, "w") as f:
|
|
234
|
+
json.dump(
|
|
235
|
+
[
|
|
236
|
+
{
|
|
237
|
+
"days": td.days,
|
|
238
|
+
"seconds": td.seconds,
|
|
239
|
+
"microseconds": td.microseconds,
|
|
240
|
+
}
|
|
241
|
+
for td in _timedeltas
|
|
242
|
+
],
|
|
243
|
+
f,
|
|
244
|
+
)
|
|
245
|
+
zf.write(timestampsfile, arcname="timestamps.json")
|
|
246
|
+
|
|
247
|
+
@property
|
|
248
|
+
def distances(self):
|
|
249
|
+
"""Access the M3C2 distances of this analysis"""
|
|
250
|
+
|
|
251
|
+
if self._distances is None:
|
|
252
|
+
with zipfile.ZipFile(self.filename, mode="r") as zf:
|
|
253
|
+
filename = self._numpy_filename("distances")
|
|
254
|
+
if filename not in zf.namelist():
|
|
255
|
+
self.distances = np.empty(
|
|
256
|
+
(self.corepoints.cloud.shape[0], 0), dtype=np.float64
|
|
257
|
+
)
|
|
258
|
+
return self._distances
|
|
259
|
+
|
|
260
|
+
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
261
|
+
distancefile = zf.extract(filename, path=tmp_dir)
|
|
262
|
+
read_func = (
|
|
263
|
+
(lambda f: np.load(f)["arr_0"]) if self.compress else np.load
|
|
264
|
+
)
|
|
265
|
+
self._distances = read_func(distancefile)
|
|
266
|
+
|
|
267
|
+
return self._distances
|
|
268
|
+
|
|
269
|
+
@distances.setter
|
|
270
|
+
def distances(self, _distances):
|
|
271
|
+
"""Set the distances manually
|
|
272
|
+
|
|
273
|
+
This is only possible exactly once and mutually exclusive with adding
|
|
274
|
+
epochs via the :ref:`add_epochs` method.
|
|
275
|
+
"""
|
|
276
|
+
with zipfile.ZipFile(self.filename, mode="a") as zf:
|
|
277
|
+
filename = self._numpy_filename("distances")
|
|
278
|
+
write_func = np.savez_compressed if self.compress else np.save
|
|
279
|
+
|
|
280
|
+
# If we already have distacces in the archive, this is not possible
|
|
281
|
+
if filename in zf.namelist():
|
|
282
|
+
raise Py4DGeoError(
|
|
283
|
+
"Distances can only be set on freshly created analysis instances, use add_epochs instead."
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
287
|
+
distancesfile = os.path.join(tmp_dir, filename)
|
|
288
|
+
write_func(distancesfile, _distances)
|
|
289
|
+
zf.write(distancesfile, arcname=filename)
|
|
290
|
+
|
|
291
|
+
self._distances = _distances
|
|
292
|
+
|
|
293
|
+
@distances.deleter
|
|
294
|
+
def distances(self):
|
|
295
|
+
self._distances = None
|
|
296
|
+
|
|
297
|
+
@property
|
|
298
|
+
def smoothed_distances(self):
|
|
299
|
+
if self._smoothed_distances is None:
|
|
300
|
+
with zipfile.ZipFile(self.filename, mode="r") as zf:
|
|
301
|
+
filename = self._numpy_filename("smoothed_distances")
|
|
302
|
+
if filename in zf.namelist():
|
|
303
|
+
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
304
|
+
smoothedfile = zf.extract(filename, path=tmp_dir)
|
|
305
|
+
read_func = (
|
|
306
|
+
(lambda f: np.load(f)["arr_0"])
|
|
307
|
+
if self.compress
|
|
308
|
+
else np.load
|
|
309
|
+
)
|
|
310
|
+
self._smoothed_distances = read_func(smoothedfile)
|
|
311
|
+
|
|
312
|
+
return self._smoothed_distances
|
|
313
|
+
|
|
314
|
+
@smoothed_distances.setter
|
|
315
|
+
def smoothed_distances(self, _smoothed_distances):
|
|
316
|
+
with zipfile.ZipFile(self.filename, mode="a") as zf:
|
|
317
|
+
filename = self._numpy_filename("smoothed_distances")
|
|
318
|
+
write_func = np.savez_compressed if self.compress else np.save
|
|
319
|
+
|
|
320
|
+
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
321
|
+
smoothedfile = os.path.join(tmp_dir, filename)
|
|
322
|
+
write_func(smoothedfile, _smoothed_distances)
|
|
323
|
+
zf.write(smoothedfile, arcname=filename)
|
|
324
|
+
|
|
325
|
+
self._smoothed_distances = _smoothed_distances
|
|
326
|
+
|
|
327
|
+
@smoothed_distances.deleter
|
|
328
|
+
def smoothed_distances(self):
|
|
329
|
+
self._smoothed_distances = None
|
|
330
|
+
|
|
331
|
+
@property
|
|
332
|
+
def uncertainties(self):
|
|
333
|
+
"""Access the M3C2 uncertainties of this analysis"""
|
|
334
|
+
|
|
335
|
+
if self._uncertainties is None:
|
|
336
|
+
with zipfile.ZipFile(self.filename, mode="r") as zf:
|
|
337
|
+
filename = self._numpy_filename("uncertainties")
|
|
338
|
+
if filename not in zf.namelist():
|
|
339
|
+
self.uncertainties = np.empty(
|
|
340
|
+
(self.corepoints.cloud.shape[0], 0),
|
|
341
|
+
dtype=np.dtype(
|
|
342
|
+
[
|
|
343
|
+
("lodetection", "<f8"),
|
|
344
|
+
("spread1", "<f8"),
|
|
345
|
+
("num_samples1", "<i8"),
|
|
346
|
+
("spread2", "<f8"),
|
|
347
|
+
("num_samples2", "<i8"),
|
|
348
|
+
]
|
|
349
|
+
),
|
|
350
|
+
)
|
|
351
|
+
return self._uncertainties
|
|
352
|
+
|
|
353
|
+
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
354
|
+
uncertaintyfile = zf.extract(filename, path=tmp_dir)
|
|
355
|
+
read_func = (
|
|
356
|
+
(lambda f: np.load(f)["arr_0"]) if self.compress else np.load
|
|
357
|
+
)
|
|
358
|
+
self._uncertainties = read_func(uncertaintyfile)
|
|
359
|
+
|
|
360
|
+
return self._uncertainties
|
|
361
|
+
|
|
362
|
+
@uncertainties.setter
|
|
363
|
+
def uncertainties(self, _uncertainties):
|
|
364
|
+
"""Set the uncertainties manually
|
|
365
|
+
|
|
366
|
+
This is only possible exactly once and mutually exclusive with adding
|
|
367
|
+
epochs via the :ref:`add_epochs` method.
|
|
368
|
+
"""
|
|
369
|
+
with zipfile.ZipFile(self.filename, mode="a") as zf:
|
|
370
|
+
filename = self._numpy_filename("uncertainties")
|
|
371
|
+
write_func = np.savez_compressed if self.compress else np.save
|
|
372
|
+
|
|
373
|
+
# If we already have distacces in the archive, this is not possible
|
|
374
|
+
if filename in zf.namelist():
|
|
375
|
+
raise Py4DGeoError(
|
|
376
|
+
"Uncertainties can only be set on freshly created analysis instances, use add_epochs instead."
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
380
|
+
uncertaintiesfile = os.path.join(tmp_dir, filename)
|
|
381
|
+
write_func(uncertaintiesfile, _uncertainties)
|
|
382
|
+
zf.write(uncertaintiesfile, arcname=filename)
|
|
383
|
+
|
|
384
|
+
self._uncertainties = _uncertainties
|
|
385
|
+
|
|
386
|
+
@uncertainties.deleter
|
|
387
|
+
def uncertainties(self):
|
|
388
|
+
self._uncertainties = None
|
|
389
|
+
|
|
390
|
+
def add_epochs(self, *epochs):
|
|
391
|
+
"""Add a numbers of epochs to the existing analysis"""
|
|
392
|
+
|
|
393
|
+
# Remove intermediate results from the archive
|
|
394
|
+
self.invalidate_results()
|
|
395
|
+
|
|
396
|
+
# Assert that all epochs have a timestamp
|
|
397
|
+
for epoch in epochs:
|
|
398
|
+
check_epoch_timestamp(epoch)
|
|
399
|
+
|
|
400
|
+
# Lazily fetch required data
|
|
401
|
+
reference_epoch = self.reference_epoch
|
|
402
|
+
timedeltas = self.timedeltas
|
|
403
|
+
|
|
404
|
+
# Collect the calculated results to only add them once to the archive
|
|
405
|
+
new_distances = []
|
|
406
|
+
new_uncertainties = []
|
|
407
|
+
|
|
408
|
+
# Iterate over the given epochs
|
|
409
|
+
for i, epoch in enumerate(sorted(epochs, key=lambda e: e.timestamp)):
|
|
410
|
+
with logger_context(f"Adding epoch {i+1}/{len(epochs)} to analysis object"):
|
|
411
|
+
# Prepare the M3C2 instance
|
|
412
|
+
self.m3c2.corepoints = self.corepoints.cloud
|
|
413
|
+
self.m3c2.epochs = (reference_epoch, epoch)
|
|
414
|
+
|
|
415
|
+
# Calculate the M3C2 distances
|
|
416
|
+
d, u = self.m3c2.calculate_distances(reference_epoch, epoch)
|
|
417
|
+
new_distances.append(d)
|
|
418
|
+
new_uncertainties.append(u)
|
|
419
|
+
timedeltas.append(epoch.timestamp - reference_epoch.timestamp)
|
|
420
|
+
|
|
421
|
+
# We do not need the reference_epoch at this point
|
|
422
|
+
del self.reference_epoch
|
|
423
|
+
|
|
424
|
+
# Prepare all archive data in a temporary directory
|
|
425
|
+
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
426
|
+
# Write a new timestamps file
|
|
427
|
+
timestampsfile = os.path.join(tmp_dir, "timestamps.json")
|
|
428
|
+
with open(timestampsfile, "w") as f:
|
|
429
|
+
json.dump(
|
|
430
|
+
[
|
|
431
|
+
{
|
|
432
|
+
"days": td.days,
|
|
433
|
+
"seconds": td.seconds,
|
|
434
|
+
"microseconds": td.microseconds,
|
|
435
|
+
}
|
|
436
|
+
for td in timedeltas
|
|
437
|
+
],
|
|
438
|
+
f,
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
# Depending on whether we compress, we use different numpy functionality
|
|
442
|
+
write_func = np.savez_compressed if self.compress else np.save
|
|
443
|
+
distance_filename = self._numpy_filename("distances")
|
|
444
|
+
uncertainty_filename = self._numpy_filename("uncertainties")
|
|
445
|
+
|
|
446
|
+
with logger_context("Rearranging space-time array in memory"):
|
|
447
|
+
# Load the distance array and append new data
|
|
448
|
+
distance_file = os.path.join(tmp_dir, distance_filename)
|
|
449
|
+
write_func(
|
|
450
|
+
distance_file,
|
|
451
|
+
np.concatenate(
|
|
452
|
+
(self.distances, np.column_stack(tuple(new_distances))), axis=1
|
|
453
|
+
),
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
# Load the uncertainty array and append new data
|
|
457
|
+
uncertainty_file = os.path.join(tmp_dir, uncertainty_filename)
|
|
458
|
+
write_func(
|
|
459
|
+
uncertainty_file,
|
|
460
|
+
np.concatenate(
|
|
461
|
+
(self.uncertainties, np.column_stack(tuple(new_uncertainties))),
|
|
462
|
+
axis=1,
|
|
463
|
+
),
|
|
464
|
+
)
|
|
465
|
+
|
|
466
|
+
# Invalidate potential caches for distances/uncertainties
|
|
467
|
+
self._distances = None
|
|
468
|
+
self._uncertainties = None
|
|
469
|
+
|
|
470
|
+
# Dump the updated files into the archive
|
|
471
|
+
with logger_context("Updating disk-based analysis archive with new epochs"):
|
|
472
|
+
with UpdateableZipFile(self.filename, mode="a") as zf:
|
|
473
|
+
if "timestamps.json" in zf.namelist():
|
|
474
|
+
zf.remove("timestamps.json")
|
|
475
|
+
zf.write(timestampsfile, arcname="timestamps.json")
|
|
476
|
+
if distance_filename in zf.namelist():
|
|
477
|
+
zf.remove(distance_filename)
|
|
478
|
+
zf.write(distance_file, arcname=distance_filename)
|
|
479
|
+
if uncertainty_filename in zf.namelist():
|
|
480
|
+
zf.remove(uncertainty_filename)
|
|
481
|
+
zf.write(uncertainty_file, arcname=uncertainty_filename)
|
|
482
|
+
|
|
483
|
+
# (Potentially) remove caches
|
|
484
|
+
del self.distances
|
|
485
|
+
del self.uncertainties
|
|
486
|
+
|
|
487
|
+
@property
|
|
488
|
+
def seeds(self):
|
|
489
|
+
"""The list of seed candidates for this analysis"""
|
|
490
|
+
|
|
491
|
+
with zipfile.ZipFile(self.filename, mode="r") as zf:
|
|
492
|
+
if "seeds.pickle" not in zf.namelist():
|
|
493
|
+
return None
|
|
494
|
+
|
|
495
|
+
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
496
|
+
zf.extract("seeds.pickle", path=tmp_dir)
|
|
497
|
+
with open(os.path.join(tmp_dir, "seeds.pickle"), "rb") as f:
|
|
498
|
+
return pickle.load(f)
|
|
499
|
+
|
|
500
|
+
@seeds.setter
|
|
501
|
+
def seeds(self, _seeds):
|
|
502
|
+
# Assert that we received the correct type
|
|
503
|
+
for seed in _seeds:
|
|
504
|
+
if not isinstance(seed, RegionGrowingSeed):
|
|
505
|
+
raise Py4DGeoError(
|
|
506
|
+
"Seeds are expected to inherit from RegionGrowingSeed"
|
|
507
|
+
)
|
|
508
|
+
|
|
509
|
+
if not self.allow_pickle:
|
|
510
|
+
return
|
|
511
|
+
|
|
512
|
+
with UpdateableZipFile(self.filename, mode="a") as zf:
|
|
513
|
+
if "seeds.pickle" in zf.namelist():
|
|
514
|
+
zf.remove("seeds.pickle")
|
|
515
|
+
|
|
516
|
+
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
517
|
+
seedsfile = os.path.join(tmp_dir, "seeds.pickle")
|
|
518
|
+
with open(seedsfile, "wb") as f:
|
|
519
|
+
pickle.dump(_seeds, f)
|
|
520
|
+
|
|
521
|
+
zf.write(seedsfile, arcname="seeds.pickle")
|
|
522
|
+
|
|
523
|
+
@property
|
|
524
|
+
def objects(self):
|
|
525
|
+
"""The list of objects by change for this analysis"""
|
|
526
|
+
|
|
527
|
+
with zipfile.ZipFile(self.filename, mode="r") as zf:
|
|
528
|
+
if "objects.pickle" not in zf.namelist():
|
|
529
|
+
return None
|
|
530
|
+
|
|
531
|
+
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
532
|
+
zf.extract("objects.pickle", path=tmp_dir)
|
|
533
|
+
with open(os.path.join(tmp_dir, "objects.pickle"), "rb") as f:
|
|
534
|
+
return pickle.load(f)
|
|
535
|
+
|
|
536
|
+
@objects.setter
|
|
537
|
+
def objects(self, _objects):
|
|
538
|
+
# Assert that we received the correct type
|
|
539
|
+
for seed in _objects:
|
|
540
|
+
if not isinstance(seed, ObjectByChange):
|
|
541
|
+
raise Py4DGeoError(
|
|
542
|
+
"Objects are expected to inherit from ObjectByChange"
|
|
543
|
+
)
|
|
544
|
+
|
|
545
|
+
if not self.allow_pickle:
|
|
546
|
+
return
|
|
547
|
+
|
|
548
|
+
with UpdateableZipFile(self.filename, mode="a") as zf:
|
|
549
|
+
if "objects.pickle" in zf.namelist():
|
|
550
|
+
zf.remove("objects.pickle")
|
|
551
|
+
|
|
552
|
+
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
553
|
+
objectsfile = os.path.join(tmp_dir, "objects.pickle")
|
|
554
|
+
with open(objectsfile, "wb") as f:
|
|
555
|
+
pickle.dump(_objects, f)
|
|
556
|
+
|
|
557
|
+
zf.write(objectsfile, arcname="objects.pickle")
|
|
558
|
+
|
|
559
|
+
def invalidate_results(self, seeds=True, objects=True, smoothed_distances=False):
|
|
560
|
+
"""Invalidate (and remove) calculated results
|
|
561
|
+
|
|
562
|
+
This is automatically called when new epochs are added or when
|
|
563
|
+
an algorithm sets the :code:`force` option.
|
|
564
|
+
"""
|
|
565
|
+
|
|
566
|
+
logger.info(
|
|
567
|
+
f"Removing intermediate results from the analysis file {self.filename}"
|
|
568
|
+
)
|
|
569
|
+
with UpdateableZipFile(self.filename, mode="a") as zf:
|
|
570
|
+
if seeds and "seeds.pickle" in zf.namelist():
|
|
571
|
+
zf.remove("seeds.pickle")
|
|
572
|
+
|
|
573
|
+
if objects and "objects.pickle" in zf.namelist():
|
|
574
|
+
zf.remove("objects.pickle")
|
|
575
|
+
|
|
576
|
+
smoothed_file = self._numpy_filename("smoothed_distances")
|
|
577
|
+
if smoothed_distances and smoothed_file in zf.namelist():
|
|
578
|
+
zf.remove(smoothed_file)
|
|
579
|
+
|
|
580
|
+
def _numpy_filename(self, name):
|
|
581
|
+
extension = "npz" if self.compress else "npy"
|
|
582
|
+
return f"{name}.{extension}"
|
|
583
|
+
|
|
584
|
+
@property
|
|
585
|
+
def distances_for_compute(self):
|
|
586
|
+
"""Retrieve the distance array used for computation
|
|
587
|
+
|
|
588
|
+
This might be the raw data or smoothed data, based on whether
|
|
589
|
+
a smoothing was provided by the user.
|
|
590
|
+
"""
|
|
591
|
+
distances = self.smoothed_distances
|
|
592
|
+
if distances is None:
|
|
593
|
+
distances = self.distances
|
|
594
|
+
return distances
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
class RegionGrowingAlgorithmBase:
|
|
598
|
+
def __init__(
|
|
599
|
+
self,
|
|
600
|
+
neighborhood_radius=1.0,
|
|
601
|
+
thresholds=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
|
|
602
|
+
min_segments=20,
|
|
603
|
+
max_segments=None,
|
|
604
|
+
):
|
|
605
|
+
"""Construct a spatiotemporal _segmentation algorithm.
|
|
606
|
+
|
|
607
|
+
This class can be derived from to customize the algorithm behaviour.
|
|
608
|
+
|
|
609
|
+
:param neighborhood_radius:
|
|
610
|
+
The size of the neighborhood of a core point. All core points within
|
|
611
|
+
this radius are considered adjacent and are therefore considered as
|
|
612
|
+
candidates for inclusion in the region growing algorithm.
|
|
613
|
+
:type neighborhood_radius: float
|
|
614
|
+
:param thresholds:
|
|
615
|
+
A list of thresholds to use as candidates in 4D-OBC's adaptive
|
|
616
|
+
thresholding procedure.
|
|
617
|
+
:type thresholds: list
|
|
618
|
+
:param min_segments:
|
|
619
|
+
The minimum number of core points in an object-by-change. Defaults to
|
|
620
|
+
20.
|
|
621
|
+
:type min_segments: int
|
|
622
|
+
:param max_segments:
|
|
623
|
+
The maximum number of core points in an object-by-change. This is mainly
|
|
624
|
+
used to bound the runtime of expensive region growing. By default, no
|
|
625
|
+
maximum is applied.
|
|
626
|
+
:type max_segments: int
|
|
627
|
+
"""
|
|
628
|
+
|
|
629
|
+
self.neighborhood_radius = neighborhood_radius
|
|
630
|
+
self.thresholds = thresholds
|
|
631
|
+
self.min_segments = min_segments
|
|
632
|
+
self.max_segments = max_segments
|
|
633
|
+
|
|
634
|
+
self._analysis = None
|
|
635
|
+
|
|
636
|
+
def distance_measure(self):
|
|
637
|
+
"""Distance measure between two time series
|
|
638
|
+
|
|
639
|
+
Expected to return a function that accepts two time series and returns
|
|
640
|
+
the distance.
|
|
641
|
+
"""
|
|
642
|
+
|
|
643
|
+
return _py4dgeo.normalized_dtw_distance
|
|
644
|
+
|
|
645
|
+
def find_seedpoints(self):
|
|
646
|
+
"""Calculate seedpoints for the region growing algorithm"""
|
|
647
|
+
|
|
648
|
+
raise NotImplementedError
|
|
649
|
+
|
|
650
|
+
def seed_sorting_scorefunction(self):
|
|
651
|
+
"""A function that computes a score for a seed candidate
|
|
652
|
+
|
|
653
|
+
This function is used to prioritize seed candidates.
|
|
654
|
+
"""
|
|
655
|
+
|
|
656
|
+
# The base class does not perform sorting.
|
|
657
|
+
return lambda seed: 0.0
|
|
658
|
+
|
|
659
|
+
def filter_objects(self, obj):
|
|
660
|
+
"""A filter for objects produced by the region growing algorithm
|
|
661
|
+
|
|
662
|
+
Objects are discarded if this method returns False.
|
|
663
|
+
"""
|
|
664
|
+
|
|
665
|
+
# The base class does not perform filtering
|
|
666
|
+
return True
|
|
667
|
+
|
|
668
|
+
@property
|
|
669
|
+
def analysis(self):
|
|
670
|
+
"""Access the analysis object that the algorithm operates on
|
|
671
|
+
|
|
672
|
+
This is only available after :ref:`run` has been called.
|
|
673
|
+
"""
|
|
674
|
+
if self._analysis is None:
|
|
675
|
+
raise Py4DGeoError(
|
|
676
|
+
"Analysis object is only available when the algorithm is run"
|
|
677
|
+
)
|
|
678
|
+
return self._analysis
|
|
679
|
+
|
|
680
|
+
def run(self, analysis, force=False):
|
|
681
|
+
"""Calculate the _segmentation
|
|
682
|
+
|
|
683
|
+
:param analysis:
|
|
684
|
+
The analysis object we are working with.
|
|
685
|
+
:type analysis: py4dgeo.segmentation.SpatiotemporalAnalysis
|
|
686
|
+
:param force:
|
|
687
|
+
Force recalculation of results. If false, some intermediate results will be
|
|
688
|
+
restored from the analysis object instead of being recalculated.
|
|
689
|
+
"""
|
|
690
|
+
|
|
691
|
+
# Make the analysis object known to all members
|
|
692
|
+
self._analysis = analysis
|
|
693
|
+
|
|
694
|
+
# Enforce the removal of intermediate results
|
|
695
|
+
if force:
|
|
696
|
+
analysis.invalidate_results()
|
|
697
|
+
|
|
698
|
+
# Return pre-calculated objects if they are available
|
|
699
|
+
# precalculated = analysis.objects
|
|
700
|
+
# if precalculated is not None:
|
|
701
|
+
# logger.info("Reusing objects by change stored in analysis object")
|
|
702
|
+
# return precalculated
|
|
703
|
+
|
|
704
|
+
# Check if there are pre-calculated objects.
|
|
705
|
+
# If so, create objects list from these and continue growing objects, taking into consideration objects that are already grown.
|
|
706
|
+
# if not initiate new empty objects list
|
|
707
|
+
precalculated = analysis.objects # TODO: do not assign to new object
|
|
708
|
+
if precalculated is not None:
|
|
709
|
+
logger.info("Reusing objects by change stored in analysis object")
|
|
710
|
+
objects = (
|
|
711
|
+
precalculated.copy()
|
|
712
|
+
) # test if .copy() solves memory problem, or deepcopy?
|
|
713
|
+
else:
|
|
714
|
+
objects = (
|
|
715
|
+
[]
|
|
716
|
+
) # TODO: test initializing this in the analysis class, see if it crashes instantly
|
|
717
|
+
|
|
718
|
+
# Get corepoints from M3C2 class and build a KDTree on them
|
|
719
|
+
corepoints = as_epoch(analysis.corepoints)
|
|
720
|
+
corepoints.build_kdtree()
|
|
721
|
+
|
|
722
|
+
# Calculate the list of seed points and sort them
|
|
723
|
+
seeds = analysis.seeds
|
|
724
|
+
if seeds is None:
|
|
725
|
+
with logger_context("Find seed candidates in time series"):
|
|
726
|
+
seeds = self.find_seedpoints()
|
|
727
|
+
|
|
728
|
+
# Sort the seed points
|
|
729
|
+
with logger_context("Sort seed candidates by priority"):
|
|
730
|
+
seeds = list(sorted(seeds, key=self.seed_sorting_scorefunction()))
|
|
731
|
+
|
|
732
|
+
# Store the seeds
|
|
733
|
+
analysis.seeds = seeds
|
|
734
|
+
else:
|
|
735
|
+
logger.info("Reusing seed candidates stored in analysis object")
|
|
736
|
+
# write the number of seeds to a separate text file if self.write_nr_seeds is True
|
|
737
|
+
if self.write_nr_seeds:
|
|
738
|
+
with open("number_of_seeds.txt", "w") as f:
|
|
739
|
+
f.write(str(len(seeds)))
|
|
740
|
+
|
|
741
|
+
# Iterate over the seeds to maybe turn them into objects
|
|
742
|
+
for i, seed in enumerate(
|
|
743
|
+
seeds
|
|
744
|
+
): # [self.resume_from_seed-1:]): # starting seed ranked at the `resume_from_seed` variable (representing 1 for index 0)
|
|
745
|
+
# or to keep within the same index range when resuming from seed:
|
|
746
|
+
if i < (
|
|
747
|
+
self.resume_from_seed - 1
|
|
748
|
+
): # resume from index 0 when `resume_from_seed` == 1
|
|
749
|
+
continue
|
|
750
|
+
if i >= (self.stop_at_seed - 1): # stop at index 0 when `stop_at_seed` == 1
|
|
751
|
+
break
|
|
752
|
+
|
|
753
|
+
# save objects to analysis object when at index `intermediate_saving`
|
|
754
|
+
if (
|
|
755
|
+
(self.intermediate_saving)
|
|
756
|
+
and ((i % self.intermediate_saving) == 0)
|
|
757
|
+
and (i != 0)
|
|
758
|
+
):
|
|
759
|
+
with logger_context(
|
|
760
|
+
f"Intermediate saving of first {len(objects)} objects, grown from first {i+1}/{len(seeds)} seeds"
|
|
761
|
+
):
|
|
762
|
+
analysis.objects = objects # This assigns itself to itself
|
|
763
|
+
|
|
764
|
+
# Check all already calculated objects whether they overlap with this seed.
|
|
765
|
+
found = False
|
|
766
|
+
for obj in objects:
|
|
767
|
+
if seed.index in obj.indices and (
|
|
768
|
+
obj.end_epoch > seed.start_epoch
|
|
769
|
+
and seed.end_epoch > obj.start_epoch
|
|
770
|
+
):
|
|
771
|
+
found = True
|
|
772
|
+
break
|
|
773
|
+
|
|
774
|
+
# If we found an overlap, we skip this seed
|
|
775
|
+
if found:
|
|
776
|
+
continue
|
|
777
|
+
|
|
778
|
+
# Apply a numeric default to the max_segments parameter
|
|
779
|
+
max_segments = self.max_segments
|
|
780
|
+
if max_segments is None:
|
|
781
|
+
max_segments = corepoints.cloud.shape[0] + 1
|
|
782
|
+
|
|
783
|
+
data = _py4dgeo.RegionGrowingAlgorithmData(
|
|
784
|
+
analysis.distances_for_compute,
|
|
785
|
+
corepoints,
|
|
786
|
+
self.neighborhood_radius,
|
|
787
|
+
seed._seed,
|
|
788
|
+
self.thresholds,
|
|
789
|
+
self.min_segments,
|
|
790
|
+
max_segments,
|
|
791
|
+
)
|
|
792
|
+
|
|
793
|
+
# Perform the region growing
|
|
794
|
+
with logger_context(
|
|
795
|
+
f"Performing region growing on seed candidate {i+1}/{len(seeds)}"
|
|
796
|
+
):
|
|
797
|
+
objdata = _py4dgeo.region_growing(data, self.distance_measure())
|
|
798
|
+
|
|
799
|
+
# If the returned object has 0 indices, the min_segments threshold was violated
|
|
800
|
+
if objdata.indices_distances:
|
|
801
|
+
obj = ObjectByChange(
|
|
802
|
+
objdata, seed, analysis
|
|
803
|
+
) # TODO: check, does it copy the whole analysis object when initializing
|
|
804
|
+
if self.filter_objects(obj):
|
|
805
|
+
objects.append(obj)
|
|
806
|
+
|
|
807
|
+
# If the returned object is larger than max_segments we issue a warning
|
|
808
|
+
if len(objdata.indices_distances) >= max_segments:
|
|
809
|
+
logger.warning(
|
|
810
|
+
f"An object by change exceeded the given maximum size of {max_segments}"
|
|
811
|
+
)
|
|
812
|
+
|
|
813
|
+
# Store the results in the analysis object
|
|
814
|
+
analysis.objects = objects
|
|
815
|
+
|
|
816
|
+
# Potentially remove objects from memory
|
|
817
|
+
del analysis.smoothed_distances
|
|
818
|
+
del analysis.distances
|
|
819
|
+
|
|
820
|
+
return objects
|
|
821
|
+
|
|
822
|
+
|
|
823
|
+
class RegionGrowingAlgorithm(RegionGrowingAlgorithmBase):
|
|
824
|
+
def __init__(
|
|
825
|
+
self,
|
|
826
|
+
seed_subsampling=1,
|
|
827
|
+
seed_candidates=None,
|
|
828
|
+
window_width=24,
|
|
829
|
+
window_min_size=12,
|
|
830
|
+
window_jump=1,
|
|
831
|
+
window_penalty=1.0,
|
|
832
|
+
minperiod=24,
|
|
833
|
+
height_threshold=0.0,
|
|
834
|
+
use_unfinished=True,
|
|
835
|
+
intermediate_saving=0,
|
|
836
|
+
resume_from_seed=0,
|
|
837
|
+
stop_at_seed=np.inf,
|
|
838
|
+
write_nr_seeds=False,
|
|
839
|
+
**kwargs,
|
|
840
|
+
):
|
|
841
|
+
"""Construct the 4D-OBC algorithm.
|
|
842
|
+
|
|
843
|
+
:param seed_subsampling:
|
|
844
|
+
A subsampling factor for the set of core points for the generation
|
|
845
|
+
of _segmentation seed candidates. This can be used to speed up
|
|
846
|
+
the generation of seeds. The default of 1 does not perform any
|
|
847
|
+
subsampling, a value of, e.g., 10 would only consider every 10th
|
|
848
|
+
corepoint for adding seeds.
|
|
849
|
+
:type seed_subsampling: int
|
|
850
|
+
:param seed_candidates:
|
|
851
|
+
A set of indices specifying which core points should be used for seed detection. This can be used to perform _segmentation for selected locations. The default of None does not perform any selection and uses all corepoints. The subsampling parameter is applied additionally.
|
|
852
|
+
:type seed_candidates: list
|
|
853
|
+
:param window_width:
|
|
854
|
+
The width of the sliding temporal window for change point detection. The sliding window
|
|
855
|
+
moves along the signal and determines the discrepancy between the first and the second
|
|
856
|
+
half of the window (i.e. subsequent time series segments within the window width). The
|
|
857
|
+
default value is 24, corresponding to one day in case of hourly data.
|
|
858
|
+
:type window_width: int
|
|
859
|
+
:param window_min_size:
|
|
860
|
+
The minimum temporal distance needed between two seed candidates, for the second one to be considered.
|
|
861
|
+
The default value is 1, such that all detected seeds candidates are considered.
|
|
862
|
+
:type window_min_size: int
|
|
863
|
+
:param window_jump:
|
|
864
|
+
The interval on which the sliding temporal window moves and checks for seed candidates.
|
|
865
|
+
The default value is 1, corresponding to a check for every epoch in the time series.
|
|
866
|
+
:type window_jump: int
|
|
867
|
+
:param window_penalty:
|
|
868
|
+
A complexity penalty that determines how strict the change point detection is.
|
|
869
|
+
A higher penalty results in stricter change point detection (i.e, fewer points are detected), while a low
|
|
870
|
+
value results in a large amount of detected change points. The default value is 1.0.
|
|
871
|
+
:type window_penalty: float
|
|
872
|
+
:param minperiod:
|
|
873
|
+
The minimum period of a detected change to be considered as seed candidate for subsequent
|
|
874
|
+
_segmentation. The default is 24, corresponding to one day for hourly data.
|
|
875
|
+
:type minperiod: int
|
|
876
|
+
:param height_threshold:
|
|
877
|
+
The height threshold represents the required magnitude of a detected change to be considered
|
|
878
|
+
as seed candidate for subsequent _segmentation. The magnitude of a detected change is derived
|
|
879
|
+
as unsigned difference between magnitude (i.e. distance) at start epoch and peak magnitude.
|
|
880
|
+
The default is 0.0, in which case all detected changes are used as seed candidates.
|
|
881
|
+
:type height_threshold: float
|
|
882
|
+
:param use_unfinished:
|
|
883
|
+
If False, seed candidates that are not finished by the end of the time series are not considered in further
|
|
884
|
+
analysis. The default is True, in which case unfinished seed_candidates are regarded as seeds region growing.
|
|
885
|
+
:type use_unfinished: bool
|
|
886
|
+
:param intermediate_saving:
|
|
887
|
+
Parameter that determines after how many considered seeds, the resulting list of 4D-OBCs is saved to the SpatiotemporalAnalysis object.
|
|
888
|
+
This is to ensure that if the algorithm is terminated unexpectedly not all results are lost. If set to 0 no intermediate saving is done.
|
|
889
|
+
:type intermediate_saving: int
|
|
890
|
+
:param resume_from_seed:
|
|
891
|
+
Parameter specifying from which seed index the region growing algorithm must resume. If zero all seeds are considered, starting from the highest ranked seed.
|
|
892
|
+
Default is 0.
|
|
893
|
+
:type resume_from_seed: int
|
|
894
|
+
:param stop_at_seed:
|
|
895
|
+
Parameter specifying at which seed to stop region growing and terminate the run function.
|
|
896
|
+
Default is np.inf, meaning all seeds are considered.
|
|
897
|
+
:type stop_at_seed: int
|
|
898
|
+
:param write_nr_seeds:
|
|
899
|
+
If True, after seed detection, a text file is written in the working directory containing the total number of detected seeds.
|
|
900
|
+
This can be used to split up the consecutive 4D-OBC segmentation into different subsets.
|
|
901
|
+
Default is False, meaning no txt file is written.
|
|
902
|
+
:type write_nr_seeds: bool
|
|
903
|
+
"""
|
|
904
|
+
|
|
905
|
+
# Initialize base class
|
|
906
|
+
super().__init__(**kwargs)
|
|
907
|
+
|
|
908
|
+
# Store the given parameters
|
|
909
|
+
self.seed_subsampling = seed_subsampling
|
|
910
|
+
self.seed_candidates = seed_candidates
|
|
911
|
+
self.window_width = window_width
|
|
912
|
+
self.window_min_size = window_min_size
|
|
913
|
+
self.window_jump = window_jump
|
|
914
|
+
self.window_penalty = window_penalty
|
|
915
|
+
self.minperiod = minperiod
|
|
916
|
+
self.height_threshold = height_threshold
|
|
917
|
+
self.use_unfinished = use_unfinished
|
|
918
|
+
self.intermediate_saving = intermediate_saving
|
|
919
|
+
self.resume_from_seed = resume_from_seed
|
|
920
|
+
self.stop_at_seed = stop_at_seed
|
|
921
|
+
self.write_nr_seeds = write_nr_seeds
|
|
922
|
+
|
|
923
|
+
def find_seedpoints(self):
|
|
924
|
+
"""Calculate seedpoints for the region growing algorithm"""
|
|
925
|
+
|
|
926
|
+
# These are some arguments used below that we might consider
|
|
927
|
+
# exposing to the user in the future. For now, they are considered
|
|
928
|
+
# internal, but they are still defined here for readability.
|
|
929
|
+
window_costmodel = "l1"
|
|
930
|
+
# window_min_size = 12
|
|
931
|
+
# window_jump = 1
|
|
932
|
+
# window_penalty = 1.0
|
|
933
|
+
|
|
934
|
+
# The list of generated seeds
|
|
935
|
+
seeds = []
|
|
936
|
+
|
|
937
|
+
# The list of core point indices to check as seeds
|
|
938
|
+
if self.seed_candidates is None:
|
|
939
|
+
if self.seed_subsampling == 0:
|
|
940
|
+
raise Py4DGeoError(
|
|
941
|
+
"Subsampling factor cannot be 0, use 1 or any integer larger than 1"
|
|
942
|
+
)
|
|
943
|
+
# Use all corepoints if no selection specified, considering subsampling
|
|
944
|
+
seed_candidates_curr = range(
|
|
945
|
+
0, self.analysis.distances_for_compute.shape[0], self.seed_subsampling
|
|
946
|
+
)
|
|
947
|
+
else:
|
|
948
|
+
# Use the specified corepoint indices, but consider subsampling
|
|
949
|
+
seed_candidates_curr = self.seed_candidates # [::self.seed_subsampling]
|
|
950
|
+
|
|
951
|
+
# Iterate over all time series to analyse their change points
|
|
952
|
+
for i in seed_candidates_curr:
|
|
953
|
+
# Extract the time series and interpolate its nan values
|
|
954
|
+
timeseries = self.analysis.distances_for_compute[i, :]
|
|
955
|
+
bad_indices = np.isnan(timeseries)
|
|
956
|
+
num_nans = np.count_nonzero(bad_indices)
|
|
957
|
+
|
|
958
|
+
# If we too many nans, this timeseries does not make sense
|
|
959
|
+
if num_nans > timeseries.shape[0] - 3:
|
|
960
|
+
continue
|
|
961
|
+
|
|
962
|
+
# If there are nan values, we try fixing things by interpolation
|
|
963
|
+
if num_nans > 0:
|
|
964
|
+
good_indices = np.logical_not(bad_indices)
|
|
965
|
+
timeseries[bad_indices] = np.interp(
|
|
966
|
+
bad_indices.nonzero()[0],
|
|
967
|
+
good_indices.nonzero()[0],
|
|
968
|
+
timeseries[good_indices],
|
|
969
|
+
)
|
|
970
|
+
|
|
971
|
+
# Run detection of change points
|
|
972
|
+
cpdata = _py4dgeo.ChangePointDetectionData(
|
|
973
|
+
ts=timeseries,
|
|
974
|
+
window_size=self.window_width,
|
|
975
|
+
min_size=self.window_min_size,
|
|
976
|
+
jump=self.window_jump,
|
|
977
|
+
penalty=self.window_penalty,
|
|
978
|
+
)
|
|
979
|
+
changepoints = _py4dgeo.change_point_detection(cpdata)[:-1]
|
|
980
|
+
|
|
981
|
+
# Shift the time series to positive values
|
|
982
|
+
timeseries = timeseries + abs(np.nanmin(timeseries) + 0.1)
|
|
983
|
+
# create a flipped version for negative change volumes
|
|
984
|
+
timeseries_flipped = timeseries * -1.0 + abs(np.nanmax(timeseries)) + 0.1
|
|
985
|
+
|
|
986
|
+
# Create seeds for this timeseries
|
|
987
|
+
corepoint_seeds = []
|
|
988
|
+
for start_idx in changepoints:
|
|
989
|
+
# Skip this changepoint if it was included into a previous seed
|
|
990
|
+
if corepoint_seeds and start_idx <= corepoint_seeds[-1].end_epoch:
|
|
991
|
+
continue
|
|
992
|
+
|
|
993
|
+
# Skip this changepoint if this to close to the end
|
|
994
|
+
if start_idx >= timeseries.shape[0] - self.minperiod:
|
|
995
|
+
break
|
|
996
|
+
|
|
997
|
+
# Decide whether we need use the flipped timeseries
|
|
998
|
+
used_timeseries = timeseries
|
|
999
|
+
if timeseries[start_idx] >= timeseries[start_idx + self.minperiod]:
|
|
1000
|
+
used_timeseries = timeseries_flipped
|
|
1001
|
+
|
|
1002
|
+
previous_volume = -999.9
|
|
1003
|
+
for target_idx in range(start_idx + 1, timeseries.shape[0]):
|
|
1004
|
+
# Calculate the change volume
|
|
1005
|
+
height = used_timeseries[start_idx]
|
|
1006
|
+
volume = np.nansum(
|
|
1007
|
+
used_timeseries[start_idx : target_idx + 1] - height
|
|
1008
|
+
)
|
|
1009
|
+
|
|
1010
|
+
# Check whether the volume started decreasing
|
|
1011
|
+
if previous_volume > volume:
|
|
1012
|
+
# Only add seed if larger than the minimum period and height of the change form larger than threshold
|
|
1013
|
+
if (target_idx - start_idx >= self.minperiod) and (
|
|
1014
|
+
np.abs(
|
|
1015
|
+
np.max(used_timeseries[start_idx : target_idx + 1])
|
|
1016
|
+
- np.min(used_timeseries[start_idx : target_idx + 1])
|
|
1017
|
+
)
|
|
1018
|
+
>= self.height_threshold
|
|
1019
|
+
):
|
|
1020
|
+
corepoint_seeds.append(
|
|
1021
|
+
RegionGrowingSeed(i, start_idx, target_idx)
|
|
1022
|
+
)
|
|
1023
|
+
break
|
|
1024
|
+
else:
|
|
1025
|
+
previous_volume = volume
|
|
1026
|
+
|
|
1027
|
+
# This causes a seed to always be detected if the volume doesn't decrease before present
|
|
1028
|
+
# Useful when used in an online setting, can be filtered before region growing
|
|
1029
|
+
# Only if the last epoch is reached we use the segment as seed
|
|
1030
|
+
if (target_idx == timeseries.shape[0] - 1) and self.use_unfinished:
|
|
1031
|
+
# We reached the present and add a seed based on it
|
|
1032
|
+
corepoint_seeds.append(
|
|
1033
|
+
RegionGrowingSeed(i, start_idx, timeseries.shape[0] - 1)
|
|
1034
|
+
)
|
|
1035
|
+
|
|
1036
|
+
# Add all the seeds found for this corepoint to the full list
|
|
1037
|
+
seeds.extend(corepoint_seeds)
|
|
1038
|
+
|
|
1039
|
+
return seeds
|
|
1040
|
+
|
|
1041
|
+
def seed_sorting_scorefunction(self):
|
|
1042
|
+
"""Neighborhood similarity sorting function"""
|
|
1043
|
+
|
|
1044
|
+
# The 4D-OBC algorithm sorts by similarity in the neighborhood
|
|
1045
|
+
# of the seed.
|
|
1046
|
+
def neighborhood_similarity(seed):
|
|
1047
|
+
neighbors = self.analysis.corepoints.kdtree.radius_search(
|
|
1048
|
+
self.analysis.corepoints.cloud[seed.index, :], self.neighborhood_radius
|
|
1049
|
+
)
|
|
1050
|
+
# if no neighbors are found make sure the algorithm continues its search but with a large dissimilarity
|
|
1051
|
+
if len(neighbors) < 2:
|
|
1052
|
+
return 9999999.0 # return very large number? or delete the seed point, but then also delete from the seeds list
|
|
1053
|
+
|
|
1054
|
+
similarities = []
|
|
1055
|
+
for n in neighbors:
|
|
1056
|
+
data = _py4dgeo.TimeseriesDistanceFunctionData(
|
|
1057
|
+
self.analysis.distances_for_compute[
|
|
1058
|
+
seed.index, seed.start_epoch : seed.end_epoch + 1
|
|
1059
|
+
],
|
|
1060
|
+
self.analysis.distances_for_compute[
|
|
1061
|
+
n, seed.start_epoch : seed.end_epoch + 1
|
|
1062
|
+
],
|
|
1063
|
+
)
|
|
1064
|
+
similarities.append(self.distance_measure()(data))
|
|
1065
|
+
|
|
1066
|
+
return sum(similarities, 0.0) / (len(neighbors) - 1)
|
|
1067
|
+
|
|
1068
|
+
return neighborhood_similarity
|
|
1069
|
+
|
|
1070
|
+
def filter_objects(self, obj):
|
|
1071
|
+
"""A filter for objects produced by the region growing algorithm"""
|
|
1072
|
+
|
|
1073
|
+
# Filter based on coefficient of variation
|
|
1074
|
+
distarray = np.fromiter(obj._data.indices_distances.values(), np.float64)
|
|
1075
|
+
|
|
1076
|
+
# Check if mean is 0.0, if so, set to very small value to avoid division by 0
|
|
1077
|
+
mean_distarray = np.mean(distarray)
|
|
1078
|
+
if mean_distarray == 0.0:
|
|
1079
|
+
mean_distarray = 10**-10
|
|
1080
|
+
|
|
1081
|
+
# Calculate coefficient of variation
|
|
1082
|
+
cv = np.std(distarray) / mean_distarray
|
|
1083
|
+
|
|
1084
|
+
# TODO: Make this threshold configurable?
|
|
1085
|
+
return cv <= 0.8
|
|
1086
|
+
|
|
1087
|
+
|
|
1088
|
+
class RegionGrowingSeed:
|
|
1089
|
+
def __init__(self, index, start_epoch, end_epoch):
|
|
1090
|
+
self._seed = _py4dgeo.RegionGrowingSeed(index, start_epoch, end_epoch)
|
|
1091
|
+
|
|
1092
|
+
@property
|
|
1093
|
+
def index(self):
|
|
1094
|
+
return self._seed.index
|
|
1095
|
+
|
|
1096
|
+
@property
|
|
1097
|
+
def start_epoch(self):
|
|
1098
|
+
return self._seed.start_epoch
|
|
1099
|
+
|
|
1100
|
+
@property
|
|
1101
|
+
def end_epoch(self):
|
|
1102
|
+
return self._seed.end_epoch
|
|
1103
|
+
|
|
1104
|
+
|
|
1105
|
+
class ObjectByChange:
|
|
1106
|
+
"""Representation a change object in the spatiotemporal domain"""
|
|
1107
|
+
|
|
1108
|
+
def __init__(self, data, seed, analysis=None):
|
|
1109
|
+
self._data = data
|
|
1110
|
+
self._analysis = analysis
|
|
1111
|
+
self.seed = seed
|
|
1112
|
+
|
|
1113
|
+
@property
|
|
1114
|
+
def indices(self):
|
|
1115
|
+
"""The set of corepoint indices that compose the object by change"""
|
|
1116
|
+
return list(self._data.indices_distances.keys())
|
|
1117
|
+
|
|
1118
|
+
def distance(self, index):
|
|
1119
|
+
return self._data.indices_distances[index]
|
|
1120
|
+
|
|
1121
|
+
@property
|
|
1122
|
+
def start_epoch(self):
|
|
1123
|
+
"""The index of the start epoch of the change object"""
|
|
1124
|
+
return self._data.start_epoch
|
|
1125
|
+
|
|
1126
|
+
@property
|
|
1127
|
+
def end_epoch(self):
|
|
1128
|
+
"""The index of the end epoch of the change object"""
|
|
1129
|
+
return self._data.end_epoch
|
|
1130
|
+
|
|
1131
|
+
@property
|
|
1132
|
+
def threshold(self):
|
|
1133
|
+
"""The distance threshold that produced this object"""
|
|
1134
|
+
return self._data.threshold
|
|
1135
|
+
|
|
1136
|
+
def plot(self, filename=None):
|
|
1137
|
+
"""Create an informative visualization of the Object By Change
|
|
1138
|
+
|
|
1139
|
+
:param filename:
|
|
1140
|
+
The filename to use to store the plot. Can be omitted to only show
|
|
1141
|
+
plot in a Jupyter notebook session.
|
|
1142
|
+
:type filename: str
|
|
1143
|
+
"""
|
|
1144
|
+
|
|
1145
|
+
# Extract DTW distances from this object
|
|
1146
|
+
indexarray = np.fromiter(self.indices, np.int32)
|
|
1147
|
+
distarray = np.fromiter((self.distance(i) for i in indexarray), np.float64)
|
|
1148
|
+
|
|
1149
|
+
# Intitialize the figure and all of its subfigures
|
|
1150
|
+
fig = plt.figure(figsize=plt.figaspect(0.3))
|
|
1151
|
+
tsax = fig.add_subplot(1, 3, 1)
|
|
1152
|
+
histax = fig.add_subplot(1, 3, 2)
|
|
1153
|
+
mapax = fig.add_subplot(1, 3, 3)
|
|
1154
|
+
|
|
1155
|
+
# The first plot (tsax) prints all time series of chosen corepoints
|
|
1156
|
+
# and colors them according to distance.
|
|
1157
|
+
tsax.set_ylabel("Height change [m]")
|
|
1158
|
+
tsax.set_xlabel("Time [h]")
|
|
1159
|
+
|
|
1160
|
+
# We pad the time series visualization with a number of data
|
|
1161
|
+
# points on both sides. TODO: Expose as argument to plot?
|
|
1162
|
+
timeseries_padding = 10
|
|
1163
|
+
start_epoch = max(self.start_epoch - timeseries_padding, 0)
|
|
1164
|
+
end_epoch = min(
|
|
1165
|
+
self.end_epoch + timeseries_padding,
|
|
1166
|
+
self._analysis.distances_for_compute.shape[1],
|
|
1167
|
+
)
|
|
1168
|
+
|
|
1169
|
+
# We use the seed's timeseries to set good axis limits
|
|
1170
|
+
seed_ts = self._analysis.distances_for_compute[
|
|
1171
|
+
self.seed.index, start_epoch:end_epoch
|
|
1172
|
+
]
|
|
1173
|
+
tsax.set_ylim(np.nanmin(seed_ts) * 0.5, np.nanmax(seed_ts) * 1.5)
|
|
1174
|
+
|
|
1175
|
+
# Create a colormap with distance for this object
|
|
1176
|
+
cmap = matplotlib.colormaps.get_cmap("viridis")
|
|
1177
|
+
maxdist = np.nanmax(distarray)
|
|
1178
|
+
|
|
1179
|
+
# Plot each time series individually
|
|
1180
|
+
for index in self.indices:
|
|
1181
|
+
tsax.plot(
|
|
1182
|
+
self._analysis.distances_for_compute[index, start_epoch:end_epoch],
|
|
1183
|
+
linewidth=0.7,
|
|
1184
|
+
alpha=0.3,
|
|
1185
|
+
color=cmap(self.distance(index) / maxdist),
|
|
1186
|
+
)
|
|
1187
|
+
|
|
1188
|
+
# Plot the seed timeseries again, but with a thicker line
|
|
1189
|
+
tsax.plot(seed_ts, linewidth=2.0, zorder=10, color="blue")
|
|
1190
|
+
|
|
1191
|
+
# Next, we add a histogram plot with the distance values (using seaborn)
|
|
1192
|
+
seaborn.histplot(distarray, ax=histax, kde=True, color="r")
|
|
1193
|
+
|
|
1194
|
+
# Add labels to the histogram plot
|
|
1195
|
+
histax.set_title(f"Segment size: {distarray.shape[0]}")
|
|
1196
|
+
histax.set_xlabel("DTW distance")
|
|
1197
|
+
|
|
1198
|
+
# Create a 2D view of the segment
|
|
1199
|
+
locations = self._analysis.corepoints.cloud[indexarray, 0:2]
|
|
1200
|
+
mapax.scatter(locations[:, 0], locations[:, 1], c=distarray)
|
|
1201
|
+
|
|
1202
|
+
# Some global settings of the generated figure
|
|
1203
|
+
fig.tight_layout()
|
|
1204
|
+
|
|
1205
|
+
# Maybe save to file
|
|
1206
|
+
if filename is not None:
|
|
1207
|
+
plt.savefig(filename)
|
|
1208
|
+
|
|
1209
|
+
|
|
1210
|
+
def check_epoch_timestamp(epoch):
|
|
1211
|
+
"""Validate an epoch to be used with SpatiotemporalSegmentation"""
|
|
1212
|
+
if epoch.timestamp is None:
|
|
1213
|
+
raise Py4DGeoError(
|
|
1214
|
+
"Epochs need to define a timestamp to be usable in SpatiotemporalSegmentation"
|
|
1215
|
+
)
|
|
1216
|
+
|
|
1217
|
+
return epoch
|
|
1218
|
+
|
|
1219
|
+
|
|
1220
|
+
def regular_corepoint_grid(lowerleft, upperright, num_points, zval=0.0):
|
|
1221
|
+
"""A helper function to create a regularly spaced grid for the analysis
|
|
1222
|
+
|
|
1223
|
+
:param lowerleft:
|
|
1224
|
+
The lower left corner of the grid. Given as a 2D coordinate.
|
|
1225
|
+
:type lowerleft: np.ndarray
|
|
1226
|
+
:param upperright:
|
|
1227
|
+
The upper right corner of the grid. Given as a 2D coordinate.
|
|
1228
|
+
:type upperright: np.ndarray
|
|
1229
|
+
:param num_points:
|
|
1230
|
+
A tuple with two entries denoting the number of points to be used in
|
|
1231
|
+
x and y direction
|
|
1232
|
+
:type num_points: tuple
|
|
1233
|
+
:param zval:
|
|
1234
|
+
The value to fill for the z-direction.
|
|
1235
|
+
:type zval: double
|
|
1236
|
+
"""
|
|
1237
|
+
xspace = np.linspace(
|
|
1238
|
+
lowerleft[0], upperright[0], num=num_points[0], dtype=np.float64
|
|
1239
|
+
)
|
|
1240
|
+
yspace = np.linspace(
|
|
1241
|
+
lowerleft[1], upperright[1], num=num_points[1], dtype=np.float64
|
|
1242
|
+
)
|
|
1243
|
+
|
|
1244
|
+
grid = np.empty(shape=(num_points[0] * num_points[1], 3), dtype=np.float64)
|
|
1245
|
+
for i, x in enumerate(xspace):
|
|
1246
|
+
for j, y in enumerate(yspace):
|
|
1247
|
+
grid[i * num_points[0] + j, 0] = x
|
|
1248
|
+
grid[i * num_points[0] + j, 1] = y
|
|
1249
|
+
grid[i * num_points[0] + j, 2] = zval
|
|
1250
|
+
|
|
1251
|
+
return grid
|
|
1252
|
+
|
|
1253
|
+
|
|
1254
|
+
def temporal_averaging(distances, smoothing_window=24):
|
|
1255
|
+
"""Smoothen a space-time array of distance change using a sliding window approach
|
|
1256
|
+
|
|
1257
|
+
:param distances:
|
|
1258
|
+
The raw data to smoothen.
|
|
1259
|
+
:type distances: np.ndarray
|
|
1260
|
+
:param smoothing_window:
|
|
1261
|
+
The size of the sliding window used in smoothing the data. The
|
|
1262
|
+
default value of 0 does not perform any smooting.
|
|
1263
|
+
:type smooting_window: int
|
|
1264
|
+
"""
|
|
1265
|
+
|
|
1266
|
+
with logger_context("Smoothing temporal data"):
|
|
1267
|
+
smoothed = np.empty_like(distances)
|
|
1268
|
+
eps = smoothing_window // 2
|
|
1269
|
+
|
|
1270
|
+
for i in range(distances.shape[1]):
|
|
1271
|
+
smoothed[:, i] = np.nanmedian(
|
|
1272
|
+
distances[
|
|
1273
|
+
:,
|
|
1274
|
+
max(0, i - eps) : min(distances.shape[1] - 1, i + eps),
|
|
1275
|
+
],
|
|
1276
|
+
axis=1,
|
|
1277
|
+
)
|
|
1278
|
+
|
|
1279
|
+
# We use no-op smooting as the default implementation here
|
|
1280
|
+
return smoothed
|