py4dgeo 0.6.0__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1202 @@
1
+ from py4dgeo.epoch import Epoch, as_epoch
2
+ from py4dgeo.logger import logger_context
3
+ from py4dgeo.util import Py4DGeoError, find_file
4
+ from py4dgeo.UpdateableZipFile import UpdateableZipFile
5
+
6
+ import datetime
7
+ import json
8
+ import logging
9
+ import matplotlib
10
+ import matplotlib.pyplot as plt
11
+ import numpy as np
12
+ import os
13
+ import pickle
14
+ import seaborn
15
+ import tempfile
16
+ import zipfile
17
+
18
+ import _py4dgeo
19
+
20
+
21
+ # Get the py4dgeo logger instance
22
+ logger = logging.getLogger("py4dgeo")
23
+
24
+
25
+ # This integer controls the versioning of the _segmentation file format. Whenever the
26
+ # format is changed, this version should be increased, so that py4dgeo can warn
27
+ # about incompatibilities of py4dgeo with loaded data. This version is intentionally
28
+ # different from py4dgeo's version, because not all releases of py4dgeo necessarily
29
+ # change the _segmentation file format and we want to be as compatible as possible.
30
+ PY4DGEO_SEGMENTATION_FILE_FORMAT_VERSION = 0
31
+
32
+
33
+ class SpatiotemporalAnalysis:
34
+ def __init__(self, filename, compress=True, allow_pickle=True, force=False):
35
+ """Construct a spatiotemporal _segmentation object
36
+
37
+ This is the basic data structure for the 4D objects by change algorithm
38
+ and its derived variants. It manages storage of M3C2 distances and other
39
+ intermediate results for a time series of epochs. The original point clouds
40
+ themselves are not needed after initial distance calculation and additional
41
+ epochs can be added to an existing analysis. The class uses a disk backend
42
+ to store information and allows lazy loading of additional data like e.g.
43
+ M3C2 uncertainty values for postprocessing.
44
+
45
+ :param filename:
46
+ The filename used for this analysis. If it does not exist on the file
47
+ system, a new analysis is created. Otherwise, the data is loaded from the existent file.
48
+ :type filename: str
49
+ :param compress:
50
+ Whether to compress the stored data. This is a tradeoff decision between
51
+ disk space and runtime. Especially appending new epochs to an existing
52
+ analysis is an operation whose runtime can easily be dominated by
53
+ decompression/compression of data.
54
+ :type compress: bool
55
+ :param allow_pickle:
56
+ Whether py4dgeo is allowed to use the pickle module to store some data
57
+ in the file representation of the analysis. If set to false, some data
58
+ may not be stored and needs to be recomputed instead.
59
+ :type allow_pickle: bool
60
+ :param force:
61
+ Force creation of a new analysis object, even if a file of this name
62
+ already exists.
63
+ """
64
+
65
+ # Store the given parameters
66
+ self.filename = find_file(filename, fatal=False)
67
+ self.compress = compress
68
+ self.allow_pickle = allow_pickle
69
+
70
+ # Instantiate some properties used later on
71
+ self._m3c2 = None
72
+
73
+ # This is the cache for lazily loaded data
74
+ self._corepoints = None
75
+ self._distances = None
76
+ self._smoothed_distances = None
77
+ self._uncertainties = None
78
+ self._reference_epoch = None
79
+
80
+ # If the filename does not already exist, we create a new archive
81
+ if force or not os.path.exists(self.filename):
82
+ logger.info(f"Creating analysis file {self.filename}")
83
+ with zipfile.ZipFile(self.filename, mode="w") as zf:
84
+ # Write the _segmentation file format version number
85
+ zf.writestr(
86
+ "SEGMENTATION_FILE_FORMAT",
87
+ str(PY4DGEO_SEGMENTATION_FILE_FORMAT_VERSION),
88
+ )
89
+
90
+ # Write the compression algorithm used for all suboperations
91
+ zf.writestr("USE_COMPRESSION", str(self.compress))
92
+
93
+ # Assert that the _segmentation file format is still valid
94
+ with zipfile.ZipFile(self.filename, mode="r") as zf:
95
+ # Read the _segmentation file version number and compare to current
96
+ version = int(zf.read("SEGMENTATION_FILE_FORMAT").decode())
97
+ if version != PY4DGEO_SEGMENTATION_FILE_FORMAT_VERSION:
98
+ raise Py4DGeoError("_segmentation file format is out of date!")
99
+
100
+ # Read the compression algorithm
101
+ self.compress = eval(zf.read("USE_COMPRESSION").decode())
102
+
103
+ @property
104
+ def reference_epoch(self):
105
+ """Access the reference epoch of this analysis"""
106
+
107
+ if self._reference_epoch is None:
108
+ with zipfile.ZipFile(self.filename, mode="r") as zf:
109
+ # Double check that the reference has already been set
110
+ if "reference_epoch.zip" not in zf.namelist():
111
+ raise Py4DGeoError("Reference epoch for analysis not yet set")
112
+
113
+ # Extract it from the archive
114
+ with tempfile.TemporaryDirectory() as tmp_dir:
115
+ ref_epochfile = zf.extract("reference_epoch.zip", path=tmp_dir)
116
+ self._reference_epoch = Epoch.load(ref_epochfile)
117
+
118
+ return self._reference_epoch
119
+
120
+ @reference_epoch.setter
121
+ def reference_epoch(self, epoch):
122
+ """Set the reference epoch of this analysis (only possible once)"""
123
+ with zipfile.ZipFile(self.filename, mode="a") as zf:
124
+ # If we already have a reference epoch, the user should start a
125
+ # new analysis instead
126
+ if "reference_epoch.zip" in zf.namelist():
127
+ raise Py4DGeoError(
128
+ "Reference epoch cannot be changed - please start a new analysis"
129
+ )
130
+
131
+ # Ensure that we do have a timestamp on the epoch
132
+ epoch = check_epoch_timestamp(epoch)
133
+
134
+ # Ensure that the KDTree is built - no-op if triggered by the user
135
+ epoch.build_kdtree()
136
+
137
+ # Write the reference epoch into the archive
138
+ with tempfile.TemporaryDirectory() as tmp_dir:
139
+ epochfilename = os.path.join(tmp_dir, "reference_epoch.zip")
140
+ epoch.save(epochfilename)
141
+ zf.write(epochfilename, arcname="reference_epoch.zip")
142
+
143
+ # Also cache it directly
144
+ self._reference_epoch = epoch
145
+
146
+ @reference_epoch.deleter
147
+ def reference_epoch(self):
148
+ self._reference_epoch = None
149
+
150
+ @property
151
+ def corepoints(self):
152
+ """Access the corepoints of this analysis"""
153
+ if self._corepoints is None:
154
+ with zipfile.ZipFile(self.filename, mode="r") as zf:
155
+ # Double check that the reference has already been set
156
+ if "corepoints.zip" not in zf.namelist():
157
+ raise Py4DGeoError("Corepoints for analysis not yet set")
158
+
159
+ # Extract it from the archive
160
+ with tempfile.TemporaryDirectory() as tmp_dir:
161
+ cpfile = zf.extract("corepoints.zip", path=tmp_dir)
162
+ self._corepoints = Epoch.load(cpfile)
163
+
164
+ return self._corepoints
165
+
166
+ @corepoints.setter
167
+ def corepoints(self, _corepoints):
168
+ """Set the corepoints for this analysis (only possible once)"""
169
+ with zipfile.ZipFile(self.filename, mode="a") as zf:
170
+ # If we already have corepoints in the archive, the user should start a
171
+ # new analysis instead
172
+ if "corepoints.zip" in zf.namelist():
173
+ raise Py4DGeoError(
174
+ "Corepoints cannot be changed - please start a new analysis"
175
+ )
176
+
177
+ # Ensure that the corepoints are stored as an epoch and build its KDTree
178
+ self._corepoints = as_epoch(_corepoints)
179
+ self._corepoints.build_kdtree()
180
+
181
+ # Write the corepoints into the archive
182
+ with tempfile.TemporaryDirectory() as tmp_dir:
183
+ cpfilename = os.path.join(tmp_dir, "corepoints.zip")
184
+ self._corepoints.save(cpfilename)
185
+ zf.write(cpfilename, arcname="corepoints.zip")
186
+
187
+ @corepoints.deleter
188
+ def corepoints(self):
189
+ self._corepoints = None
190
+
191
+ @property
192
+ def m3c2(self):
193
+ """Access the M3C2 algorithm of this analysis"""
194
+ # If M3C2 has not been set, we use a default constructed one
195
+ return self._m3c2
196
+
197
+ @m3c2.setter
198
+ def m3c2(self, _m3c2):
199
+ """Set the M3C2 algorithm of this analysis"""
200
+ self._m3c2 = _m3c2
201
+
202
+ @property
203
+ def timedeltas(self):
204
+ """Access the sequence of time stamp deltas for the time series"""
205
+ with zipfile.ZipFile(self.filename, mode="r") as zf:
206
+ if "timestamps.json" not in zf.namelist():
207
+ return []
208
+
209
+ # Read timedeltas
210
+ with tempfile.TemporaryDirectory() as tmp_dir:
211
+ timestampsfile = zf.extract("timestamps.json", path=tmp_dir)
212
+ with open(timestampsfile) as f:
213
+ timedeltas = json.load(f)
214
+
215
+ # Convert the serialized deltas to datetime.timedelta
216
+ return [datetime.timedelta(**data) for data in timedeltas]
217
+
218
+ @timedeltas.setter
219
+ def timedeltas(self, _timedeltas):
220
+ """Set the timedeltas manually
221
+
222
+ This is only possible exactly once and mutually exclusive with adding
223
+ epochs via the :ref:`add_epochs` method.
224
+ """
225
+ with zipfile.ZipFile(self.filename, mode="a") as zf:
226
+ # If we already have timestamps in the archive, this is not possible
227
+ if "timestamps.json" in zf.namelist():
228
+ raise Py4DGeoError(
229
+ "Timestamps can only be set on freshly created analysis instances"
230
+ )
231
+
232
+ with tempfile.TemporaryDirectory() as tmp_dir:
233
+ timestampsfile = os.path.join(tmp_dir, "timestamps.json")
234
+ with open(timestampsfile, "w") as f:
235
+ json.dump(
236
+ [
237
+ {
238
+ "days": td.days,
239
+ "seconds": td.seconds,
240
+ "microseconds": td.microseconds,
241
+ }
242
+ for td in _timedeltas
243
+ ],
244
+ f,
245
+ )
246
+ zf.write(timestampsfile, arcname="timestamps.json")
247
+
248
+ @property
249
+ def distances(self):
250
+ """Access the M3C2 distances of this analysis"""
251
+
252
+ if self._distances is None:
253
+ with zipfile.ZipFile(self.filename, mode="r") as zf:
254
+ filename = self._numpy_filename("distances")
255
+ if filename not in zf.namelist():
256
+ self.distances = np.empty(
257
+ (self.corepoints.cloud.shape[0], 0), dtype=np.float64
258
+ )
259
+ return self._distances
260
+
261
+ with tempfile.TemporaryDirectory() as tmp_dir:
262
+ distancefile = zf.extract(filename, path=tmp_dir)
263
+ read_func = (
264
+ (lambda f: np.load(f)["arr_0"]) if self.compress else np.load
265
+ )
266
+ self._distances = read_func(distancefile)
267
+
268
+ return self._distances
269
+
270
+ @distances.setter
271
+ def distances(self, _distances):
272
+ """Set the distances manually
273
+
274
+ This is only possible exactly once and mutually exclusive with adding
275
+ epochs via the :ref:`add_epochs` method.
276
+ """
277
+ with zipfile.ZipFile(self.filename, mode="a") as zf:
278
+ filename = self._numpy_filename("distances")
279
+ write_func = np.savez_compressed if self.compress else np.save
280
+
281
+ # If we already have distacces in the archive, this is not possible
282
+ if filename in zf.namelist():
283
+ raise Py4DGeoError(
284
+ "Distances can only be set on freshly created analysis instances, use add_epochs instead."
285
+ )
286
+
287
+ with tempfile.TemporaryDirectory() as tmp_dir:
288
+ distancesfile = os.path.join(tmp_dir, filename)
289
+ write_func(distancesfile, _distances)
290
+ zf.write(distancesfile, arcname=filename)
291
+
292
+ self._distances = _distances
293
+
294
+ @distances.deleter
295
+ def distances(self):
296
+ self._distances = None
297
+
298
+ @property
299
+ def smoothed_distances(self):
300
+ if self._smoothed_distances is None:
301
+ with zipfile.ZipFile(self.filename, mode="r") as zf:
302
+ filename = self._numpy_filename("smoothed_distances")
303
+ if filename in zf.namelist():
304
+ with tempfile.TemporaryDirectory() as tmp_dir:
305
+ smoothedfile = zf.extract(filename, path=tmp_dir)
306
+ read_func = (
307
+ (lambda f: np.load(f)["arr_0"])
308
+ if self.compress
309
+ else np.load
310
+ )
311
+ self._smoothed_distances = read_func(smoothedfile)
312
+
313
+ return self._smoothed_distances
314
+
315
+ @smoothed_distances.setter
316
+ def smoothed_distances(self, _smoothed_distances):
317
+ with zipfile.ZipFile(self.filename, mode="a") as zf:
318
+ filename = self._numpy_filename("smoothed_distances")
319
+ write_func = np.savez_compressed if self.compress else np.save
320
+
321
+ with tempfile.TemporaryDirectory() as tmp_dir:
322
+ smoothedfile = os.path.join(tmp_dir, filename)
323
+ write_func(smoothedfile, _smoothed_distances)
324
+ zf.write(smoothedfile, arcname=filename)
325
+
326
+ self._smoothed_distances = _smoothed_distances
327
+
328
+ @smoothed_distances.deleter
329
+ def smoothed_distances(self):
330
+ self._smoothed_distances = None
331
+
332
+ @property
333
+ def uncertainties(self):
334
+ """Access the M3C2 uncertainties of this analysis"""
335
+
336
+ if self._uncertainties is None:
337
+ with zipfile.ZipFile(self.filename, mode="r") as zf:
338
+ filename = self._numpy_filename("uncertainties")
339
+ if filename not in zf.namelist():
340
+ self.uncertainties = np.empty(
341
+ (self.corepoints.cloud.shape[0], 0),
342
+ dtype=np.dtype(
343
+ [
344
+ ("lodetection", "<f8"),
345
+ ("spread1", "<f8"),
346
+ ("num_samples1", "<i8"),
347
+ ("spread2", "<f8"),
348
+ ("num_samples2", "<i8"),
349
+ ]
350
+ ),
351
+ )
352
+ return self._uncertainties
353
+
354
+ with tempfile.TemporaryDirectory() as tmp_dir:
355
+ uncertaintyfile = zf.extract(filename, path=tmp_dir)
356
+ read_func = (
357
+ (lambda f: np.load(f)["arr_0"]) if self.compress else np.load
358
+ )
359
+ self._uncertainties = read_func(uncertaintyfile)
360
+
361
+ return self._uncertainties
362
+
363
+ @uncertainties.setter
364
+ def uncertainties(self, _uncertainties):
365
+ """Set the uncertainties manually
366
+
367
+ This is only possible exactly once and mutually exclusive with adding
368
+ epochs via the :ref:`add_epochs` method.
369
+ """
370
+ with zipfile.ZipFile(self.filename, mode="a") as zf:
371
+ filename = self._numpy_filename("uncertainties")
372
+ write_func = np.savez_compressed if self.compress else np.save
373
+
374
+ # If we already have distacces in the archive, this is not possible
375
+ if filename in zf.namelist():
376
+ raise Py4DGeoError(
377
+ "Uncertainties can only be set on freshly created analysis instances, use add_epochs instead."
378
+ )
379
+
380
+ with tempfile.TemporaryDirectory() as tmp_dir:
381
+ uncertaintiesfile = os.path.join(tmp_dir, filename)
382
+ write_func(uncertaintiesfile, _uncertainties)
383
+ zf.write(uncertaintiesfile, arcname=filename)
384
+
385
+ self._uncertainties = _uncertainties
386
+
387
+ @uncertainties.deleter
388
+ def uncertainties(self):
389
+ self._uncertainties = None
390
+
391
+ def add_epochs(self, *epochs):
392
+ """Add a numbers of epochs to the existing analysis"""
393
+
394
+ # Remove intermediate results from the archive
395
+ self.invalidate_results()
396
+
397
+ # Assert that all epochs have a timestamp
398
+ for epoch in epochs:
399
+ check_epoch_timestamp(epoch)
400
+
401
+ # Lazily fetch required data
402
+ reference_epoch = self.reference_epoch
403
+ timedeltas = self.timedeltas
404
+
405
+ # Collect the calculated results to only add them once to the archive
406
+ new_distances = []
407
+ new_uncertainties = []
408
+
409
+ # Iterate over the given epochs
410
+ for i, epoch in enumerate(sorted(epochs, key=lambda e: e.timestamp)):
411
+ with logger_context(f"Adding epoch {i+1}/{len(epochs)} to analysis object"):
412
+ # Prepare the M3C2 instance
413
+ self.m3c2.corepoints = self.corepoints.cloud
414
+ self.m3c2.epochs = (reference_epoch, epoch)
415
+
416
+ # Calculate the M3C2 distances
417
+ d, u = self.m3c2.calculate_distances(reference_epoch, epoch)
418
+ new_distances.append(d)
419
+ new_uncertainties.append(u)
420
+ timedeltas.append(epoch.timestamp - reference_epoch.timestamp)
421
+
422
+ # We do not need the reference_epoch at this point
423
+ del self.reference_epoch
424
+
425
+ # Prepare all archive data in a temporary directory
426
+ with tempfile.TemporaryDirectory() as tmp_dir:
427
+ # Write a new timestamps file
428
+ timestampsfile = os.path.join(tmp_dir, "timestamps.json")
429
+ with open(timestampsfile, "w") as f:
430
+ json.dump(
431
+ [
432
+ {
433
+ "days": td.days,
434
+ "seconds": td.seconds,
435
+ "microseconds": td.microseconds,
436
+ }
437
+ for td in timedeltas
438
+ ],
439
+ f,
440
+ )
441
+
442
+ # Depending on whether we compress, we use different numpy functionality
443
+ write_func = np.savez_compressed if self.compress else np.save
444
+ distance_filename = self._numpy_filename("distances")
445
+ uncertainty_filename = self._numpy_filename("uncertainties")
446
+
447
+ with logger_context("Rearranging space-time array in memory"):
448
+ # Load the distance array and append new data
449
+ distance_file = os.path.join(tmp_dir, distance_filename)
450
+ write_func(
451
+ distance_file,
452
+ np.concatenate(
453
+ (self.distances, np.column_stack(tuple(new_distances))), axis=1
454
+ ),
455
+ )
456
+
457
+ # Load the uncertainty array and append new data
458
+ uncertainty_file = os.path.join(tmp_dir, uncertainty_filename)
459
+ write_func(
460
+ uncertainty_file,
461
+ np.concatenate(
462
+ (self.uncertainties, np.column_stack(tuple(new_uncertainties))),
463
+ axis=1,
464
+ ),
465
+ )
466
+
467
+ # Invalidate potential caches for distances/uncertainties
468
+ self._distances = None
469
+ self._uncertainties = None
470
+
471
+ # Dump the updated files into the archive
472
+ with logger_context("Updating disk-based analysis archive with new epochs"):
473
+ with UpdateableZipFile(self.filename, mode="a") as zf:
474
+ if "timestamps.json" in zf.namelist():
475
+ zf.remove("timestamps.json")
476
+ zf.write(timestampsfile, arcname="timestamps.json")
477
+ if distance_filename in zf.namelist():
478
+ zf.remove(distance_filename)
479
+ zf.write(distance_file, arcname=distance_filename)
480
+ if uncertainty_filename in zf.namelist():
481
+ zf.remove(uncertainty_filename)
482
+ zf.write(uncertainty_file, arcname=uncertainty_filename)
483
+
484
+ # (Potentially) remove caches
485
+ del self.distances
486
+ del self.uncertainties
487
+
488
+ @property
489
+ def seeds(self):
490
+ """The list of seed candidates for this analysis"""
491
+
492
+ with zipfile.ZipFile(self.filename, mode="r") as zf:
493
+ if "seeds.pickle" not in zf.namelist():
494
+ return None
495
+
496
+ with tempfile.TemporaryDirectory() as tmp_dir:
497
+ zf.extract("seeds.pickle", path=tmp_dir)
498
+ with open(os.path.join(tmp_dir, "seeds.pickle"), "rb") as f:
499
+ return pickle.load(f)
500
+
501
+ @seeds.setter
502
+ def seeds(self, _seeds):
503
+ # Assert that we received the correct type
504
+ for seed in _seeds:
505
+ if not isinstance(seed, RegionGrowingSeed):
506
+ raise Py4DGeoError(
507
+ "Seeds are expected to inherit from RegionGrowingSeed"
508
+ )
509
+
510
+ if not self.allow_pickle:
511
+ return
512
+
513
+ with UpdateableZipFile(self.filename, mode="a") as zf:
514
+ if "seeds.pickle" in zf.namelist():
515
+ zf.remove("seeds.pickle")
516
+
517
+ with tempfile.TemporaryDirectory() as tmp_dir:
518
+ seedsfile = os.path.join(tmp_dir, "seeds.pickle")
519
+ with open(seedsfile, "wb") as f:
520
+ pickle.dump(_seeds, f)
521
+
522
+ zf.write(seedsfile, arcname="seeds.pickle")
523
+
524
+ @property
525
+ def objects(self):
526
+ """The list of objects by change for this analysis"""
527
+
528
+ with zipfile.ZipFile(self.filename, mode="r") as zf:
529
+ if "objects.pickle" not in zf.namelist():
530
+ return None
531
+
532
+ with tempfile.TemporaryDirectory() as tmp_dir:
533
+ zf.extract("objects.pickle", path=tmp_dir)
534
+ with open(os.path.join(tmp_dir, "objects.pickle"), "rb") as f:
535
+ return pickle.load(f)
536
+
537
+ @objects.setter
538
+ def objects(self, _objects):
539
+ # Assert that we received the correct type
540
+ for seed in _objects:
541
+ if not isinstance(seed, ObjectByChange):
542
+ raise Py4DGeoError(
543
+ "Objects are expected to inherit from ObjectByChange"
544
+ )
545
+
546
+ if not self.allow_pickle:
547
+ return
548
+
549
+ with UpdateableZipFile(self.filename, mode="a") as zf:
550
+ if "objects.pickle" in zf.namelist():
551
+ zf.remove("objects.pickle")
552
+
553
+ with tempfile.TemporaryDirectory() as tmp_dir:
554
+ objectsfile = os.path.join(tmp_dir, "objects.pickle")
555
+ with open(objectsfile, "wb") as f:
556
+ pickle.dump(_objects, f)
557
+
558
+ zf.write(objectsfile, arcname="objects.pickle")
559
+
560
+ def invalidate_results(self, seeds=True, objects=True, smoothed_distances=True):
561
+ """Invalidate (and remove) calculated results
562
+
563
+ This is automatically called when new epochs are added or when
564
+ an algorithm sets the :code:`force` option.
565
+ """
566
+
567
+ logger.info(
568
+ f"Removing intermediate results from the analysis file {self.filename}"
569
+ )
570
+ with UpdateableZipFile(self.filename, mode="a") as zf:
571
+ if seeds and "seeds.pickle" in zf.namelist():
572
+ zf.remove("seeds.pickle")
573
+
574
+ if objects and "objects.pickle" in zf.namelist():
575
+ zf.remove("objects.pickle")
576
+
577
+ smoothed_file = self._numpy_filename("smoothed_distances")
578
+ if smoothed_distances and smoothed_file in zf.namelist():
579
+ zf.remove(smoothed_file)
580
+
581
+ def _numpy_filename(self, name):
582
+ extension = "npz" if self.compress else "npy"
583
+ return f"{name}.{extension}"
584
+
585
+ @property
586
+ def distances_for_compute(self):
587
+ """Retrieve the distance array used for computation
588
+
589
+ This might be the raw data or smoothed data, based on whether
590
+ a smoothing was provided by the user.
591
+ """
592
+ distances = self.smoothed_distances
593
+ if distances is None:
594
+ distances = self.distances
595
+ return distances
596
+
597
+
598
+ class RegionGrowingAlgorithmBase:
599
+ def __init__(
600
+ self,
601
+ neighborhood_radius=1.0,
602
+ thresholds=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
603
+ min_segments=20,
604
+ max_segments=None,
605
+ ):
606
+ """Construct a spatiotemporal _segmentation algorithm.
607
+
608
+ This class can be derived from to customize the algorithm behaviour.
609
+
610
+ :param neighborhood_radius:
611
+ The size of the neighborhood of a core point. All core points within
612
+ this radius are considered adjacent and are therefore considered as
613
+ candidates for inclusion in the region growing algorithm.
614
+ :type neighborhood_radius: float
615
+ :param thresholds:
616
+ A list of thresholds to use as candidates in 4D-OBC's adaptive
617
+ thresholding procedure.
618
+ :type thresholds: list
619
+ :param min_segments:
620
+ The minimum number of core points in an object-by-change. Defaults to
621
+ 20.
622
+ :type min_segments: int
623
+ :param max_segments:
624
+ The maximum number of core points in an object-by-change. This is mainly
625
+ used to bound the runtime of expensive region growing. By default, no
626
+ maximum is applied.
627
+ :type max_segments: int
628
+ """
629
+
630
+ self.neighborhood_radius = neighborhood_radius
631
+ self.thresholds = thresholds
632
+ self.min_segments = min_segments
633
+ self.max_segments = max_segments
634
+
635
+ self._analysis = None
636
+
637
+ def distance_measure(self):
638
+ """Distance measure between two time series
639
+
640
+ Expected to return a function that accepts two time series and returns
641
+ the distance.
642
+ """
643
+
644
+ return _py4dgeo.normalized_dtw_distance
645
+
646
+ def find_seedpoints(self):
647
+ """Calculate seedpoints for the region growing algorithm"""
648
+
649
+ raise NotImplementedError
650
+
651
+ def seed_sorting_scorefunction(self):
652
+ """A function that computes a score for a seed candidate
653
+
654
+ This function is used to prioritize seed candidates.
655
+ """
656
+
657
+ # The base class does not perform sorting.
658
+ return lambda seed: 0.0
659
+
660
+ def filter_objects(self, obj):
661
+ """A filter for objects produced by the region growing algorithm
662
+
663
+ Objects are discarded if this method returns False.
664
+ """
665
+
666
+ # The base class does not perform filtering
667
+ return True
668
+
669
+ @property
670
+ def analysis(self):
671
+ """Access the analysis object that the algorithm operates on
672
+
673
+ This is only available after :ref:`run` has been called.
674
+ """
675
+ if self._analysis is None:
676
+ raise Py4DGeoError(
677
+ "Analysis object is only available when the algorithm is run"
678
+ )
679
+ return self._analysis
680
+
681
+ def run(self, analysis, force=False):
682
+ """Calculate the _segmentation
683
+
684
+ :param analysis:
685
+ The analysis object we are working with.
686
+ :type analysis: py4dgeo.segmentation.SpatiotemporalAnalysis
687
+ :param force:
688
+ Force recalculation of results. If false, some intermediate results will be
689
+ restored from the analysis object instead of being recalculated.
690
+ """
691
+
692
+ # Make the analysis object known to all members
693
+ self._analysis = analysis
694
+
695
+ # Enforce the removal of intermediate results
696
+ if force:
697
+ analysis.invalidate_results()
698
+
699
+ # Return pre-calculated objects if they are available
700
+ precalculated = analysis.objects
701
+ if precalculated is not None:
702
+ logger.info("Reusing objects by change stored in analysis object")
703
+ return precalculated
704
+
705
+ # Get corepoints from M3C2 class and build a KDTree on them
706
+ corepoints = as_epoch(analysis.corepoints)
707
+ corepoints.build_kdtree()
708
+
709
+ # Calculate the list of seed points and sort them
710
+ seeds = analysis.seeds
711
+ if seeds is None:
712
+ with logger_context("Find seed candidates in time series"):
713
+ seeds = self.find_seedpoints()
714
+
715
+ # Sort the seed points
716
+ with logger_context("Sort seed candidates by priority"):
717
+ seeds = list(sorted(seeds, key=self.seed_sorting_scorefunction()))
718
+
719
+ # Store the seeds
720
+ analysis.seeds = seeds
721
+ else:
722
+ logger.info("Reusing seed candidates stored in analysis object")
723
+
724
+ objects = []
725
+
726
+ # Iterate over the seeds to maybe turn them into objects
727
+ for i, seed in enumerate(seeds):
728
+ # Check all already calculated objects whether they overlap with this seed.
729
+ found = False
730
+ for obj in objects:
731
+ if seed.index in obj.indices and (
732
+ obj.end_epoch > seed.start_epoch
733
+ and seed.end_epoch > obj.start_epoch
734
+ ):
735
+ found = True
736
+ break
737
+
738
+ # If we found an overlap, we skip this seed
739
+ if found:
740
+ continue
741
+
742
+ # Apply a numeric default to the max_segments parameter
743
+ max_segments = self.max_segments
744
+ if max_segments is None:
745
+ max_segments = corepoints.cloud.shape[0] + 1
746
+
747
+ data = _py4dgeo.RegionGrowingAlgorithmData(
748
+ analysis.distances_for_compute,
749
+ corepoints,
750
+ self.neighborhood_radius,
751
+ seed._seed,
752
+ self.thresholds,
753
+ self.min_segments,
754
+ max_segments,
755
+ )
756
+
757
+ # Perform the region growing
758
+ with logger_context(
759
+ f"Performing region growing on seed candidate {i+1}/{len(seeds)}"
760
+ ):
761
+ objdata = _py4dgeo.region_growing(data, self.distance_measure())
762
+
763
+ # If the returned object has 0 indices, the min_segments threshold was violated
764
+ if objdata.indices_distances:
765
+ obj = ObjectByChange(objdata, seed, analysis)
766
+ if self.filter_objects(obj):
767
+ objects.append(obj)
768
+
769
+ # If the returned object is larger than max_segments we issue a warning
770
+ if len(objdata.indices_distances) >= max_segments:
771
+ logger.warning(
772
+ f"An object by change exceeded the given maximum size of {max_segments}"
773
+ )
774
+
775
+ # Store the results in the analysis object
776
+ analysis.objects = objects
777
+
778
+ # Potentially remove objects from memory # TODO Why do we remove these?
779
+ del analysis.smoothed_distances
780
+ del analysis.distances
781
+
782
+ return objects
783
+
784
+
785
+ class RegionGrowingAlgorithm(RegionGrowingAlgorithmBase):
786
+ def __init__(
787
+ self,
788
+ seed_subsampling=1,
789
+ seed_candidates=None,
790
+ window_width=24,
791
+ window_min_size=12,
792
+ window_jump=1,
793
+ window_penalty=1.0,
794
+ minperiod=24,
795
+ height_threshold=0.0,
796
+ **kwargs,
797
+ ):
798
+ """Construct the 4D-OBC algorithm.
799
+
800
+ :param seed_subsampling:
801
+ A subsampling factor for the set of core points for the generation
802
+ of _segmentation seed candidates. This can be used to speed up
803
+ the generation of seeds. The default of 1 does not perform any
804
+ subsampling, a value of, e.g., 10 would only consider every 10th
805
+ corepoint for adding seeds.
806
+ :type seed_subsampling: int
807
+ :param seed_candidates:
808
+ A set of indices specifying which core points should be used for seed detection. This can be used to perform _segmentation for selected locations. The default of None does not perform any selection and uses all corepoints. The subsampling parameter is applied additionally.
809
+ :type seed_candidates: list
810
+ :param window_width:
811
+ The width of the sliding temporal window for change point detection. The sliding window
812
+ moves along the signal and determines the discrepancy between the first and the second
813
+ half of the window (i.e. subsequent time series segments within the window width). The
814
+ default value is 24, corresponding to one day in case of hourly data.
815
+ :type window_width: int
816
+ :param window_min_size:
817
+ The minimum temporal distance needed between two seed candidates, for the second one to be considered.
818
+ The default value is 1, such that all detected seeds candidates are considered.
819
+ :type window_min_size: int
820
+ :param window_jump:
821
+ The interval on which the sliding temporal window moves and checks for seed candidates.
822
+ The default value is 1, corresponding to a check for every epoch in the time series.
823
+ :type window_jump: int
824
+ :param window_penalty:
825
+ A complexity penalty that determines how strict the change point detection is.
826
+ A higher penalty results in stricter change point detection (i.e, fewer points are detected), while a low
827
+ value results in a large amount of detected change points. The default value is 1.0.
828
+ :type window_penalty: float
829
+ :param minperiod:
830
+ The minimum period of a detected change to be considered as seed candidate for subsequent
831
+ _segmentation. The default is 24, corresponding to one day for hourly data.
832
+ :type minperiod: int
833
+ :param height_threshold:
834
+ The height threshold represents the required magnitude of a detected change to be considered
835
+ as seed candidate for subsequent _segmentation. The magnitude of a detected change is derived
836
+ as unsigned difference between magnitude (i.e. distance) at start epoch and peak magnitude.
837
+ The default is 0.0, in which case all detected changes are used as seed candidates.
838
+ :type height_threshold: float
839
+
840
+ """
841
+
842
+ # Initialize base class
843
+ super().__init__(**kwargs)
844
+
845
+ # Store the given parameters
846
+ self.seed_subsampling = seed_subsampling
847
+ self.seed_candidates = seed_candidates
848
+ self.window_width = window_width
849
+ self.window_min_size = window_min_size
850
+ self.window_jump = window_jump
851
+ self.window_penalty = window_penalty
852
+ self.minperiod = minperiod
853
+ self.height_threshold = height_threshold
854
+
855
+ def find_seedpoints(self):
856
+ """Calculate seedpoints for the region growing algorithm"""
857
+
858
+ # These are some arguments used below that we might consider
859
+ # exposing to the user in the future. For now, they are considered
860
+ # internal, but they are still defined here for readability.
861
+ window_costmodel = "l1"
862
+ # window_min_size = 12
863
+ # window_jump = 1
864
+ # window_penalty = 1.0
865
+
866
+ # The list of generated seeds
867
+ seeds = []
868
+
869
+ # The list of core point indices to check as seeds
870
+ if self.seed_candidates is None:
871
+ if self.seed_subsampling == 0:
872
+ raise Py4DGeoError(
873
+ "Subsampling factor cannot be 0, use 1 or any integer larger than 1"
874
+ )
875
+ # Use all corepoints if no selection specified, considering subsampling
876
+ seed_candidates_curr = range(
877
+ 0, self.analysis.distances_for_compute.shape[0], self.seed_subsampling
878
+ )
879
+ else:
880
+ # Use the specified corepoint indices, but consider subsampling
881
+ seed_candidates_curr = self.seed_candidates # [::self.seed_subsampling]
882
+
883
+ # Iterate over all time series to analyse their change points
884
+ for i in seed_candidates_curr:
885
+ # Extract the time series and interpolate its nan values
886
+ timeseries = self.analysis.distances_for_compute[i, :]
887
+ bad_indices = np.isnan(timeseries)
888
+ num_nans = np.count_nonzero(bad_indices)
889
+
890
+ # If we too many nans, this timeseries does not make sense
891
+ if num_nans > timeseries.shape[0] - 3:
892
+ continue
893
+
894
+ # If there are nan values, we try fixing things by interpolation
895
+ if num_nans > 0:
896
+ good_indices = np.logical_not(bad_indices)
897
+ timeseries[bad_indices] = np.interp(
898
+ bad_indices.nonzero()[0],
899
+ good_indices.nonzero()[0],
900
+ timeseries[good_indices],
901
+ )
902
+
903
+ # Run detection of change points
904
+ cpdata = _py4dgeo.ChangePointDetectionData(
905
+ ts=timeseries,
906
+ window_size=self.window_width,
907
+ min_size=self.window_min_size,
908
+ jump=self.window_jump,
909
+ penalty=self.window_penalty,
910
+ )
911
+ changepoints = _py4dgeo.change_point_detection(cpdata)[:-1]
912
+
913
+ # Shift the time series to positive values
914
+ timeseries = timeseries + abs(np.nanmin(timeseries) + 0.1)
915
+ # create a flipped version for negative change volumes
916
+ timeseries_flipped = timeseries * -1.0 + abs(np.nanmax(timeseries)) + 0.1
917
+
918
+ # Create seeds for this timeseries
919
+ corepoint_seeds = []
920
+ for start_idx in changepoints:
921
+ # Skip this changepoint if it was included into a previous seed
922
+ if corepoint_seeds and start_idx <= corepoint_seeds[-1].end_epoch:
923
+ continue
924
+
925
+ # Skip this changepoint if this to close to the end
926
+ if start_idx >= timeseries.shape[0] - self.minperiod:
927
+ break
928
+
929
+ # Decide whether we need use the flipped timeseries
930
+ used_timeseries = timeseries
931
+ if timeseries[start_idx] >= timeseries[start_idx + self.minperiod]:
932
+ used_timeseries = timeseries_flipped
933
+
934
+ previous_volume = -999.9
935
+ for target_idx in range(start_idx + 1, timeseries.shape[0]):
936
+ # Calculate the change volume
937
+ height = used_timeseries[start_idx]
938
+ volume = np.nansum(
939
+ used_timeseries[start_idx : target_idx + 1] - height
940
+ )
941
+
942
+ # Check whether the volume started decreasing
943
+ if previous_volume > volume:
944
+ # Only add seed if larger than the minimum period
945
+ if target_idx - start_idx >= self.minperiod:
946
+ corepoint_seeds.append(
947
+ RegionGrowingSeed(i, start_idx, target_idx)
948
+ )
949
+ break
950
+ else:
951
+ previous_volume = volume
952
+
953
+ # This causes a seed to always be detected if the volume doesn't decrease before present
954
+ # Useful when used in an online setting, can be filtered before region growing
955
+ # Only if the last epoch is reached we use the segment as seed
956
+ if target_idx == timeseries.shape[0] - 1:
957
+ # We reached the present and add a seed based on it
958
+ corepoint_seeds.append(
959
+ RegionGrowingSeed(i, start_idx, timeseries.shape[0] - 1)
960
+ )
961
+
962
+ # Add all the seeds found for this corepoint to the full list
963
+ seeds.extend(corepoint_seeds)
964
+
965
+ return seeds
966
+
967
+ def seed_sorting_scorefunction(self):
968
+ """Neighborhood similarity sorting function"""
969
+
970
+ # The 4D-OBC algorithm sorts by similarity in the neighborhood
971
+ # of the seed.
972
+ def neighborhood_similarity(seed):
973
+ neighbors = self.analysis.corepoints.kdtree.radius_search(
974
+ self.analysis.corepoints.cloud[seed.index, :], self.neighborhood_radius
975
+ )
976
+ similarities = []
977
+ for n in neighbors:
978
+ data = _py4dgeo.TimeseriesDistanceFunctionData(
979
+ self.analysis.distances_for_compute[
980
+ seed.index, seed.start_epoch : seed.end_epoch + 1
981
+ ],
982
+ self.analysis.distances_for_compute[
983
+ n, seed.start_epoch : seed.end_epoch + 1
984
+ ],
985
+ )
986
+ similarities.append(self.distance_measure()(data))
987
+
988
+ return sum(similarities, 0.0) / (len(neighbors) - 1)
989
+
990
+ return neighborhood_similarity
991
+
992
+ def filter_objects(self, obj):
993
+ """A filter for objects produced by the region growing algorithm"""
994
+
995
+ # Filter based on coefficient of variation
996
+ distarray = np.fromiter(obj._data.indices_distances.values(), np.float64)
997
+
998
+ # Check if mean is 0.0, if so, set to very small value to avoid division by 0
999
+ mean_distarray = np.mean(distarray)
1000
+ if mean_distarray == 0.0:
1001
+ mean_distarray = 10**-10
1002
+
1003
+ # Calculate coefficient of variation
1004
+ cv = np.std(distarray) / mean_distarray
1005
+
1006
+ # TODO: Make this threshold configurable?
1007
+ return cv <= 0.8
1008
+
1009
+
1010
+ class RegionGrowingSeed:
1011
+ def __init__(self, index, start_epoch, end_epoch):
1012
+ self._seed = _py4dgeo.RegionGrowingSeed(index, start_epoch, end_epoch)
1013
+
1014
+ @property
1015
+ def index(self):
1016
+ return self._seed.index
1017
+
1018
+ @property
1019
+ def start_epoch(self):
1020
+ return self._seed.start_epoch
1021
+
1022
+ @property
1023
+ def end_epoch(self):
1024
+ return self._seed.end_epoch
1025
+
1026
+
1027
+ class ObjectByChange:
1028
+ """Representation a change object in the spatiotemporal domain"""
1029
+
1030
+ def __init__(self, data, seed, analysis=None):
1031
+ self._data = data
1032
+ self._analysis = analysis
1033
+ self.seed = seed
1034
+
1035
+ @property
1036
+ def indices(self):
1037
+ """The set of corepoint indices that compose the object by change"""
1038
+ return list(self._data.indices_distances.keys())
1039
+
1040
+ def distance(self, index):
1041
+ return self._data.indices_distances[index]
1042
+
1043
+ @property
1044
+ def start_epoch(self):
1045
+ """The index of the start epoch of the change object"""
1046
+ return self._data.start_epoch
1047
+
1048
+ @property
1049
+ def end_epoch(self):
1050
+ """The index of the end epoch of the change object"""
1051
+ return self._data.end_epoch
1052
+
1053
+ @property
1054
+ def threshold(self):
1055
+ """The distance threshold that produced this object"""
1056
+ return self._data.threshold
1057
+
1058
+ def plot(self, filename=None):
1059
+ """Create an informative visualization of the Object By Change
1060
+
1061
+ :param filename:
1062
+ The filename to use to store the plot. Can be omitted to only show
1063
+ plot in a Jupyter notebook session.
1064
+ :type filename: str
1065
+ """
1066
+
1067
+ # Extract DTW distances from this object
1068
+ indexarray = np.fromiter(self.indices, np.int32)
1069
+ distarray = np.fromiter((self.distance(i) for i in indexarray), np.float64)
1070
+
1071
+ # Intitialize the figure and all of its subfigures
1072
+ fig = plt.figure(figsize=plt.figaspect(0.3))
1073
+ tsax = fig.add_subplot(1, 3, 1)
1074
+ histax = fig.add_subplot(1, 3, 2)
1075
+ mapax = fig.add_subplot(1, 3, 3)
1076
+
1077
+ # The first plot (tsax) prints all time series of chosen corepoints
1078
+ # and colors them according to distance.
1079
+ tsax.set_ylabel("Height change [m]")
1080
+ tsax.set_xlabel("Time [h]")
1081
+
1082
+ # We pad the time series visualization with a number of data
1083
+ # points on both sides. TODO: Expose as argument to plot?
1084
+ timeseries_padding = 10
1085
+ start_epoch = max(self.start_epoch - timeseries_padding, 0)
1086
+ end_epoch = min(
1087
+ self.end_epoch + timeseries_padding,
1088
+ self._analysis.distances_for_compute.shape[1],
1089
+ )
1090
+
1091
+ # We use the seed's timeseries to set good axis limits
1092
+ seed_ts = self._analysis.distances_for_compute[
1093
+ self.seed.index, start_epoch:end_epoch
1094
+ ]
1095
+ tsax.set_ylim(np.nanmin(seed_ts) * 0.5, np.nanmax(seed_ts) * 1.5)
1096
+
1097
+ # Create a colormap with distance for this object
1098
+ cmap = matplotlib.colormaps.get_cmap("viridis")
1099
+ maxdist = np.nanmax(distarray)
1100
+
1101
+ # Plot each time series individually
1102
+ for index in self.indices:
1103
+ tsax.plot(
1104
+ self._analysis.distances_for_compute[index, start_epoch:end_epoch],
1105
+ linewidth=0.7,
1106
+ alpha=0.3,
1107
+ color=cmap(self.distance(index) / maxdist),
1108
+ )
1109
+
1110
+ # Plot the seed timeseries again, but with a thicker line
1111
+ tsax.plot(seed_ts, linewidth=2.0, zorder=10, color="blue")
1112
+
1113
+ # Next, we add a histogram plot with the distance values (using seaborn)
1114
+ seaborn.histplot(distarray, ax=histax, kde=True, color="r")
1115
+
1116
+ # Add labels to the histogram plot
1117
+ histax.set_title(f"Segment size: {distarray.shape[0]}")
1118
+ histax.set_xlabel("DTW distance")
1119
+
1120
+ # Create a 2D view of the segment
1121
+ locations = self._analysis.corepoints.cloud[indexarray, 0:2]
1122
+ mapax.scatter(locations[:, 0], locations[:, 1], c=distarray)
1123
+
1124
+ # Some global settings of the generated figure
1125
+ fig.tight_layout()
1126
+
1127
+ # Maybe save to file
1128
+ if filename is not None:
1129
+ plt.savefig(filename)
1130
+
1131
+
1132
+ def check_epoch_timestamp(epoch):
1133
+ """Validate an epoch to be used with SpatiotemporalSegmentation"""
1134
+ if epoch.timestamp is None:
1135
+ raise Py4DGeoError(
1136
+ "Epochs need to define a timestamp to be usable in SpatiotemporalSegmentation"
1137
+ )
1138
+
1139
+ return epoch
1140
+
1141
+
1142
+ def regular_corepoint_grid(lowerleft, upperright, num_points, zval=0.0):
1143
+ """A helper function to create a regularly spaced grid for the analysis
1144
+
1145
+ :param lowerleft:
1146
+ The lower left corner of the grid. Given as a 2D coordinate.
1147
+ :type lowerleft: np.ndarray
1148
+ :param upperright:
1149
+ The upper right corner of the grid. Given as a 2D coordinate.
1150
+ :type upperright: np.ndarray
1151
+ :param num_points:
1152
+ A tuple with two entries denoting the number of points to be used in
1153
+ x and y direction
1154
+ :type num_points: tuple
1155
+ :param zval:
1156
+ The value to fill for the z-direction.
1157
+ :type zval: double
1158
+ """
1159
+ xspace = np.linspace(
1160
+ lowerleft[0], upperright[0], num=num_points[0], dtype=np.float64
1161
+ )
1162
+ yspace = np.linspace(
1163
+ lowerleft[1], upperright[1], num=num_points[1], dtype=np.float64
1164
+ )
1165
+
1166
+ grid = np.empty(shape=(num_points[0] * num_points[1], 3), dtype=np.float64)
1167
+ for i, x in enumerate(xspace):
1168
+ for j, y in enumerate(yspace):
1169
+ grid[i * num_points[0] + j, 0] = x
1170
+ grid[i * num_points[0] + j, 1] = y
1171
+ grid[i * num_points[0] + j, 2] = zval
1172
+
1173
+ return grid
1174
+
1175
+
1176
+ def temporal_averaging(distances, smoothing_window=24):
1177
+ """Smoothen a space-time array of distance change using a sliding window approach
1178
+
1179
+ :param distances:
1180
+ The raw data to smoothen.
1181
+ :type distances: np.ndarray
1182
+ :param smoothing_window:
1183
+ The size of the sliding window used in smoothing the data. The
1184
+ default value of 0 does not perform any smooting.
1185
+ :type smooting_window: int
1186
+ """
1187
+
1188
+ with logger_context("Smoothing temporal data"):
1189
+ smoothed = np.empty_like(distances)
1190
+ eps = smoothing_window // 2
1191
+
1192
+ for i in range(distances.shape[1]):
1193
+ smoothed[:, i] = np.nanmedian(
1194
+ distances[
1195
+ :,
1196
+ max(0, i - eps) : min(distances.shape[1] - 1, i + eps),
1197
+ ],
1198
+ axis=1,
1199
+ )
1200
+
1201
+ # We use no-op smooting as the default implementation here
1202
+ return smoothed