py4dgeo 0.7.0__cp313-cp313-macosx_14_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1280 @@
1
+ from py4dgeo.epoch import Epoch, as_epoch
2
+ from py4dgeo.logger import logger_context
3
+ from py4dgeo.util import Py4DGeoError, find_file
4
+ from py4dgeo.UpdateableZipFile import UpdateableZipFile
5
+
6
+ import datetime
7
+ import json
8
+ import logging
9
+ import matplotlib
10
+ import matplotlib.pyplot as plt
11
+ import numpy as np
12
+ import os
13
+ import pickle
14
+ import seaborn
15
+ import tempfile
16
+ import zipfile
17
+ import _py4dgeo
18
+
19
+
20
+ # Get the py4dgeo logger instance
21
+ logger = logging.getLogger("py4dgeo")
22
+
23
+
24
+ # This integer controls the versioning of the _segmentation file format. Whenever the
25
+ # format is changed, this version should be increased, so that py4dgeo can warn
26
+ # about incompatibilities of py4dgeo with loaded data. This version is intentionally
27
+ # different from py4dgeo's version, because not all releases of py4dgeo necessarily
28
+ # change the _segmentation file format and we want to be as compatible as possible.
29
+ PY4DGEO_SEGMENTATION_FILE_FORMAT_VERSION = 0
30
+
31
+
32
+ class SpatiotemporalAnalysis:
33
+ def __init__(self, filename, compress=True, allow_pickle=True, force=False):
34
+ """Construct a spatiotemporal _segmentation object
35
+
36
+ This is the basic data structure for the 4D objects by change algorithm
37
+ and its derived variants. It manages storage of M3C2 distances and other
38
+ intermediate results for a time series of epochs. The original point clouds
39
+ themselves are not needed after initial distance calculation and additional
40
+ epochs can be added to an existing analysis. The class uses a disk backend
41
+ to store information and allows lazy loading of additional data like e.g.
42
+ M3C2 uncertainty values for postprocessing.
43
+
44
+ :param filename:
45
+ The filename used for this analysis. If it does not exist on the file
46
+ system, a new analysis is created. Otherwise, the data is loaded from the existent file.
47
+ :type filename: str
48
+ :param compress:
49
+ Whether to compress the stored data. This is a tradeoff decision between
50
+ disk space and runtime. Especially appending new epochs to an existing
51
+ analysis is an operation whose runtime can easily be dominated by
52
+ decompression/compression of data.
53
+ :type compress: bool
54
+ :param allow_pickle:
55
+ Whether py4dgeo is allowed to use the pickle module to store some data
56
+ in the file representation of the analysis. If set to false, some data
57
+ may not be stored and needs to be recomputed instead.
58
+ :type allow_pickle: bool
59
+ :param force:
60
+ Force creation of a new analysis object, even if a file of this name
61
+ already exists.
62
+ """
63
+
64
+ # Store the given parameters
65
+ self.filename = find_file(filename, fatal=False)
66
+ self.compress = compress
67
+ self.allow_pickle = allow_pickle
68
+
69
+ # Instantiate some properties used later on
70
+ self._m3c2 = None
71
+
72
+ # This is the cache for lazily loaded data
73
+ self._corepoints = None
74
+ self._distances = None
75
+ self._smoothed_distances = None
76
+ self._uncertainties = None
77
+ self._reference_epoch = None
78
+
79
+ # If the filename does not already exist, we create a new archive
80
+ if force or not os.path.exists(self.filename):
81
+ logger.info(f"Creating analysis file {self.filename}")
82
+ with zipfile.ZipFile(self.filename, mode="w") as zf:
83
+ # Write the _segmentation file format version number
84
+ zf.writestr(
85
+ "SEGMENTATION_FILE_FORMAT",
86
+ str(PY4DGEO_SEGMENTATION_FILE_FORMAT_VERSION),
87
+ )
88
+
89
+ # Write the compression algorithm used for all suboperations
90
+ zf.writestr("USE_COMPRESSION", str(self.compress))
91
+
92
+ # Assert that the _segmentation file format is still valid
93
+ with zipfile.ZipFile(self.filename, mode="r") as zf:
94
+ # Read the _segmentation file version number and compare to current
95
+ version = int(zf.read("SEGMENTATION_FILE_FORMAT").decode())
96
+ if version != PY4DGEO_SEGMENTATION_FILE_FORMAT_VERSION:
97
+ raise Py4DGeoError("_segmentation file format is out of date!")
98
+
99
+ # Read the compression algorithm
100
+ self.compress = eval(zf.read("USE_COMPRESSION").decode())
101
+
102
+ @property
103
+ def reference_epoch(self):
104
+ """Access the reference epoch of this analysis"""
105
+
106
+ if self._reference_epoch is None:
107
+ with zipfile.ZipFile(self.filename, mode="r") as zf:
108
+ # Double check that the reference has already been set
109
+ if "reference_epoch.zip" not in zf.namelist():
110
+ raise Py4DGeoError("Reference epoch for analysis not yet set")
111
+
112
+ # Extract it from the archive
113
+ with tempfile.TemporaryDirectory() as tmp_dir:
114
+ ref_epochfile = zf.extract("reference_epoch.zip", path=tmp_dir)
115
+ self._reference_epoch = Epoch.load(ref_epochfile)
116
+
117
+ return self._reference_epoch
118
+
119
+ @reference_epoch.setter
120
+ def reference_epoch(self, epoch):
121
+ """Set the reference epoch of this analysis (only possible once)"""
122
+ with zipfile.ZipFile(self.filename, mode="a") as zf:
123
+ # If we already have a reference epoch, the user should start a
124
+ # new analysis instead
125
+ if "reference_epoch.zip" in zf.namelist():
126
+ raise Py4DGeoError(
127
+ "Reference epoch cannot be changed - please start a new analysis"
128
+ )
129
+
130
+ # Ensure that we do have a timestamp on the epoch
131
+ epoch = check_epoch_timestamp(epoch)
132
+
133
+ # Ensure that the KDTree is built - no-op if triggered by the user
134
+ epoch.build_kdtree()
135
+
136
+ # Write the reference epoch into the archive
137
+ with tempfile.TemporaryDirectory() as tmp_dir:
138
+ epochfilename = os.path.join(tmp_dir, "reference_epoch.zip")
139
+ epoch.save(epochfilename)
140
+ zf.write(epochfilename, arcname="reference_epoch.zip")
141
+
142
+ # Also cache it directly
143
+ self._reference_epoch = epoch
144
+
145
+ @reference_epoch.deleter
146
+ def reference_epoch(self):
147
+ self._reference_epoch = None
148
+
149
+ @property
150
+ def corepoints(self):
151
+ """Access the corepoints of this analysis"""
152
+ if self._corepoints is None:
153
+ with zipfile.ZipFile(self.filename, mode="r") as zf:
154
+ # Double check that the reference has already been set
155
+ if "corepoints.zip" not in zf.namelist():
156
+ raise Py4DGeoError("Corepoints for analysis not yet set")
157
+
158
+ # Extract it from the archive
159
+ with tempfile.TemporaryDirectory() as tmp_dir:
160
+ cpfile = zf.extract("corepoints.zip", path=tmp_dir)
161
+ self._corepoints = Epoch.load(cpfile)
162
+
163
+ return self._corepoints
164
+
165
+ @corepoints.setter
166
+ def corepoints(self, _corepoints):
167
+ """Set the corepoints for this analysis (only possible once)"""
168
+ with zipfile.ZipFile(self.filename, mode="a") as zf:
169
+ # If we already have corepoints in the archive, the user should start a
170
+ # new analysis instead
171
+ if "corepoints.zip" in zf.namelist():
172
+ raise Py4DGeoError(
173
+ "Corepoints cannot be changed - please start a new analysis"
174
+ )
175
+
176
+ # Ensure that the corepoints are stored as an epoch and build its KDTree
177
+ self._corepoints = as_epoch(_corepoints)
178
+ self._corepoints.build_kdtree()
179
+
180
+ # Write the corepoints into the archive
181
+ with tempfile.TemporaryDirectory() as tmp_dir:
182
+ cpfilename = os.path.join(tmp_dir, "corepoints.zip")
183
+ self._corepoints.save(cpfilename)
184
+ zf.write(cpfilename, arcname="corepoints.zip")
185
+
186
+ @corepoints.deleter
187
+ def corepoints(self):
188
+ self._corepoints = None
189
+
190
+ @property
191
+ def m3c2(self):
192
+ """Access the M3C2 algorithm of this analysis"""
193
+ # If M3C2 has not been set, we use a default constructed one
194
+ return self._m3c2
195
+
196
+ @m3c2.setter
197
+ def m3c2(self, _m3c2):
198
+ """Set the M3C2 algorithm of this analysis"""
199
+ self._m3c2 = _m3c2
200
+
201
+ @property
202
+ def timedeltas(self):
203
+ """Access the sequence of time stamp deltas for the time series"""
204
+ with zipfile.ZipFile(self.filename, mode="r") as zf:
205
+ if "timestamps.json" not in zf.namelist():
206
+ return []
207
+
208
+ # Read timedeltas
209
+ with tempfile.TemporaryDirectory() as tmp_dir:
210
+ timestampsfile = zf.extract("timestamps.json", path=tmp_dir)
211
+ with open(timestampsfile) as f:
212
+ timedeltas = json.load(f)
213
+
214
+ # Convert the serialized deltas to datetime.timedelta
215
+ return [datetime.timedelta(**data) for data in timedeltas]
216
+
217
+ @timedeltas.setter
218
+ def timedeltas(self, _timedeltas):
219
+ """Set the timedeltas manually
220
+
221
+ This is only possible exactly once and mutually exclusive with adding
222
+ epochs via the :ref:`add_epochs` method.
223
+ """
224
+ with zipfile.ZipFile(self.filename, mode="a") as zf:
225
+ # If we already have timestamps in the archive, this is not possible
226
+ if "timestamps.json" in zf.namelist():
227
+ raise Py4DGeoError(
228
+ "Timestamps can only be set on freshly created analysis instances"
229
+ )
230
+
231
+ with tempfile.TemporaryDirectory() as tmp_dir:
232
+ timestampsfile = os.path.join(tmp_dir, "timestamps.json")
233
+ with open(timestampsfile, "w") as f:
234
+ json.dump(
235
+ [
236
+ {
237
+ "days": td.days,
238
+ "seconds": td.seconds,
239
+ "microseconds": td.microseconds,
240
+ }
241
+ for td in _timedeltas
242
+ ],
243
+ f,
244
+ )
245
+ zf.write(timestampsfile, arcname="timestamps.json")
246
+
247
+ @property
248
+ def distances(self):
249
+ """Access the M3C2 distances of this analysis"""
250
+
251
+ if self._distances is None:
252
+ with zipfile.ZipFile(self.filename, mode="r") as zf:
253
+ filename = self._numpy_filename("distances")
254
+ if filename not in zf.namelist():
255
+ self.distances = np.empty(
256
+ (self.corepoints.cloud.shape[0], 0), dtype=np.float64
257
+ )
258
+ return self._distances
259
+
260
+ with tempfile.TemporaryDirectory() as tmp_dir:
261
+ distancefile = zf.extract(filename, path=tmp_dir)
262
+ read_func = (
263
+ (lambda f: np.load(f)["arr_0"]) if self.compress else np.load
264
+ )
265
+ self._distances = read_func(distancefile)
266
+
267
+ return self._distances
268
+
269
+ @distances.setter
270
+ def distances(self, _distances):
271
+ """Set the distances manually
272
+
273
+ This is only possible exactly once and mutually exclusive with adding
274
+ epochs via the :ref:`add_epochs` method.
275
+ """
276
+ with zipfile.ZipFile(self.filename, mode="a") as zf:
277
+ filename = self._numpy_filename("distances")
278
+ write_func = np.savez_compressed if self.compress else np.save
279
+
280
+ # If we already have distacces in the archive, this is not possible
281
+ if filename in zf.namelist():
282
+ raise Py4DGeoError(
283
+ "Distances can only be set on freshly created analysis instances, use add_epochs instead."
284
+ )
285
+
286
+ with tempfile.TemporaryDirectory() as tmp_dir:
287
+ distancesfile = os.path.join(tmp_dir, filename)
288
+ write_func(distancesfile, _distances)
289
+ zf.write(distancesfile, arcname=filename)
290
+
291
+ self._distances = _distances
292
+
293
+ @distances.deleter
294
+ def distances(self):
295
+ self._distances = None
296
+
297
+ @property
298
+ def smoothed_distances(self):
299
+ if self._smoothed_distances is None:
300
+ with zipfile.ZipFile(self.filename, mode="r") as zf:
301
+ filename = self._numpy_filename("smoothed_distances")
302
+ if filename in zf.namelist():
303
+ with tempfile.TemporaryDirectory() as tmp_dir:
304
+ smoothedfile = zf.extract(filename, path=tmp_dir)
305
+ read_func = (
306
+ (lambda f: np.load(f)["arr_0"])
307
+ if self.compress
308
+ else np.load
309
+ )
310
+ self._smoothed_distances = read_func(smoothedfile)
311
+
312
+ return self._smoothed_distances
313
+
314
+ @smoothed_distances.setter
315
+ def smoothed_distances(self, _smoothed_distances):
316
+ with zipfile.ZipFile(self.filename, mode="a") as zf:
317
+ filename = self._numpy_filename("smoothed_distances")
318
+ write_func = np.savez_compressed if self.compress else np.save
319
+
320
+ with tempfile.TemporaryDirectory() as tmp_dir:
321
+ smoothedfile = os.path.join(tmp_dir, filename)
322
+ write_func(smoothedfile, _smoothed_distances)
323
+ zf.write(smoothedfile, arcname=filename)
324
+
325
+ self._smoothed_distances = _smoothed_distances
326
+
327
+ @smoothed_distances.deleter
328
+ def smoothed_distances(self):
329
+ self._smoothed_distances = None
330
+
331
+ @property
332
+ def uncertainties(self):
333
+ """Access the M3C2 uncertainties of this analysis"""
334
+
335
+ if self._uncertainties is None:
336
+ with zipfile.ZipFile(self.filename, mode="r") as zf:
337
+ filename = self._numpy_filename("uncertainties")
338
+ if filename not in zf.namelist():
339
+ self.uncertainties = np.empty(
340
+ (self.corepoints.cloud.shape[0], 0),
341
+ dtype=np.dtype(
342
+ [
343
+ ("lodetection", "<f8"),
344
+ ("spread1", "<f8"),
345
+ ("num_samples1", "<i8"),
346
+ ("spread2", "<f8"),
347
+ ("num_samples2", "<i8"),
348
+ ]
349
+ ),
350
+ )
351
+ return self._uncertainties
352
+
353
+ with tempfile.TemporaryDirectory() as tmp_dir:
354
+ uncertaintyfile = zf.extract(filename, path=tmp_dir)
355
+ read_func = (
356
+ (lambda f: np.load(f)["arr_0"]) if self.compress else np.load
357
+ )
358
+ self._uncertainties = read_func(uncertaintyfile)
359
+
360
+ return self._uncertainties
361
+
362
+ @uncertainties.setter
363
+ def uncertainties(self, _uncertainties):
364
+ """Set the uncertainties manually
365
+
366
+ This is only possible exactly once and mutually exclusive with adding
367
+ epochs via the :ref:`add_epochs` method.
368
+ """
369
+ with zipfile.ZipFile(self.filename, mode="a") as zf:
370
+ filename = self._numpy_filename("uncertainties")
371
+ write_func = np.savez_compressed if self.compress else np.save
372
+
373
+ # If we already have distacces in the archive, this is not possible
374
+ if filename in zf.namelist():
375
+ raise Py4DGeoError(
376
+ "Uncertainties can only be set on freshly created analysis instances, use add_epochs instead."
377
+ )
378
+
379
+ with tempfile.TemporaryDirectory() as tmp_dir:
380
+ uncertaintiesfile = os.path.join(tmp_dir, filename)
381
+ write_func(uncertaintiesfile, _uncertainties)
382
+ zf.write(uncertaintiesfile, arcname=filename)
383
+
384
+ self._uncertainties = _uncertainties
385
+
386
+ @uncertainties.deleter
387
+ def uncertainties(self):
388
+ self._uncertainties = None
389
+
390
+ def add_epochs(self, *epochs):
391
+ """Add a numbers of epochs to the existing analysis"""
392
+
393
+ # Remove intermediate results from the archive
394
+ self.invalidate_results()
395
+
396
+ # Assert that all epochs have a timestamp
397
+ for epoch in epochs:
398
+ check_epoch_timestamp(epoch)
399
+
400
+ # Lazily fetch required data
401
+ reference_epoch = self.reference_epoch
402
+ timedeltas = self.timedeltas
403
+
404
+ # Collect the calculated results to only add them once to the archive
405
+ new_distances = []
406
+ new_uncertainties = []
407
+
408
+ # Iterate over the given epochs
409
+ for i, epoch in enumerate(sorted(epochs, key=lambda e: e.timestamp)):
410
+ with logger_context(f"Adding epoch {i+1}/{len(epochs)} to analysis object"):
411
+ # Prepare the M3C2 instance
412
+ self.m3c2.corepoints = self.corepoints.cloud
413
+ self.m3c2.epochs = (reference_epoch, epoch)
414
+
415
+ # Calculate the M3C2 distances
416
+ d, u = self.m3c2.calculate_distances(reference_epoch, epoch)
417
+ new_distances.append(d)
418
+ new_uncertainties.append(u)
419
+ timedeltas.append(epoch.timestamp - reference_epoch.timestamp)
420
+
421
+ # We do not need the reference_epoch at this point
422
+ del self.reference_epoch
423
+
424
+ # Prepare all archive data in a temporary directory
425
+ with tempfile.TemporaryDirectory() as tmp_dir:
426
+ # Write a new timestamps file
427
+ timestampsfile = os.path.join(tmp_dir, "timestamps.json")
428
+ with open(timestampsfile, "w") as f:
429
+ json.dump(
430
+ [
431
+ {
432
+ "days": td.days,
433
+ "seconds": td.seconds,
434
+ "microseconds": td.microseconds,
435
+ }
436
+ for td in timedeltas
437
+ ],
438
+ f,
439
+ )
440
+
441
+ # Depending on whether we compress, we use different numpy functionality
442
+ write_func = np.savez_compressed if self.compress else np.save
443
+ distance_filename = self._numpy_filename("distances")
444
+ uncertainty_filename = self._numpy_filename("uncertainties")
445
+
446
+ with logger_context("Rearranging space-time array in memory"):
447
+ # Load the distance array and append new data
448
+ distance_file = os.path.join(tmp_dir, distance_filename)
449
+ write_func(
450
+ distance_file,
451
+ np.concatenate(
452
+ (self.distances, np.column_stack(tuple(new_distances))), axis=1
453
+ ),
454
+ )
455
+
456
+ # Load the uncertainty array and append new data
457
+ uncertainty_file = os.path.join(tmp_dir, uncertainty_filename)
458
+ write_func(
459
+ uncertainty_file,
460
+ np.concatenate(
461
+ (self.uncertainties, np.column_stack(tuple(new_uncertainties))),
462
+ axis=1,
463
+ ),
464
+ )
465
+
466
+ # Invalidate potential caches for distances/uncertainties
467
+ self._distances = None
468
+ self._uncertainties = None
469
+
470
+ # Dump the updated files into the archive
471
+ with logger_context("Updating disk-based analysis archive with new epochs"):
472
+ with UpdateableZipFile(self.filename, mode="a") as zf:
473
+ if "timestamps.json" in zf.namelist():
474
+ zf.remove("timestamps.json")
475
+ zf.write(timestampsfile, arcname="timestamps.json")
476
+ if distance_filename in zf.namelist():
477
+ zf.remove(distance_filename)
478
+ zf.write(distance_file, arcname=distance_filename)
479
+ if uncertainty_filename in zf.namelist():
480
+ zf.remove(uncertainty_filename)
481
+ zf.write(uncertainty_file, arcname=uncertainty_filename)
482
+
483
+ # (Potentially) remove caches
484
+ del self.distances
485
+ del self.uncertainties
486
+
487
+ @property
488
+ def seeds(self):
489
+ """The list of seed candidates for this analysis"""
490
+
491
+ with zipfile.ZipFile(self.filename, mode="r") as zf:
492
+ if "seeds.pickle" not in zf.namelist():
493
+ return None
494
+
495
+ with tempfile.TemporaryDirectory() as tmp_dir:
496
+ zf.extract("seeds.pickle", path=tmp_dir)
497
+ with open(os.path.join(tmp_dir, "seeds.pickle"), "rb") as f:
498
+ return pickle.load(f)
499
+
500
+ @seeds.setter
501
+ def seeds(self, _seeds):
502
+ # Assert that we received the correct type
503
+ for seed in _seeds:
504
+ if not isinstance(seed, RegionGrowingSeed):
505
+ raise Py4DGeoError(
506
+ "Seeds are expected to inherit from RegionGrowingSeed"
507
+ )
508
+
509
+ if not self.allow_pickle:
510
+ return
511
+
512
+ with UpdateableZipFile(self.filename, mode="a") as zf:
513
+ if "seeds.pickle" in zf.namelist():
514
+ zf.remove("seeds.pickle")
515
+
516
+ with tempfile.TemporaryDirectory() as tmp_dir:
517
+ seedsfile = os.path.join(tmp_dir, "seeds.pickle")
518
+ with open(seedsfile, "wb") as f:
519
+ pickle.dump(_seeds, f)
520
+
521
+ zf.write(seedsfile, arcname="seeds.pickle")
522
+
523
+ @property
524
+ def objects(self):
525
+ """The list of objects by change for this analysis"""
526
+
527
+ with zipfile.ZipFile(self.filename, mode="r") as zf:
528
+ if "objects.pickle" not in zf.namelist():
529
+ return None
530
+
531
+ with tempfile.TemporaryDirectory() as tmp_dir:
532
+ zf.extract("objects.pickle", path=tmp_dir)
533
+ with open(os.path.join(tmp_dir, "objects.pickle"), "rb") as f:
534
+ return pickle.load(f)
535
+
536
+ @objects.setter
537
+ def objects(self, _objects):
538
+ # Assert that we received the correct type
539
+ for seed in _objects:
540
+ if not isinstance(seed, ObjectByChange):
541
+ raise Py4DGeoError(
542
+ "Objects are expected to inherit from ObjectByChange"
543
+ )
544
+
545
+ if not self.allow_pickle:
546
+ return
547
+
548
+ with UpdateableZipFile(self.filename, mode="a") as zf:
549
+ if "objects.pickle" in zf.namelist():
550
+ zf.remove("objects.pickle")
551
+
552
+ with tempfile.TemporaryDirectory() as tmp_dir:
553
+ objectsfile = os.path.join(tmp_dir, "objects.pickle")
554
+ with open(objectsfile, "wb") as f:
555
+ pickle.dump(_objects, f)
556
+
557
+ zf.write(objectsfile, arcname="objects.pickle")
558
+
559
+ def invalidate_results(self, seeds=True, objects=True, smoothed_distances=False):
560
+ """Invalidate (and remove) calculated results
561
+
562
+ This is automatically called when new epochs are added or when
563
+ an algorithm sets the :code:`force` option.
564
+ """
565
+
566
+ logger.info(
567
+ f"Removing intermediate results from the analysis file {self.filename}"
568
+ )
569
+ with UpdateableZipFile(self.filename, mode="a") as zf:
570
+ if seeds and "seeds.pickle" in zf.namelist():
571
+ zf.remove("seeds.pickle")
572
+
573
+ if objects and "objects.pickle" in zf.namelist():
574
+ zf.remove("objects.pickle")
575
+
576
+ smoothed_file = self._numpy_filename("smoothed_distances")
577
+ if smoothed_distances and smoothed_file in zf.namelist():
578
+ zf.remove(smoothed_file)
579
+
580
+ def _numpy_filename(self, name):
581
+ extension = "npz" if self.compress else "npy"
582
+ return f"{name}.{extension}"
583
+
584
+ @property
585
+ def distances_for_compute(self):
586
+ """Retrieve the distance array used for computation
587
+
588
+ This might be the raw data or smoothed data, based on whether
589
+ a smoothing was provided by the user.
590
+ """
591
+ distances = self.smoothed_distances
592
+ if distances is None:
593
+ distances = self.distances
594
+ return distances
595
+
596
+
597
+ class RegionGrowingAlgorithmBase:
598
+ def __init__(
599
+ self,
600
+ neighborhood_radius=1.0,
601
+ thresholds=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
602
+ min_segments=20,
603
+ max_segments=None,
604
+ ):
605
+ """Construct a spatiotemporal _segmentation algorithm.
606
+
607
+ This class can be derived from to customize the algorithm behaviour.
608
+
609
+ :param neighborhood_radius:
610
+ The size of the neighborhood of a core point. All core points within
611
+ this radius are considered adjacent and are therefore considered as
612
+ candidates for inclusion in the region growing algorithm.
613
+ :type neighborhood_radius: float
614
+ :param thresholds:
615
+ A list of thresholds to use as candidates in 4D-OBC's adaptive
616
+ thresholding procedure.
617
+ :type thresholds: list
618
+ :param min_segments:
619
+ The minimum number of core points in an object-by-change. Defaults to
620
+ 20.
621
+ :type min_segments: int
622
+ :param max_segments:
623
+ The maximum number of core points in an object-by-change. This is mainly
624
+ used to bound the runtime of expensive region growing. By default, no
625
+ maximum is applied.
626
+ :type max_segments: int
627
+ """
628
+
629
+ self.neighborhood_radius = neighborhood_radius
630
+ self.thresholds = thresholds
631
+ self.min_segments = min_segments
632
+ self.max_segments = max_segments
633
+
634
+ self._analysis = None
635
+
636
+ def distance_measure(self):
637
+ """Distance measure between two time series
638
+
639
+ Expected to return a function that accepts two time series and returns
640
+ the distance.
641
+ """
642
+
643
+ return _py4dgeo.normalized_dtw_distance
644
+
645
+ def find_seedpoints(self):
646
+ """Calculate seedpoints for the region growing algorithm"""
647
+
648
+ raise NotImplementedError
649
+
650
+ def seed_sorting_scorefunction(self):
651
+ """A function that computes a score for a seed candidate
652
+
653
+ This function is used to prioritize seed candidates.
654
+ """
655
+
656
+ # The base class does not perform sorting.
657
+ return lambda seed: 0.0
658
+
659
+ def filter_objects(self, obj):
660
+ """A filter for objects produced by the region growing algorithm
661
+
662
+ Objects are discarded if this method returns False.
663
+ """
664
+
665
+ # The base class does not perform filtering
666
+ return True
667
+
668
+ @property
669
+ def analysis(self):
670
+ """Access the analysis object that the algorithm operates on
671
+
672
+ This is only available after :ref:`run` has been called.
673
+ """
674
+ if self._analysis is None:
675
+ raise Py4DGeoError(
676
+ "Analysis object is only available when the algorithm is run"
677
+ )
678
+ return self._analysis
679
+
680
+ def run(self, analysis, force=False):
681
+ """Calculate the _segmentation
682
+
683
+ :param analysis:
684
+ The analysis object we are working with.
685
+ :type analysis: py4dgeo.segmentation.SpatiotemporalAnalysis
686
+ :param force:
687
+ Force recalculation of results. If false, some intermediate results will be
688
+ restored from the analysis object instead of being recalculated.
689
+ """
690
+
691
+ # Make the analysis object known to all members
692
+ self._analysis = analysis
693
+
694
+ # Enforce the removal of intermediate results
695
+ if force:
696
+ analysis.invalidate_results()
697
+
698
+ # Return pre-calculated objects if they are available
699
+ # precalculated = analysis.objects
700
+ # if precalculated is not None:
701
+ # logger.info("Reusing objects by change stored in analysis object")
702
+ # return precalculated
703
+
704
+ # Check if there are pre-calculated objects.
705
+ # If so, create objects list from these and continue growing objects, taking into consideration objects that are already grown.
706
+ # if not initiate new empty objects list
707
+ precalculated = analysis.objects # TODO: do not assign to new object
708
+ if precalculated is not None:
709
+ logger.info("Reusing objects by change stored in analysis object")
710
+ objects = (
711
+ precalculated.copy()
712
+ ) # test if .copy() solves memory problem, or deepcopy?
713
+ else:
714
+ objects = (
715
+ []
716
+ ) # TODO: test initializing this in the analysis class, see if it crashes instantly
717
+
718
+ # Get corepoints from M3C2 class and build a KDTree on them
719
+ corepoints = as_epoch(analysis.corepoints)
720
+ corepoints.build_kdtree()
721
+
722
+ # Calculate the list of seed points and sort them
723
+ seeds = analysis.seeds
724
+ if seeds is None:
725
+ with logger_context("Find seed candidates in time series"):
726
+ seeds = self.find_seedpoints()
727
+
728
+ # Sort the seed points
729
+ with logger_context("Sort seed candidates by priority"):
730
+ seeds = list(sorted(seeds, key=self.seed_sorting_scorefunction()))
731
+
732
+ # Store the seeds
733
+ analysis.seeds = seeds
734
+ else:
735
+ logger.info("Reusing seed candidates stored in analysis object")
736
+ # write the number of seeds to a separate text file if self.write_nr_seeds is True
737
+ if self.write_nr_seeds:
738
+ with open("number_of_seeds.txt", "w") as f:
739
+ f.write(str(len(seeds)))
740
+
741
+ # Iterate over the seeds to maybe turn them into objects
742
+ for i, seed in enumerate(
743
+ seeds
744
+ ): # [self.resume_from_seed-1:]): # starting seed ranked at the `resume_from_seed` variable (representing 1 for index 0)
745
+ # or to keep within the same index range when resuming from seed:
746
+ if i < (
747
+ self.resume_from_seed - 1
748
+ ): # resume from index 0 when `resume_from_seed` == 1
749
+ continue
750
+ if i >= (self.stop_at_seed - 1): # stop at index 0 when `stop_at_seed` == 1
751
+ break
752
+
753
+ # save objects to analysis object when at index `intermediate_saving`
754
+ if (
755
+ (self.intermediate_saving)
756
+ and ((i % self.intermediate_saving) == 0)
757
+ and (i != 0)
758
+ ):
759
+ with logger_context(
760
+ f"Intermediate saving of first {len(objects)} objects, grown from first {i+1}/{len(seeds)} seeds"
761
+ ):
762
+ analysis.objects = objects # This assigns itself to itself
763
+
764
+ # Check all already calculated objects whether they overlap with this seed.
765
+ found = False
766
+ for obj in objects:
767
+ if seed.index in obj.indices and (
768
+ obj.end_epoch > seed.start_epoch
769
+ and seed.end_epoch > obj.start_epoch
770
+ ):
771
+ found = True
772
+ break
773
+
774
+ # If we found an overlap, we skip this seed
775
+ if found:
776
+ continue
777
+
778
+ # Apply a numeric default to the max_segments parameter
779
+ max_segments = self.max_segments
780
+ if max_segments is None:
781
+ max_segments = corepoints.cloud.shape[0] + 1
782
+
783
+ data = _py4dgeo.RegionGrowingAlgorithmData(
784
+ analysis.distances_for_compute,
785
+ corepoints,
786
+ self.neighborhood_radius,
787
+ seed._seed,
788
+ self.thresholds,
789
+ self.min_segments,
790
+ max_segments,
791
+ )
792
+
793
+ # Perform the region growing
794
+ with logger_context(
795
+ f"Performing region growing on seed candidate {i+1}/{len(seeds)}"
796
+ ):
797
+ objdata = _py4dgeo.region_growing(data, self.distance_measure())
798
+
799
+ # If the returned object has 0 indices, the min_segments threshold was violated
800
+ if objdata.indices_distances:
801
+ obj = ObjectByChange(
802
+ objdata, seed, analysis
803
+ ) # TODO: check, does it copy the whole analysis object when initializing
804
+ if self.filter_objects(obj):
805
+ objects.append(obj)
806
+
807
+ # If the returned object is larger than max_segments we issue a warning
808
+ if len(objdata.indices_distances) >= max_segments:
809
+ logger.warning(
810
+ f"An object by change exceeded the given maximum size of {max_segments}"
811
+ )
812
+
813
+ # Store the results in the analysis object
814
+ analysis.objects = objects
815
+
816
+ # Potentially remove objects from memory
817
+ del analysis.smoothed_distances
818
+ del analysis.distances
819
+
820
+ return objects
821
+
822
+
823
+ class RegionGrowingAlgorithm(RegionGrowingAlgorithmBase):
824
+ def __init__(
825
+ self,
826
+ seed_subsampling=1,
827
+ seed_candidates=None,
828
+ window_width=24,
829
+ window_min_size=12,
830
+ window_jump=1,
831
+ window_penalty=1.0,
832
+ minperiod=24,
833
+ height_threshold=0.0,
834
+ use_unfinished=True,
835
+ intermediate_saving=0,
836
+ resume_from_seed=0,
837
+ stop_at_seed=np.inf,
838
+ write_nr_seeds=False,
839
+ **kwargs,
840
+ ):
841
+ """Construct the 4D-OBC algorithm.
842
+
843
+ :param seed_subsampling:
844
+ A subsampling factor for the set of core points for the generation
845
+ of _segmentation seed candidates. This can be used to speed up
846
+ the generation of seeds. The default of 1 does not perform any
847
+ subsampling, a value of, e.g., 10 would only consider every 10th
848
+ corepoint for adding seeds.
849
+ :type seed_subsampling: int
850
+ :param seed_candidates:
851
+ A set of indices specifying which core points should be used for seed detection. This can be used to perform _segmentation for selected locations. The default of None does not perform any selection and uses all corepoints. The subsampling parameter is applied additionally.
852
+ :type seed_candidates: list
853
+ :param window_width:
854
+ The width of the sliding temporal window for change point detection. The sliding window
855
+ moves along the signal and determines the discrepancy between the first and the second
856
+ half of the window (i.e. subsequent time series segments within the window width). The
857
+ default value is 24, corresponding to one day in case of hourly data.
858
+ :type window_width: int
859
+ :param window_min_size:
860
+ The minimum temporal distance needed between two seed candidates, for the second one to be considered.
861
+ The default value is 1, such that all detected seeds candidates are considered.
862
+ :type window_min_size: int
863
+ :param window_jump:
864
+ The interval on which the sliding temporal window moves and checks for seed candidates.
865
+ The default value is 1, corresponding to a check for every epoch in the time series.
866
+ :type window_jump: int
867
+ :param window_penalty:
868
+ A complexity penalty that determines how strict the change point detection is.
869
+ A higher penalty results in stricter change point detection (i.e, fewer points are detected), while a low
870
+ value results in a large amount of detected change points. The default value is 1.0.
871
+ :type window_penalty: float
872
+ :param minperiod:
873
+ The minimum period of a detected change to be considered as seed candidate for subsequent
874
+ _segmentation. The default is 24, corresponding to one day for hourly data.
875
+ :type minperiod: int
876
+ :param height_threshold:
877
+ The height threshold represents the required magnitude of a detected change to be considered
878
+ as seed candidate for subsequent _segmentation. The magnitude of a detected change is derived
879
+ as unsigned difference between magnitude (i.e. distance) at start epoch and peak magnitude.
880
+ The default is 0.0, in which case all detected changes are used as seed candidates.
881
+ :type height_threshold: float
882
+ :param use_unfinished:
883
+ If False, seed candidates that are not finished by the end of the time series are not considered in further
884
+ analysis. The default is True, in which case unfinished seed_candidates are regarded as seeds region growing.
885
+ :type use_unfinished: bool
886
+ :param intermediate_saving:
887
+ Parameter that determines after how many considered seeds, the resulting list of 4D-OBCs is saved to the SpatiotemporalAnalysis object.
888
+ This is to ensure that if the algorithm is terminated unexpectedly not all results are lost. If set to 0 no intermediate saving is done.
889
+ :type intermediate_saving: int
890
+ :param resume_from_seed:
891
+ Parameter specifying from which seed index the region growing algorithm must resume. If zero all seeds are considered, starting from the highest ranked seed.
892
+ Default is 0.
893
+ :type resume_from_seed: int
894
+ :param stop_at_seed:
895
+ Parameter specifying at which seed to stop region growing and terminate the run function.
896
+ Default is np.inf, meaning all seeds are considered.
897
+ :type stop_at_seed: int
898
+ :param write_nr_seeds:
899
+ If True, after seed detection, a text file is written in the working directory containing the total number of detected seeds.
900
+ This can be used to split up the consecutive 4D-OBC segmentation into different subsets.
901
+ Default is False, meaning no txt file is written.
902
+ :type write_nr_seeds: bool
903
+ """
904
+
905
+ # Initialize base class
906
+ super().__init__(**kwargs)
907
+
908
+ # Store the given parameters
909
+ self.seed_subsampling = seed_subsampling
910
+ self.seed_candidates = seed_candidates
911
+ self.window_width = window_width
912
+ self.window_min_size = window_min_size
913
+ self.window_jump = window_jump
914
+ self.window_penalty = window_penalty
915
+ self.minperiod = minperiod
916
+ self.height_threshold = height_threshold
917
+ self.use_unfinished = use_unfinished
918
+ self.intermediate_saving = intermediate_saving
919
+ self.resume_from_seed = resume_from_seed
920
+ self.stop_at_seed = stop_at_seed
921
+ self.write_nr_seeds = write_nr_seeds
922
+
923
+ def find_seedpoints(self):
924
+ """Calculate seedpoints for the region growing algorithm"""
925
+
926
+ # These are some arguments used below that we might consider
927
+ # exposing to the user in the future. For now, they are considered
928
+ # internal, but they are still defined here for readability.
929
+ window_costmodel = "l1"
930
+ # window_min_size = 12
931
+ # window_jump = 1
932
+ # window_penalty = 1.0
933
+
934
+ # The list of generated seeds
935
+ seeds = []
936
+
937
+ # The list of core point indices to check as seeds
938
+ if self.seed_candidates is None:
939
+ if self.seed_subsampling == 0:
940
+ raise Py4DGeoError(
941
+ "Subsampling factor cannot be 0, use 1 or any integer larger than 1"
942
+ )
943
+ # Use all corepoints if no selection specified, considering subsampling
944
+ seed_candidates_curr = range(
945
+ 0, self.analysis.distances_for_compute.shape[0], self.seed_subsampling
946
+ )
947
+ else:
948
+ # Use the specified corepoint indices, but consider subsampling
949
+ seed_candidates_curr = self.seed_candidates # [::self.seed_subsampling]
950
+
951
+ # Iterate over all time series to analyse their change points
952
+ for i in seed_candidates_curr:
953
+ # Extract the time series and interpolate its nan values
954
+ timeseries = self.analysis.distances_for_compute[i, :]
955
+ bad_indices = np.isnan(timeseries)
956
+ num_nans = np.count_nonzero(bad_indices)
957
+
958
+ # If we too many nans, this timeseries does not make sense
959
+ if num_nans > timeseries.shape[0] - 3:
960
+ continue
961
+
962
+ # If there are nan values, we try fixing things by interpolation
963
+ if num_nans > 0:
964
+ good_indices = np.logical_not(bad_indices)
965
+ timeseries[bad_indices] = np.interp(
966
+ bad_indices.nonzero()[0],
967
+ good_indices.nonzero()[0],
968
+ timeseries[good_indices],
969
+ )
970
+
971
+ # Run detection of change points
972
+ cpdata = _py4dgeo.ChangePointDetectionData(
973
+ ts=timeseries,
974
+ window_size=self.window_width,
975
+ min_size=self.window_min_size,
976
+ jump=self.window_jump,
977
+ penalty=self.window_penalty,
978
+ )
979
+ changepoints = _py4dgeo.change_point_detection(cpdata)[:-1]
980
+
981
+ # Shift the time series to positive values
982
+ timeseries = timeseries + abs(np.nanmin(timeseries) + 0.1)
983
+ # create a flipped version for negative change volumes
984
+ timeseries_flipped = timeseries * -1.0 + abs(np.nanmax(timeseries)) + 0.1
985
+
986
+ # Create seeds for this timeseries
987
+ corepoint_seeds = []
988
+ for start_idx in changepoints:
989
+ # Skip this changepoint if it was included into a previous seed
990
+ if corepoint_seeds and start_idx <= corepoint_seeds[-1].end_epoch:
991
+ continue
992
+
993
+ # Skip this changepoint if this to close to the end
994
+ if start_idx >= timeseries.shape[0] - self.minperiod:
995
+ break
996
+
997
+ # Decide whether we need use the flipped timeseries
998
+ used_timeseries = timeseries
999
+ if timeseries[start_idx] >= timeseries[start_idx + self.minperiod]:
1000
+ used_timeseries = timeseries_flipped
1001
+
1002
+ previous_volume = -999.9
1003
+ for target_idx in range(start_idx + 1, timeseries.shape[0]):
1004
+ # Calculate the change volume
1005
+ height = used_timeseries[start_idx]
1006
+ volume = np.nansum(
1007
+ used_timeseries[start_idx : target_idx + 1] - height
1008
+ )
1009
+
1010
+ # Check whether the volume started decreasing
1011
+ if previous_volume > volume:
1012
+ # Only add seed if larger than the minimum period and height of the change form larger than threshold
1013
+ if (target_idx - start_idx >= self.minperiod) and (
1014
+ np.abs(
1015
+ np.max(used_timeseries[start_idx : target_idx + 1])
1016
+ - np.min(used_timeseries[start_idx : target_idx + 1])
1017
+ )
1018
+ >= self.height_threshold
1019
+ ):
1020
+ corepoint_seeds.append(
1021
+ RegionGrowingSeed(i, start_idx, target_idx)
1022
+ )
1023
+ break
1024
+ else:
1025
+ previous_volume = volume
1026
+
1027
+ # This causes a seed to always be detected if the volume doesn't decrease before present
1028
+ # Useful when used in an online setting, can be filtered before region growing
1029
+ # Only if the last epoch is reached we use the segment as seed
1030
+ if (target_idx == timeseries.shape[0] - 1) and self.use_unfinished:
1031
+ # We reached the present and add a seed based on it
1032
+ corepoint_seeds.append(
1033
+ RegionGrowingSeed(i, start_idx, timeseries.shape[0] - 1)
1034
+ )
1035
+
1036
+ # Add all the seeds found for this corepoint to the full list
1037
+ seeds.extend(corepoint_seeds)
1038
+
1039
+ return seeds
1040
+
1041
+ def seed_sorting_scorefunction(self):
1042
+ """Neighborhood similarity sorting function"""
1043
+
1044
+ # The 4D-OBC algorithm sorts by similarity in the neighborhood
1045
+ # of the seed.
1046
+ def neighborhood_similarity(seed):
1047
+ neighbors = self.analysis.corepoints.kdtree.radius_search(
1048
+ self.analysis.corepoints.cloud[seed.index, :], self.neighborhood_radius
1049
+ )
1050
+ # if no neighbors are found make sure the algorithm continues its search but with a large dissimilarity
1051
+ if len(neighbors) < 2:
1052
+ return 9999999.0 # return very large number? or delete the seed point, but then also delete from the seeds list
1053
+
1054
+ similarities = []
1055
+ for n in neighbors:
1056
+ data = _py4dgeo.TimeseriesDistanceFunctionData(
1057
+ self.analysis.distances_for_compute[
1058
+ seed.index, seed.start_epoch : seed.end_epoch + 1
1059
+ ],
1060
+ self.analysis.distances_for_compute[
1061
+ n, seed.start_epoch : seed.end_epoch + 1
1062
+ ],
1063
+ )
1064
+ similarities.append(self.distance_measure()(data))
1065
+
1066
+ return sum(similarities, 0.0) / (len(neighbors) - 1)
1067
+
1068
+ return neighborhood_similarity
1069
+
1070
+ def filter_objects(self, obj):
1071
+ """A filter for objects produced by the region growing algorithm"""
1072
+
1073
+ # Filter based on coefficient of variation
1074
+ distarray = np.fromiter(obj._data.indices_distances.values(), np.float64)
1075
+
1076
+ # Check if mean is 0.0, if so, set to very small value to avoid division by 0
1077
+ mean_distarray = np.mean(distarray)
1078
+ if mean_distarray == 0.0:
1079
+ mean_distarray = 10**-10
1080
+
1081
+ # Calculate coefficient of variation
1082
+ cv = np.std(distarray) / mean_distarray
1083
+
1084
+ # TODO: Make this threshold configurable?
1085
+ return cv <= 0.8
1086
+
1087
+
1088
+ class RegionGrowingSeed:
1089
+ def __init__(self, index, start_epoch, end_epoch):
1090
+ self._seed = _py4dgeo.RegionGrowingSeed(index, start_epoch, end_epoch)
1091
+
1092
+ @property
1093
+ def index(self):
1094
+ return self._seed.index
1095
+
1096
+ @property
1097
+ def start_epoch(self):
1098
+ return self._seed.start_epoch
1099
+
1100
+ @property
1101
+ def end_epoch(self):
1102
+ return self._seed.end_epoch
1103
+
1104
+
1105
+ class ObjectByChange:
1106
+ """Representation a change object in the spatiotemporal domain"""
1107
+
1108
+ def __init__(self, data, seed, analysis=None):
1109
+ self._data = data
1110
+ self._analysis = analysis
1111
+ self.seed = seed
1112
+
1113
+ @property
1114
+ def indices(self):
1115
+ """The set of corepoint indices that compose the object by change"""
1116
+ return list(self._data.indices_distances.keys())
1117
+
1118
+ def distance(self, index):
1119
+ return self._data.indices_distances[index]
1120
+
1121
+ @property
1122
+ def start_epoch(self):
1123
+ """The index of the start epoch of the change object"""
1124
+ return self._data.start_epoch
1125
+
1126
+ @property
1127
+ def end_epoch(self):
1128
+ """The index of the end epoch of the change object"""
1129
+ return self._data.end_epoch
1130
+
1131
+ @property
1132
+ def threshold(self):
1133
+ """The distance threshold that produced this object"""
1134
+ return self._data.threshold
1135
+
1136
+ def plot(self, filename=None):
1137
+ """Create an informative visualization of the Object By Change
1138
+
1139
+ :param filename:
1140
+ The filename to use to store the plot. Can be omitted to only show
1141
+ plot in a Jupyter notebook session.
1142
+ :type filename: str
1143
+ """
1144
+
1145
+ # Extract DTW distances from this object
1146
+ indexarray = np.fromiter(self.indices, np.int32)
1147
+ distarray = np.fromiter((self.distance(i) for i in indexarray), np.float64)
1148
+
1149
+ # Intitialize the figure and all of its subfigures
1150
+ fig = plt.figure(figsize=plt.figaspect(0.3))
1151
+ tsax = fig.add_subplot(1, 3, 1)
1152
+ histax = fig.add_subplot(1, 3, 2)
1153
+ mapax = fig.add_subplot(1, 3, 3)
1154
+
1155
+ # The first plot (tsax) prints all time series of chosen corepoints
1156
+ # and colors them according to distance.
1157
+ tsax.set_ylabel("Height change [m]")
1158
+ tsax.set_xlabel("Time [h]")
1159
+
1160
+ # We pad the time series visualization with a number of data
1161
+ # points on both sides. TODO: Expose as argument to plot?
1162
+ timeseries_padding = 10
1163
+ start_epoch = max(self.start_epoch - timeseries_padding, 0)
1164
+ end_epoch = min(
1165
+ self.end_epoch + timeseries_padding,
1166
+ self._analysis.distances_for_compute.shape[1],
1167
+ )
1168
+
1169
+ # We use the seed's timeseries to set good axis limits
1170
+ seed_ts = self._analysis.distances_for_compute[
1171
+ self.seed.index, start_epoch:end_epoch
1172
+ ]
1173
+ tsax.set_ylim(np.nanmin(seed_ts) * 0.5, np.nanmax(seed_ts) * 1.5)
1174
+
1175
+ # Create a colormap with distance for this object
1176
+ cmap = matplotlib.colormaps.get_cmap("viridis")
1177
+ maxdist = np.nanmax(distarray)
1178
+
1179
+ # Plot each time series individually
1180
+ for index in self.indices:
1181
+ tsax.plot(
1182
+ self._analysis.distances_for_compute[index, start_epoch:end_epoch],
1183
+ linewidth=0.7,
1184
+ alpha=0.3,
1185
+ color=cmap(self.distance(index) / maxdist),
1186
+ )
1187
+
1188
+ # Plot the seed timeseries again, but with a thicker line
1189
+ tsax.plot(seed_ts, linewidth=2.0, zorder=10, color="blue")
1190
+
1191
+ # Next, we add a histogram plot with the distance values (using seaborn)
1192
+ seaborn.histplot(distarray, ax=histax, kde=True, color="r")
1193
+
1194
+ # Add labels to the histogram plot
1195
+ histax.set_title(f"Segment size: {distarray.shape[0]}")
1196
+ histax.set_xlabel("DTW distance")
1197
+
1198
+ # Create a 2D view of the segment
1199
+ locations = self._analysis.corepoints.cloud[indexarray, 0:2]
1200
+ mapax.scatter(locations[:, 0], locations[:, 1], c=distarray)
1201
+
1202
+ # Some global settings of the generated figure
1203
+ fig.tight_layout()
1204
+
1205
+ # Maybe save to file
1206
+ if filename is not None:
1207
+ plt.savefig(filename)
1208
+
1209
+
1210
+ def check_epoch_timestamp(epoch):
1211
+ """Validate an epoch to be used with SpatiotemporalSegmentation"""
1212
+ if epoch.timestamp is None:
1213
+ raise Py4DGeoError(
1214
+ "Epochs need to define a timestamp to be usable in SpatiotemporalSegmentation"
1215
+ )
1216
+
1217
+ return epoch
1218
+
1219
+
1220
+ def regular_corepoint_grid(lowerleft, upperright, num_points, zval=0.0):
1221
+ """A helper function to create a regularly spaced grid for the analysis
1222
+
1223
+ :param lowerleft:
1224
+ The lower left corner of the grid. Given as a 2D coordinate.
1225
+ :type lowerleft: np.ndarray
1226
+ :param upperright:
1227
+ The upper right corner of the grid. Given as a 2D coordinate.
1228
+ :type upperright: np.ndarray
1229
+ :param num_points:
1230
+ A tuple with two entries denoting the number of points to be used in
1231
+ x and y direction
1232
+ :type num_points: tuple
1233
+ :param zval:
1234
+ The value to fill for the z-direction.
1235
+ :type zval: double
1236
+ """
1237
+ xspace = np.linspace(
1238
+ lowerleft[0], upperright[0], num=num_points[0], dtype=np.float64
1239
+ )
1240
+ yspace = np.linspace(
1241
+ lowerleft[1], upperright[1], num=num_points[1], dtype=np.float64
1242
+ )
1243
+
1244
+ grid = np.empty(shape=(num_points[0] * num_points[1], 3), dtype=np.float64)
1245
+ for i, x in enumerate(xspace):
1246
+ for j, y in enumerate(yspace):
1247
+ grid[i * num_points[0] + j, 0] = x
1248
+ grid[i * num_points[0] + j, 1] = y
1249
+ grid[i * num_points[0] + j, 2] = zval
1250
+
1251
+ return grid
1252
+
1253
+
1254
+ def temporal_averaging(distances, smoothing_window=24):
1255
+ """Smoothen a space-time array of distance change using a sliding window approach
1256
+
1257
+ :param distances:
1258
+ The raw data to smoothen.
1259
+ :type distances: np.ndarray
1260
+ :param smoothing_window:
1261
+ The size of the sliding window used in smoothing the data. The
1262
+ default value of 0 does not perform any smooting.
1263
+ :type smooting_window: int
1264
+ """
1265
+
1266
+ with logger_context("Smoothing temporal data"):
1267
+ smoothed = np.empty_like(distances)
1268
+ eps = smoothing_window // 2
1269
+
1270
+ for i in range(distances.shape[1]):
1271
+ smoothed[:, i] = np.nanmedian(
1272
+ distances[
1273
+ :,
1274
+ max(0, i - eps) : min(distances.shape[1] - 1, i + eps),
1275
+ ],
1276
+ axis=1,
1277
+ )
1278
+
1279
+ # We use no-op smooting as the default implementation here
1280
+ return smoothed