py4dgeo 1.0.0__cp314-cp314t-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1325 @@
1
+ from py4dgeo.epoch import Epoch, as_epoch
2
+ from py4dgeo.logger import logger_context
3
+ from py4dgeo.util import Py4DGeoError, find_file
4
+ from py4dgeo.UpdateableZipFile import UpdateableZipFile
5
+
6
+ import datetime
7
+ import json
8
+ import logging
9
+ import matplotlib
10
+ import matplotlib.pyplot as plt
11
+ import numpy as np
12
+ import os
13
+ import pickle
14
+ import seaborn
15
+ import tempfile
16
+ import zipfile
17
+ import _py4dgeo
18
+
19
+ # Get the py4dgeo logger instance
20
+ logger = logging.getLogger("py4dgeo")
21
+
22
+
23
+ # This integer controls the versioning of the _segmentation file format. Whenever the
24
+ # format is changed, this version should be increased, so that py4dgeo can warn
25
+ # about incompatibilities of py4dgeo with loaded data. This version is intentionally
26
+ # different from py4dgeo's version, because not all releases of py4dgeo necessarily
27
+ # change the _segmentation file format and we want to be as compatible as possible.
28
+ PY4DGEO_SEGMENTATION_FILE_FORMAT_VERSION = 0
29
+
30
+
31
+ class SpatiotemporalAnalysis:
32
+ def __init__(self, filename, compress=True, allow_pickle=True, force=False):
33
+ """Construct a spatiotemporal _segmentation object
34
+
35
+ This is the basic data structure for the 4D objects by change algorithm
36
+ and its derived variants. It manages storage of M3C2 distances and other
37
+ intermediate results for a time series of epochs. The original point clouds
38
+ themselves are not needed after initial distance calculation and additional
39
+ epochs can be added to an existing analysis. The class uses a disk backend
40
+ to store information and allows lazy loading of additional data like e.g.
41
+ M3C2 uncertainty values for postprocessing.
42
+
43
+ :param filename:
44
+ The filename used for this analysis. If it does not exist on the file
45
+ system, a new analysis is created. Otherwise, the data is loaded from the existent file.
46
+ :type filename: str
47
+ :param compress:
48
+ Whether to compress the stored data. This is a tradeoff decision between
49
+ disk space and runtime. Especially appending new epochs to an existing
50
+ analysis is an operation whose runtime can easily be dominated by
51
+ decompression/compression of data.
52
+ :type compress: bool
53
+ :param allow_pickle:
54
+ Whether py4dgeo is allowed to use the pickle module to store some data
55
+ in the file representation of the analysis. If set to false, some data
56
+ may not be stored and needs to be recomputed instead.
57
+ :type allow_pickle: bool
58
+ :param force:
59
+ Force creation of a new analysis object, even if a file of this name
60
+ already exists.
61
+ """
62
+
63
+ # Store the given parameters
64
+ self.filename = find_file(filename, fatal=False)
65
+ self.compress = compress
66
+ self.allow_pickle = allow_pickle
67
+
68
+ # Instantiate some properties used later on
69
+ self._m3c2 = None
70
+
71
+ # This is the cache for lazily loaded data
72
+ self._corepoints = None
73
+ self._distances = None
74
+ self._smoothed_distances = None
75
+ self._uncertainties = None
76
+ self._reference_epoch = None
77
+
78
+ # If the filename does not already exist, we create a new archive
79
+ if force or not os.path.exists(self.filename):
80
+ logger.info(f"Creating analysis file {self.filename}")
81
+ with zipfile.ZipFile(self.filename, mode="w") as zf:
82
+ # Write the _segmentation file format version number
83
+ zf.writestr(
84
+ "SEGMENTATION_FILE_FORMAT",
85
+ str(PY4DGEO_SEGMENTATION_FILE_FORMAT_VERSION),
86
+ )
87
+
88
+ # Write the compression algorithm used for all suboperations
89
+ zf.writestr("USE_COMPRESSION", str(self.compress))
90
+
91
+ # Assert that the _segmentation file format is still valid
92
+ with zipfile.ZipFile(self.filename, mode="r") as zf:
93
+ # Read the _segmentation file version number and compare to current
94
+ version = int(zf.read("SEGMENTATION_FILE_FORMAT").decode())
95
+ if version != PY4DGEO_SEGMENTATION_FILE_FORMAT_VERSION:
96
+ raise Py4DGeoError("_segmentation file format is out of date!")
97
+
98
+ # Read the compression algorithm
99
+ self.compress = eval(zf.read("USE_COMPRESSION").decode())
100
+
101
+ @property
102
+ def reference_epoch(self):
103
+ """Access the reference epoch of this analysis"""
104
+
105
+ if self._reference_epoch is None:
106
+ with zipfile.ZipFile(self.filename, mode="r") as zf:
107
+ # Double check that the reference has already been set
108
+ if "reference_epoch.zip" not in zf.namelist():
109
+ raise Py4DGeoError("Reference epoch for analysis not yet set")
110
+
111
+ # Extract it from the archive
112
+ with tempfile.TemporaryDirectory() as tmp_dir:
113
+ ref_epochfile = zf.extract("reference_epoch.zip", path=tmp_dir)
114
+ self._reference_epoch = Epoch.load(ref_epochfile)
115
+
116
+ return self._reference_epoch
117
+
118
+ @reference_epoch.setter
119
+ def reference_epoch(self, epoch):
120
+ """Set the reference epoch of this analysis (only possible once)"""
121
+ with zipfile.ZipFile(self.filename, mode="a") as zf:
122
+ # If we already have a reference epoch, the user should start a
123
+ # new analysis instead
124
+ if "reference_epoch.zip" in zf.namelist():
125
+ raise Py4DGeoError(
126
+ "Reference epoch cannot be changed - please start a new analysis"
127
+ )
128
+
129
+ # Ensure that we do have a timestamp on the epoch
130
+ epoch = check_epoch_timestamp(epoch)
131
+
132
+ # Ensure that the tearch tree is built - no-op if triggered by the user
133
+ epoch._validate_search_tree()
134
+
135
+ # Write the reference epoch into the archive
136
+ with tempfile.TemporaryDirectory() as tmp_dir:
137
+ epochfilename = os.path.join(tmp_dir, "reference_epoch.zip")
138
+ epoch.save(epochfilename)
139
+ zf.write(epochfilename, arcname="reference_epoch.zip")
140
+
141
+ # Also cache it directly
142
+ self._reference_epoch = epoch
143
+
144
+ @reference_epoch.deleter
145
+ def reference_epoch(self):
146
+ self._reference_epoch = None
147
+
148
+ @property
149
+ def corepoints(self):
150
+ """Access the corepoints of this analysis"""
151
+ if self._corepoints is None:
152
+ with zipfile.ZipFile(self.filename, mode="r") as zf:
153
+ # Double check that the reference has already been set
154
+ if "corepoints.zip" not in zf.namelist():
155
+ raise Py4DGeoError("Corepoints for analysis not yet set")
156
+
157
+ # Extract it from the archive
158
+ with tempfile.TemporaryDirectory() as tmp_dir:
159
+ cpfile = zf.extract("corepoints.zip", path=tmp_dir)
160
+ self._corepoints = Epoch.load(cpfile)
161
+
162
+ return self._corepoints
163
+
164
+ @corepoints.setter
165
+ def corepoints(self, _corepoints):
166
+ """Set the corepoints for this analysis (only possible once)"""
167
+ with zipfile.ZipFile(self.filename, mode="a") as zf:
168
+ # If we already have corepoints in the archive, the user should start a
169
+ # new analysis instead
170
+ if "corepoints.zip" in zf.namelist():
171
+ raise Py4DGeoError(
172
+ "Corepoints cannot be changed - please start a new analysis"
173
+ )
174
+
175
+ # Ensure that the corepoints are stored as an epoch and its search trees are built
176
+ self._corepoints = as_epoch(_corepoints)
177
+ self._corepoints._validate_search_tree()
178
+
179
+ # Write the corepoints into the archive
180
+ with tempfile.TemporaryDirectory() as tmp_dir:
181
+ cpfilename = os.path.join(tmp_dir, "corepoints.zip")
182
+ self._corepoints.save(cpfilename)
183
+ zf.write(cpfilename, arcname="corepoints.zip")
184
+
185
+ @corepoints.deleter
186
+ def corepoints(self):
187
+ self._corepoints = None
188
+
189
+ @property
190
+ def m3c2(self):
191
+ """Access the M3C2 algorithm of this analysis"""
192
+ # If M3C2 has not been set, we use a default constructed one
193
+ return self._m3c2
194
+
195
+ @m3c2.setter
196
+ def m3c2(self, _m3c2):
197
+ """Set the M3C2 algorithm of this analysis"""
198
+ self._m3c2 = _m3c2
199
+
200
+ @property
201
+ def timedeltas(self):
202
+ """Access the sequence of time stamp deltas for the time series"""
203
+ with zipfile.ZipFile(self.filename, mode="r") as zf:
204
+ if "timestamps.json" not in zf.namelist():
205
+ return []
206
+
207
+ # Read timedeltas
208
+ with tempfile.TemporaryDirectory() as tmp_dir:
209
+ timestampsfile = zf.extract("timestamps.json", path=tmp_dir)
210
+ with open(timestampsfile) as f:
211
+ timedeltas = json.load(f)
212
+
213
+ # Convert the serialized deltas to datetime.timedelta
214
+ return [datetime.timedelta(**data) for data in timedeltas]
215
+
216
+ @timedeltas.setter
217
+ def timedeltas(self, _timedeltas):
218
+ """Set the timedeltas manually
219
+
220
+ This is only possible exactly once and mutually exclusive with adding
221
+ epochs via the :ref:`add_epochs` method.
222
+ """
223
+ with zipfile.ZipFile(self.filename, mode="a") as zf:
224
+ # If we already have timestamps in the archive, this is not possible
225
+ if "timestamps.json" in zf.namelist():
226
+ raise Py4DGeoError(
227
+ "Timestamps can only be set on freshly created analysis instances"
228
+ )
229
+
230
+ with tempfile.TemporaryDirectory() as tmp_dir:
231
+ timestampsfile = os.path.join(tmp_dir, "timestamps.json")
232
+ with open(timestampsfile, "w") as f:
233
+ json.dump(
234
+ [
235
+ {
236
+ "days": td.days,
237
+ "seconds": td.seconds,
238
+ "microseconds": td.microseconds,
239
+ }
240
+ for td in _timedeltas
241
+ ],
242
+ f,
243
+ )
244
+ zf.write(timestampsfile, arcname="timestamps.json")
245
+
246
+ @property
247
+ def distances(self):
248
+ """Access the M3C2 distances of this analysis"""
249
+
250
+ if self._distances is None:
251
+ with zipfile.ZipFile(self.filename, mode="r") as zf:
252
+ filename = self._numpy_filename("distances")
253
+ if filename not in zf.namelist():
254
+ self.distances = np.empty(
255
+ (self.corepoints.cloud.shape[0], 0), dtype=np.float64
256
+ )
257
+ return self._distances
258
+
259
+ with tempfile.TemporaryDirectory() as tmp_dir:
260
+ distancefile = zf.extract(filename, path=tmp_dir)
261
+ read_func = (
262
+ (lambda f: np.load(f)["arr_0"]) if self.compress else np.load
263
+ )
264
+ self._distances = read_func(distancefile)
265
+
266
+ return self._distances
267
+
268
+ @distances.setter
269
+ def distances(self, _distances):
270
+ """Set the distances manually
271
+
272
+ This is only possible exactly once and mutually exclusive with adding
273
+ epochs via the :ref:`add_epochs` method.
274
+ """
275
+ with zipfile.ZipFile(self.filename, mode="a") as zf:
276
+ filename = self._numpy_filename("distances")
277
+ write_func = np.savez_compressed if self.compress else np.save
278
+
279
+ # If we already have distacces in the archive, this is not possible
280
+ if filename in zf.namelist():
281
+ raise Py4DGeoError(
282
+ "Distances can only be set on freshly created analysis instances, use add_epochs instead."
283
+ )
284
+
285
+ with tempfile.TemporaryDirectory() as tmp_dir:
286
+ distancesfile = os.path.join(tmp_dir, filename)
287
+ write_func(distancesfile, _distances)
288
+ zf.write(distancesfile, arcname=filename)
289
+
290
+ self._distances = _distances
291
+
292
+ @distances.deleter
293
+ def distances(self):
294
+ self._distances = None
295
+
296
+ @property
297
+ def smoothed_distances(self):
298
+ if self._smoothed_distances is None:
299
+ with zipfile.ZipFile(self.filename, mode="r") as zf:
300
+ filename = self._numpy_filename("smoothed_distances")
301
+ if filename in zf.namelist():
302
+ with tempfile.TemporaryDirectory() as tmp_dir:
303
+ smoothedfile = zf.extract(filename, path=tmp_dir)
304
+ read_func = (
305
+ (lambda f: np.load(f)["arr_0"])
306
+ if self.compress
307
+ else np.load
308
+ )
309
+ self._smoothed_distances = read_func(smoothedfile)
310
+
311
+ return self._smoothed_distances
312
+
313
+ @smoothed_distances.setter
314
+ def smoothed_distances(self, _smoothed_distances):
315
+ with zipfile.ZipFile(self.filename, mode="a") as zf:
316
+ filename = self._numpy_filename("smoothed_distances")
317
+ write_func = np.savez_compressed if self.compress else np.save
318
+
319
+ with tempfile.TemporaryDirectory() as tmp_dir:
320
+ smoothedfile = os.path.join(tmp_dir, filename)
321
+ write_func(smoothedfile, _smoothed_distances)
322
+ zf.write(smoothedfile, arcname=filename)
323
+
324
+ self._smoothed_distances = _smoothed_distances
325
+
326
+ @smoothed_distances.deleter
327
+ def smoothed_distances(self):
328
+ self._smoothed_distances = None
329
+
330
+ @property
331
+ def uncertainties(self):
332
+ """Access the M3C2 uncertainties of this analysis"""
333
+
334
+ if self._uncertainties is None:
335
+ with zipfile.ZipFile(self.filename, mode="r") as zf:
336
+ filename = self._numpy_filename("uncertainties")
337
+ if filename not in zf.namelist():
338
+ self.uncertainties = np.empty(
339
+ (self.corepoints.cloud.shape[0], 0),
340
+ dtype=np.dtype(
341
+ [
342
+ ("lodetection", "<f8"),
343
+ ("spread1", "<f8"),
344
+ ("num_samples1", "<i8"),
345
+ ("spread2", "<f8"),
346
+ ("num_samples2", "<i8"),
347
+ ]
348
+ ),
349
+ )
350
+ return self._uncertainties
351
+
352
+ with tempfile.TemporaryDirectory() as tmp_dir:
353
+ uncertaintyfile = zf.extract(filename, path=tmp_dir)
354
+ read_func = (
355
+ (lambda f: np.load(f)["arr_0"]) if self.compress else np.load
356
+ )
357
+ self._uncertainties = read_func(uncertaintyfile)
358
+
359
+ return self._uncertainties
360
+
361
+ @uncertainties.setter
362
+ def uncertainties(self, _uncertainties):
363
+ """Set the uncertainties manually
364
+
365
+ This is only possible exactly once and mutually exclusive with adding
366
+ epochs via the :ref:`add_epochs` method.
367
+ """
368
+ with zipfile.ZipFile(self.filename, mode="a") as zf:
369
+ filename = self._numpy_filename("uncertainties")
370
+ write_func = np.savez_compressed if self.compress else np.save
371
+
372
+ # If we already have distacces in the archive, this is not possible
373
+ if filename in zf.namelist():
374
+ raise Py4DGeoError(
375
+ "Uncertainties can only be set on freshly created analysis instances, use add_epochs instead."
376
+ )
377
+
378
+ with tempfile.TemporaryDirectory() as tmp_dir:
379
+ uncertaintiesfile = os.path.join(tmp_dir, filename)
380
+ write_func(uncertaintiesfile, _uncertainties)
381
+ zf.write(uncertaintiesfile, arcname=filename)
382
+
383
+ self._uncertainties = _uncertainties
384
+
385
+ @uncertainties.deleter
386
+ def uncertainties(self):
387
+ self._uncertainties = None
388
+
389
+ def add_epochs(self, *epochs):
390
+ """Add a numbers of epochs to the existing analysis"""
391
+
392
+ # Remove intermediate results from the archive
393
+ self.invalidate_results()
394
+
395
+ # Assert that all epochs have a timestamp
396
+ for epoch in epochs:
397
+ check_epoch_timestamp(epoch)
398
+
399
+ # Lazily fetch required data
400
+ reference_epoch = self.reference_epoch
401
+ timedeltas = self.timedeltas
402
+
403
+ # Collect the calculated results to only add them once to the archive
404
+ new_distances = []
405
+ new_uncertainties = []
406
+
407
+ # Iterate over the given epochs
408
+ for i, epoch in enumerate(sorted(epochs, key=lambda e: e.timestamp)):
409
+ with logger_context(f"Adding epoch {i+1}/{len(epochs)} to analysis object"):
410
+ # Prepare the M3C2 instance
411
+ self.m3c2.corepoints = self.corepoints.cloud
412
+ self.m3c2.epochs = (reference_epoch, epoch)
413
+
414
+ # Calculate the M3C2 distances
415
+ d, u = self.m3c2.calculate_distances(reference_epoch, epoch)
416
+ new_distances.append(d)
417
+ new_uncertainties.append(u)
418
+ timedeltas.append(epoch.timestamp - reference_epoch.timestamp)
419
+
420
+ # We do not need the reference_epoch at this point
421
+ del self.reference_epoch
422
+
423
+ # Prepare all archive data in a temporary directory
424
+ with tempfile.TemporaryDirectory() as tmp_dir:
425
+ # Write a new timestamps file
426
+ timestampsfile = os.path.join(tmp_dir, "timestamps.json")
427
+ with open(timestampsfile, "w") as f:
428
+ json.dump(
429
+ [
430
+ {
431
+ "days": td.days,
432
+ "seconds": td.seconds,
433
+ "microseconds": td.microseconds,
434
+ }
435
+ for td in timedeltas
436
+ ],
437
+ f,
438
+ )
439
+
440
+ # Depending on whether we compress, we use different numpy functionality
441
+ write_func = np.savez_compressed if self.compress else np.save
442
+ distance_filename = self._numpy_filename("distances")
443
+ uncertainty_filename = self._numpy_filename("uncertainties")
444
+
445
+ with logger_context("Rearranging space-time array in memory"):
446
+ # Load the distance array and append new data
447
+ distance_file = os.path.join(tmp_dir, distance_filename)
448
+ write_func(
449
+ distance_file,
450
+ np.concatenate(
451
+ (self.distances, np.column_stack(tuple(new_distances))), axis=1
452
+ ),
453
+ )
454
+
455
+ # Load the uncertainty array and append new data
456
+ uncertainty_file = os.path.join(tmp_dir, uncertainty_filename)
457
+ write_func(
458
+ uncertainty_file,
459
+ np.concatenate(
460
+ (self.uncertainties, np.column_stack(tuple(new_uncertainties))),
461
+ axis=1,
462
+ ),
463
+ )
464
+
465
+ # Invalidate potential caches for distances/uncertainties
466
+ self._distances = None
467
+ self._uncertainties = None
468
+
469
+ # Dump the updated files into the archive
470
+ with logger_context("Updating disk-based analysis archive with new epochs"):
471
+ with UpdateableZipFile(self.filename, mode="a") as zf:
472
+ if "timestamps.json" in zf.namelist():
473
+ zf.remove("timestamps.json")
474
+ zf.write(timestampsfile, arcname="timestamps.json")
475
+ if distance_filename in zf.namelist():
476
+ zf.remove(distance_filename)
477
+ zf.write(distance_file, arcname=distance_filename)
478
+ if uncertainty_filename in zf.namelist():
479
+ zf.remove(uncertainty_filename)
480
+ zf.write(uncertainty_file, arcname=uncertainty_filename)
481
+
482
+ # (Potentially) remove caches
483
+ del self.distances
484
+ del self.uncertainties
485
+
486
+ @property
487
+ def seeds(self):
488
+ """The list of seed candidates for this analysis"""
489
+
490
+ with zipfile.ZipFile(self.filename, mode="r") as zf:
491
+ if "seeds.pickle" not in zf.namelist():
492
+ return None
493
+
494
+ with tempfile.TemporaryDirectory() as tmp_dir:
495
+ zf.extract("seeds.pickle", path=tmp_dir)
496
+ with open(os.path.join(tmp_dir, "seeds.pickle"), "rb") as f:
497
+ return pickle.load(f)
498
+
499
+ @seeds.setter
500
+ def seeds(self, _seeds):
501
+ # Assert that we received the correct type
502
+ for seed in _seeds:
503
+ if not isinstance(seed, RegionGrowingSeed):
504
+ raise Py4DGeoError(
505
+ "Seeds are expected to inherit from RegionGrowingSeed"
506
+ )
507
+
508
+ if not self.allow_pickle:
509
+ return
510
+
511
+ with UpdateableZipFile(self.filename, mode="a") as zf:
512
+ if "seeds.pickle" in zf.namelist():
513
+ zf.remove("seeds.pickle")
514
+
515
+ with tempfile.TemporaryDirectory() as tmp_dir:
516
+ seedsfile = os.path.join(tmp_dir, "seeds.pickle")
517
+ with open(seedsfile, "wb") as f:
518
+ pickle.dump(_seeds, f)
519
+
520
+ zf.write(seedsfile, arcname="seeds.pickle")
521
+
522
+ @property
523
+ def objects(self):
524
+ """The list of objects by change for this analysis"""
525
+
526
+ with zipfile.ZipFile(self.filename, mode="r") as zf:
527
+ if "objects.pickle" not in zf.namelist():
528
+ return None
529
+
530
+ with tempfile.TemporaryDirectory() as tmp_dir:
531
+ zf.extract("objects.pickle", path=tmp_dir)
532
+ with open(os.path.join(tmp_dir, "objects.pickle"), "rb") as f:
533
+ objects = pickle.load(f)
534
+
535
+ # Re-attach analysis backlink after unpickling
536
+ if objects is not None:
537
+ for obj in objects:
538
+ if hasattr(obj, "_analysis"):
539
+ obj.attach_analysis(self)
540
+
541
+ return objects
542
+
543
+ @objects.setter
544
+ def objects(self, _objects):
545
+ if _objects is None:
546
+ return
547
+
548
+ # Assert that we received the correct type
549
+ for obj in _objects:
550
+ if not isinstance(obj, ObjectByChange):
551
+ raise Py4DGeoError(
552
+ "Objects are expected to inherit from ObjectByChange"
553
+ )
554
+
555
+ if not self.allow_pickle:
556
+ return
557
+
558
+ with UpdateableZipFile(self.filename, mode="a") as zf:
559
+ if "objects.pickle" in zf.namelist():
560
+ zf.remove("objects.pickle")
561
+
562
+ with tempfile.TemporaryDirectory() as tmp_dir:
563
+ objectsfile = os.path.join(tmp_dir, "objects.pickle")
564
+ with open(objectsfile, "wb") as f:
565
+ pickle.dump(_objects, f)
566
+
567
+ zf.write(objectsfile, arcname="objects.pickle")
568
+
569
+ def invalidate_results(self, seeds=True, objects=True, smoothed_distances=False):
570
+ """Invalidate (and remove) calculated results
571
+
572
+ This is automatically called when new epochs are added or when
573
+ an algorithm sets the :code:`force` option.
574
+ """
575
+
576
+ logger.info(
577
+ f"Removing intermediate results from the analysis file {self.filename}"
578
+ )
579
+ with UpdateableZipFile(self.filename, mode="a") as zf:
580
+ if seeds and "seeds.pickle" in zf.namelist():
581
+ zf.remove("seeds.pickle")
582
+
583
+ if objects and "objects.pickle" in zf.namelist():
584
+ zf.remove("objects.pickle")
585
+
586
+ smoothed_file = self._numpy_filename("smoothed_distances")
587
+ if smoothed_distances and smoothed_file in zf.namelist():
588
+ zf.remove(smoothed_file)
589
+
590
+ def _numpy_filename(self, name):
591
+ extension = "npz" if self.compress else "npy"
592
+ return f"{name}.{extension}"
593
+
594
+ @property
595
+ def distances_for_compute(self):
596
+ """Retrieve the distance array used for computation
597
+
598
+ This might be the raw data or smoothed data, based on whether
599
+ a smoothing was provided by the user.
600
+ """
601
+ distances = self.smoothed_distances
602
+ if distances is None:
603
+ distances = self.distances
604
+ return distances
605
+
606
+
607
+ class RegionGrowingAlgorithmBase:
608
+ def __init__(
609
+ self,
610
+ neighborhood_radius=1.0,
611
+ thresholds=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
612
+ min_segments=20,
613
+ max_segments=None,
614
+ ):
615
+ """Construct a spatiotemporal _segmentation algorithm.
616
+
617
+ This class can be derived from to customize the algorithm behaviour.
618
+
619
+ :param neighborhood_radius:
620
+ The size of the neighborhood of a core point. All core points within
621
+ this radius are considered adjacent and are therefore considered as
622
+ candidates for inclusion in the region growing algorithm.
623
+ :type neighborhood_radius: float
624
+ :param thresholds:
625
+ A list of thresholds to use as candidates in 4D-OBC's adaptive
626
+ thresholding procedure.
627
+ :type thresholds: list
628
+ :param min_segments:
629
+ The minimum number of core points in an object-by-change. Defaults to
630
+ 20.
631
+ :type min_segments: int
632
+ :param max_segments:
633
+ The maximum number of core points in an object-by-change. This is mainly
634
+ used to bound the runtime of expensive region growing. By default, no
635
+ maximum is applied.
636
+ :type max_segments: int
637
+ """
638
+
639
+ self.neighborhood_radius = neighborhood_radius
640
+ self.thresholds = thresholds
641
+ self.min_segments = min_segments
642
+ self.max_segments = max_segments
643
+
644
+ self._analysis = None
645
+
646
+ def distance_measure(self):
647
+ """Distance measure between two time series
648
+
649
+ Expected to return a function that accepts two time series and returns
650
+ the distance.
651
+ """
652
+
653
+ return _py4dgeo.normalized_dtw_distance
654
+
655
+ def find_seedpoints(self):
656
+ """Calculate seedpoints for the region growing algorithm"""
657
+
658
+ raise NotImplementedError
659
+
660
+ def seed_sorting_scorefunction(self):
661
+ """A function that computes a score for a seed candidate
662
+
663
+ This function is used to prioritize seed candidates.
664
+ """
665
+
666
+ # The base class does not perform sorting.
667
+ return lambda seed: 0.0
668
+
669
+ def filter_objects(self, obj):
670
+ """A filter for objects produced by the region growing algorithm
671
+
672
+ Objects are discarded if this method returns False.
673
+ """
674
+
675
+ # The base class does not perform filtering
676
+ return True
677
+
678
+ @property
679
+ def analysis(self):
680
+ """Access the analysis object that the algorithm operates on
681
+
682
+ This is only available after :ref:`run` has been called.
683
+ """
684
+ if self._analysis is None:
685
+ raise Py4DGeoError(
686
+ "Analysis object is only available when the algorithm is run"
687
+ )
688
+ return self._analysis
689
+
690
+ def run(self, analysis, force=False):
691
+ """Calculate the _segmentation
692
+
693
+ :param analysis:
694
+ The analysis object we are working with.
695
+ :type analysis: py4dgeo.segmentation.SpatiotemporalAnalysis
696
+ :param force:
697
+ Force recalculation of results. If false, some intermediate results will be
698
+ restored from the analysis object instead of being recalculated.
699
+ """
700
+
701
+ # Make the analysis object known to all members
702
+ self._analysis = analysis
703
+
704
+ # Enforce the removal of intermediate results
705
+ if force:
706
+ analysis.invalidate_results()
707
+
708
+ # Return pre-calculated objects if they are available
709
+ # precalculated = analysis.objects
710
+ # if precalculated is not None:
711
+ # logger.info("Reusing objects by change stored in analysis object")
712
+ # return precalculated
713
+
714
+ # Check if there are pre-calculated objects.
715
+ # If so, create objects list from these and continue growing objects, taking into consideration objects that are already grown.
716
+ # if not initiate new empty objects list
717
+ precalculated = analysis.objects # TODO: do not assign to new object
718
+ if precalculated is not None:
719
+ logger.info("Reusing objects by change stored in analysis object")
720
+ objects = (
721
+ precalculated.copy()
722
+ ) # test if .copy() solves memory problem, or deepcopy?
723
+ else:
724
+ objects = (
725
+ []
726
+ ) # TODO: test initializing this in the analysis class, see if it crashes instantly
727
+
728
+ # Get corepoints from M3C2 class and build a search tree on them
729
+ corepoints = as_epoch(analysis.corepoints)
730
+ corepoints._validate_search_tree()
731
+
732
+ # Calculate the list of seed points and sort them
733
+ seeds = analysis.seeds
734
+ if seeds is None:
735
+ with logger_context("Find seed candidates in time series"):
736
+ seeds = self.find_seedpoints()
737
+
738
+ # Sort the seed points
739
+ with logger_context("Sort seed candidates by priority"):
740
+ seeds = list(sorted(seeds, key=self.seed_sorting_scorefunction()))
741
+
742
+ # Store the seeds
743
+ analysis.seeds = seeds
744
+ else:
745
+ logger.info("Reusing seed candidates stored in analysis object")
746
+ # write the number of seeds to a separate text file if self.write_nr_seeds is True
747
+ if self.write_nr_seeds:
748
+ with open("number_of_seeds.txt", "w") as f:
749
+ f.write(str(len(seeds)))
750
+
751
+ # Iterate over the seeds to maybe turn them into objects
752
+ for i, seed in enumerate(
753
+ seeds
754
+ ): # [self.resume_from_seed-1:]): # starting seed ranked at the `resume_from_seed` variable (representing 1 for index 0)
755
+ # or to keep within the same index range when resuming from seed:
756
+ if i < (
757
+ self.resume_from_seed - 1
758
+ ): # resume from index 0 when `resume_from_seed` == 1
759
+ continue
760
+ if i >= (self.stop_at_seed - 1): # stop at index 0 when `stop_at_seed` == 1
761
+ break
762
+
763
+ # save objects to analysis object when at index `intermediate_saving`
764
+ if (
765
+ (self.intermediate_saving)
766
+ and ((i % self.intermediate_saving) == 0)
767
+ and (i != 0)
768
+ ):
769
+ with logger_context(
770
+ f"Intermediate saving of first {len(objects)} objects, grown from first {i+1}/{len(seeds)} seeds"
771
+ ):
772
+ analysis.objects = objects # This assigns itself to itself
773
+
774
+ # Check all already calculated objects whether they overlap with this seed.
775
+ found = False
776
+ for obj in objects:
777
+ if seed.index in obj.indices and (
778
+ obj.end_epoch > seed.start_epoch
779
+ and seed.end_epoch > obj.start_epoch
780
+ ):
781
+ found = True
782
+ break
783
+
784
+ # If we found an overlap, we skip this seed
785
+ if found:
786
+ continue
787
+
788
+ # Apply a numeric default to the max_segments parameter
789
+ max_segments = self.max_segments
790
+ if max_segments is None:
791
+ max_segments = corepoints.cloud.shape[0] + 1
792
+
793
+ data = _py4dgeo.RegionGrowingAlgorithmData(
794
+ analysis.distances_for_compute,
795
+ corepoints,
796
+ self.neighborhood_radius,
797
+ seed._seed,
798
+ self.thresholds,
799
+ self.min_segments,
800
+ max_segments,
801
+ )
802
+
803
+ # Perform the region growing
804
+ with logger_context(
805
+ f"Performing region growing on seed candidate {i+1}/{len(seeds)}"
806
+ ):
807
+ objdata = _py4dgeo.region_growing(data, self.distance_measure())
808
+
809
+ # If the returned object has 0 indices, the min_segments threshold was violated
810
+ if objdata.indices_distances:
811
+ obj = ObjectByChange(
812
+ objdata, seed, analysis
813
+ ) # TODO: check, does it copy the whole analysis object when initializing
814
+ if self.filter_objects(obj):
815
+ objects.append(obj)
816
+
817
+ # If the returned object is larger than max_segments we issue a warning
818
+ if len(objdata.indices_distances) >= max_segments:
819
+ logger.warning(
820
+ f"An object by change exceeded the given maximum size of {max_segments}"
821
+ )
822
+
823
+ # Store the results in the analysis object
824
+ analysis.objects = objects
825
+
826
+ # Potentially remove objects from memory
827
+ del analysis.smoothed_distances
828
+ del analysis.distances
829
+
830
+ return objects
831
+
832
+
833
+ class RegionGrowingAlgorithm(RegionGrowingAlgorithmBase):
834
+ def __init__(
835
+ self,
836
+ seed_subsampling=1,
837
+ seed_candidates=None,
838
+ window_width=24,
839
+ window_min_size=12,
840
+ window_jump=1,
841
+ window_penalty=1.0,
842
+ minperiod=24,
843
+ height_threshold=0.0,
844
+ use_unfinished=True,
845
+ intermediate_saving=0,
846
+ resume_from_seed=0,
847
+ stop_at_seed=np.inf,
848
+ write_nr_seeds=False,
849
+ **kwargs,
850
+ ):
851
+ """Construct the 4D-OBC algorithm.
852
+
853
+ :param seed_subsampling:
854
+ A subsampling factor for the set of core points for the generation
855
+ of _segmentation seed candidates. This can be used to speed up
856
+ the generation of seeds. The default of 1 does not perform any
857
+ subsampling, a value of, e.g., 10 would only consider every 10th
858
+ corepoint for adding seeds.
859
+ :type seed_subsampling: int
860
+ :param seed_candidates:
861
+ A set of indices specifying which core points should be used for seed detection. This can be used to perform _segmentation for selected locations. The default of None does not perform any selection and uses all corepoints. The subsampling parameter is applied additionally.
862
+ :type seed_candidates: list
863
+ :param window_width:
864
+ The width of the sliding temporal window for change point detection. The sliding window
865
+ moves along the signal and determines the discrepancy between the first and the second
866
+ half of the window (i.e. subsequent time series segments within the window width). The
867
+ default value is 24, corresponding to one day in case of hourly data.
868
+ :type window_width: int
869
+ :param window_min_size:
870
+ The minimum temporal distance needed between two seed candidates, for the second one to be considered.
871
+ The default value is 1, such that all detected seeds candidates are considered.
872
+ :type window_min_size: int
873
+ :param window_jump:
874
+ The interval on which the sliding temporal window moves and checks for seed candidates.
875
+ The default value is 1, corresponding to a check for every epoch in the time series.
876
+ :type window_jump: int
877
+ :param window_penalty:
878
+ A complexity penalty that determines how strict the change point detection is.
879
+ A higher penalty results in stricter change point detection (i.e, fewer points are detected), while a low
880
+ value results in a large amount of detected change points. The default value is 1.0.
881
+ :type window_penalty: float
882
+ :param minperiod:
883
+ The minimum period of a detected change to be considered as seed candidate for subsequent
884
+ _segmentation. The default is 24, corresponding to one day for hourly data.
885
+ :type minperiod: int
886
+ :param height_threshold:
887
+ The height threshold represents the required magnitude of a detected change to be considered
888
+ as seed candidate for subsequent _segmentation. The magnitude of a detected change is derived
889
+ as unsigned difference between magnitude (i.e. distance) at start epoch and peak magnitude.
890
+ The default is 0.0, in which case all detected changes are used as seed candidates.
891
+ :type height_threshold: float
892
+ :param use_unfinished:
893
+ If False, seed candidates that are not finished by the end of the time series are not considered in further
894
+ analysis. The default is True, in which case unfinished seed_candidates are regarded as seeds region growing.
895
+ :type use_unfinished: bool
896
+ :param intermediate_saving:
897
+ Parameter that determines after how many considered seeds, the resulting list of 4D-OBCs is saved to the SpatiotemporalAnalysis object.
898
+ This is to ensure that if the algorithm is terminated unexpectedly not all results are lost. If set to 0 no intermediate saving is done.
899
+ :type intermediate_saving: int
900
+ :param resume_from_seed:
901
+ Parameter specifying from which seed index the region growing algorithm must resume. If zero all seeds are considered, starting from the highest ranked seed.
902
+ Default is 0.
903
+ :type resume_from_seed: int
904
+ :param stop_at_seed:
905
+ Parameter specifying at which seed to stop region growing and terminate the run function.
906
+ Default is np.inf, meaning all seeds are considered.
907
+ :type stop_at_seed: int
908
+ :param write_nr_seeds:
909
+ If True, after seed detection, a text file is written in the working directory containing the total number of detected seeds.
910
+ This can be used to split up the consecutive 4D-OBC segmentation into different subsets.
911
+ Default is False, meaning no txt file is written.
912
+ :type write_nr_seeds: bool
913
+ """
914
+
915
+ # Initialize base class
916
+ super().__init__(**kwargs)
917
+
918
+ # Store the given parameters
919
+ self.seed_subsampling = seed_subsampling
920
+ self.seed_candidates = seed_candidates
921
+ self.window_width = window_width
922
+ self.window_min_size = window_min_size
923
+ self.window_jump = window_jump
924
+ self.window_penalty = window_penalty
925
+ self.minperiod = minperiod
926
+ self.height_threshold = height_threshold
927
+ self.use_unfinished = use_unfinished
928
+ self.intermediate_saving = intermediate_saving
929
+ self.resume_from_seed = resume_from_seed
930
+ self.stop_at_seed = stop_at_seed
931
+ self.write_nr_seeds = write_nr_seeds
932
+
933
+ def find_seedpoints(self):
934
+ """Calculate seedpoints for the region growing algorithm"""
935
+
936
+ # These are some arguments used below that we might consider
937
+ # exposing to the user in the future. For now, they are considered
938
+ # internal, but they are still defined here for readability.
939
+ window_costmodel = "l1"
940
+ # window_min_size = 12
941
+ # window_jump = 1
942
+ # window_penalty = 1.0
943
+
944
+ # Before starting the process, we check if the user has set a reasonable window width parameter
945
+ if self.window_width >= self.analysis.distances_for_compute.shape[1]:
946
+ raise Py4DGeoError(
947
+ "Window width cannot be larger than the length of the time series - please adapt parameter"
948
+ )
949
+
950
+ # The list of generated seeds
951
+ seeds = []
952
+
953
+ # The list of core point indices to check as seeds
954
+ if self.seed_candidates is None:
955
+ if self.seed_subsampling == 0:
956
+ raise Py4DGeoError(
957
+ "Subsampling factor cannot be 0, use 1 or any integer larger than 1"
958
+ )
959
+ # Use all corepoints if no selection specified, considering subsampling
960
+ seed_candidates_curr = range(
961
+ 0, self.analysis.distances_for_compute.shape[0], self.seed_subsampling
962
+ )
963
+ else:
964
+ # Use the specified corepoint indices, but consider subsampling
965
+ seed_candidates_curr = self.seed_candidates # [::self.seed_subsampling]
966
+
967
+ # Iterate over all time series to analyse their change points
968
+ for i in seed_candidates_curr:
969
+ # Extract the time series and interpolate its nan values
970
+ timeseries = self.analysis.distances_for_compute[i, :]
971
+ bad_indices = np.isnan(timeseries)
972
+ num_nans = np.count_nonzero(bad_indices)
973
+
974
+ # If we too many nans, this timeseries does not make sense
975
+ if num_nans > timeseries.shape[0] - 3:
976
+ continue
977
+
978
+ # If there are nan values, we try fixing things by interpolation
979
+ if num_nans > 0:
980
+ good_indices = np.logical_not(bad_indices)
981
+ timeseries[bad_indices] = np.interp(
982
+ bad_indices.nonzero()[0],
983
+ good_indices.nonzero()[0],
984
+ timeseries[good_indices],
985
+ )
986
+
987
+ # Run detection of change points
988
+ cpdata = _py4dgeo.ChangePointDetectionData(
989
+ ts=timeseries,
990
+ window_size=self.window_width,
991
+ min_size=self.window_min_size,
992
+ jump=self.window_jump,
993
+ penalty=self.window_penalty,
994
+ )
995
+ changepoints = _py4dgeo.change_point_detection(cpdata)[:-1]
996
+
997
+ # Shift the time series to positive values
998
+ timeseries = timeseries + abs(np.nanmin(timeseries) + 0.1)
999
+ # create a flipped version for negative change volumes
1000
+ timeseries_flipped = timeseries * -1.0 + abs(np.nanmax(timeseries)) + 0.1
1001
+
1002
+ # Create seeds for this timeseries
1003
+ corepoint_seeds = []
1004
+ for start_idx in changepoints:
1005
+ # Skip this changepoint if it was included into a previous seed
1006
+ if corepoint_seeds and start_idx <= corepoint_seeds[-1].end_epoch:
1007
+ continue
1008
+
1009
+ # Skip this changepoint if this to close to the end
1010
+ if start_idx >= timeseries.shape[0] - self.minperiod:
1011
+ break
1012
+
1013
+ # Decide whether we need use the flipped timeseries
1014
+ used_timeseries = timeseries
1015
+ if timeseries[start_idx] >= timeseries[start_idx + self.minperiod]:
1016
+ used_timeseries = timeseries_flipped
1017
+
1018
+ previous_volume = -999.9
1019
+ for target_idx in range(start_idx + 1, timeseries.shape[0]):
1020
+ # Calculate the change volume
1021
+ height = used_timeseries[start_idx]
1022
+ volume = np.nansum(
1023
+ used_timeseries[start_idx : target_idx + 1] - height
1024
+ )
1025
+
1026
+ # Check whether the volume started decreasing
1027
+ if previous_volume > volume:
1028
+ # Only add seed if larger than the minimum period and height of the change form larger than threshold
1029
+ if (target_idx - start_idx >= self.minperiod) and (
1030
+ np.abs(
1031
+ np.max(used_timeseries[start_idx : target_idx + 1])
1032
+ - np.min(used_timeseries[start_idx : target_idx + 1])
1033
+ )
1034
+ >= self.height_threshold
1035
+ ):
1036
+ corepoint_seeds.append(
1037
+ RegionGrowingSeed(i, start_idx, target_idx)
1038
+ )
1039
+ break
1040
+ else:
1041
+ previous_volume = volume
1042
+
1043
+ # This causes a seed to always be detected if the volume doesn't decrease before present
1044
+ # Useful when used in an online setting, can be filtered before region growing
1045
+ # Only if the last epoch is reached we use the segment as seed
1046
+ if (target_idx == timeseries.shape[0] - 1) and self.use_unfinished:
1047
+ # We reached the present and add a seed based on it
1048
+ corepoint_seeds.append(
1049
+ RegionGrowingSeed(i, start_idx, timeseries.shape[0] - 1)
1050
+ )
1051
+
1052
+ # Add all the seeds found for this corepoint to the full list
1053
+ seeds.extend(corepoint_seeds)
1054
+
1055
+ return seeds
1056
+
1057
+ def seed_sorting_scorefunction(self):
1058
+ """Neighborhood similarity sorting function"""
1059
+
1060
+ # The 4D-OBC algorithm sorts by similarity in the neighborhood
1061
+ # of the seed.
1062
+ def neighborhood_similarity(seed):
1063
+ self.analysis.corepoints._validate_search_tree()
1064
+ neighbors = self.analysis.corepoints._radius_search(
1065
+ self.analysis.corepoints.cloud[seed.index, :], self.neighborhood_radius
1066
+ )
1067
+ # if no neighbors are found make sure the algorithm continues its search but with a large dissimilarity
1068
+ if len(neighbors) < 2:
1069
+ return 9999999.0 # return very large number? or delete the seed point, but then also delete from the seeds list
1070
+
1071
+ similarities = []
1072
+ for n in neighbors:
1073
+ data = _py4dgeo.TimeseriesDistanceFunctionData(
1074
+ self.analysis.distances_for_compute[
1075
+ seed.index, seed.start_epoch : seed.end_epoch + 1
1076
+ ],
1077
+ self.analysis.distances_for_compute[
1078
+ n, seed.start_epoch : seed.end_epoch + 1
1079
+ ],
1080
+ )
1081
+ similarities.append(self.distance_measure()(data))
1082
+
1083
+ return sum(similarities, 0.0) / (len(neighbors) - 1)
1084
+
1085
+ return neighborhood_similarity
1086
+
1087
+ def filter_objects(self, obj):
1088
+ """A filter for objects produced by the region growing algorithm"""
1089
+
1090
+ # Filter based on coefficient of variation
1091
+ distarray = np.fromiter(obj._data.indices_distances.values(), np.float64)
1092
+
1093
+ # Check if mean is 0.0, if so, set to very small value to avoid division by 0
1094
+ mean_distarray = np.mean(distarray)
1095
+ if mean_distarray == 0.0:
1096
+ mean_distarray = 10**-10
1097
+
1098
+ # Calculate coefficient of variation
1099
+ cv = np.std(distarray) / mean_distarray
1100
+
1101
+ # TODO: Make this threshold configurable?
1102
+ return cv <= 0.8
1103
+
1104
+
1105
+ class RegionGrowingSeed:
1106
+ def __init__(self, index, start_epoch, end_epoch):
1107
+ self._seed = _py4dgeo.RegionGrowingSeed(index, start_epoch, end_epoch)
1108
+
1109
+ @property
1110
+ def index(self):
1111
+ return self._seed.index
1112
+
1113
+ @property
1114
+ def start_epoch(self):
1115
+ return self._seed.start_epoch
1116
+
1117
+ @property
1118
+ def end_epoch(self):
1119
+ return self._seed.end_epoch
1120
+
1121
+
1122
+ class ObjectByChange:
1123
+ """Representation a change object in the spatiotemporal domain"""
1124
+
1125
+ def __init__(self, data, seed, analysis=None):
1126
+ self._data = data
1127
+ self._analysis = analysis
1128
+ self.seed = seed
1129
+
1130
+ @property
1131
+ def indices(self):
1132
+ """The set of corepoint indices that compose the object by change"""
1133
+ return list(self._data.indices_distances.keys())
1134
+
1135
+ def distance(self, index):
1136
+ return self._data.indices_distances[index]
1137
+
1138
+ @property
1139
+ def start_epoch(self):
1140
+ """The index of the start epoch of the change object"""
1141
+ return self._data.start_epoch
1142
+
1143
+ @property
1144
+ def end_epoch(self):
1145
+ """The index of the end epoch of the change object"""
1146
+ return self._data.end_epoch
1147
+
1148
+ @property
1149
+ def threshold(self):
1150
+ """The distance threshold that produced this object"""
1151
+ return self._data.threshold
1152
+
1153
+ def attach_analysis(self, analysis):
1154
+ self._analysis = analysis
1155
+
1156
+ def __getstate__(self):
1157
+ """
1158
+ Return the pickle state for this object.
1159
+
1160
+ We explicitly omit `_analysis` because it can contain a back-link to the
1161
+ SpatiotemporalAnalysis object, which would cause the full analysis to be
1162
+ pickled into every ObjectByChange instance.
1163
+ """
1164
+ return {
1165
+ "_data": self._data,
1166
+ "seed": self.seed,
1167
+ # intentionally NOT storing "_analysis"
1168
+ }
1169
+
1170
+ def __setstate__(self, state):
1171
+ """
1172
+ Restore the object from pickle state.
1173
+
1174
+ `_analysis` is always reset to None. It can later be re-attached by the
1175
+ algorithm / analysis code if needed.
1176
+ """
1177
+ self._data = state["_data"]
1178
+ self.seed = state["seed"]
1179
+ self._analysis = None
1180
+
1181
+ def plot(self, filename=None):
1182
+ """Create an informative visualization of the Object By Change
1183
+
1184
+ :param filename:
1185
+ The filename to use to store the plot. Can be omitted to only show
1186
+ plot in a Jupyter notebook session.
1187
+ :type filename: str
1188
+ """
1189
+
1190
+ # Extract DTW distances from this object
1191
+ indexarray = np.fromiter(self.indices, np.int32)
1192
+ distarray = np.fromiter((self.distance(i) for i in indexarray), np.float64)
1193
+
1194
+ # Intitialize the figure and all of its subfigures
1195
+ fig = plt.figure(figsize=plt.figaspect(0.3))
1196
+ tsax = fig.add_subplot(1, 3, 1)
1197
+ histax = fig.add_subplot(1, 3, 2)
1198
+ mapax = fig.add_subplot(1, 3, 3)
1199
+
1200
+ # The first plot (tsax) prints all time series of chosen corepoints
1201
+ # and colors them according to distance.
1202
+ tsax.set_ylabel("Height change [m]")
1203
+ tsax.set_xlabel("Time [h]")
1204
+
1205
+ # We pad the time series visualization with a number of data
1206
+ # points on both sides. TODO: Expose as argument to plot?
1207
+ timeseries_padding = 10
1208
+ start_epoch = max(self.start_epoch - timeseries_padding, 0)
1209
+ end_epoch = min(
1210
+ self.end_epoch + timeseries_padding,
1211
+ self._analysis.distances_for_compute.shape[1],
1212
+ )
1213
+
1214
+ # We use the seed's timeseries to set good axis limits
1215
+ seed_ts = self._analysis.distances_for_compute[
1216
+ self.seed.index, start_epoch:end_epoch
1217
+ ]
1218
+ tsax.set_ylim(np.nanmin(seed_ts) * 0.5, np.nanmax(seed_ts) * 1.5)
1219
+
1220
+ # Create a colormap with distance for this object
1221
+ cmap = matplotlib.colormaps.get_cmap("viridis")
1222
+ maxdist = np.nanmax(distarray)
1223
+
1224
+ # Plot each time series individually
1225
+ for index in self.indices:
1226
+ tsax.plot(
1227
+ self._analysis.distances_for_compute[index, start_epoch:end_epoch],
1228
+ linewidth=0.7,
1229
+ alpha=0.3,
1230
+ color=cmap(self.distance(index) / maxdist),
1231
+ )
1232
+
1233
+ # Plot the seed timeseries again, but with a thicker line
1234
+ tsax.plot(seed_ts, linewidth=2.0, zorder=10, color="blue")
1235
+
1236
+ # Next, we add a histogram plot with the distance values (using seaborn)
1237
+ seaborn.histplot(distarray, ax=histax, kde=True, color="r")
1238
+
1239
+ # Add labels to the histogram plot
1240
+ histax.set_title(f"Segment size: {distarray.shape[0]}")
1241
+ histax.set_xlabel("DTW distance")
1242
+
1243
+ # Create a 2D view of the segment
1244
+ locations = self._analysis.corepoints.cloud[indexarray, 0:2]
1245
+ mapax.scatter(locations[:, 0], locations[:, 1], c=distarray)
1246
+
1247
+ # Some global settings of the generated figure
1248
+ fig.tight_layout()
1249
+
1250
+ # Maybe save to file
1251
+ if filename is not None:
1252
+ plt.savefig(filename)
1253
+
1254
+
1255
+ def check_epoch_timestamp(epoch):
1256
+ """Validate an epoch to be used with SpatiotemporalSegmentation"""
1257
+ if epoch.timestamp is None:
1258
+ raise Py4DGeoError(
1259
+ "Epochs need to define a timestamp to be usable in SpatiotemporalSegmentation"
1260
+ )
1261
+
1262
+ return epoch
1263
+
1264
+
1265
+ def regular_corepoint_grid(lowerleft, upperright, num_points, zval=0.0):
1266
+ """A helper function to create a regularly spaced grid for the analysis
1267
+
1268
+ :param lowerleft:
1269
+ The lower left corner of the grid. Given as a 2D coordinate.
1270
+ :type lowerleft: np.ndarray
1271
+ :param upperright:
1272
+ The upper right corner of the grid. Given as a 2D coordinate.
1273
+ :type upperright: np.ndarray
1274
+ :param num_points:
1275
+ A tuple with two entries denoting the number of points to be used in
1276
+ x and y direction
1277
+ :type num_points: tuple
1278
+ :param zval:
1279
+ The value to fill for the z-direction.
1280
+ :type zval: double
1281
+ """
1282
+ xspace = np.linspace(
1283
+ lowerleft[0], upperright[0], num=num_points[0], dtype=np.float64
1284
+ )
1285
+ yspace = np.linspace(
1286
+ lowerleft[1], upperright[1], num=num_points[1], dtype=np.float64
1287
+ )
1288
+
1289
+ grid = np.empty(shape=(num_points[0] * num_points[1], 3), dtype=np.float64)
1290
+ for i, x in enumerate(xspace):
1291
+ for j, y in enumerate(yspace):
1292
+ grid[i * num_points[0] + j, 0] = x
1293
+ grid[i * num_points[0] + j, 1] = y
1294
+ grid[i * num_points[0] + j, 2] = zval
1295
+
1296
+ return grid
1297
+
1298
+
1299
+ def temporal_averaging(distances, smoothing_window=24):
1300
+ """Smoothen a space-time array of distance change using a sliding window approach
1301
+
1302
+ :param distances:
1303
+ The raw data to smoothen.
1304
+ :type distances: np.ndarray
1305
+ :param smoothing_window:
1306
+ The size of the sliding window used in smoothing the data. The
1307
+ default value of 0 does not perform any smooting.
1308
+ :type smooting_window: int
1309
+ """
1310
+
1311
+ with logger_context("Smoothing temporal data"):
1312
+ smoothed = np.empty_like(distances)
1313
+ eps = smoothing_window // 2
1314
+
1315
+ for i in range(distances.shape[1]):
1316
+ smoothed[:, i] = np.nanmedian(
1317
+ distances[
1318
+ :,
1319
+ max(0, i - eps) : min(distances.shape[1] - 1, i + eps),
1320
+ ],
1321
+ axis=1,
1322
+ )
1323
+
1324
+ # We use no-op smooting as the default implementation here
1325
+ return smoothed