PyEvoMotion 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
PyEvoMotion/core/base.py CHANGED
@@ -2,7 +2,7 @@ import numpy as np
2
2
  import pandas as pd
3
3
  from sklearn.metrics import r2_score
4
4
  from scipy.optimize import curve_fit
5
- from scipy.stats import f as snedecor_f
5
+ from scipy.stats import f as snedecor_f, t as t_dist
6
6
  from sklearn.linear_model import LinearRegression
7
7
 
8
8
 
@@ -102,7 +102,7 @@ class PyEvoMotionBase():
102
102
  print(f"Method {method} not found in {instance}")
103
103
 
104
104
  @staticmethod
105
- def _remove_nan(x: pd.Series, y: pd.Series) -> tuple[np.ndarray, np.ndarray]:
105
+ def _remove_nan(x: pd.Series, y: pd.Series, z: pd.Series) -> tuple[np.ndarray, np.ndarray]:
106
106
  """
107
107
  Remove NaN values from two pandas Series and return them as numpy arrays.
108
108
 
@@ -110,22 +110,77 @@ class PyEvoMotionBase():
110
110
  :type x: pd.Series
111
111
  :param y: the second pandas Series.
112
112
  :type y: pd.Series
113
+ :param z: the third pandas Series.
114
+ :type z: pd.Series
113
115
  :return: a tuple with the two pandas Series without NaN values.
114
116
  :rtype: tuple[np.ndarray,np.ndarray]
115
117
  """
116
118
 
117
- data = pd.DataFrame({"x": x, "y": y}).dropna()
119
+ data = pd.DataFrame({"x": x, "y": y, "z": z}).dropna()
118
120
 
119
121
  x = data["x"].to_numpy().reshape(-1, 1)
120
122
  y = data["y"].to_numpy().reshape(-1, 1)
123
+ z = data["z"].to_numpy().reshape(-1, 1)
124
+ return x, y, z
121
125
 
122
- return x, y
126
+ @staticmethod
127
+ def _weighting_function(n: int, n_0: int = 30) -> np.ndarray:
128
+ """
129
+ Weighting function for the data points.
130
+
131
+ :param n: The number of data points.
132
+ :type n: int
133
+ :param n_0: The number of data points at which the weighting function approximates the constant 1. Default is 30.
134
+ :type n_0: int
135
+ :return: The weighting function.
136
+ :rtype: np.ndarray
137
+ """
138
+
139
+ return np.tanh(2*n/n_0)
140
+
141
+ @staticmethod
142
+ def _compute_confidence_intervals(
143
+ parameters: dict[str, float],
144
+ standard_errors: dict[str, float],
145
+ degrees_of_freedom: int,
146
+ confidence_level: float = 0.95
147
+ ) -> dict[str, tuple[float, float]]:
148
+ """
149
+ Compute confidence intervals for parameters using t-distribution.
150
+
151
+ :param parameters: Dictionary of parameter names and their estimated values.
152
+ :type parameters: dict[str, float]
153
+ :param standard_errors: Dictionary of parameter names and their standard errors.
154
+ :type standard_errors: dict[str, float]
155
+ :param degrees_of_freedom: Degrees of freedom for the t-distribution.
156
+ :type degrees_of_freedom: int
157
+ :param confidence_level: Confidence level for the intervals (default 0.95 for 95% CI).
158
+ :type confidence_level: float
159
+ :return: Dictionary with parameter names as keys and (lower_bound, upper_bound) tuples as values.
160
+ :rtype: dict[str, tuple[float, float]]
161
+ """
162
+ alpha = 1 - confidence_level
163
+ t_val = t_dist.ppf(1 - alpha/2, degrees_of_freedom)
164
+
165
+ confidence_intervals = {}
166
+ for param_name in parameters.keys():
167
+ param_value = parameters[param_name]
168
+ param_se = standard_errors[param_name]
169
+ margin_of_error = t_val * param_se
170
+ confidence_intervals[param_name] = (
171
+ param_value - margin_of_error,
172
+ param_value + margin_of_error
173
+ )
174
+
175
+ return confidence_intervals
123
176
 
124
177
  @classmethod
125
178
  def linear_regression(cls,
126
179
  x: np.ndarray,
127
180
  y: np.ndarray,
128
- fit_intercept=True
181
+ weights: np.ndarray | None = None,
182
+ fit_intercept: bool = True,
183
+ confidence_level: float = 0.95
129
184
  ) -> dict[str, any]:
130
185
  """
131
186
  Perform a linear regression on a set of data.
@@ -136,16 +191,58 @@ class PyEvoMotionBase():
136
191
  :type y: np.ndarray
137
192
  :param fit_intercept: Whether to fit the intercept. Default is ``True``.
138
193
  :type fit_intercept: bool
194
+ :param weights: Optional weights for the data points. If provided, points with higher weights will have more influence on the fit. These weights are scaled by the weighting function tanh(2*n/n_0), where n is the number of data points and n_0 is the number of data points at which the weighting function approximates the constant 1. Default is ``None``.
195
+ :type weights: np.ndarray | None
196
+ :param confidence_level: Confidence level for parameter confidence intervals (default 0.95 for 95% CI).
197
+ :type confidence_level: float
139
198
  :return: A dictionary containing:
140
199
 
141
200
  * ``model``: A ``lambda`` function that computes predictions based on the fitted model.
142
201
  * ``parameters``: A dictionary with the slope of the regression line.
202
+ * ``confidence_intervals``: A dictionary with confidence intervals for each parameter.
143
203
  * ``expression``: A string representation of the regression equation.
144
204
  * ``r2``: The :math:`R^2` score of the regression.
145
205
  :rtype: ``dict[str, any]``
146
206
  """
147
207
 
148
- reg = LinearRegression(fit_intercept=fit_intercept).fit(x,y)
208
+ _weights = cls._weighting_function(weights).flatten() if weights is not None else None
209
+
210
+ reg = LinearRegression(fit_intercept=fit_intercept).fit(x, y, sample_weight=_weights)
211
+
212
+ # Calculate confidence intervals
213
+ n = len(x)
214
+ _df = n - (2 if fit_intercept else 1) # degrees of freedom
215
+
216
+ # Calculate residuals and MSE
217
+ y_pred = reg.predict(x)
218
+ residuals = y.flatten() - y_pred.flatten()
219
+
220
+ if _weights is not None:
221
+ # Weighted MSE
222
+ mse = np.sum(_weights * residuals**2) / (np.sum(_weights) - (2 if fit_intercept else 1))
223
+ else:
224
+ mse = np.sum(residuals**2) / _df
225
+
226
+ # Calculate standard errors
227
+ x_flat = x.flatten()
228
+ x_mean = np.mean(x_flat)
229
+ sxx = np.sum((x_flat - x_mean)**2)
230
+
231
+ # Standard error for slope
232
+ se_slope = np.sqrt(mse / sxx)
233
+
234
+ parameters = {"m": reg.coef_[0][0]}
235
+ standard_errors = {"m": se_slope}
236
+
237
+ if fit_intercept:
238
+ se_intercept = np.sqrt(mse * (1/n + x_mean**2/sxx))
239
+ parameters["b"] = reg.intercept_[0]
240
+ standard_errors["b"] = se_intercept
241
+
242
+ # Compute confidence intervals using the abstracted method
243
+ confidence_intervals = cls._compute_confidence_intervals(
244
+ parameters, standard_errors, _df, confidence_level
245
+ )
149
246
 
150
247
  if fit_intercept:
151
248
  model = {
@@ -154,7 +251,9 @@ class PyEvoMotionBase():
154
251
  "m": reg.coef_[0][0],
155
252
  "b": reg.intercept_[0]
156
253
  },
157
- "expression": "mx + b"
254
+ "confidence_intervals": confidence_intervals,
255
+ "expression": "mx + b",
256
+ "confidence_level": confidence_level
158
257
  }
159
258
 
160
259
  else:
@@ -163,10 +262,12 @@ class PyEvoMotionBase():
163
262
  "parameters": {
164
263
  "m": reg.coef_[0][0],
165
264
  },
166
- "expression": "mx"
265
+ "confidence_intervals": confidence_intervals,
266
+ "expression": "mx",
267
+ "confidence_level": confidence_level
167
268
  }
168
269
 
169
- model["r2"] = r2_score(y, reg.predict(x))
270
+ model["r2"] = r2_score(y, reg.predict(x), sample_weight=_weights)
170
271
 
171
272
  return model
172
273
 
@@ -192,36 +293,97 @@ class PyEvoMotionBase():
192
293
  return a*np.power(x, b)
193
294
 
194
295
  @classmethod
195
- def power_law_fit(cls, x: np.ndarray, y: np.ndarray) -> dict[str, any]:
296
+ def power_law_fit(cls, x: np.ndarray, y: np.ndarray, weights: np.ndarray | None = None, confidence_level: float = 0.95) -> dict[str, any]:
196
297
  """
197
298
  Perform a power law fit on a set of data.
299
+
300
+ This method fits a power law model of the form :math:`y = d \\cdot x^{\\alpha}` to the data.
301
+ Initial parameter estimates are obtained via linear regression on log-transformed data,
302
+ which provides better convergence than default initialization.
198
303
 
199
304
  :param x: A numpy array of the features.
200
305
  :type x: np.ndarray
201
306
  :param y: A numpy array of the target.
202
307
  :type y: np.ndarray
308
+ :param weights: Optional weights for the data points. If provided, points with higher weights will have more influence on the fit. These weights are scaled by the weighting function tanh(2*n/n_0), where n is the number of data points and n_0 is the number of data points at which the weighting function approximates the constant 1. Default is ``None``.
309
+ :type weights: np.ndarray | None
310
+ :param confidence_level: Confidence level for parameter confidence intervals (default 0.95 for 95% CI).
311
+ :type confidence_level: float
203
312
  :return: A dictionary containing:
204
313
 
205
314
  * ``model``: A ``lambda`` function that computes predictions based on the fitted model.
206
- * ``parameters``: A dictionary with the parameters of the fitted power law.
315
+ * ``parameters``: A dictionary with the parameters of the fitted power law (``d`` and ``alpha``).
316
+ * ``confidence_intervals``: A dictionary with confidence intervals for each parameter.
207
317
  * ``expression``: A string representation of the regression equation.
208
318
  * ``r2``: The :math:`R^2` score of the regression.
319
+ * ``confidence_level``: The confidence level used for the confidence intervals.
209
320
  :rtype: ``dict[str, any]``
210
321
  """
211
322
 
323
+ _weights = cls._weighting_function(weights).flatten() if weights is not None else None
324
+
325
+ # Provide good initial parameter guesses for power law
326
+ # Use linear regression on log-transformed data to get initial estimates
327
+ x_flat = x.T.tolist()[0]
328
+ y_flat = y.T.tolist()[0]
329
+ mask = (np.array(x_flat) > 0) & (np.array(y_flat) > 0)
330
+ x_log = np.log(np.array(x_flat)[mask])
331
+ y_log = np.log(np.array(y_flat)[mask])
332
+
333
+ # Linear regression on log-transformed data: log(y) = log(d) + alpha*log(x)
334
+ # This gives us initial estimates for d and alpha
335
+ if len(x_log) > 1:
336
+ reg = LinearRegression(fit_intercept=True).fit(x_log.reshape(-1, 1), y_log.reshape(-1, 1))
337
+
338
+ p0 = [np.exp(reg.intercept_[0]), reg.coef_[0][0]] # [d, alpha]
339
+ else:
340
+ p0 = [1.0, 1.0] # Default fallback
341
+
342
+ # Set reasonable bounds for power law parameters
343
+ # d > 0 (coefficient must be positive)
344
+ # alpha can be any real number, but constrain to reasonable range
345
+ bounds = ([1e-10, -10], [np.inf, 10]) # [d_min, alpha_min], [d_max, alpha_max]
346
+
212
347
  try:
213
- _popt, _, _, _msg, _ier = curve_fit(
348
+ _popt, _pcov, _, _msg, _ier = curve_fit(
214
349
  cls._power_law,
215
- x.T.tolist()[0], y.T.tolist()[0],
350
+ x_flat, y_flat,
351
+ p0=p0,
352
+ bounds=bounds,
353
+ sigma=1/np.sqrt(_weights) if _weights is not None else None,
216
354
  full_output=True
217
355
  )
218
356
  except RuntimeError as e:
219
357
  _ier = 0
220
358
  _msg = str(e)
359
+ _pcov = np.array([[np.inf, 0], [0, np.inf]])
221
360
 
222
361
  if _ier not in range(1, 5):
223
362
  print(f"{_msg}")
224
363
  _popt = [0, 0]
364
+ _pcov = np.array([[np.inf, 0], [0, np.inf]])
365
+
366
+ # Calculate confidence intervals from covariance matrix
367
+ n = len(x)
368
+ df = n - 2 # degrees of freedom for 2 parameters
369
+
370
+ # Standard errors from covariance matrix diagonal
371
+ param_errors = np.sqrt(np.diag(_pcov))
372
+
373
+ # Prepare parameters and standard errors for confidence interval computation
374
+ parameters = {
375
+ "d": _popt[0],
376
+ "alpha": _popt[1]
377
+ }
378
+ standard_errors = {
379
+ "d": param_errors[0],
380
+ "alpha": param_errors[1]
381
+ }
382
+
383
+ # Compute confidence intervals using the abstracted method
384
+ confidence_intervals = cls._compute_confidence_intervals(
385
+ parameters, standard_errors, df, confidence_level
386
+ )
225
387
 
226
388
  model = {
227
389
  "model": lambda x: _popt[0]*np.power(x, _popt[1]),
@@ -229,17 +391,21 @@ class PyEvoMotionBase():
229
391
  "d": _popt[0],
230
392
  "alpha": _popt[1]
231
393
  },
394
+ "confidence_intervals": confidence_intervals,
232
395
  "expression": "d*x^alpha",
233
- "r2": r2_score(y, cls._power_law(x, *_popt))
396
+ "confidence_level": confidence_level,
397
+ "r2": r2_score(y, cls._power_law(x, *_popt), sample_weight=_weights)
234
398
  }
235
399
 
236
400
  return model
237
401
 
238
- @staticmethod
402
+ @classmethod
239
403
  def F_test(
404
+ cls,
240
405
  model1: dict[str,any],
241
406
  model2: dict[str,any],
242
- data: np.ndarray
407
+ data: np.ndarray,
408
+ weights: np.ndarray | None = None
243
409
  ) -> tuple[float, float]:
244
410
  """
245
411
  Perform an F-test between two models.
@@ -257,6 +423,11 @@ class PyEvoMotionBase():
257
423
  """
258
424
 
259
425
  data = data.flatten()
426
+
427
+ if weights is not None:
428
+ _weights = cls._weighting_function(weights.flatten())
429
+ else:
430
+ _weights = np.ones(len(data))
260
431
 
261
432
  # Note that p1 < p2 always. Won't do an assertion because I'm making sure elsewhere that the linear model does not have an intercept, i.e. it only has the slope
262
433
  p1 = len(model1["parameters"])
@@ -278,20 +449,112 @@ class PyEvoMotionBase():
278
449
  )
279
450
 
280
451
  # Sum the residuals without the infinite values
281
- RSS1 = RS1.sum(where=~mask)
282
- RSS2 = RS2.sum(where=~mask)
452
+ RSS1 = np.sum(_weights*RS1, where=~mask)
453
+ RSS2 = np.sum(_weights*RS2, where=~mask)
283
454
 
284
455
  F = ((RSS1 - RSS2)/(p2 - p1))/(RSS2/(n - p2))
285
456
 
286
- return F, 1 - snedecor_f.cdf(F, p2 - p1, n - p2)
457
+ return F, 1 - snedecor_f.cdf(F, p2 - p1, n - p2)
287
458
 
459
+ @classmethod
460
+ def AIC(
461
+ cls,
462
+ model1: dict[str,any],
463
+ model2: dict[str,any],
464
+ data: np.ndarray,
465
+ weights: np.ndarray | None = None
466
+ ) -> tuple[float, float]:
467
+ """
468
+ Perform an AIC test between two models.
469
+
470
+ Uses the small-sample corrected AIC with full constant terms:
471
+ AICc = n*ln(2*pi) + n*ln(RSS/n) + n + 2k + [2k(k+1)]/(n-k-1)
472
+
473
+ See https://en.wikipedia.org/wiki/Akaike_information_criterion for more details.
474
+
475
+ :param model1: The first model.
476
+ :type model1: dict[str, any]
477
+ :param model2: The second model.
478
+ :type model2: dict[str, any]
479
+ :param data: The data to test the models.
480
+ :type data: np.ndarray
481
+ :return: A tuple with the F-value and the p-value.
482
+ :rtype: ``tuple[float, float]``
483
+ """
484
+
485
+ data = data.flatten()
486
+
487
+ if weights is not None:
488
+ _weights = cls._weighting_function(weights.flatten())
489
+ else:
490
+ _weights = np.ones(len(data))
491
+
492
+ k1 = len(model1["parameters"])
493
+ k2 = len(model2["parameters"])
494
+ n = len(data)
495
+
496
+ model1 = np.vectorize(model1["model"])
497
+ model2 = np.vectorize(model2["model"])
498
+
499
+ RS1 = (data - model1(range(n)))**2
500
+ RS2 = (data - model2(range(n)))**2
501
+
502
+ # Mask the infinite and nan values
503
+ mask = (
504
+ np.isinf(RS1)
505
+ | np.isinf(RS2)
506
+ | np.isnan(RS1)
507
+ | np.isnan(RS2)
508
+ )
509
+
510
+ # Sum the residuals without the infinite values
511
+ RSS1 = np.sum(_weights*RS1, where=~mask)
512
+ RSS2 = np.sum(_weights*RS2, where=~mask)
513
+
514
+ # Handle edge case where RSS is 0 (perfect fit) to avoid log(0)
515
+ if RSS1 == 0:
516
+ RSS1 = 1e-10 # Small positive value to avoid log(0)
517
+ if RSS2 == 0:
518
+ RSS2 = 1e-10 # Small positive value to avoid log(0)
519
+
520
+ const_term = n * (np.log(2*np.pi) + 1.0)
521
+ denom1 = n - k1 - 1
522
+ denom2 = n - k2 - 1
523
+
524
+ # If denom <= 0, AICc is undefined; treat as +inf (no support)
525
+ if denom1 <= 0:
526
+ AICc1 = np.inf
527
+ else:
528
+ AICc1 = const_term + n * np.log(RSS1 / n) + 2 * k1 + (2 * k1 * (k1 + 1)) / denom1
529
+
530
+ if denom2 <= 0:
531
+ AICc2 = np.inf
532
+ else:
533
+ AICc2 = const_term + n * np.log(RSS2 / n) + 2 * k2 + (2 * k2 * (k2 + 1)) / denom2
534
+
535
+ # ΔAIC: relative to best (lowest AIC)
536
+ min_aicc = min(AICc1, AICc2)
537
+ dAICc1 = AICc1 - min_aicc
538
+ dAICc2 = AICc2 - min_aicc
539
+
540
+ # Akaike weights
541
+ rel1 = np.exp(-0.5 * dAICc1) if np.isfinite(dAICc1) else 0
542
+ rel2 = np.exp(-0.5 * dAICc2) if np.isfinite(dAICc2) else 0
543
+ denom = rel1 + rel2 if (rel1 + rel2) > 0 else 1.0
544
+ w1 = rel1 / denom
545
+ w2 = rel2 / denom
546
+
547
+ return AICc1, AICc2, dAICc1, dAICc2, w1, w2
548
+
288
549
  @classmethod
289
550
  def adjust_model(cls,
290
551
  x: pd.Series,
291
552
  y: pd.Series,
292
- name: str = None
553
+ name: str = None,
554
+ weights: pd.Series | None = None,
555
+ confidence_level: float = 0.95
293
556
  ) -> dict[str, any]:
294
- """Adjust a model to the data.
557
+ """Adjust a model to the data using AIC for model selection.
295
558
 
296
559
  :param x: The features. It is a single pandas Series.
297
560
  :type x: pd.Series
@@ -299,12 +562,25 @@ class PyEvoMotionBase():
299
562
  :type y: pd.Series
300
563
  :param name: The name of the data. Default is ``None``.
301
564
  :type name: str
302
- :return: A dictionary with the model.
565
+ :param weights: Optional weights for the data points. If provided, points with higher weights will have more influence on the fit. These weights are scaled by the weighting function tanh(2*n/n_0), where n is the number of data points and n_0 is the number of data points at which the weighting function approximates the constant 1. Default is ``None``.
566
+ :type weights: np.ndarray | None
567
+ :param confidence_level: Confidence level for parameter confidence intervals (default 0.95 for 95% CI).
568
+ :type confidence_level: float
569
+ :return: A dictionary containing:
570
+
571
+ * If name is provided: A dictionary with the name as key and the result dictionary as value
572
+ * If name is None: A dictionary containing:
573
+
574
+ * ``selected_model``: The selected model based on lowest AIC
575
+ * ``linear_model``: The linear regression model with AIC statistics
576
+ * ``power_law_model``: The power law model with AIC statistics
577
+ * ``model_selection``: Dictionary with AIC comparison results
578
+
303
579
  :rtype: ``dict[str, any]``
304
580
  :raises ValueError: If the dataset is empty or full of NaN values. This may occur if the grouped data contains only one entry per group, indicating that the variance cannot be computed.
305
581
  """
306
582
 
307
- x,y = cls._remove_nan(x, y)
583
+ x,y,w = cls._remove_nan(x, y, weights)
308
584
 
309
585
  # Raises an error if the dataset is (almost) empty at this point
310
586
  if (x.size <= 1) or (y.size <= 1):
@@ -313,20 +589,57 @@ class PyEvoMotionBase():
313
589
  f"Dataset length after filtering is: x: {x.size} elements; y: {y.size} elements. In particular:\n\nx: {x}\ny: {y}\n\nPerhaps NaN appeared for certain entries. Check if the grouped data contains only one entry per group, as this may cause NaN values when computing the variance. Also, consider widening the time window."
314
590
  )
315
591
 
316
- model1 = cls.linear_regression(x, y, fit_intercept=False) # Not fitting the intercept because data is passed scaled to the minimum
317
- model2 = cls.power_law_fit(x, y)
592
+ model1 = cls.linear_regression(x, y, weights=w, fit_intercept=False, confidence_level=confidence_level) # Not fitting the intercept because data is passed scaled to the minimum
593
+ model2 = cls.power_law_fit(x, y, weights=w, confidence_level=confidence_level)
318
594
 
319
- _, p = cls.F_test(model1, model2, y)
595
+ # Compute AIC statistics for both models
596
+ AIC1, AIC2, dAIC1, dAIC2, w1, w2 = cls.AIC(model1, model2, y, weights=w)
320
597
 
321
- if p < 0.05:
322
- model = model2
598
+ # Select model with lowest AIC (highest Akaike weight)
599
+ if AIC1 <= AIC2:
600
+ selected_model = model1
601
+ selected_model_name = "linear"
323
602
  else:
324
- model = model1
603
+ selected_model = model2
604
+ selected_model_name = "power_law"
605
+
606
+ # Add AIC statistics to each model
607
+ model1_with_aic = model1.copy()
608
+ model1_with_aic.update({
609
+ "AIC": AIC1,
610
+ "delta_AIC": dAIC1,
611
+ "akaike_weight": w1,
612
+ "confidence_level": confidence_level
613
+ })
614
+
615
+ model2_with_aic = model2.copy()
616
+ model2_with_aic.update({
617
+ "AIC": AIC2,
618
+ "delta_AIC": dAIC2,
619
+ "akaike_weight": w2,
620
+ "confidence_level": confidence_level
621
+ })
622
+
623
+ # Create comprehensive result dictionary
624
+ result = {
625
+ "selected_model": selected_model,
626
+ "linear_model": model1_with_aic,
627
+ "power_law_model": model2_with_aic,
628
+ "model_selection": {
629
+ "selected": selected_model_name,
630
+ "linear_AIC": AIC1,
631
+ "power_law_AIC": AIC2,
632
+ "delta_AIC_linear": dAIC1,
633
+ "delta_AIC_power_law": dAIC2,
634
+ "akaike_weight_linear": w1,
635
+ "akaike_weight_power_law": w2
636
+ }
637
+ }
325
638
 
326
639
  if name:
327
- return {name: model}
640
+ return {name: result}
328
641
  else:
329
- return model
642
+ return result
330
643
 
331
644
  @staticmethod
332
645
  def plot_single_data_and_model(
@@ -337,6 +650,7 @@ class PyEvoMotionBase():
337
650
  model_label: str,
338
651
  data_xlabel_units: str,
339
652
  ax: any,
653
+ dt_ratio: float,
340
654
  **kwargs: dict[str, any]
341
655
  ) -> None:
342
656
  """
@@ -376,13 +690,13 @@ class PyEvoMotionBase():
376
690
  point_kwargs[_k] = kwargs[k]
377
691
 
378
692
  ax.scatter(
379
- data_x,
693
+ data_x.to_numpy()*dt_ratio,
380
694
  data_y,
381
695
  **point_kwargs
382
696
  )
383
697
  ax.plot(
384
- data_x,
385
- model(data_x),
698
+ data_x.to_numpy()*dt_ratio,
699
+ model(data_x.to_numpy()*dt_ratio),
386
700
  label=model_label,
387
701
  **line_kwargs
388
702
  )
@@ -404,3 +718,28 @@ class PyEvoMotionBase():
404
718
  raise ValueError(
405
719
  f"The dataset is (almost) empty at this point of the analysis.\n{msg}"
406
720
  )
721
+
722
+ @staticmethod
723
+ def _get_time_ratio(dt: str, reference: str = "7D") -> float:
724
+ """Get the ratio of a time interval with respect to a reference interval.
725
+
726
+ :param dt: Time interval string (e.g. "5D", "7D", "10D", "14D", "12H")
727
+ :type dt: str
728
+ :param reference: Reference time interval string. Default is "7D".
729
+ :type reference: str
730
+ :return: The ratio of dt to reference
731
+ :rtype: float
732
+ """
733
+
734
+ return pd.Timedelta(dt) / pd.Timedelta(reference)
735
+
736
+ @classmethod
737
+ def _verify_dt(cls, dt: str) -> None:
738
+ """Verify that the time window string is greater than 1 day.
739
+
740
+ :param dt: Time window string (e.g. "5D", "7D", "10D", "14D")
741
+ :type dt: str
742
+ :raises ValueError: If the time window is not greater than 1 day
743
+ """
744
+ if cls._get_time_ratio(dt, "1D") <= 1:
745
+ raise ValueError(f"Time window must be greater than 1 day. Got {dt}")