dragon-ml-toolbox 13.3.0__py3-none-any.whl → 14.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dragon_ml_toolbox-13.3.0.dist-info → dragon_ml_toolbox-14.7.0.dist-info}/METADATA +12 -2
- dragon_ml_toolbox-14.7.0.dist-info/RECORD +49 -0
- {dragon_ml_toolbox-13.3.0.dist-info → dragon_ml_toolbox-14.7.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +10 -0
- ml_tools/MICE_imputation.py +207 -5
- ml_tools/ML_configuration.py +108 -0
- ml_tools/ML_datasetmaster.py +106 -206
- ml_tools/ML_evaluation.py +229 -76
- ml_tools/ML_evaluation_multi.py +45 -16
- ml_tools/ML_inference.py +0 -1
- ml_tools/ML_models.py +22 -6
- ml_tools/ML_models_advanced.py +323 -0
- ml_tools/ML_trainer.py +498 -29
- ml_tools/ML_utilities.py +351 -4
- ml_tools/ML_vision_datasetmaster.py +1492 -0
- ml_tools/ML_vision_evaluation.py +260 -0
- ml_tools/ML_vision_inference.py +428 -0
- ml_tools/ML_vision_models.py +641 -0
- ml_tools/ML_vision_transformers.py +203 -0
- ml_tools/_ML_vision_recipe.py +88 -0
- ml_tools/custom_logger.py +37 -14
- ml_tools/data_exploration.py +502 -93
- ml_tools/ensemble_evaluation.py +53 -10
- ml_tools/keys.py +39 -0
- ml_tools/math_utilities.py +1 -1
- ml_tools/serde.py +2 -2
- ml_tools/utilities.py +192 -3
- dragon_ml_toolbox-13.3.0.dist-info/RECORD +0 -41
- {dragon_ml_toolbox-13.3.0.dist-info → dragon_ml_toolbox-14.7.0.dist-info}/WHEEL +0 -0
- {dragon_ml_toolbox-13.3.0.dist-info → dragon_ml_toolbox-14.7.0.dist-info}/licenses/LICENSE +0 -0
- {dragon_ml_toolbox-13.3.0.dist-info → dragon_ml_toolbox-14.7.0.dist-info}/top_level.txt +0 -0
ml_tools/ML_datasetmaster.py
CHANGED
|
@@ -1,13 +1,10 @@
|
|
|
1
1
|
import torch
|
|
2
|
-
from torch.utils.data import Dataset
|
|
2
|
+
from torch.utils.data import Dataset
|
|
3
3
|
import pandas
|
|
4
4
|
import numpy
|
|
5
5
|
from sklearn.model_selection import train_test_split
|
|
6
6
|
from typing import Literal, Union, Tuple, List, Optional
|
|
7
7
|
from abc import ABC, abstractmethod
|
|
8
|
-
from PIL import Image, ImageOps
|
|
9
|
-
from torchvision.datasets import ImageFolder
|
|
10
|
-
from torchvision import transforms
|
|
11
8
|
import matplotlib.pyplot as plt
|
|
12
9
|
from pathlib import Path
|
|
13
10
|
|
|
@@ -23,9 +20,7 @@ from ._schema import FeatureSchema
|
|
|
23
20
|
__all__ = [
|
|
24
21
|
"DatasetMaker",
|
|
25
22
|
"DatasetMakerMulti",
|
|
26
|
-
"
|
|
27
|
-
"SequenceMaker",
|
|
28
|
-
"ResizeAspectFill",
|
|
23
|
+
"SequenceMaker"
|
|
29
24
|
]
|
|
30
25
|
|
|
31
26
|
|
|
@@ -126,8 +121,8 @@ class _BaseDatasetMaker(ABC):
|
|
|
126
121
|
else:
|
|
127
122
|
_LOGGER.info("No continuous features listed in schema. Scaler will not be fitted.")
|
|
128
123
|
|
|
129
|
-
X_train_values = X_train.
|
|
130
|
-
X_test_values = X_test.
|
|
124
|
+
X_train_values = X_train.to_numpy()
|
|
125
|
+
X_test_values = X_test.to_numpy()
|
|
131
126
|
|
|
132
127
|
# continuous_feature_indices is derived
|
|
133
128
|
if self.scaler is None and continuous_feature_indices:
|
|
@@ -253,26 +248,42 @@ class DatasetMaker(_BaseDatasetMaker):
|
|
|
253
248
|
pandas_df: pandas.DataFrame,
|
|
254
249
|
schema: FeatureSchema,
|
|
255
250
|
kind: Literal["regression", "classification"],
|
|
251
|
+
scaler: Union[Literal["fit"], Literal["none"], PytorchScaler],
|
|
256
252
|
test_size: float = 0.2,
|
|
257
|
-
random_state: int = 42
|
|
258
|
-
scaler: Optional[PytorchScaler] = None):
|
|
253
|
+
random_state: int = 42):
|
|
259
254
|
"""
|
|
260
255
|
Args:
|
|
261
256
|
pandas_df (pandas.DataFrame):
|
|
262
257
|
The pre-processed input DataFrame containing all columns. (features and single target).
|
|
263
258
|
schema (FeatureSchema):
|
|
264
259
|
The definitive schema object from data_exploration.
|
|
265
|
-
kind (
|
|
260
|
+
kind ("regression" | "classification"):
|
|
266
261
|
The type of ML task. This determines the data type of the labels.
|
|
262
|
+
scaler ("fit" | "none" | PytorchScaler):
|
|
263
|
+
Strategy for data scaling:
|
|
264
|
+
- "fit": Fit a new PytorchScaler on continuous features.
|
|
265
|
+
- "none": Do not scale data (e.g., for TabularTransformer).
|
|
266
|
+
- PytorchScaler instance: Use a pre-fitted scaler to transform data.
|
|
267
267
|
test_size (float):
|
|
268
268
|
The proportion of the dataset to allocate to the test split.
|
|
269
269
|
random_state (int):
|
|
270
270
|
The seed for the random number of generator for reproducibility.
|
|
271
|
-
|
|
272
|
-
A pre-fitted PytorchScaler instance, if None a new scaler will be created.
|
|
271
|
+
|
|
273
272
|
"""
|
|
274
273
|
super().__init__()
|
|
275
|
-
|
|
274
|
+
|
|
275
|
+
_apply_scaling: bool = False
|
|
276
|
+
if scaler == "fit":
|
|
277
|
+
self.scaler = None # To be created
|
|
278
|
+
_apply_scaling = True
|
|
279
|
+
elif scaler == "none":
|
|
280
|
+
self.scaler = None
|
|
281
|
+
elif isinstance(scaler, PytorchScaler):
|
|
282
|
+
self.scaler = scaler # Use the provided one
|
|
283
|
+
_apply_scaling = True
|
|
284
|
+
else:
|
|
285
|
+
_LOGGER.error(f"Invalid 'scaler' argument. Must be 'fit', 'none', or a PytorchScaler instance.")
|
|
286
|
+
raise ValueError()
|
|
276
287
|
|
|
277
288
|
# --- 1. Identify features (from schema) ---
|
|
278
289
|
self._feature_names = list(schema.feature_names)
|
|
@@ -310,14 +321,32 @@ class DatasetMaker(_BaseDatasetMaker):
|
|
|
310
321
|
label_dtype = torch.float32 if kind == "regression" else torch.int64
|
|
311
322
|
|
|
312
323
|
# --- 4. Scale (using the schema) ---
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
324
|
+
if _apply_scaling:
|
|
325
|
+
X_train_final, X_test_final = self._prepare_scaler(
|
|
326
|
+
X_train, y_train, X_test, label_dtype, schema
|
|
327
|
+
)
|
|
328
|
+
else:
|
|
329
|
+
_LOGGER.info("Features have not been scaled as specified.")
|
|
330
|
+
X_train_final = X_train.to_numpy()
|
|
331
|
+
X_test_final = X_test.to_numpy()
|
|
316
332
|
|
|
317
333
|
# --- 5. Create Datasets ---
|
|
318
334
|
self._train_ds = _PytorchDataset(X_train_final, y_train, labels_dtype=label_dtype, feature_names=self._feature_names, target_names=self._target_names)
|
|
319
335
|
self._test_ds = _PytorchDataset(X_test_final, y_test, labels_dtype=label_dtype, feature_names=self._feature_names, target_names=self._target_names)
|
|
320
|
-
|
|
336
|
+
|
|
337
|
+
def __repr__(self) -> str:
|
|
338
|
+
s = f"<{self.__class__.__name__} (ID: '{self.id}')>\n"
|
|
339
|
+
s += f" Target: {self.target_names[0]}\n"
|
|
340
|
+
s += f" Features: {self.number_of_features}\n"
|
|
341
|
+
s += f" Scaler: {'Fitted' if self.scaler else 'None'}\n"
|
|
342
|
+
|
|
343
|
+
if self._train_ds:
|
|
344
|
+
s += f" Train Samples: {len(self._train_ds)}\n" # type: ignore
|
|
345
|
+
if self._test_ds:
|
|
346
|
+
s += f" Test Samples: {len(self._test_ds)}\n" # type: ignore
|
|
347
|
+
|
|
348
|
+
return s
|
|
349
|
+
|
|
321
350
|
|
|
322
351
|
# --- Multi-Target Class ---
|
|
323
352
|
class DatasetMakerMulti(_BaseDatasetMaker):
|
|
@@ -336,9 +365,9 @@ class DatasetMakerMulti(_BaseDatasetMaker):
|
|
|
336
365
|
pandas_df: pandas.DataFrame,
|
|
337
366
|
target_columns: List[str],
|
|
338
367
|
schema: FeatureSchema,
|
|
368
|
+
scaler: Union[Literal["fit"], Literal["none"], PytorchScaler],
|
|
339
369
|
test_size: float = 0.2,
|
|
340
|
-
random_state: int = 42
|
|
341
|
-
scaler: Optional[PytorchScaler] = None):
|
|
370
|
+
random_state: int = 42):
|
|
342
371
|
"""
|
|
343
372
|
Args:
|
|
344
373
|
pandas_df (pandas.DataFrame):
|
|
@@ -348,20 +377,35 @@ class DatasetMakerMulti(_BaseDatasetMaker):
|
|
|
348
377
|
List of target column names.
|
|
349
378
|
schema (FeatureSchema):
|
|
350
379
|
The definitive schema object from data_exploration.
|
|
380
|
+
scaler ("fit" | "none" | PytorchScaler):
|
|
381
|
+
Strategy for data scaling:
|
|
382
|
+
- "fit": Fit a new PytorchScaler on continuous features.
|
|
383
|
+
- "none": Do not scale data (e.g., for TabularTransformer).
|
|
384
|
+
- PytorchScaler instance: Use a pre-fitted scaler to transform data.
|
|
351
385
|
test_size (float):
|
|
352
386
|
The proportion of the dataset to allocate to the test split.
|
|
353
387
|
random_state (int):
|
|
354
388
|
The seed for the random number generator for reproducibility.
|
|
355
|
-
scaler (PytorchScaler | None):
|
|
356
|
-
A pre-fitted PytorchScaler instance.
|
|
357
389
|
|
|
358
390
|
## Note:
|
|
359
391
|
For multi-binary classification, the most common PyTorch loss function is nn.BCEWithLogitsLoss.
|
|
360
392
|
This loss function requires the labels to be torch.float32 which is the same type required for regression (multi-regression) tasks.
|
|
361
393
|
"""
|
|
362
394
|
super().__init__()
|
|
363
|
-
|
|
364
|
-
|
|
395
|
+
|
|
396
|
+
_apply_scaling: bool = False
|
|
397
|
+
if scaler == "fit":
|
|
398
|
+
self.scaler = None
|
|
399
|
+
_apply_scaling = True
|
|
400
|
+
elif scaler == "none":
|
|
401
|
+
self.scaler = None
|
|
402
|
+
elif isinstance(scaler, PytorchScaler):
|
|
403
|
+
self.scaler = scaler # Use the provided one
|
|
404
|
+
_apply_scaling = True
|
|
405
|
+
else:
|
|
406
|
+
_LOGGER.error(f"Invalid 'scaler' argument. Must be 'fit', 'none', or a PytorchScaler instance.")
|
|
407
|
+
raise ValueError()
|
|
408
|
+
|
|
365
409
|
# --- 1. Get features and targets from schema/args ---
|
|
366
410
|
self._feature_names = list(schema.feature_names)
|
|
367
411
|
self._target_names = target_columns
|
|
@@ -403,15 +447,33 @@ class DatasetMakerMulti(_BaseDatasetMaker):
|
|
|
403
447
|
label_dtype = torch.float32
|
|
404
448
|
|
|
405
449
|
# --- 4. Scale (using the schema) ---
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
450
|
+
if _apply_scaling:
|
|
451
|
+
X_train_final, X_test_final = self._prepare_scaler(
|
|
452
|
+
X_train, y_train, X_test, label_dtype, schema
|
|
453
|
+
)
|
|
454
|
+
else:
|
|
455
|
+
_LOGGER.info("Features have not been scaled as specified.")
|
|
456
|
+
X_train_final = X_train.to_numpy()
|
|
457
|
+
X_test_final = X_test.to_numpy()
|
|
409
458
|
|
|
410
459
|
# --- 5. Create Datasets ---
|
|
411
460
|
# _PytorchDataset now correctly handles y_train (a DataFrame)
|
|
412
461
|
self._train_ds = _PytorchDataset(X_train_final, y_train, labels_dtype=label_dtype, feature_names=self._feature_names, target_names=self._target_names)
|
|
413
462
|
self._test_ds = _PytorchDataset(X_test_final, y_test, labels_dtype=label_dtype, feature_names=self._feature_names, target_names=self._target_names)
|
|
414
463
|
|
|
464
|
+
def __repr__(self) -> str:
|
|
465
|
+
s = f"<{self.__class__.__name__} (ID: '{self.id}')>\n"
|
|
466
|
+
s += f" Targets: {self.number_of_targets}\n"
|
|
467
|
+
s += f" Features: {self.number_of_features}\n"
|
|
468
|
+
s += f" Scaler: {'Fitted' if self.scaler else 'None'}\n"
|
|
469
|
+
|
|
470
|
+
if self._train_ds:
|
|
471
|
+
s += f" Train Samples: {len(self._train_ds)}\n" # type: ignore
|
|
472
|
+
if self._test_ds:
|
|
473
|
+
s += f" Test Samples: {len(self._test_ds)}\n" # type: ignore
|
|
474
|
+
|
|
475
|
+
return s
|
|
476
|
+
|
|
415
477
|
|
|
416
478
|
# --- Private Base Class ---
|
|
417
479
|
class _BaseMaker(ABC):
|
|
@@ -432,149 +494,6 @@ class _BaseMaker(ABC):
|
|
|
432
494
|
pass
|
|
433
495
|
|
|
434
496
|
|
|
435
|
-
# --- VisionDatasetMaker ---
|
|
436
|
-
class VisionDatasetMaker(_BaseMaker):
|
|
437
|
-
"""
|
|
438
|
-
Creates processed PyTorch datasets for computer vision tasks from an
|
|
439
|
-
image folder directory.
|
|
440
|
-
|
|
441
|
-
Uses online augmentations per epoch (image augmentation without creating new files).
|
|
442
|
-
"""
|
|
443
|
-
def __init__(self, full_dataset: ImageFolder):
|
|
444
|
-
super().__init__()
|
|
445
|
-
self.full_dataset = full_dataset
|
|
446
|
-
self.labels = [s[1] for s in self.full_dataset.samples]
|
|
447
|
-
self.class_map = full_dataset.class_to_idx
|
|
448
|
-
|
|
449
|
-
self._is_split = False
|
|
450
|
-
self._are_transforms_configured = False
|
|
451
|
-
|
|
452
|
-
@classmethod
|
|
453
|
-
def from_folder(cls, root_dir: str) -> 'VisionDatasetMaker':
|
|
454
|
-
"""Creates a maker instance from a root directory of images."""
|
|
455
|
-
initial_transform = transforms.Compose([transforms.ToTensor()])
|
|
456
|
-
full_dataset = ImageFolder(root=root_dir, transform=initial_transform)
|
|
457
|
-
_LOGGER.info(f"Found {len(full_dataset)} images in {len(full_dataset.classes)} classes.")
|
|
458
|
-
return cls(full_dataset)
|
|
459
|
-
|
|
460
|
-
@staticmethod
|
|
461
|
-
def inspect_folder(path: Union[str, Path]):
|
|
462
|
-
"""
|
|
463
|
-
Logs a report of the types, sizes, and channels of image files
|
|
464
|
-
found in the directory and its subdirectories.
|
|
465
|
-
"""
|
|
466
|
-
path_obj = make_fullpath(path)
|
|
467
|
-
|
|
468
|
-
non_image_files = set()
|
|
469
|
-
img_types = set()
|
|
470
|
-
img_sizes = set()
|
|
471
|
-
img_channels = set()
|
|
472
|
-
img_counter = 0
|
|
473
|
-
|
|
474
|
-
_LOGGER.info(f"Inspecting folder: {path_obj}...")
|
|
475
|
-
# Use rglob to recursively find all files
|
|
476
|
-
for filepath in path_obj.rglob('*'):
|
|
477
|
-
if filepath.is_file():
|
|
478
|
-
try:
|
|
479
|
-
# Using PIL to open is a more reliable check
|
|
480
|
-
with Image.open(filepath) as img:
|
|
481
|
-
img_types.add(img.format)
|
|
482
|
-
img_sizes.add(img.size)
|
|
483
|
-
img_channels.update(img.getbands())
|
|
484
|
-
img_counter += 1
|
|
485
|
-
except (IOError, SyntaxError):
|
|
486
|
-
non_image_files.add(filepath.name)
|
|
487
|
-
|
|
488
|
-
if non_image_files:
|
|
489
|
-
_LOGGER.warning(f"Non-image or corrupted files found and ignored: {non_image_files}")
|
|
490
|
-
|
|
491
|
-
report = (
|
|
492
|
-
f"\n--- Inspection Report for '{path_obj.name}' ---\n"
|
|
493
|
-
f"Total images found: {img_counter}\n"
|
|
494
|
-
f"Image formats: {img_types or 'None'}\n"
|
|
495
|
-
f"Image sizes (WxH): {img_sizes or 'None'}\n"
|
|
496
|
-
f"Image channels (bands): {img_channels or 'None'}\n"
|
|
497
|
-
f"--------------------------------------"
|
|
498
|
-
)
|
|
499
|
-
print(report)
|
|
500
|
-
|
|
501
|
-
def split_data(self, val_size: float = 0.2, test_size: float = 0.0,
|
|
502
|
-
stratify: bool = True, random_state: Optional[int] = None) -> 'VisionDatasetMaker':
|
|
503
|
-
"""Splits the dataset into training, validation, and optional test sets."""
|
|
504
|
-
if self._is_split:
|
|
505
|
-
_LOGGER.warning("Data has already been split.")
|
|
506
|
-
return self
|
|
507
|
-
|
|
508
|
-
if val_size + test_size >= 1.0:
|
|
509
|
-
_LOGGER.error("The sum of val_size and test_size must be less than 1.")
|
|
510
|
-
raise ValueError()
|
|
511
|
-
|
|
512
|
-
indices = list(range(len(self.full_dataset)))
|
|
513
|
-
labels_for_split = self.labels if stratify else None
|
|
514
|
-
|
|
515
|
-
train_indices, val_test_indices = train_test_split(
|
|
516
|
-
indices, test_size=(val_size + test_size), random_state=random_state, stratify=labels_for_split
|
|
517
|
-
)
|
|
518
|
-
|
|
519
|
-
if test_size > 0:
|
|
520
|
-
val_test_labels = [self.labels[i] for i in val_test_indices]
|
|
521
|
-
stratify_val_test = val_test_labels if stratify else None
|
|
522
|
-
val_indices, test_indices = train_test_split(
|
|
523
|
-
val_test_indices, test_size=(test_size / (val_size + test_size)),
|
|
524
|
-
random_state=random_state, stratify=stratify_val_test
|
|
525
|
-
)
|
|
526
|
-
self._test_dataset = Subset(self.full_dataset, test_indices)
|
|
527
|
-
_LOGGER.info(f"Test set created with {len(self._test_dataset)} images.")
|
|
528
|
-
else:
|
|
529
|
-
val_indices = val_test_indices
|
|
530
|
-
|
|
531
|
-
self._train_dataset = Subset(self.full_dataset, train_indices)
|
|
532
|
-
self._val_dataset = Subset(self.full_dataset, val_indices)
|
|
533
|
-
self._is_split = True
|
|
534
|
-
|
|
535
|
-
_LOGGER.info(f"Data split into: \n- Training: {len(self._train_dataset)} images \n- Validation: {len(self._val_dataset)} images")
|
|
536
|
-
return self
|
|
537
|
-
|
|
538
|
-
def configure_transforms(self, resize_size: int = 256, crop_size: int = 224,
|
|
539
|
-
mean: List[float] = [0.485, 0.456, 0.406],
|
|
540
|
-
std: List[float] = [0.229, 0.224, 0.225],
|
|
541
|
-
extra_train_transforms: Optional[List] = None) -> 'VisionDatasetMaker':
|
|
542
|
-
"""Configures and applies the image transformations (augmentations)."""
|
|
543
|
-
if not self._is_split:
|
|
544
|
-
_LOGGER.error("Transforms must be configured AFTER splitting data. Call .split_data() first.")
|
|
545
|
-
raise RuntimeError()
|
|
546
|
-
|
|
547
|
-
base_train_transforms = [transforms.RandomResizedCrop(crop_size), transforms.RandomHorizontalFlip()]
|
|
548
|
-
if extra_train_transforms:
|
|
549
|
-
base_train_transforms.extend(extra_train_transforms)
|
|
550
|
-
|
|
551
|
-
final_transforms = [transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]
|
|
552
|
-
|
|
553
|
-
val_transform = transforms.Compose([transforms.Resize(resize_size), transforms.CenterCrop(crop_size), *final_transforms])
|
|
554
|
-
train_transform = transforms.Compose([*base_train_transforms, *final_transforms])
|
|
555
|
-
|
|
556
|
-
self._train_dataset.dataset.transform = train_transform # type: ignore
|
|
557
|
-
self._val_dataset.dataset.transform = val_transform # type: ignore
|
|
558
|
-
if self._test_dataset:
|
|
559
|
-
self._test_dataset.dataset.transform = val_transform # type: ignore
|
|
560
|
-
|
|
561
|
-
self._are_transforms_configured = True
|
|
562
|
-
_LOGGER.info("Image transforms configured and applied.")
|
|
563
|
-
return self
|
|
564
|
-
|
|
565
|
-
def get_datasets(self) -> Tuple[Dataset, ...]:
|
|
566
|
-
"""Returns the final train, validation, and optional test datasets."""
|
|
567
|
-
if not self._is_split:
|
|
568
|
-
_LOGGER.error("Data has not been split. Call .split_data() first.")
|
|
569
|
-
raise RuntimeError()
|
|
570
|
-
if not self._are_transforms_configured:
|
|
571
|
-
_LOGGER.warning("Transforms have not been configured. Using default ToTensor only.")
|
|
572
|
-
|
|
573
|
-
if self._test_dataset:
|
|
574
|
-
return self._train_dataset, self._val_dataset, self._test_dataset
|
|
575
|
-
return self._train_dataset, self._val_dataset
|
|
576
|
-
|
|
577
|
-
|
|
578
497
|
# --- SequenceMaker ---
|
|
579
498
|
class SequenceMaker(_BaseMaker):
|
|
580
499
|
"""
|
|
@@ -761,41 +680,22 @@ class SequenceMaker(_BaseMaker):
|
|
|
761
680
|
_LOGGER.error("Windows have not been generated. Call .generate_windows() first.")
|
|
762
681
|
raise RuntimeError()
|
|
763
682
|
return self._train_dataset, self._test_dataset
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
if not isinstance(image, Image.Image):
|
|
781
|
-
_LOGGER.error(f"Expected PIL.Image.Image, got {type(image).__name__}")
|
|
782
|
-
raise TypeError()
|
|
783
|
-
|
|
784
|
-
w, h = image.size
|
|
785
|
-
if w == h:
|
|
786
|
-
return image
|
|
787
|
-
|
|
788
|
-
# Determine padding to center the image
|
|
789
|
-
if w > h:
|
|
790
|
-
top_padding = (w - h) // 2
|
|
791
|
-
bottom_padding = w - h - top_padding
|
|
792
|
-
padding = (0, top_padding, 0, bottom_padding)
|
|
793
|
-
else: # h > w
|
|
794
|
-
left_padding = (h - w) // 2
|
|
795
|
-
right_padding = h - w - left_padding
|
|
796
|
-
padding = (left_padding, 0, right_padding, 0)
|
|
797
|
-
|
|
798
|
-
return ImageOps.expand(image, padding, fill=self.pad_color)
|
|
683
|
+
|
|
684
|
+
def __repr__(self) -> str:
|
|
685
|
+
s = f"<{self.__class__.__name__}>:\n"
|
|
686
|
+
s += f" Sequence Length (Window): {self.sequence_length}\n"
|
|
687
|
+
s += f" Total Data Points: {len(self.sequence)}\n"
|
|
688
|
+
s += " --- Status ---\n"
|
|
689
|
+
s += f" Split: {self._is_split}\n"
|
|
690
|
+
s += f" Normalized: {self._is_normalized}\n"
|
|
691
|
+
s += f" Windows Generated: {self._are_windows_generated}\n"
|
|
692
|
+
|
|
693
|
+
if self._are_windows_generated:
|
|
694
|
+
train_len = len(self._train_dataset) if self._train_dataset else 0 # type: ignore
|
|
695
|
+
test_len = len(self._test_dataset) if self._test_dataset else 0 # type: ignore
|
|
696
|
+
s += f" Datasets (Train/Test): {train_len} / {test_len} windows\n"
|
|
697
|
+
|
|
698
|
+
return s
|
|
799
699
|
|
|
800
700
|
|
|
801
701
|
def info():
|