hafnia 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,15 +10,12 @@ from typing import Any, Dict, List, Optional, Tuple, Type, Union
10
10
  import polars as pl
11
11
  from packaging.version import Version
12
12
 
13
+ from hafnia import utils
13
14
  from hafnia.dataset import dataset_helpers
15
+ from hafnia.dataset.dataset_helpers import is_valid_version_string, version_from_string
14
16
  from hafnia.dataset.dataset_names import (
15
- DATASET_FILENAMES_REQUIRED,
16
- FILENAME_ANNOTATIONS_JSONL,
17
- FILENAME_ANNOTATIONS_PARQUET,
18
- FILENAME_DATASET_INFO,
19
17
  FILENAME_RECIPE_JSON,
20
18
  TAG_IS_SAMPLE,
21
- AwsCredentials,
22
19
  PrimitiveField,
23
20
  SampleField,
24
21
  SplitName,
@@ -29,7 +26,7 @@ from hafnia.dataset.format_conversions import (
29
26
  format_image_classification_folder,
30
27
  format_yolo,
31
28
  )
32
- from hafnia.dataset.hafnia_dataset_types import DatasetInfo, Sample
29
+ from hafnia.dataset.hafnia_dataset_types import DatasetInfo, DatasetMetadataFilePaths, Sample
33
30
  from hafnia.dataset.operations import (
34
31
  dataset_stats,
35
32
  dataset_transformations,
@@ -37,7 +34,11 @@ from hafnia.dataset.operations import (
37
34
  )
38
35
  from hafnia.dataset.primitives.primitive import Primitive
39
36
  from hafnia.log import user_logger
37
+ from hafnia.platform import s5cmd_utils
38
+ from hafnia.platform.datasets import get_read_credentials_by_name
39
+ from hafnia.platform.s5cmd_utils import AwsCredentials, ResourceCredentials
40
40
  from hafnia.utils import progress_bar
41
+ from hafnia_cli.config import Config
41
42
 
42
43
 
43
44
  @dataclass
@@ -89,10 +90,11 @@ class HafniaDataset:
89
90
  @staticmethod
90
91
  def from_path(path_folder: Path, check_for_images: bool = True) -> "HafniaDataset":
91
92
  path_folder = Path(path_folder)
92
- HafniaDataset.check_dataset_path(path_folder, raise_error=True)
93
+ metadata_file_paths = DatasetMetadataFilePaths.from_path(path_folder)
94
+ metadata_file_paths.exists(raise_error=True)
93
95
 
94
- dataset_info = DatasetInfo.from_json_file(path_folder / FILENAME_DATASET_INFO)
95
- samples = table_transformations.read_samples_from_path(path_folder)
96
+ dataset_info = DatasetInfo.from_json_file(Path(metadata_file_paths.dataset_info))
97
+ samples = metadata_file_paths.read_samples()
96
98
  samples, dataset_info = _dataset_corrections(samples, dataset_info)
97
99
 
98
100
  # Convert from relative paths to absolute paths
@@ -103,14 +105,24 @@ class HafniaDataset:
103
105
  return HafniaDataset(samples=samples, info=dataset_info)
104
106
 
105
107
  @staticmethod
106
- def from_name(name: str, force_redownload: bool = False, download_files: bool = True) -> "HafniaDataset":
108
+ def from_name(
109
+ name: str,
110
+ version: Optional[str] = None,
111
+ force_redownload: bool = False,
112
+ download_files: bool = True,
113
+ ) -> "HafniaDataset":
107
114
  """
108
115
  Load a dataset by its name. The dataset must be registered in the Hafnia platform.
109
116
  """
110
- from hafnia.platform.datasets import download_or_get_dataset_path
111
-
117
+ if ":" in name:
118
+ name, version = dataset_helpers.dataset_name_and_version_from_string(name)
119
+ raise ValueError(
120
+ "The 'from_name' does not support the 'name:version' format. Please provide the version separately.\n"
121
+ f"E.g., HafniaDataset.from_name(name='{name}', version='{version}')"
122
+ )
112
123
  dataset_path = download_or_get_dataset_path(
113
124
  dataset_name=name,
125
+ version=version,
114
126
  force_redownload=force_redownload,
115
127
  download_files=download_files,
116
128
  )
@@ -434,7 +446,7 @@ class HafniaDataset:
434
446
  aws_credentials: AwsCredentials,
435
447
  force_redownload: bool = False,
436
448
  ) -> HafniaDataset:
437
- from hafnia.platform.datasets import fast_copy_files_s3
449
+ from hafnia.platform.s5cmd_utils import fast_copy_files
438
450
 
439
451
  remote_src_paths = dataset.samples[SampleField.REMOTE_PATH].unique().to_list()
440
452
  update_rows = []
@@ -470,7 +482,7 @@ class HafniaDataset:
470
482
  return dataset
471
483
 
472
484
  environment_vars = aws_credentials.aws_credentials()
473
- fast_copy_files_s3(
485
+ fast_copy_files(
474
486
  src_paths=remote_src_paths,
475
487
  dst_paths=local_dst_paths,
476
488
  append_envs=environment_vars,
@@ -523,30 +535,6 @@ class HafniaDataset:
523
535
  table = dataset.samples if isinstance(dataset, HafniaDataset) else dataset
524
536
  return table_transformations.has_primitive(table, PrimitiveType)
525
537
 
526
- @staticmethod
527
- def check_dataset_path(path_dataset: Path, raise_error: bool = True) -> bool:
528
- """
529
- Checks if the dataset path exists and contains the required files.
530
- Returns True if the dataset is valid, otherwise raises an error or returns False.
531
- """
532
- if not path_dataset.exists():
533
- if raise_error:
534
- raise FileNotFoundError(f"Dataset path {path_dataset} does not exist.")
535
- return False
536
-
537
- required_files = [
538
- FILENAME_DATASET_INFO,
539
- FILENAME_ANNOTATIONS_JSONL,
540
- FILENAME_ANNOTATIONS_PARQUET,
541
- ]
542
- for filename in required_files:
543
- if not (path_dataset / filename).exists():
544
- if raise_error:
545
- raise FileNotFoundError(f"Required file {filename} not found in {path_dataset}.")
546
- return False
547
-
548
- return True
549
-
550
538
  def copy(self) -> "HafniaDataset":
551
539
  return HafniaDataset(info=self.info.model_copy(deep=True), samples=self.samples.clone())
552
540
 
@@ -563,7 +551,7 @@ class HafniaDataset:
563
551
  keep_sample_data=keep_sample_data,
564
552
  )
565
553
 
566
- def write(self, path_folder: Path, add_version: bool = False, drop_null_cols: bool = True) -> None:
554
+ def write(self, path_folder: Path, drop_null_cols: bool = True) -> None:
567
555
  user_logger.info(f"Writing dataset to {path_folder}...")
568
556
  path_folder = path_folder.absolute()
569
557
  if not path_folder.exists():
@@ -578,44 +566,124 @@ class HafniaDataset:
578
566
  )
579
567
  new_paths.append(str(new_path))
580
568
  hafnia_dataset.samples = hafnia_dataset.samples.with_columns(pl.Series(new_paths).alias(SampleField.FILE_PATH))
581
- hafnia_dataset.write_annotations(
582
- path_folder=path_folder,
583
- drop_null_cols=drop_null_cols,
584
- add_version=add_version,
585
- )
569
+ hafnia_dataset.write_annotations(path_folder=path_folder, drop_null_cols=drop_null_cols)
586
570
 
587
- def write_annotations(
588
- dataset: HafniaDataset,
589
- path_folder: Path,
590
- drop_null_cols: bool = True,
591
- add_version: bool = False,
592
- ) -> None:
571
+ def write_annotations(dataset: HafniaDataset, path_folder: Path, drop_null_cols: bool = True) -> None:
593
572
  """
594
573
  Writes only the annotations files (JSONL and Parquet) to the specified folder.
595
574
  """
575
+
596
576
  user_logger.info(f"Writing dataset annotations to {path_folder}...")
597
- path_folder = path_folder.absolute()
598
- if not path_folder.exists():
599
- path_folder.mkdir(parents=True)
600
- dataset.info.write_json(path_folder / FILENAME_DATASET_INFO)
577
+ metadata_file_paths = DatasetMetadataFilePaths.from_path(path_folder)
578
+ path_dataset_info = Path(metadata_file_paths.dataset_info)
579
+ path_dataset_info.parent.mkdir(parents=True, exist_ok=True)
580
+ dataset.info.write_json(path_dataset_info)
601
581
 
602
582
  samples = dataset.samples
603
583
  if drop_null_cols: # Drops all unused/Null columns
604
584
  samples = samples.drop(pl.selectors.by_dtype(pl.Null))
605
585
 
586
+ path_folder = path_folder.absolute()
606
587
  # Store only relative paths in the annotations files
607
- absolute_paths = samples[SampleField.FILE_PATH].to_list()
608
- relative_paths = [str(Path(path).relative_to(path_folder)) for path in absolute_paths]
609
- samples = samples.with_columns(pl.Series(relative_paths).alias(SampleField.FILE_PATH))
588
+ if SampleField.FILE_PATH in samples.columns: # We drop column for remote datasets
589
+ absolute_paths = samples[SampleField.FILE_PATH].to_list()
590
+ relative_paths = [str(Path(path).relative_to(path_folder)) for path in absolute_paths]
591
+ samples = samples.with_columns(pl.Series(relative_paths).alias(SampleField.FILE_PATH))
592
+ else:
593
+ samples = samples.with_columns(pl.lit("").alias(SampleField.FILE_PATH))
594
+
595
+ if metadata_file_paths.annotations_jsonl:
596
+ samples.write_ndjson(Path(metadata_file_paths.annotations_jsonl)) # Json for readability
597
+ if metadata_file_paths.annotations_parquet:
598
+ samples.write_parquet(Path(metadata_file_paths.annotations_parquet)) # Parquet for speed
599
+
600
+ def delete_on_platform(dataset: HafniaDataset, interactive: bool = True) -> None:
601
+ """
602
+ Delete this dataset from the Hafnia platform.
603
+ This is a thin wrapper around `hafnia.platform.datasets.delete_dataset_completely_by_name`.
604
+
605
+ Args:
606
+ dataset (HafniaDataset): The :class:`HafniaDataset` instance to delete from the platform. The
607
+ dataset name is taken from `dataset.info.dataset_name`.
608
+ interactive (bool): If ``True``, perform the deletion in interactive mode (for example,
609
+ prompting the user for confirmation where supported). If ``False``,
610
+ run non-interactively, suitable for automated scripts or CI usage. Defaults to True.
611
+ """
612
+ from hafnia.platform.datasets import delete_dataset_completely_by_name
613
+
614
+ delete_dataset_completely_by_name(dataset_name=dataset.info.dataset_name, interactive=interactive)
615
+
616
+ def upload_to_platform(
617
+ dataset: HafniaDataset,
618
+ dataset_sample: Optional[HafniaDataset] = None,
619
+ allow_version_overwrite: bool = False,
620
+ interactive: bool = True,
621
+ gallery_images: Optional[Any] = None,
622
+ distribution_task_names: Optional[List[str]] = None,
623
+ cfg: Optional[Config] = None,
624
+ ) -> dict:
625
+ """
626
+ Upload the dataset and dataset details to the Hafnia platform.
627
+ This method ensures the dataset exists on the platform, synchronizes the
628
+ dataset files to remote storage, and uploads dataset details and optional gallery images
629
+ distributions.
630
+ Args:
631
+ dataset: The full :class:`HafniaDataset` instance that should be uploaded
632
+ to the platform.
633
+ dataset_sample: Optional sample :class:`HafniaDataset` used as a smaller
634
+ preview or subset of the main dataset on the platform. If provided,
635
+ it is uploaded alongside the full dataset for demonstration or
636
+ inspection purposes. Use only this if the sample dataset uses different
637
+ image files than the main dataset. Otherwise it is sufficient to just provide
638
+ the main dataset and the platform will create a sample automatically.
639
+ allow_version_overwrite: If ``True``, allows an existing dataset version
640
+ with the same name to be overwritten on the platform. If ``False``,
641
+ an error or confirmation may be required when a version conflict is
642
+ detected.
643
+ interactive: If ``True``, the upload process may prompt the user for
644
+ confirmation or additional input (for example when overwriting
645
+ existing versions). If ``False``, the upload is performed without
646
+ interactive prompts.
647
+ gallery_images: Optional collection of image identifiers or file names
648
+ that should be marked or displayed as gallery images for the dataset
649
+ on the platform. These are forwarded as ``gallery_image_names`` to
650
+ the platform API.
651
+ distribution_task_names: Optional list of task names associated with the
652
+ dataset that should be considered when configuring how the dataset is
653
+ distributed or exposed on the platform.
654
+ cfg: Optional :class:`hafnia_cli.config.Config` instance providing
655
+ configuration for platform access and storage. If not supplied, a
656
+ default configuration is created.
657
+ Returns:
658
+ dict: The response returned by the platform after uploading the dataset
659
+ details. The exact contents depend on the platform API but typically
660
+ include information about the created or updated dataset (such as
661
+ identifiers and status).
662
+ """
663
+
664
+ from hafnia.dataset.dataset_details_uploader import upload_dataset_details_to_platform
665
+ from hafnia.dataset.operations.dataset_s3_storage import sync_dataset_files_to_platform
666
+ from hafnia.platform.datasets import get_or_create_dataset
667
+
668
+ cfg = cfg or Config()
669
+ get_or_create_dataset(dataset.info.dataset_name, cfg=cfg)
670
+
671
+ sync_dataset_files_to_platform(
672
+ dataset=dataset,
673
+ sample_dataset=dataset_sample,
674
+ interactive=interactive,
675
+ allow_version_overwrite=allow_version_overwrite,
676
+ cfg=cfg,
677
+ )
610
678
 
611
- samples.write_ndjson(path_folder / FILENAME_ANNOTATIONS_JSONL) # Json for readability
612
- samples.write_parquet(path_folder / FILENAME_ANNOTATIONS_PARQUET) # Parquet for speed
679
+ response = upload_dataset_details_to_platform(
680
+ dataset=dataset,
681
+ distribution_task_names=distribution_task_names,
682
+ gallery_image_names=gallery_images,
683
+ cfg=cfg,
684
+ )
613
685
 
614
- if add_version:
615
- path_version = path_folder / "versions" / f"{dataset.info.version}"
616
- path_version.mkdir(parents=True, exist_ok=True)
617
- for filename in DATASET_FILENAMES_REQUIRED:
618
- shutil.copy2(path_folder / filename, path_version / filename)
686
+ return response
619
687
 
620
688
  def __eq__(self, value) -> bool:
621
689
  if not isinstance(value, HafniaDataset):
@@ -632,6 +700,42 @@ class HafniaDataset:
632
700
  return True
633
701
 
634
702
 
703
+ def _dataset_corrections(samples: pl.DataFrame, dataset_info: DatasetInfo) -> Tuple[pl.DataFrame, DatasetInfo]:
704
+ format_version_of_dataset = Version(dataset_info.format_version)
705
+
706
+ ## Backwards compatibility fixes for older dataset versions
707
+ if format_version_of_dataset < Version("0.2.0"):
708
+ samples = table_transformations.add_dataset_name_if_missing(samples, dataset_info.dataset_name)
709
+
710
+ if "file_name" in samples.columns:
711
+ samples = samples.rename({"file_name": SampleField.FILE_PATH})
712
+
713
+ if SampleField.SAMPLE_INDEX not in samples.columns:
714
+ samples = table_transformations.add_sample_index(samples)
715
+
716
+ # Backwards compatibility: If tags-column doesn't exist, create it with empty lists
717
+ if SampleField.TAGS not in samples.columns:
718
+ tags_column: List[List[str]] = [[] for _ in range(len(samples))] # type: ignore[annotation-unchecked]
719
+ samples = samples.with_columns(pl.Series(tags_column, dtype=pl.List(pl.String)).alias(SampleField.TAGS))
720
+
721
+ if SampleField.STORAGE_FORMAT not in samples.columns:
722
+ samples = samples.with_columns(pl.lit(StorageFormat.IMAGE).alias(SampleField.STORAGE_FORMAT))
723
+
724
+ if SampleField.SAMPLE_INDEX in samples.columns and samples[SampleField.SAMPLE_INDEX].dtype != pl.UInt64:
725
+ samples = samples.cast({SampleField.SAMPLE_INDEX: pl.UInt64})
726
+
727
+ if format_version_of_dataset <= Version("0.2.0"):
728
+ if SampleField.BITMASKS in samples.columns and samples[SampleField.BITMASKS].dtype == pl.List(pl.Struct):
729
+ struct_schema = samples.schema[SampleField.BITMASKS].inner
730
+ struct_names = [f.name for f in struct_schema.fields]
731
+ if "rleString" in struct_names:
732
+ struct_names[struct_names.index("rleString")] = "rle_string"
733
+ samples = samples.with_columns(
734
+ pl.col(SampleField.BITMASKS).list.eval(pl.element().struct.rename_fields(struct_names))
735
+ )
736
+ return samples, dataset_info
737
+
738
+
635
739
  def check_hafnia_dataset_from_path(path_dataset: Path) -> None:
636
740
  dataset = HafniaDataset.from_path(path_dataset, check_for_images=True)
637
741
  dataset.check_dataset()
@@ -653,7 +757,8 @@ def get_or_create_dataset_path_from_recipe(
653
757
  if force_redownload:
654
758
  shutil.rmtree(path_dataset, ignore_errors=True)
655
759
 
656
- if HafniaDataset.check_dataset_path(path_dataset, raise_error=False):
760
+ dataset_metadata_files = DatasetMetadataFilePaths.from_path(path_dataset)
761
+ if dataset_metadata_files.exists(raise_error=False):
657
762
  return path_dataset
658
763
 
659
764
  path_dataset.mkdir(parents=True, exist_ok=True)
@@ -666,37 +771,101 @@ def get_or_create_dataset_path_from_recipe(
666
771
  return path_dataset
667
772
 
668
773
 
669
- def _dataset_corrections(samples: pl.DataFrame, dataset_info: DatasetInfo) -> Tuple[pl.DataFrame, DatasetInfo]:
670
- format_version_of_dataset = Version(dataset_info.format_version)
774
+ def available_dataset_versions_from_name(dataset_name: str) -> Dict[Version, "DatasetMetadataFilePaths"]:
775
+ credentials: ResourceCredentials = get_read_credentials_by_name(dataset_name=dataset_name)
776
+ return available_dataset_versions(credentials=credentials)
671
777
 
672
- ## Backwards compatibility fixes for older dataset versions
673
- if format_version_of_dataset < Version("0.2.0"):
674
- samples = table_transformations.add_dataset_name_if_missing(samples, dataset_info.dataset_name)
675
778
 
676
- if "file_name" in samples.columns:
677
- samples = samples.rename({"file_name": SampleField.FILE_PATH})
779
+ def available_dataset_versions(
780
+ credentials: ResourceCredentials,
781
+ ) -> Dict[Version, "DatasetMetadataFilePaths"]:
782
+ envs = credentials.aws_credentials()
783
+ bucket_prefix_sample_versions = f"{credentials.s3_uri()}/versions"
784
+ all_s3_annotation_files = s5cmd_utils.list_bucket(bucket_prefix=bucket_prefix_sample_versions, append_envs=envs)
785
+ available_versions = DatasetMetadataFilePaths.available_versions_from_files_list(all_s3_annotation_files)
786
+ return available_versions
678
787
 
679
- if SampleField.SAMPLE_INDEX not in samples.columns:
680
- samples = table_transformations.add_sample_index(samples)
681
788
 
682
- # Backwards compatibility: If tags-column doesn't exist, create it with empty lists
683
- if SampleField.TAGS not in samples.columns:
684
- tags_column: List[List[str]] = [[] for _ in range(len(samples))] # type: ignore[annotation-unchecked]
685
- samples = samples.with_columns(pl.Series(tags_column, dtype=pl.List(pl.String)).alias(SampleField.TAGS))
789
+ def select_version_from_available_versions(
790
+ available_versions: Dict[Version, "DatasetMetadataFilePaths"],
791
+ version: Optional[str],
792
+ ) -> "DatasetMetadataFilePaths":
793
+ if len(available_versions) == 0:
794
+ raise ValueError("No versions were found in the dataset.")
686
795
 
687
- if SampleField.STORAGE_FORMAT not in samples.columns:
688
- samples = samples.with_columns(pl.lit(StorageFormat.IMAGE).alias(SampleField.STORAGE_FORMAT))
796
+ if version is None:
797
+ str_versions = [str(v) for v in available_versions]
798
+ raise ValueError(f"Version must be specified. Available versions: {str_versions}")
799
+ elif version == "latest":
800
+ version_casted = max(available_versions)
801
+ user_logger.info(f"'latest' version '{version_casted}' has been selected")
802
+ else:
803
+ version_casted = version_from_string(version)
689
804
 
690
- if SampleField.SAMPLE_INDEX in samples.columns and samples[SampleField.SAMPLE_INDEX].dtype != pl.UInt64:
691
- samples = samples.cast({SampleField.SAMPLE_INDEX: pl.UInt64})
805
+ if version_casted not in available_versions:
806
+ raise ValueError(f"Selected version '{version}' not found in available versions: {available_versions}")
692
807
 
693
- if format_version_of_dataset <= Version("0.2.0"):
694
- if SampleField.BITMASKS in samples.columns and samples[SampleField.BITMASKS].dtype == pl.List(pl.Struct):
695
- struct_schema = samples.schema[SampleField.BITMASKS].inner
696
- struct_names = [f.name for f in struct_schema.fields]
697
- if "rleString" in struct_names:
698
- struct_names[struct_names.index("rleString")] = "rle_string"
699
- samples = samples.with_columns(
700
- pl.col(SampleField.BITMASKS).list.eval(pl.element().struct.rename_fields(struct_names))
701
- )
702
- return samples, dataset_info
808
+ return available_versions[version_casted]
809
+
810
+
811
+ def download_meta_dataset_files_from_version(
812
+ resource_credentials: ResourceCredentials, version: Optional[str], path_dataset: Path
813
+ ) -> list[str]:
814
+ envs = resource_credentials.aws_credentials()
815
+ available_versions = available_dataset_versions(credentials=resource_credentials)
816
+ metadata_files = select_version_from_available_versions(available_versions=available_versions, version=version)
817
+
818
+ s3_files = metadata_files.as_list()
819
+ path_dataset.mkdir(parents=True, exist_ok=True)
820
+ local_paths = [(path_dataset / filename.split("/")[-1]).as_posix() for filename in s3_files]
821
+ s5cmd_utils.fast_copy_files(
822
+ src_paths=s3_files,
823
+ dst_paths=local_paths,
824
+ append_envs=envs,
825
+ description="Downloading meta dataset files",
826
+ )
827
+
828
+ return local_paths
829
+
830
+
831
+ def download_or_get_dataset_path(
832
+ dataset_name: str,
833
+ version: Optional[str],
834
+ cfg: Optional[Config] = None,
835
+ path_datasets_folder: Optional[str] = None,
836
+ force_redownload: bool = False,
837
+ download_files: bool = True,
838
+ ) -> Path:
839
+ """Download or get the path of the dataset."""
840
+
841
+ path_datasets = path_datasets_folder or utils.PATH_DATASETS
842
+ path_dataset = Path(path_datasets) / dataset_name
843
+ if not is_valid_version_string(version, allow_none=True, allow_latest=True):
844
+ raise ValueError(
845
+ f"Invalid version string: {version}. Should be a valid version (e.g. '0.1.0'), 'latest' or None."
846
+ )
847
+
848
+ # Only valid versions (e.g. '0.1.0', '1.0.0') can use local cache. Using either "latest"/None will always redownload
849
+ if is_valid_version_string(version, allow_none=False, allow_latest=False):
850
+ dataset_metadata_files = DatasetMetadataFilePaths.from_path(path_dataset)
851
+ dataset_exists = dataset_metadata_files.exists(version=version, raise_error=False)
852
+ if dataset_exists and not force_redownload:
853
+ user_logger.info("Dataset found locally. Set 'force=True' or add `--force` flag with cli to re-download")
854
+ return path_dataset
855
+
856
+ cfg = cfg or Config()
857
+ resource_credentials = get_read_credentials_by_name(dataset_name=dataset_name, cfg=cfg)
858
+ if resource_credentials is None:
859
+ raise ValueError(f"Failed to get read credentials for dataset '{dataset_name}' from the platform.")
860
+
861
+ download_meta_dataset_files_from_version(
862
+ resource_credentials=resource_credentials, version=version, path_dataset=path_dataset
863
+ )
864
+
865
+ if not download_files:
866
+ return path_dataset
867
+
868
+ dataset = HafniaDataset.from_path(path_dataset, check_for_images=False)
869
+ dataset = dataset.download_files_aws(path_dataset, aws_credentials=resource_credentials, force_redownload=True)
870
+ dataset.write_annotations(path_folder=path_dataset) # Overwrite annotations as files have been re-downloaded
871
+ return path_dataset