PyPI - hafnia - Versions diffs - 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

hafnia 0.3.0py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

cli/__main__.py +3 -1
cli/config.py +43 -3
cli/keychain.py +88 -0
cli/profile_cmds.py +5 -2
hafnia/__init__.py +1 -1
hafnia/dataset/dataset_helpers.py +9 -2
hafnia/dataset/dataset_names.py +2 -1
hafnia/dataset/dataset_recipe/dataset_recipe.py +49 -37
hafnia/dataset/dataset_recipe/recipe_transforms.py +18 -2
hafnia/dataset/dataset_upload_helper.py +60 -4
hafnia/dataset/format_conversions/image_classification_from_directory.py +106 -0
hafnia/dataset/format_conversions/torchvision_datasets.py +281 -0
hafnia/dataset/hafnia_dataset.py +176 -50
hafnia/dataset/operations/dataset_stats.py +2 -3
hafnia/dataset/operations/dataset_transformations.py +19 -15
hafnia/dataset/operations/table_transformations.py +4 -3
hafnia/dataset/primitives/bbox.py +25 -12
hafnia/dataset/primitives/bitmask.py +26 -14
hafnia/dataset/primitives/classification.py +16 -8
hafnia/dataset/primitives/point.py +7 -3
hafnia/dataset/primitives/polygon.py +16 -9
hafnia/dataset/primitives/segmentation.py +10 -7
hafnia/experiment/hafnia_logger.py +0 -9
hafnia/platform/dataset_recipe.py +7 -2
hafnia/platform/datasets.py +3 -3
hafnia/platform/download.py +23 -18
hafnia/utils.py +17 -0
hafnia/visualizations/image_visualizations.py +1 -1
{hafnia-0.3.0.dist-info → hafnia-0.4.0.dist-info}/METADATA +8 -6
hafnia-0.4.0.dist-info/RECORD +56 -0
hafnia-0.3.0.dist-info/RECORD +0 -53
{hafnia-0.3.0.dist-info → hafnia-0.4.0.dist-info}/WHEEL +0 -0
{hafnia-0.3.0.dist-info → hafnia-0.4.0.dist-info}/entry_points.txt +0 -0
{hafnia-0.3.0.dist-info → hafnia-0.4.0.dist-info}/licenses/LICENSE +0 -0

hafnia/dataset/operations/dataset_transformations.py CHANGED Viewed

@@ -33,17 +33,17 @@ import json
 import re
 import textwrap
 from pathlib import Path
-from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Set, Tuple, Type, Union
+from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple, Type, Union
 import cv2
 import more_itertools
 import numpy as np
 import polars as pl
 from PIL import Image
-from tqdm import tqdm
+from rich.progress import track
 from hafnia.dataset import dataset_helpers
-from hafnia.dataset.dataset_names import OPS_REMOVE_CLASS, FieldName
+from hafnia.dataset.dataset_names import OPS_REMOVE_CLASS, ColumnName, FieldName
 from hafnia.dataset.primitives import get_primitive_type_from_string
 from hafnia.dataset.primitives.primitive import Primitive
 from hafnia.utils import remove_duplicates_preserve_order
@@ -73,7 +73,8 @@ def transform_images(
     path_image_folder = path_output / "data"
     path_image_folder.mkdir(parents=True, exist_ok=True)
-    for org_path in tqdm(dataset.samples["file_name"].to_list(), desc="Transform images"):
+    org_paths = dataset.samples[ColumnName.FILE_PATH].to_list()
+    for org_path in track(org_paths, description="Transform images"):
         org_path = Path(org_path)
         if not org_path.exists():
             raise FileNotFoundError(f"File {org_path} does not exist in the dataset.")
@@ -86,7 +87,7 @@ def transform_images(
             raise FileNotFoundError(f"Transformed file {new_path} does not exist in the dataset.")
         new_paths.append(str(new_path))
-    table = dataset.samples.with_columns(pl.Series(new_paths).alias("file_name"))
+    table = dataset.samples.with_columns(pl.Series(new_paths).alias(ColumnName.FILE_PATH))
     return dataset.update_samples(table)
@@ -156,13 +157,16 @@ def get_task_info_from_task_name_and_primitive(
 def class_mapper(
     dataset: "HafniaDataset",
-    class_mapping: Dict[str, str],
+    class_mapping: Union[Dict[str, str], List[Tuple[str, str]]],
     method: str = "strict",
     primitive: Optional[Type[Primitive]] = None,
     task_name: Optional[str] = None,
 ) -> "HafniaDataset":
     from hafnia.dataset.hafnia_dataset import HafniaDataset
+    if isinstance(class_mapping, list):
+        class_mapping = dict(class_mapping)
     allowed_methods = ("strict", "remove_undefined", "keep_undefined")
     if method not in allowed_methods:
         raise ValueError(f"Method '{method}' is not recognized. Allowed methods are: {allowed_methods}")
@@ -170,7 +174,7 @@ def class_mapper(
     task = dataset.info.get_task_by_task_name_and_primitive(task_name=task_name, primitive=primitive)
     current_names = task.class_names or []
-    # Expand wildcard mappings
+    # Expand wildcard mappings e.g. {"Vehicle.*": "Vehicle"} to {"Vehicle.Car": "Vehicle", "Vehicle.Bus": "Vehicle"}
     class_mapping = expand_class_mapping(class_mapping, current_names)
     non_existing_mapping_names = set(class_mapping) - set(current_names)
@@ -213,7 +217,6 @@ def class_mapper(
     if OPS_REMOVE_CLASS in new_class_names:
         # Move __REMOVE__ to the end of the list if it exists
         new_class_names.append(new_class_names.pop(new_class_names.index(OPS_REMOVE_CLASS)))
-    name_2_idx_mapping: Dict[str, int] = {name: idx for idx, name in enumerate(new_class_names)}
     samples = dataset.samples
     samples_updated = samples.with_columns(
@@ -230,6 +233,7 @@ def class_mapper(
     )
     # Update class indices too
+    name_2_idx_mapping: Dict[str, int] = {name: idx for idx, name in enumerate(new_class_names)}
     samples_updated = samples_updated.with_columns(
         pl.col(task.primitive.column_name())
         .list.eval(
@@ -354,14 +358,14 @@ def _validate_inputs_select_samples_by_class_name(
     name: Union[List[str], str],
     task_name: Optional[str] = None,
     primitive: Optional[Type[Primitive]] = None,
-) -> Tuple["TaskInfo", Set[str]]:
+) -> Tuple["TaskInfo", List[str]]:
     if isinstance(name, str):
         name = [name]
-    names = set(name)
+    names = list(name)
     # Check that specified names are available in at least one of the tasks
     available_names_across_tasks = set(more_itertools.flatten([t.class_names for t in dataset.info.tasks]))
-    missing_class_names_across_tasks = names - available_names_across_tasks
+    missing_class_names_across_tasks = set(names) - available_names_across_tasks
     if len(missing_class_names_across_tasks) > 0:
         raise ValueError(
             f"The specified names {list(names)} have not been found in any of the tasks. "
@@ -370,15 +374,15 @@ def _validate_inputs_select_samples_by_class_name(
     # Auto infer task if task_name and primitive are not provided
     if task_name is None and primitive is None:
-        tasks_with_names = [t for t in dataset.info.tasks if names.issubset(t.class_names or [])]
+        tasks_with_names = [t for t in dataset.info.tasks if set(names).issubset(t.class_names or [])]
         if len(tasks_with_names) == 0:
             raise ValueError(
-                f"The specified names {list(names)} have not been found in any of the tasks. "
+                f"The specified names {names} have not been found in any of the tasks. "
                 f"Available class names: {available_names_across_tasks}"
             )
         if len(tasks_with_names) > 1:
             raise ValueError(
-                f"Found multiple tasks containing the specified names {list(names)}. "
+                f"Found multiple tasks containing the specified names {names}. "
                 f"Specify either 'task_name' or 'primitive' to only select from one task. "
                 f"Tasks containing all provided names: {[t.name for t in tasks_with_names]}"
             )
@@ -393,7 +397,7 @@ def _validate_inputs_select_samples_by_class_name(
         )
     task_class_names = set(task.class_names or [])
-    missing_class_names = names - task_class_names
+    missing_class_names = set(names) - task_class_names
     if len(missing_class_names) > 0:
         raise ValueError(
             f"The specified names {list(missing_class_names)} have not been found for the '{task.name}' task. "

hafnia/dataset/operations/table_transformations.py CHANGED Viewed

@@ -2,7 +2,7 @@ from pathlib import Path
 from typing import List, Optional, Type
 import polars as pl
-from tqdm import tqdm
+from rich.progress import track
 from hafnia.dataset.dataset_names import (
     FILENAME_ANNOTATIONS_JSONL,
@@ -144,7 +144,7 @@ def split_primitive_columns_by_task_name(
     return samples_table
-def read_table_from_path(path: Path) -> pl.DataFrame:
+def read_samples_from_path(path: Path) -> pl.DataFrame:
     path_annotations = path / FILENAME_ANNOTATIONS_PARQUET
     if path_annotations.exists():
         user_logger.info(f"Reading dataset annotations from Parquet file: {path_annotations}")
@@ -162,7 +162,8 @@ def read_table_from_path(path: Path) -> pl.DataFrame:
 def check_image_paths(table: pl.DataFrame) -> bool:
     missing_files = []
-    for org_path in tqdm(table["file_name"].to_list(), desc="Check image paths"):
+    org_paths = table[ColumnName.FILE_PATH].to_list()
+    for org_path in track(org_paths, description="Check image paths"):
         org_path = Path(org_path)
         if not org_path.exists():
             missing_files.append(org_path)

hafnia/dataset/primitives/bbox.py CHANGED Viewed

@@ -4,6 +4,7 @@ from typing import Any, Dict, List, Optional, Tuple, Union
 import cv2
 import numpy as np
+from pydantic import Field
 from hafnia.dataset.primitives.primitive import Primitive
 from hafnia.dataset.primitives.utils import (
@@ -17,18 +18,30 @@ from hafnia.dataset.primitives.utils import (
 class Bbox(Primitive):
     # Names should match names in FieldName
-    height: float  # Height of the bounding box as a fraction of the image height, e.g. 0.1 for 10% of the image height
-    width: float  # Width of the bounding box as a fraction of the image width, e.g. 0.1 for 10% of the image width
-    top_left_x: float  # X coordinate of top-left corner of Bbox as a fraction of the image width, e.g. 0.1 for 10% of the image width
-    top_left_y: float  # Y coordinate of top-left corner of Bbox as a fraction of the image height, e.g. 0.1 for 10% of the image height
-    class_name: Optional[str] = None  # Class name, e.g. "car"
-    class_idx: Optional[int] = None  # Class index, e.g. 0 for "car" if it is the first class
-    object_id: Optional[str] = None  # Unique identifier for the object, e.g. "12345123"
-    confidence: Optional[float] = None  # Confidence score (0-1.0) for the primitive, e.g. 0.95 for Bbox
-    ground_truth: bool = True  # Whether this is ground truth or a prediction
-    task_name: str = ""  # Task name to support multiple Bbox tasks in the same dataset. "" defaults to "bboxes"
-    meta: Optional[Dict[str, Any]] = None  # This can be used to store additional information about the bitmask
+    height: float = Field(
+        description="Normalized height of the bounding box (0.0=no height, 1.0=full image height) as a fraction of image height"
+    )
+    width: float = Field(
+        description="Normalized width of the bounding box (0.0=no width, 1.0=full image width) as a fraction of image width"
+    )
+    top_left_x: float = Field(
+        description="Normalized x-coordinate of top-left corner (0.0=left edge, 1.0=right edge) as a fraction of image width"
+    )
+    top_left_y: float = Field(
+        description="Normalized y-coordinate of top-left corner (0.0=top edge, 1.0=bottom edge) as a fraction of image height"
+    )
+    class_name: Optional[str] = Field(default=None, description="Class name, e.g. 'car'")
+    class_idx: Optional[int] = Field(default=None, description="Class index, e.g. 0 for 'car' if it is the first class")
+    object_id: Optional[str] = Field(default=None, description="Unique identifier for the object, e.g. '12345123'")
+    confidence: Optional[float] = Field(
+        default=None, description="Confidence score (0-1.0) for the primitive, e.g. 0.95 for Bbox"
+    )
+    ground_truth: bool = Field(default=True, description="Whether this is ground truth or a prediction")
+    task_name: str = Field(
+        default="", description="Task name to support multiple Bbox tasks in the same dataset. '' defaults to 'bboxes'"
+    )
+    meta: Optional[Dict[str, Any]] = Field(default=None, description="Additional metadata for the annotation")
     @staticmethod
     def default_task_name() -> str:

hafnia/dataset/primitives/bitmask.py CHANGED Viewed

@@ -5,7 +5,9 @@ from typing import Any, Dict, Optional, Tuple
 import cv2
 import numpy as np
 import pycocotools.mask as coco_mask
+from pydantic import Field
+from hafnia.dataset.dataset_names import FieldName
 from hafnia.dataset.primitives.primitive import Primitive
 from hafnia.dataset.primitives.utils import (
     anonymize_by_resizing,
@@ -14,23 +16,33 @@ from hafnia.dataset.primitives.utils import (
     text_org_from_left_bottom_to_centered,
 )
+FieldName
 class Bitmask(Primitive):
     # Names should match names in FieldName
-    top: int  # Bitmask top coordinate in pixels
-    left: int  # Bitmask left coordinate in pixels
-    height: int  # Bitmask height of the bounding box in pixels
-    width: int  # Bitmask width of the bounding box in pixels
-    rleString: str  # Run-length encoding (RLE) string for the bitmask region of size (height, width) at (top, left).
-    area: Optional[float] = None  # Area of the bitmask in pixels is calculated from the RLE string
-    class_name: Optional[str] = None  # This should match the string in 'FieldName.CLASS_NAME'
-    class_idx: Optional[int] = None  # This should match the string in 'FieldName.CLASS_IDX'
-    object_id: Optional[str] = None  # This should match the string in 'FieldName.OBJECT_ID'
-    confidence: Optional[float] = None  # Confidence score (0-1.0) for the primitive, e.g. 0.95 for Bbox
-    ground_truth: bool = True  # Whether this is ground truth or a prediction
-    task_name: str = ""  # Task name to support multiple Bitmask tasks in the same dataset. "" defaults to "bitmask"
-    meta: Optional[Dict[str, Any]] = None  # This can be used to store additional information about the bitmask
+    top: int = Field(description="Bitmask top coordinate in pixels ")
+    left: int = Field(description="Bitmask left coordinate in pixels")
+    height: int = Field(description="Bitmask height of the bounding box in pixels")
+    width: int = Field(description="Bitmask width of the bounding box in pixels")
+    rleString: str = Field(
+        description="Run-length encoding (RLE) string for the bitmask region of size (height, width) at (top, left)."
+    )
+    area: Optional[float] = Field(
+        default=None, description="Area of the bitmask in pixels is calculated from the RLE string"
+    )
+    class_name: Optional[str] = Field(default=None, description="Class name of the object represented by the bitmask")
+    class_idx: Optional[int] = Field(default=None, description="Class index of the object represented by the bitmask")
+    object_id: Optional[str] = Field(default=None, description="Object ID of the instance represented by the bitmask")
+    confidence: Optional[float] = Field(
+        default=None, description="Confidence score (0-1.0) for the primitive, e.g. 0.95 for Bbox"
+    )
+    ground_truth: bool = Field(default=True, description="Whether this is ground truth or a prediction")
+    task_name: str = Field(
+        default="", description="Task name to support multiple Bitmask tasks in the same dataset. Defaults to 'bitmask'"
+    )
+    meta: Optional[Dict[str, Any]] = Field(default=None, description="Additional metadata for the annotation")
     @staticmethod
     def default_task_name() -> str:

hafnia/dataset/primitives/classification.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from typing import Any, Dict, Optional, Tuple
 import numpy as np
+from pydantic import Field
 from hafnia.dataset.primitives.primitive import Primitive
 from hafnia.dataset.primitives.utils import anonymize_by_resizing, get_class_name
@@ -8,14 +9,21 @@ from hafnia.dataset.primitives.utils import anonymize_by_resizing, get_class_nam
 class Classification(Primitive):
     # Names should match names in FieldName
-    class_name: Optional[str] = None  # Class name, e.g. "car"
-    class_idx: Optional[int] = None  # Class index, e.g. 0 for "car" if it is the first class
-    object_id: Optional[str] = None  # Unique identifier for the object, e.g. "12345123"
-    confidence: Optional[float] = None  # Confidence score (0-1.0) for the primitive, e.g. 0.95 for Classification
-    ground_truth: bool = True  # Whether this is ground truth or a prediction
-    task_name: str = ""  # To support multiple Classification tasks in the same dataset. "" defaults to "classification"
-    meta: Optional[Dict[str, Any]] = None  # This can be used to store additional information about the bitmask
+    class_name: Optional[str] = Field(default=None, description="Class name, e.g. 'car'")
+    class_idx: Optional[int] = Field(default=None, description="Class index, e.g. 0 for 'car' if it is the first class")
+    object_id: Optional[str] = Field(default=None, description="Unique identifier for the object, e.g. '12345123'")
+    confidence: Optional[float] = Field(
+        default=None, description="Confidence score (0-1.0) for the primitive, e.g. 0.95 for Classification"
+    )
+    ground_truth: bool = Field(default=True, description="Whether this is ground truth or a prediction")
+    task_name: str = Field(
+        default="",
+        description="To support multiple Classification tasks in the same dataset. '' defaults to 'classification'",
+    )
+    meta: Optional[Dict[str, Any]] = Field(
+        default=None, description="This can be used to store additional information about the classification"
+    )
     @staticmethod
     def default_task_name() -> str:

hafnia/dataset/primitives/point.py CHANGED Viewed

@@ -1,13 +1,17 @@
 from typing import Any, Tuple
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 from hafnia.dataset.primitives.utils import clip
 class Point(BaseModel):
-    x: float
-    y: float
+    x: float = Field(
+        description="Normalized x-coordinate (0.0=left edge, 1.0=right edge) relative to image width",
+    )
+    y: float = Field(
+        description="Normalized y-coordinate (0.0=top edge, 1.0=bottom edge) relative to image height",
+    )
     def to_pixel_coordinates(
         self, image_shape: Tuple[int, int], as_int: bool = True, clip_values: bool = True

hafnia/dataset/primitives/polygon.py CHANGED Viewed

@@ -2,6 +2,7 @@ from typing import Any, Dict, List, Optional, Sequence, Tuple
 import cv2
 import numpy as np
+from pydantic import Field
 from hafnia.dataset.primitives.bitmask import Bitmask
 from hafnia.dataset.primitives.point import Point
@@ -11,15 +12,21 @@ from hafnia.dataset.primitives.utils import class_color_by_name, get_class_name
 class Polygon(Primitive):
     # Names should match names in FieldName
-    points: List[Point]
-    class_name: Optional[str] = None  # This should match the string in 'FieldName.CLASS_NAME'
-    class_idx: Optional[int] = None  # This should match the string in 'FieldName.CLASS_IDX'
-    object_id: Optional[str] = None  # This should match the string in 'FieldName.OBJECT_ID'
-    confidence: Optional[float] = None  # Confidence score (0-1.0) for the primitive, e.g. 0.95 for Bbox
-    ground_truth: bool = True  # Whether this is ground truth or a prediction
-    task_name: str = ""  # Task name to support multiple Polygon tasks in the same dataset. "" defaults to "polygon"
-    meta: Optional[Dict[str, Any]] = None  # This can be used to store additional information about the bitmask
+    points: List[Point] = Field(description="List of points defining the polygon")
+    class_name: Optional[str] = Field(default=None, description="Class name of the polygon")
+    class_idx: Optional[int] = Field(default=None, description="Class index of the polygon")
+    object_id: Optional[str] = Field(default=None, description="Object ID of the polygon")
+    confidence: Optional[float] = Field(
+        default=None, description="Confidence score (0-1.0) for the primitive, e.g. 0.95 for Bbox"
+    )
+    ground_truth: bool = Field(default=True, description="Whether this is ground truth or a prediction")
+    task_name: str = Field(
+        default="", description="Task name to support multiple Polygon tasks in the same dataset. Defaults to 'polygon'"
+    )
+    meta: Optional[Dict[str, Any]] = Field(
+        default=None, description="This can be used to store additional information about the polygon"
+    )
     @staticmethod
     def from_list_of_points(

hafnia/dataset/primitives/segmentation.py CHANGED Viewed

@@ -2,6 +2,7 @@ from typing import Any, Dict, List, Optional, Tuple
 import cv2
 import numpy as np
+from pydantic import Field
 from hafnia.dataset.primitives.primitive import Primitive
 from hafnia.dataset.primitives.utils import get_class_name
@@ -9,15 +10,17 @@ from hafnia.visualizations.colors import get_n_colors
 class Segmentation(Primitive):
-    # mask: np.ndarray
-    class_names: Optional[List[str]] = None  # This should match the string in 'FieldName.CLASS_NAME'
-    ground_truth: bool = True  # Whether this is ground truth or a prediction
+    # WARNING: Segmentation masks have not been fully implemented yet
+    class_names: Optional[List[str]] = Field(default=None, description="Class names of the segmentation")
+    ground_truth: bool = Field(default=True, description="Whether this is ground truth or a prediction")
-    # confidence: Optional[float] = None  # Confidence score (0-1.0) for the primitive, e.g. 0.95 for Classification
-    task_name: str = (
-        ""  # Task name to support multiple Segmentation tasks in the same dataset. "" defaults to "segmentation"
+    task_name: str = Field(
+        default="",
+        description="Task name to support multiple Segmentation tasks in the same dataset. Defaults to 'segmentation'",
+    )
+    meta: Optional[Dict[str, Any]] = Field(
+        default=None, description="This can be used to store additional information about the segmentation"
     )
-    meta: Optional[Dict[str, Any]] = None  # This can be used to store additional information about the bitmask
     @staticmethod
     def default_task_name() -> str:

hafnia/experiment/hafnia_logger.py CHANGED Viewed

@@ -12,8 +12,6 @@ import pyarrow as pa
 import pyarrow.parquet as pq
 from pydantic import BaseModel, field_validator
-from hafnia.data.factory import load_dataset
-from hafnia.dataset.hafnia_dataset import HafniaDataset
 from hafnia.log import sys_logger, user_logger
 from hafnia.utils import is_hafnia_cloud_job, now_as_str
@@ -136,13 +134,6 @@ class HafniaLogger:
         except Exception as e:
             user_logger.error(f"Failed to initialize MLflow: {e}")
-    def load_dataset(self, dataset_name: str) -> HafniaDataset:
-        """
-        Load a dataset from the specified path.
-        """
-        self.dataset_name = dataset_name
-        return load_dataset(dataset_name)
     def path_local_experiment(self) -> Path:
         """Get the path for local experiment."""
         if is_hafnia_cloud_job():

hafnia/platform/dataset_recipe.py CHANGED Viewed

@@ -11,12 +11,17 @@ from hafnia.utils import pretty_print_list_as_table, timed
 @timed("Get or create dataset recipe")
 def get_or_create_dataset_recipe(
-    recipe: dict, endpoint: str, api_key: str, name: Optional[str] = None
+    recipe: dict,
+    endpoint: str,
+    api_key: str,
+    name: Optional[str] = None,
+    overwrite: bool = False,
 ) -> Optional[Dict]:
     headers = {"Authorization": api_key}
-    data = {"template": {"body": recipe}}
+    data = {"template": {"body": recipe}, "overwrite": overwrite}
     if name is not None:
         data["name"] = name  # type: ignore[assignment]
     response = http.post(endpoint, headers=headers, data=data)
     return response

hafnia/platform/datasets.py CHANGED Viewed

@@ -9,7 +9,7 @@ from typing import Any, Dict, List, Optional
 import rich
 from rich import print as rprint
-from tqdm import tqdm
+from rich.progress import track
 from cli.config import Config
 from hafnia import http, utils
@@ -122,7 +122,7 @@ def download_dataset_from_access_endpoint(
     try:
         fast_copy_files_s3(
             src_paths=dataset.samples[ColumnName.REMOTE_PATH].to_list(),
-            dst_paths=dataset.samples[ColumnName.FILE_NAME].to_list(),
+            dst_paths=dataset.samples[ColumnName.FILE_PATH].to_list(),
             append_envs=envs,
             description="Downloading images",
         )
@@ -196,7 +196,7 @@ def execute_s5cmd_commands(
         error_lines = []
         lines = []
-        for line in tqdm(process.stdout, total=len(commands), desc=description):
+        for line in track(process.stdout, total=len(commands), description=description):
             if "ERROR" in line or "error" in line:
                 error_lines.append(line.strip())
             lines.append(line.strip())

hafnia/platform/download.py CHANGED Viewed

@@ -1,10 +1,10 @@
 from pathlib import Path
-from typing import Dict
+from typing import Dict, Optional
 import boto3
 from botocore.exceptions import ClientError
 from pydantic import BaseModel, field_validator
-from tqdm import tqdm
+from rich.progress import Progress
 from hafnia.http import fetch
 from hafnia.log import sys_logger, user_logger
@@ -125,13 +125,15 @@ def download_single_object(s3_client, bucket: str, object_key: str, output_dir:
     return local_path
-def download_resource(resource_url: str, destination: str, api_key: str) -> Dict:
+def download_resource(resource_url: str, destination: str, api_key: str, prefix: Optional[str] = None) -> Dict:
     """
     Downloads either a single file from S3 or all objects under a prefix.
     Args:
         resource_url (str): The URL or identifier used to fetch S3 credentials.
         destination (str): Path to local directory where files will be stored.
+        api_key (str): API key for authentication when fetching credentials.
+        prefix (Optional[str]): If provided, only download objects under this prefix.
     Returns:
         Dict[str, Any]: A dictionary containing download info, e.g.:
@@ -147,7 +149,7 @@ def download_resource(resource_url: str, destination: str, api_key: str) -> Dict
     res_credentials = get_resource_credentials(resource_url, api_key)
     bucket_name = res_credentials.bucket_name()
-    key = res_credentials.object_key()
+    prefix = prefix or res_credentials.object_key()
     output_path = Path(destination)
     output_path.mkdir(parents=True, exist_ok=True)
@@ -159,29 +161,32 @@ def download_resource(resource_url: str, destination: str, api_key: str) -> Dict
     )
     downloaded_files = []
     try:
-        s3_client.head_object(Bucket=bucket_name, Key=key)
-        local_file = download_single_object(s3_client, bucket_name, key, output_path)
+        s3_client.head_object(Bucket=bucket_name, Key=prefix)
+        local_file = download_single_object(s3_client, bucket_name, prefix, output_path)
         downloaded_files.append(str(local_file))
         user_logger.info(f"Downloaded single file: {local_file}")
     except ClientError as e:
         error_code = e.response.get("Error", {}).get("Code")
         if error_code == "404":
-            sys_logger.debug(f"Object '{key}' not found; trying as a prefix.")
-            response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=key)
+            sys_logger.debug(f"Object '{prefix}' not found; trying as a prefix.")
+            response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=prefix)
             contents = response.get("Contents", [])
             if not contents:
-                raise ValueError(f"No objects found for prefix '{key}' in bucket '{bucket_name}'")
-            pbar = tqdm(contents)
-            for obj in pbar:
-                sub_key = obj["Key"]
-                size_mb = obj.get("Size", 0) / 1024 / 1024
-                pbar.set_description(f"{sub_key} ({size_mb:.2f} MB)")
-                local_file = download_single_object(s3_client, bucket_name, sub_key, output_path)
-                downloaded_files.append(local_file.as_posix())
-            user_logger.info(f"Downloaded folder/prefix '{key}' with {len(downloaded_files)} object(s).")
+                raise ValueError(f"No objects found for prefix '{prefix}' in bucket '{bucket_name}'")
+            with Progress() as progress:
+                task = progress.add_task("Downloading files", total=len(contents))
+                for obj in contents:
+                    sub_key = obj["Key"]
+                    size_mb = obj.get("Size", 0) / 1024 / 1024
+                    progress.update(task, description=f"Downloading {sub_key} ({size_mb:.2f} MB)")
+                    local_file = download_single_object(s3_client, bucket_name, sub_key, output_path)
+                    downloaded_files.append(local_file.as_posix())
+                    progress.advance(task)
+            user_logger.info(f"Downloaded folder/prefix '{prefix}' with {len(downloaded_files)} object(s).")
         else:
             user_logger.error(f"Error checking object or prefix: {e}")
             raise RuntimeError(f"Failed to check or download S3 resource: {e}") from e

hafnia/utils.py CHANGED Viewed

@@ -63,6 +63,18 @@ def timed(label: str):
     return decorator
+def get_path_hafnia_cache() -> Path:
+    return Path.home() / "hafnia"
+def get_path_torchvision_downloads() -> Path:
+    return get_path_hafnia_cache() / "torchvision_downloads"
+def get_path_hafnia_conversions() -> Path:
+    return get_path_hafnia_cache() / "hafnia_conversions"
 def now_as_str() -> str:
     """Get the current date and time as a string."""
     return datetime.now().strftime("%Y-%m-%dT%H-%M-%S")
@@ -205,3 +217,8 @@ def remove_duplicates_preserve_order(seq: Iterable) -> List:
     Remove duplicates from a list while preserving the order of elements.
     """
     return list(more_itertools.unique_everseen(seq))
+def is_image_file(file_path: Path) -> bool:
+    image_extensions = (".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".gif")
+    return file_path.suffix.lower() in image_extensions

hafnia/visualizations/image_visualizations.py CHANGED Viewed

@@ -193,7 +193,7 @@ def save_dataset_sample_set_visualizations(
         image = draw_annotations(image, annotations, draw_settings=draw_settings)
         pil_image = Image.fromarray(image)
-        path_image = path_output_folder / Path(sample.file_name).name
+        path_image = path_output_folder / Path(sample.file_path).name
         pil_image.save(path_image)
         paths.append(path_image)

hafnia 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

hafnia 0.3.0py3-none-any.whl → 0.4.0py3-none-any.whl