hafnia 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. cli/__main__.py +13 -2
  2. cli/config.py +2 -1
  3. cli/consts.py +1 -1
  4. cli/dataset_cmds.py +6 -14
  5. cli/dataset_recipe_cmds.py +78 -0
  6. cli/experiment_cmds.py +226 -43
  7. cli/profile_cmds.py +6 -5
  8. cli/runc_cmds.py +5 -5
  9. cli/trainer_package_cmds.py +65 -0
  10. hafnia/__init__.py +2 -0
  11. hafnia/data/factory.py +1 -2
  12. hafnia/dataset/dataset_helpers.py +0 -12
  13. hafnia/dataset/dataset_names.py +8 -4
  14. hafnia/dataset/dataset_recipe/dataset_recipe.py +119 -33
  15. hafnia/dataset/dataset_recipe/recipe_transforms.py +32 -4
  16. hafnia/dataset/dataset_recipe/recipe_types.py +1 -1
  17. hafnia/dataset/dataset_upload_helper.py +206 -53
  18. hafnia/dataset/hafnia_dataset.py +432 -194
  19. hafnia/dataset/license_types.py +63 -0
  20. hafnia/dataset/operations/dataset_stats.py +260 -3
  21. hafnia/dataset/operations/dataset_transformations.py +325 -4
  22. hafnia/dataset/operations/table_transformations.py +39 -2
  23. hafnia/dataset/primitives/__init__.py +8 -0
  24. hafnia/dataset/primitives/classification.py +1 -1
  25. hafnia/experiment/hafnia_logger.py +112 -0
  26. hafnia/http.py +16 -2
  27. hafnia/platform/__init__.py +9 -3
  28. hafnia/platform/builder.py +12 -10
  29. hafnia/platform/dataset_recipe.py +99 -0
  30. hafnia/platform/datasets.py +44 -6
  31. hafnia/platform/download.py +2 -1
  32. hafnia/platform/experiment.py +51 -56
  33. hafnia/platform/trainer_package.py +57 -0
  34. hafnia/utils.py +64 -13
  35. hafnia/visualizations/image_visualizations.py +3 -3
  36. {hafnia-0.2.4.dist-info → hafnia-0.3.0.dist-info}/METADATA +34 -30
  37. hafnia-0.3.0.dist-info/RECORD +53 -0
  38. cli/recipe_cmds.py +0 -45
  39. hafnia-0.2.4.dist-info/RECORD +0 -49
  40. {hafnia-0.2.4.dist-info → hafnia-0.3.0.dist-info}/WHEEL +0 -0
  41. {hafnia-0.2.4.dist-info → hafnia-0.3.0.dist-info}/entry_points.txt +0 -0
  42. {hafnia-0.2.4.dist-info → hafnia-0.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -18,11 +18,14 @@ class DeploymentStage(Enum):
18
18
  PRODUCTION = "production"
19
19
 
20
20
 
21
+ TAG_IS_SAMPLE = "sample"
22
+
23
+ OPS_REMOVE_CLASS = "__REMOVE__"
24
+
25
+
21
26
  class FieldName:
22
27
  CLASS_NAME: str = "class_name" # Name of the class this primitive is associated with, e.g. "car" for Bbox
23
- CLASS_IDX: str = (
24
- "class_idx" # Index of the class this primitive is associated with, e.g. 0 for "car" if it is the first class
25
- )
28
+ CLASS_IDX: str = "class_idx" # Index of the class this primitive is associated with, e.g. 0 for "car" if it is the first class # noqa: E501
26
29
  OBJECT_ID: str = "object_id" # Unique identifier for the object, e.g. "12345123"
27
30
  CONFIDENCE: str = "confidence" # Confidence score (0-1.0) for the primitive, e.g. 0.95 for Bbox
28
31
 
@@ -50,8 +53,9 @@ class ColumnName:
50
53
  HEIGHT: str = "height"
51
54
  WIDTH: str = "width"
52
55
  SPLIT: str = "split"
53
- IS_SAMPLE: str = "is_sample"
54
56
  REMOTE_PATH: str = "remote_path" # Path to the file in remote storage, e.g. S3
57
+ ATTRIBUTION: str = "attribution" # Attribution for the sample (image/video), e.g. creator, license, source, etc.
58
+ TAGS: str = "tags"
55
59
  META: str = "meta"
56
60
 
57
61
 
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  import json
4
4
  import os
5
5
  from pathlib import Path
6
- from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
6
+ from typing import Any, Callable, Dict, List, Optional, Type, Union
7
7
 
8
8
  from pydantic import (
9
9
  field_serializer,
@@ -14,9 +14,7 @@ from hafnia import utils
14
14
  from hafnia.dataset.dataset_recipe import recipe_transforms
15
15
  from hafnia.dataset.dataset_recipe.recipe_types import RecipeCreation, RecipeTransform, Serializable
16
16
  from hafnia.dataset.hafnia_dataset import HafniaDataset
17
-
18
- if TYPE_CHECKING:
19
- from hafnia.dataset.hafnia_dataset import HafniaDataset
17
+ from hafnia.dataset.primitives.primitive import Primitive
20
18
 
21
19
 
22
20
  class DatasetRecipe(Serializable):
@@ -76,6 +74,42 @@ class DatasetRecipe(Serializable):
76
74
  json_str = path_json.read_text(encoding="utf-8")
77
75
  return DatasetRecipe.from_json_str(json_str)
78
76
 
77
+ @staticmethod
78
+ def from_dict(data: Dict[str, Any]) -> "DatasetRecipe":
79
+ """Deserialize from a dictionary."""
80
+ dataset_recipe = Serializable.from_dict(data)
81
+ return dataset_recipe
82
+
83
+ @staticmethod
84
+ def from_recipe_id(recipe_id: str) -> "DatasetRecipe":
85
+ """Loads a dataset recipe by id from the hafnia platform."""
86
+ from cli.config import Config
87
+ from hafnia.platform.dataset_recipe import get_dataset_recipe_by_id
88
+
89
+ cfg = Config()
90
+ endpoint_dataset = cfg.get_platform_endpoint("dataset_recipes")
91
+ recipe_dict = get_dataset_recipe_by_id(recipe_id, endpoint=endpoint_dataset, api_key=cfg.api_key)
92
+ recipe_dict = recipe_dict["template"]["body"]
93
+ if isinstance(recipe_dict, str):
94
+ return DatasetRecipe.from_implicit_form(recipe_dict)
95
+
96
+ recipe = DatasetRecipe.from_dict(recipe_dict)
97
+ return recipe
98
+
99
+ @staticmethod
100
+ def from_recipe_name(name: str) -> "DatasetRecipe":
101
+ """Loads a dataset recipe by name from the hafnia platform"""
102
+ from cli.config import Config
103
+ from hafnia.platform.dataset_recipe import get_dataset_recipe_by_name
104
+
105
+ cfg = Config()
106
+ endpoint_dataset = cfg.get_platform_endpoint("dataset_recipes")
107
+ recipe = get_dataset_recipe_by_name(name=name, endpoint=endpoint_dataset, api_key=cfg.api_key)
108
+ if not recipe:
109
+ raise ValueError(f"Dataset recipe '{name}' not found.")
110
+ recipe_id = recipe["id"]
111
+ return DatasetRecipe.from_recipe_id(recipe_id)
112
+
79
113
  @staticmethod
80
114
  def from_implicit_form(recipe: Any) -> DatasetRecipe:
81
115
  """
@@ -152,6 +186,59 @@ class DatasetRecipe(Serializable):
152
186
 
153
187
  raise ValueError(f"Unsupported recipe type: {type(recipe)}")
154
188
 
189
+ ### Upload, store and recipe conversions ###
190
+ def as_python_code(self, keep_default_fields: bool = False, as_kwargs: bool = True) -> str:
191
+ str_operations = [self.creation.as_python_code(keep_default_fields=keep_default_fields, as_kwargs=as_kwargs)]
192
+ if self.operations:
193
+ for op in self.operations:
194
+ str_operations.append(op.as_python_code(keep_default_fields=keep_default_fields, as_kwargs=as_kwargs))
195
+ operations_str = ".".join(str_operations)
196
+ return operations_str
197
+
198
+ def as_short_name(self) -> str:
199
+ """Return a short name for the transforms."""
200
+
201
+ creation_name = self.creation.as_short_name()
202
+ if self.operations is None or len(self.operations) == 0:
203
+ return creation_name
204
+ short_names = [creation_name]
205
+ for operation in self.operations:
206
+ short_names.append(operation.as_short_name())
207
+ transforms_str = ",".join(short_names)
208
+ return f"Recipe({transforms_str})"
209
+
210
+ def as_json_str(self, indent: int = 2) -> str:
211
+ """Serialize the dataset recipe to a JSON string."""
212
+ dict_data = self.as_dict()
213
+ return json.dumps(dict_data, indent=indent, ensure_ascii=False)
214
+
215
+ def as_json_file(self, path_json: Path, indent: int = 2) -> None:
216
+ """Serialize the dataset recipe to a JSON file."""
217
+ path_json.parent.mkdir(parents=True, exist_ok=True)
218
+ json_str = self.as_json_str(indent=indent)
219
+ path_json.write_text(json_str, encoding="utf-8")
220
+
221
+ def as_dict(self) -> dict:
222
+ """Serialize the dataset recipe to a dictionary."""
223
+ return self.model_dump(mode="json")
224
+
225
+ def as_platform_recipe(self, recipe_name: Optional[str]) -> Dict:
226
+ """Uploads dataset recipe to the hafnia platform."""
227
+ from cli.config import Config
228
+ from hafnia.platform.dataset_recipe import get_or_create_dataset_recipe
229
+
230
+ recipe = self.as_dict()
231
+ cfg = Config()
232
+ endpoint_dataset = cfg.get_platform_endpoint("dataset_recipes")
233
+ recipe_dict = get_or_create_dataset_recipe(
234
+ recipe=recipe,
235
+ endpoint=endpoint_dataset,
236
+ api_key=cfg.api_key,
237
+ name=recipe_name,
238
+ )
239
+
240
+ return recipe_dict
241
+
155
242
  ### Dataset Recipe Transformations ###
156
243
  def shuffle(recipe: DatasetRecipe, seed: int = 42) -> DatasetRecipe:
157
244
  operation = recipe_transforms.Shuffle(seed=seed)
@@ -184,37 +271,36 @@ class DatasetRecipe(Serializable):
184
271
  recipe.append_operation(operation)
185
272
  return recipe
186
273
 
187
- ### Conversions ###
188
- def as_python_code(self, keep_default_fields: bool = False, as_kwargs: bool = True) -> str:
189
- str_operations = [self.creation.as_python_code(keep_default_fields=keep_default_fields, as_kwargs=as_kwargs)]
190
- if self.operations:
191
- for op in self.operations:
192
- str_operations.append(op.as_python_code(keep_default_fields=keep_default_fields, as_kwargs=as_kwargs))
193
- operations_str = ".".join(str_operations)
194
- return operations_str
195
-
196
- def as_short_name(self) -> str:
197
- """Return a short name for the transforms."""
198
-
199
- creation_name = self.creation.as_short_name()
200
- if self.operations is None or len(self.operations) == 0:
201
- return creation_name
202
- short_names = [creation_name]
203
- for operation in self.operations:
204
- short_names.append(operation.as_short_name())
205
- transforms_str = ",".join(short_names)
206
- return f"Recipe({transforms_str})"
274
+ def class_mapper(
275
+ recipe: DatasetRecipe,
276
+ class_mapping: Dict[str, str],
277
+ method: str = "strict",
278
+ primitive: Optional[Type[Primitive]] = None,
279
+ task_name: Optional[str] = None,
280
+ ) -> DatasetRecipe:
281
+ operation = recipe_transforms.ClassMapper(
282
+ class_mapping=class_mapping,
283
+ method=method,
284
+ primitive=primitive,
285
+ task_name=task_name,
286
+ )
287
+ recipe.append_operation(operation)
288
+ return recipe
207
289
 
208
- def as_json_str(self, indent: int = 2) -> str:
209
- """Serialize the dataset recipe to a JSON string."""
210
- data = self.model_dump(mode="json")
211
- # data = type_as_first_key(data)
212
- return json.dumps(data, indent=indent, ensure_ascii=False)
290
+ def rename_task(recipe: DatasetRecipe, old_task_name: str, new_task_name: str) -> DatasetRecipe:
291
+ operation = recipe_transforms.RenameTask(old_task_name=old_task_name, new_task_name=new_task_name)
292
+ recipe.append_operation(operation)
293
+ return recipe
213
294
 
214
- def as_json_file(self, path_json: Path, indent: int = 2) -> None:
215
- """Serialize the dataset recipe to a JSON file."""
216
- json_str = self.as_json_str(indent=indent)
217
- path_json.write_text(json_str, encoding="utf-8")
295
+ def select_samples_by_class_name(
296
+ recipe: DatasetRecipe,
297
+ name: Union[List[str], str],
298
+ task_name: Optional[str] = None,
299
+ primitive: Optional[Type[Primitive]] = None,
300
+ ) -> DatasetRecipe:
301
+ operation = recipe_transforms.SelectSamplesByClassName(name=name, task_name=task_name, primitive=primitive)
302
+ recipe.append_operation(operation)
303
+ return recipe
218
304
 
219
305
  ### Helper methods ###
220
306
  def get_dataset_names(self) -> List[str]:
@@ -1,10 +1,8 @@
1
- from typing import TYPE_CHECKING, Callable, Dict
1
+ from typing import Callable, Dict, List, Optional, Type, Union
2
2
 
3
3
  from hafnia.dataset.dataset_recipe.recipe_types import RecipeTransform
4
4
  from hafnia.dataset.hafnia_dataset import HafniaDataset
5
-
6
- if TYPE_CHECKING:
7
- pass
5
+ from hafnia.dataset.primitives.primitive import Primitive
8
6
 
9
7
 
10
8
  class Shuffle(RecipeTransform):
@@ -51,3 +49,33 @@ class DefineSampleSetBySize(RecipeTransform):
51
49
  @staticmethod
52
50
  def get_function() -> Callable[..., "HafniaDataset"]:
53
51
  return HafniaDataset.define_sample_set_by_size
52
+
53
+
54
+ class ClassMapper(RecipeTransform):
55
+ class_mapping: Dict[str, str]
56
+ method: str = "strict"
57
+ primitive: Optional[Type[Primitive]] = None
58
+ task_name: Optional[str] = None
59
+
60
+ @staticmethod
61
+ def get_function() -> Callable[..., "HafniaDataset"]:
62
+ return HafniaDataset.class_mapper
63
+
64
+
65
+ class RenameTask(RecipeTransform):
66
+ old_task_name: str
67
+ new_task_name: str
68
+
69
+ @staticmethod
70
+ def get_function() -> Callable[..., "HafniaDataset"]:
71
+ return HafniaDataset.rename_task
72
+
73
+
74
+ class SelectSamplesByClassName(RecipeTransform):
75
+ name: Union[List[str], str]
76
+ task_name: Optional[str] = None
77
+ primitive: Optional[Type[Primitive]] = None
78
+
79
+ @staticmethod
80
+ def get_function() -> Callable[..., "HafniaDataset"]:
81
+ return HafniaDataset.select_samples_by_class_name
@@ -8,7 +8,7 @@ from pydantic import BaseModel, computed_field
8
8
 
9
9
  from hafnia import utils
10
10
 
11
- if TYPE_CHECKING:
11
+ if TYPE_CHECKING: # Using 'TYPE_CHECKING' to avoid circular imports during type checking
12
12
  from hafnia.dataset.hafnia_dataset import HafniaDataset
13
13
 
14
14