hafnia 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/__main__.py +13 -2
- cli/config.py +2 -1
- cli/consts.py +1 -1
- cli/dataset_cmds.py +6 -14
- cli/dataset_recipe_cmds.py +78 -0
- cli/experiment_cmds.py +226 -43
- cli/profile_cmds.py +6 -5
- cli/runc_cmds.py +5 -5
- cli/trainer_package_cmds.py +65 -0
- hafnia/__init__.py +2 -0
- hafnia/data/factory.py +1 -2
- hafnia/dataset/dataset_helpers.py +0 -12
- hafnia/dataset/dataset_names.py +8 -4
- hafnia/dataset/dataset_recipe/dataset_recipe.py +119 -33
- hafnia/dataset/dataset_recipe/recipe_transforms.py +32 -4
- hafnia/dataset/dataset_recipe/recipe_types.py +1 -1
- hafnia/dataset/dataset_upload_helper.py +206 -53
- hafnia/dataset/hafnia_dataset.py +432 -194
- hafnia/dataset/license_types.py +63 -0
- hafnia/dataset/operations/dataset_stats.py +260 -3
- hafnia/dataset/operations/dataset_transformations.py +325 -4
- hafnia/dataset/operations/table_transformations.py +39 -2
- hafnia/dataset/primitives/__init__.py +8 -0
- hafnia/dataset/primitives/classification.py +1 -1
- hafnia/experiment/hafnia_logger.py +112 -0
- hafnia/http.py +16 -2
- hafnia/platform/__init__.py +9 -3
- hafnia/platform/builder.py +12 -10
- hafnia/platform/dataset_recipe.py +99 -0
- hafnia/platform/datasets.py +44 -6
- hafnia/platform/download.py +2 -1
- hafnia/platform/experiment.py +51 -56
- hafnia/platform/trainer_package.py +57 -0
- hafnia/utils.py +64 -13
- hafnia/visualizations/image_visualizations.py +3 -3
- {hafnia-0.2.4.dist-info → hafnia-0.3.0.dist-info}/METADATA +34 -30
- hafnia-0.3.0.dist-info/RECORD +53 -0
- cli/recipe_cmds.py +0 -45
- hafnia-0.2.4.dist-info/RECORD +0 -49
- {hafnia-0.2.4.dist-info → hafnia-0.3.0.dist-info}/WHEEL +0 -0
- {hafnia-0.2.4.dist-info → hafnia-0.3.0.dist-info}/entry_points.txt +0 -0
- {hafnia-0.2.4.dist-info → hafnia-0.3.0.dist-info}/licenses/LICENSE +0 -0
hafnia/dataset/dataset_names.py
CHANGED
|
@@ -18,11 +18,14 @@ class DeploymentStage(Enum):
|
|
|
18
18
|
PRODUCTION = "production"
|
|
19
19
|
|
|
20
20
|
|
|
21
|
+
TAG_IS_SAMPLE = "sample"
|
|
22
|
+
|
|
23
|
+
OPS_REMOVE_CLASS = "__REMOVE__"
|
|
24
|
+
|
|
25
|
+
|
|
21
26
|
class FieldName:
|
|
22
27
|
CLASS_NAME: str = "class_name" # Name of the class this primitive is associated with, e.g. "car" for Bbox
|
|
23
|
-
CLASS_IDX: str =
|
|
24
|
-
"class_idx" # Index of the class this primitive is associated with, e.g. 0 for "car" if it is the first class
|
|
25
|
-
)
|
|
28
|
+
CLASS_IDX: str = "class_idx" # Index of the class this primitive is associated with, e.g. 0 for "car" if it is the first class # noqa: E501
|
|
26
29
|
OBJECT_ID: str = "object_id" # Unique identifier for the object, e.g. "12345123"
|
|
27
30
|
CONFIDENCE: str = "confidence" # Confidence score (0-1.0) for the primitive, e.g. 0.95 for Bbox
|
|
28
31
|
|
|
@@ -50,8 +53,9 @@ class ColumnName:
|
|
|
50
53
|
HEIGHT: str = "height"
|
|
51
54
|
WIDTH: str = "width"
|
|
52
55
|
SPLIT: str = "split"
|
|
53
|
-
IS_SAMPLE: str = "is_sample"
|
|
54
56
|
REMOTE_PATH: str = "remote_path" # Path to the file in remote storage, e.g. S3
|
|
57
|
+
ATTRIBUTION: str = "attribution" # Attribution for the sample (image/video), e.g. creator, license, source, etc.
|
|
58
|
+
TAGS: str = "tags"
|
|
55
59
|
META: str = "meta"
|
|
56
60
|
|
|
57
61
|
|
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import json
|
|
4
4
|
import os
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import
|
|
6
|
+
from typing import Any, Callable, Dict, List, Optional, Type, Union
|
|
7
7
|
|
|
8
8
|
from pydantic import (
|
|
9
9
|
field_serializer,
|
|
@@ -14,9 +14,7 @@ from hafnia import utils
|
|
|
14
14
|
from hafnia.dataset.dataset_recipe import recipe_transforms
|
|
15
15
|
from hafnia.dataset.dataset_recipe.recipe_types import RecipeCreation, RecipeTransform, Serializable
|
|
16
16
|
from hafnia.dataset.hafnia_dataset import HafniaDataset
|
|
17
|
-
|
|
18
|
-
if TYPE_CHECKING:
|
|
19
|
-
from hafnia.dataset.hafnia_dataset import HafniaDataset
|
|
17
|
+
from hafnia.dataset.primitives.primitive import Primitive
|
|
20
18
|
|
|
21
19
|
|
|
22
20
|
class DatasetRecipe(Serializable):
|
|
@@ -76,6 +74,42 @@ class DatasetRecipe(Serializable):
|
|
|
76
74
|
json_str = path_json.read_text(encoding="utf-8")
|
|
77
75
|
return DatasetRecipe.from_json_str(json_str)
|
|
78
76
|
|
|
77
|
+
@staticmethod
|
|
78
|
+
def from_dict(data: Dict[str, Any]) -> "DatasetRecipe":
|
|
79
|
+
"""Deserialize from a dictionary."""
|
|
80
|
+
dataset_recipe = Serializable.from_dict(data)
|
|
81
|
+
return dataset_recipe
|
|
82
|
+
|
|
83
|
+
@staticmethod
|
|
84
|
+
def from_recipe_id(recipe_id: str) -> "DatasetRecipe":
|
|
85
|
+
"""Loads a dataset recipe by id from the hafnia platform."""
|
|
86
|
+
from cli.config import Config
|
|
87
|
+
from hafnia.platform.dataset_recipe import get_dataset_recipe_by_id
|
|
88
|
+
|
|
89
|
+
cfg = Config()
|
|
90
|
+
endpoint_dataset = cfg.get_platform_endpoint("dataset_recipes")
|
|
91
|
+
recipe_dict = get_dataset_recipe_by_id(recipe_id, endpoint=endpoint_dataset, api_key=cfg.api_key)
|
|
92
|
+
recipe_dict = recipe_dict["template"]["body"]
|
|
93
|
+
if isinstance(recipe_dict, str):
|
|
94
|
+
return DatasetRecipe.from_implicit_form(recipe_dict)
|
|
95
|
+
|
|
96
|
+
recipe = DatasetRecipe.from_dict(recipe_dict)
|
|
97
|
+
return recipe
|
|
98
|
+
|
|
99
|
+
@staticmethod
|
|
100
|
+
def from_recipe_name(name: str) -> "DatasetRecipe":
|
|
101
|
+
"""Loads a dataset recipe by name from the hafnia platform"""
|
|
102
|
+
from cli.config import Config
|
|
103
|
+
from hafnia.platform.dataset_recipe import get_dataset_recipe_by_name
|
|
104
|
+
|
|
105
|
+
cfg = Config()
|
|
106
|
+
endpoint_dataset = cfg.get_platform_endpoint("dataset_recipes")
|
|
107
|
+
recipe = get_dataset_recipe_by_name(name=name, endpoint=endpoint_dataset, api_key=cfg.api_key)
|
|
108
|
+
if not recipe:
|
|
109
|
+
raise ValueError(f"Dataset recipe '{name}' not found.")
|
|
110
|
+
recipe_id = recipe["id"]
|
|
111
|
+
return DatasetRecipe.from_recipe_id(recipe_id)
|
|
112
|
+
|
|
79
113
|
@staticmethod
|
|
80
114
|
def from_implicit_form(recipe: Any) -> DatasetRecipe:
|
|
81
115
|
"""
|
|
@@ -152,6 +186,59 @@ class DatasetRecipe(Serializable):
|
|
|
152
186
|
|
|
153
187
|
raise ValueError(f"Unsupported recipe type: {type(recipe)}")
|
|
154
188
|
|
|
189
|
+
### Upload, store and recipe conversions ###
|
|
190
|
+
def as_python_code(self, keep_default_fields: bool = False, as_kwargs: bool = True) -> str:
|
|
191
|
+
str_operations = [self.creation.as_python_code(keep_default_fields=keep_default_fields, as_kwargs=as_kwargs)]
|
|
192
|
+
if self.operations:
|
|
193
|
+
for op in self.operations:
|
|
194
|
+
str_operations.append(op.as_python_code(keep_default_fields=keep_default_fields, as_kwargs=as_kwargs))
|
|
195
|
+
operations_str = ".".join(str_operations)
|
|
196
|
+
return operations_str
|
|
197
|
+
|
|
198
|
+
def as_short_name(self) -> str:
|
|
199
|
+
"""Return a short name for the transforms."""
|
|
200
|
+
|
|
201
|
+
creation_name = self.creation.as_short_name()
|
|
202
|
+
if self.operations is None or len(self.operations) == 0:
|
|
203
|
+
return creation_name
|
|
204
|
+
short_names = [creation_name]
|
|
205
|
+
for operation in self.operations:
|
|
206
|
+
short_names.append(operation.as_short_name())
|
|
207
|
+
transforms_str = ",".join(short_names)
|
|
208
|
+
return f"Recipe({transforms_str})"
|
|
209
|
+
|
|
210
|
+
def as_json_str(self, indent: int = 2) -> str:
|
|
211
|
+
"""Serialize the dataset recipe to a JSON string."""
|
|
212
|
+
dict_data = self.as_dict()
|
|
213
|
+
return json.dumps(dict_data, indent=indent, ensure_ascii=False)
|
|
214
|
+
|
|
215
|
+
def as_json_file(self, path_json: Path, indent: int = 2) -> None:
|
|
216
|
+
"""Serialize the dataset recipe to a JSON file."""
|
|
217
|
+
path_json.parent.mkdir(parents=True, exist_ok=True)
|
|
218
|
+
json_str = self.as_json_str(indent=indent)
|
|
219
|
+
path_json.write_text(json_str, encoding="utf-8")
|
|
220
|
+
|
|
221
|
+
def as_dict(self) -> dict:
|
|
222
|
+
"""Serialize the dataset recipe to a dictionary."""
|
|
223
|
+
return self.model_dump(mode="json")
|
|
224
|
+
|
|
225
|
+
def as_platform_recipe(self, recipe_name: Optional[str]) -> Dict:
|
|
226
|
+
"""Uploads dataset recipe to the hafnia platform."""
|
|
227
|
+
from cli.config import Config
|
|
228
|
+
from hafnia.platform.dataset_recipe import get_or_create_dataset_recipe
|
|
229
|
+
|
|
230
|
+
recipe = self.as_dict()
|
|
231
|
+
cfg = Config()
|
|
232
|
+
endpoint_dataset = cfg.get_platform_endpoint("dataset_recipes")
|
|
233
|
+
recipe_dict = get_or_create_dataset_recipe(
|
|
234
|
+
recipe=recipe,
|
|
235
|
+
endpoint=endpoint_dataset,
|
|
236
|
+
api_key=cfg.api_key,
|
|
237
|
+
name=recipe_name,
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
return recipe_dict
|
|
241
|
+
|
|
155
242
|
### Dataset Recipe Transformations ###
|
|
156
243
|
def shuffle(recipe: DatasetRecipe, seed: int = 42) -> DatasetRecipe:
|
|
157
244
|
operation = recipe_transforms.Shuffle(seed=seed)
|
|
@@ -184,37 +271,36 @@ class DatasetRecipe(Serializable):
|
|
|
184
271
|
recipe.append_operation(operation)
|
|
185
272
|
return recipe
|
|
186
273
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
short_names = [creation_name]
|
|
203
|
-
for operation in self.operations:
|
|
204
|
-
short_names.append(operation.as_short_name())
|
|
205
|
-
transforms_str = ",".join(short_names)
|
|
206
|
-
return f"Recipe({transforms_str})"
|
|
274
|
+
def class_mapper(
|
|
275
|
+
recipe: DatasetRecipe,
|
|
276
|
+
class_mapping: Dict[str, str],
|
|
277
|
+
method: str = "strict",
|
|
278
|
+
primitive: Optional[Type[Primitive]] = None,
|
|
279
|
+
task_name: Optional[str] = None,
|
|
280
|
+
) -> DatasetRecipe:
|
|
281
|
+
operation = recipe_transforms.ClassMapper(
|
|
282
|
+
class_mapping=class_mapping,
|
|
283
|
+
method=method,
|
|
284
|
+
primitive=primitive,
|
|
285
|
+
task_name=task_name,
|
|
286
|
+
)
|
|
287
|
+
recipe.append_operation(operation)
|
|
288
|
+
return recipe
|
|
207
289
|
|
|
208
|
-
def
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
return json.dumps(data, indent=indent, ensure_ascii=False)
|
|
290
|
+
def rename_task(recipe: DatasetRecipe, old_task_name: str, new_task_name: str) -> DatasetRecipe:
|
|
291
|
+
operation = recipe_transforms.RenameTask(old_task_name=old_task_name, new_task_name=new_task_name)
|
|
292
|
+
recipe.append_operation(operation)
|
|
293
|
+
return recipe
|
|
213
294
|
|
|
214
|
-
def
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
295
|
+
def select_samples_by_class_name(
|
|
296
|
+
recipe: DatasetRecipe,
|
|
297
|
+
name: Union[List[str], str],
|
|
298
|
+
task_name: Optional[str] = None,
|
|
299
|
+
primitive: Optional[Type[Primitive]] = None,
|
|
300
|
+
) -> DatasetRecipe:
|
|
301
|
+
operation = recipe_transforms.SelectSamplesByClassName(name=name, task_name=task_name, primitive=primitive)
|
|
302
|
+
recipe.append_operation(operation)
|
|
303
|
+
return recipe
|
|
218
304
|
|
|
219
305
|
### Helper methods ###
|
|
220
306
|
def get_dataset_names(self) -> List[str]:
|
|
@@ -1,10 +1,8 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import Callable, Dict, List, Optional, Type, Union
|
|
2
2
|
|
|
3
3
|
from hafnia.dataset.dataset_recipe.recipe_types import RecipeTransform
|
|
4
4
|
from hafnia.dataset.hafnia_dataset import HafniaDataset
|
|
5
|
-
|
|
6
|
-
if TYPE_CHECKING:
|
|
7
|
-
pass
|
|
5
|
+
from hafnia.dataset.primitives.primitive import Primitive
|
|
8
6
|
|
|
9
7
|
|
|
10
8
|
class Shuffle(RecipeTransform):
|
|
@@ -51,3 +49,33 @@ class DefineSampleSetBySize(RecipeTransform):
|
|
|
51
49
|
@staticmethod
|
|
52
50
|
def get_function() -> Callable[..., "HafniaDataset"]:
|
|
53
51
|
return HafniaDataset.define_sample_set_by_size
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class ClassMapper(RecipeTransform):
|
|
55
|
+
class_mapping: Dict[str, str]
|
|
56
|
+
method: str = "strict"
|
|
57
|
+
primitive: Optional[Type[Primitive]] = None
|
|
58
|
+
task_name: Optional[str] = None
|
|
59
|
+
|
|
60
|
+
@staticmethod
|
|
61
|
+
def get_function() -> Callable[..., "HafniaDataset"]:
|
|
62
|
+
return HafniaDataset.class_mapper
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class RenameTask(RecipeTransform):
|
|
66
|
+
old_task_name: str
|
|
67
|
+
new_task_name: str
|
|
68
|
+
|
|
69
|
+
@staticmethod
|
|
70
|
+
def get_function() -> Callable[..., "HafniaDataset"]:
|
|
71
|
+
return HafniaDataset.rename_task
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class SelectSamplesByClassName(RecipeTransform):
|
|
75
|
+
name: Union[List[str], str]
|
|
76
|
+
task_name: Optional[str] = None
|
|
77
|
+
primitive: Optional[Type[Primitive]] = None
|
|
78
|
+
|
|
79
|
+
@staticmethod
|
|
80
|
+
def get_function() -> Callable[..., "HafniaDataset"]:
|
|
81
|
+
return HafniaDataset.select_samples_by_class_name
|