hafnia 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cli/__main__.py CHANGED
@@ -20,19 +20,15 @@ def configure(cfg: Config) -> None:
20
20
 
21
21
  profile_name = click.prompt("Profile Name", type=str, default=consts.DEFAULT_PROFILE_NAME)
22
22
  profile_name = profile_name.strip()
23
- try:
24
- cfg.add_profile(profile_name, ConfigSchema(), set_active=True)
25
- except ValueError:
26
- raise click.ClickException(consts.ERROR_CREATE_PROFILE)
23
+
24
+ cfg.check_profile_name(profile_name)
27
25
 
28
26
  api_key = click.prompt("Hafnia API Key", type=str, hide_input=True)
29
- try:
30
- cfg.api_key = api_key.strip()
31
- except ValueError as e:
32
- click.echo(f"Error: {str(e)}", err=True)
33
- return
27
+
34
28
  platform_url = click.prompt("Hafnia Platform URL", type=str, default=consts.DEFAULT_API_URL)
35
- cfg.platform_url = platform_url.strip()
29
+
30
+ cfg_profile = ConfigSchema(api_key=api_key, platform_url=platform_url)
31
+ cfg.add_profile(profile_name, cfg_profile, set_active=True)
36
32
  cfg.save_config()
37
33
  profile_cmds.profile_show(cfg)
38
34
 
cli/config.py CHANGED
@@ -6,7 +6,7 @@ from typing import Dict, List, Optional
6
6
  from pydantic import BaseModel, field_validator
7
7
 
8
8
  import cli.consts as consts
9
- from hafnia.log import user_logger
9
+ from hafnia.log import sys_logger, user_logger
10
10
 
11
11
  PLATFORM_API_MAPPING = {
12
12
  "recipes": "/api/v1/recipes",
@@ -23,9 +23,17 @@ class ConfigSchema(BaseModel):
23
23
  api_key: Optional[str] = None
24
24
 
25
25
  @field_validator("api_key")
26
- def validate_api_key(cls, value: str) -> str:
27
- if value is not None and len(value) < 10:
26
+ def validate_api_key(cls, value: Optional[str]) -> Optional[str]:
27
+ if value is None:
28
+ return value
29
+
30
+ if len(value) < 10:
28
31
  raise ValueError("API key is too short.")
32
+
33
+ if not value.startswith("ApiKey "):
34
+ sys_logger.warning("API key is missing the 'ApiKey ' prefix. Prefix is being added automatically.")
35
+ value = f"ApiKey {value}"
36
+
29
37
  return value
30
38
 
31
39
 
@@ -51,6 +59,7 @@ class Config:
51
59
  if profile_name not in self.config_data.profiles:
52
60
  raise ValueError(f"Profile '{profile_name}' does not exist.")
53
61
  self.config_data.active_profile = profile_name
62
+ self.save_config()
54
63
 
55
64
  @property
56
65
  def config(self) -> ConfigSchema:
@@ -92,13 +101,18 @@ class Config:
92
101
 
93
102
  return Path.home() / ".hafnia" / "config.json"
94
103
 
95
- def add_profile(self, profile_name: str, profile: ConfigSchema, set_active: bool = False) -> None:
96
- profile_name = profile_name.strip()
104
+ def check_profile_name(self, profile_name: str) -> None:
105
+ if not profile_name or not isinstance(profile_name, str):
106
+ raise ValueError("Profile name must be a non-empty string.")
107
+
97
108
  if profile_name in self.config_data.profiles:
98
109
  user_logger.warning(
99
110
  f"Profile with name '{profile_name}' already exists, it will be overwritten by the new one."
100
111
  )
101
112
 
113
+ def add_profile(self, profile_name: str, profile: ConfigSchema, set_active: bool = False) -> None:
114
+ profile_name = profile_name.strip()
115
+ self.check_profile_name(profile_name)
102
116
  self.config_data.profiles[profile_name] = profile
103
117
  if set_active:
104
118
  self.config_data.active_profile = profile_name
cli/profile_cmds.py CHANGED
@@ -56,6 +56,7 @@ def profile_create(cfg: Config, name: str, api_url: str, api_key: str, activate:
56
56
  cfg_profile = ConfigSchema(platform_url=api_url, api_key=api_key)
57
57
 
58
58
  cfg.add_profile(profile_name=name, profile=cfg_profile, set_active=activate)
59
+ profile_show(cfg)
59
60
 
60
61
 
61
62
  @profile.command("rm")
@@ -87,7 +88,7 @@ def profile_active(cfg: Config) -> None:
87
88
 
88
89
 
89
90
  def profile_show(cfg: Config) -> None:
90
- masked_key = f"{cfg.api_key[:4]}...{cfg.api_key[-4:]}" if len(cfg.api_key) > 8 else "****"
91
+ masked_key = f"{cfg.api_key[:11]}...{cfg.api_key[-4:]}" if len(cfg.api_key) > 20 else "****"
91
92
  console = Console()
92
93
 
93
94
  table = Table(title=f"{consts.PROFILE_TABLE_HEADER} {cfg.active_profile}", show_header=False)
@@ -1,6 +1,7 @@
1
1
  import io
2
2
  import math
3
3
  import random
4
+ import shutil
4
5
  from pathlib import Path
5
6
  from typing import Dict, List
6
7
 
@@ -21,7 +22,7 @@ def create_split_name_list_from_ratios(split_ratios: Dict[str, float], n_items:
21
22
 
22
23
 
23
24
  def hash_file_xxhash(path: Path, chunk_size: int = 262144) -> str:
24
- hasher = xxhash.xxh3_64()
25
+ hasher = xxhash.xxh3_128()
25
26
 
26
27
  with open(path, "rb") as f:
27
28
  for chunk in iter(lambda: f.read(chunk_size), b""): # 8192, 16384, 32768, 65536
@@ -30,7 +31,7 @@ def hash_file_xxhash(path: Path, chunk_size: int = 262144) -> str:
30
31
 
31
32
 
32
33
  def hash_from_bytes(data: bytes) -> str:
33
- hasher = xxhash.xxh3_64()
34
+ hasher = xxhash.xxh3_128()
34
35
  hasher.update(data)
35
36
  return hasher.hexdigest()
36
37
 
@@ -40,14 +41,46 @@ def save_image_with_hash_name(image: np.ndarray, path_folder: Path) -> Path:
40
41
  buffer = io.BytesIO()
41
42
  pil_image.save(buffer, format="PNG")
42
43
  hash_value = hash_from_bytes(buffer.getvalue())
43
- path_image = Path(path_folder) / f"{hash_value}.png"
44
+ path_image = Path(path_folder) / relative_path_from_hash(hash=hash_value, suffix=".png")
45
+ path_image.parent.mkdir(parents=True, exist_ok=True)
44
46
  pil_image.save(path_image)
45
47
  return path_image
46
48
 
47
49
 
48
- def filename_as_hash_from_path(path_image: Path) -> str:
49
- hash = hash_file_xxhash(path_image)
50
- return f"{hash}{path_image.suffix}"
50
+ def copy_and_rename_file_to_hash_value(path_source: Path, path_dataset_root: Path) -> Path:
51
+ """
52
+ Copies a file to a dataset root directory with a hash-based name and sub-directory structure.
53
+
54
+ E.g. for an "image.png" with hash "dfe8f3b1c2a4f5b6c7d8e9f0a1b2c3d4", the image will be copied to
55
+ 'path_dataset_root / "data" / "dfe" / "dfe8f3b1c2a4f5b6c7d8e9f0a1b2c3d4.png"'
56
+ Notice that the hash is used for both the filename and the subfolder name.
57
+
58
+ Placing image/video files into multiple sub-folders (instead of one large folder) is seemingly
59
+ unnecessary, but it is actually a requirement when the dataset is later downloaded from S3.
60
+
61
+ The reason is that AWS has a rate limit of 3500 ops/sec per prefix (sub-folder) in S3 - meaning we can "only"
62
+ download 3500 files per second from a single folder (prefix) in S3.
63
+
64
+ For even a single user, we found that this limit was being reached when files are stored in single folder (prefix)
65
+ in S3. To support multiple users and concurrent experiments, we are required to separate files into
66
+ multiple sub-folders (prefixes) in S3 to not hit the rate limit.
67
+ """
68
+
69
+ if not path_source.exists():
70
+ raise FileNotFoundError(f"Source file {path_source} does not exist.")
71
+
72
+ hash_value = hash_file_xxhash(path_source)
73
+ path_file = path_dataset_root / relative_path_from_hash(hash=hash_value, suffix=path_source.suffix)
74
+ path_file.parent.mkdir(parents=True, exist_ok=True)
75
+ if not path_file.exists():
76
+ shutil.copy2(path_source, path_file)
77
+
78
+ return path_file
79
+
80
+
81
+ def relative_path_from_hash(hash: str, suffix: str) -> Path:
82
+ path_file = Path("data") / hash[:3] / f"{hash}{suffix}"
83
+ return path_file
51
84
 
52
85
 
53
86
  def split_sizes_from_ratios(n_items: int, split_ratios: Dict[str, float]) -> Dict[str, int]:
@@ -216,6 +216,16 @@ class DatasetRecipe(Serializable):
216
216
  json_str = self.as_json_str(indent=indent)
217
217
  path_json.write_text(json_str, encoding="utf-8")
218
218
 
219
+ ### Helper methods ###
220
+ def get_dataset_names(self) -> List[str]:
221
+ """
222
+ Get all dataset names added with 'from_name'.
223
+ Function recursively gathers dataset names.
224
+ """
225
+ if self.creation is None:
226
+ return []
227
+ return self.creation.get_dataset_names()
228
+
219
229
  ### Validation and Serialization ###
220
230
  @field_validator("creation", mode="plain")
221
231
  @classmethod
@@ -282,7 +292,10 @@ class FromPath(RecipeCreation):
282
292
  return HafniaDataset.from_path
283
293
 
284
294
  def as_short_name(self) -> str:
285
- return f"'{self.path_folder}'".replace(os.sep, "|")
295
+ return f"'{self.path_folder}'".replace(os.sep, "-")
296
+
297
+ def get_dataset_names(self) -> List[str]:
298
+ return [] # Only counts 'from_name' datasets
286
299
 
287
300
 
288
301
  class FromName(RecipeCreation):
@@ -297,6 +310,9 @@ class FromName(RecipeCreation):
297
310
  def as_short_name(self) -> str:
298
311
  return self.name
299
312
 
313
+ def get_dataset_names(self) -> List[str]:
314
+ return [self.name]
315
+
300
316
 
301
317
  class FromMerge(RecipeCreation):
302
318
  recipe0: DatasetRecipe
@@ -310,6 +326,11 @@ class FromMerge(RecipeCreation):
310
326
  merger = FromMerger(recipes=[self.recipe0, self.recipe1])
311
327
  return merger.as_short_name()
312
328
 
329
+ def get_dataset_names(self) -> List[str]:
330
+ """Get the dataset names from the merged recipes."""
331
+ names = [*self.recipe0.creation.get_dataset_names(), *self.recipe1.creation.get_dataset_names()]
332
+ return names
333
+
313
334
 
314
335
  class FromMerger(RecipeCreation):
315
336
  recipes: List[DatasetRecipe]
@@ -325,3 +346,40 @@ class FromMerger(RecipeCreation):
325
346
 
326
347
  def as_short_name(self) -> str:
327
348
  return f"Merger({','.join(recipe.as_short_name() for recipe in self.recipes)})"
349
+
350
+ def get_dataset_names(self) -> List[str]:
351
+ """Get the dataset names from the merged recipes."""
352
+ names = []
353
+ for recipe in self.recipes:
354
+ names.extend(recipe.creation.get_dataset_names())
355
+ return names
356
+
357
+
358
+ def extract_dataset_names_from_json_dict(data: dict) -> list[str]:
359
+ """
360
+ Extract dataset names recursively from a JSON dictionary added with 'from_name'.
361
+
362
+ Even if the same functionality is achieved with `DatasetRecipe.get_dataset_names()`,
363
+ we want to keep this function in 'dipdatalib' to extract dataset names from json dictionaries
364
+ directly.
365
+ """
366
+ creation_field = data.get("creation")
367
+ if creation_field is None:
368
+ return []
369
+ if creation_field.get("__type__") == "FromName":
370
+ return [creation_field["name"]]
371
+ elif creation_field.get("__type__") == "FromMerge":
372
+ recipe_names = ["recipe0", "recipe1"]
373
+ dataset_name = []
374
+ for recipe_name in recipe_names:
375
+ recipe = creation_field.get(recipe_name)
376
+ if recipe is None:
377
+ continue
378
+ dataset_name.extend(extract_dataset_names_from_json_dict(recipe))
379
+ return dataset_name
380
+ elif creation_field.get("__type__") == "FromMerger":
381
+ dataset_name = []
382
+ for recipe in creation_field.get("recipes", []):
383
+ dataset_name.extend(extract_dataset_names_from_json_dict(recipe))
384
+ return dataset_name
385
+ return []
@@ -108,6 +108,10 @@ class RecipeCreation(Serializable):
108
108
  def get_function() -> Callable[..., "HafniaDataset"]:
109
109
  pass
110
110
 
111
+ @abstractmethod
112
+ def get_dataset_names(self) -> List[str]:
113
+ pass
114
+
111
115
  def build(self) -> "HafniaDataset":
112
116
  from hafnia.dataset.dataset_recipe.dataset_recipe import DatasetRecipe
113
117
 
@@ -1,6 +1,5 @@
1
1
  from __future__ import annotations
2
2
 
3
- import os
4
3
  import shutil
5
4
  from dataclasses import dataclass
6
5
  from pathlib import Path
@@ -182,9 +181,8 @@ class HafniaDataset:
182
181
  table = read_table_from_path(path_folder)
183
182
 
184
183
  # Convert from relative paths to absolute paths
185
- table = table.with_columns(
186
- pl.concat_str([pl.lit(str(path_folder.absolute()) + os.sep), pl.col("file_name")]).alias("file_name")
187
- )
184
+ dataset_root = path_folder.absolute().as_posix() + "/"
185
+ table = table.with_columns((dataset_root + pl.col("file_name")).alias("file_name"))
188
186
  if check_for_images:
189
187
  check_image_paths(table)
190
188
  return HafniaDataset(samples=table, info=dataset_info)
@@ -413,30 +411,18 @@ class HafniaDataset:
413
411
 
414
412
  return True
415
413
 
416
- def write(self, path_folder: Path, name_by_hash: bool = True, add_version: bool = False) -> None:
414
+ def write(self, path_folder: Path, add_version: bool = False) -> None:
417
415
  user_logger.info(f"Writing dataset to {path_folder}...")
418
416
  if not path_folder.exists():
419
417
  path_folder.mkdir(parents=True)
420
- path_folder_images = path_folder / "data"
421
- path_folder_images.mkdir(parents=True, exist_ok=True)
422
418
 
423
419
  new_relative_paths = []
424
420
  for org_path in tqdm(self.samples["file_name"].to_list(), desc="- Copy images"):
425
- org_path = Path(org_path)
426
- if not org_path.exists():
427
- raise FileNotFoundError(f"File {org_path} does not exist in the dataset.")
428
- if name_by_hash:
429
- filename = dataset_helpers.filename_as_hash_from_path(org_path)
430
- else:
431
- filename = Path(org_path).name
432
- new_path = path_folder_images / filename
433
- if not new_path.exists():
434
- shutil.copy2(org_path, new_path)
435
-
436
- if not new_path.exists():
437
- raise FileNotFoundError(f"File {new_path} does not exist in the dataset.")
421
+ new_path = dataset_helpers.copy_and_rename_file_to_hash_value(
422
+ path_source=Path(org_path),
423
+ path_dataset_root=path_folder,
424
+ )
438
425
  new_relative_paths.append(str(new_path.relative_to(path_folder)))
439
-
440
426
  table = self.samples.with_columns(pl.Series(new_relative_paths).alias("file_name"))
441
427
  table.write_ndjson(path_folder / FILENAME_ANNOTATIONS_JSONL) # Json for readability
442
428
  table.write_parquet(path_folder / FILENAME_ANNOTATIONS_PARQUET) # Parquet for speed
@@ -2,6 +2,7 @@ import os
2
2
  import shutil
3
3
  import subprocess
4
4
  import tempfile
5
+ import uuid
5
6
  from pathlib import Path
6
7
  from typing import Any, Dict, List, Optional
7
8
 
@@ -61,7 +62,12 @@ def download_or_get_dataset_path(
61
62
  dataset_id = get_dataset_id(dataset_name=dataset_name, endpoint=endpoint_dataset, api_key=api_key)
62
63
  if dataset_id is None:
63
64
  sys_logger.error(f"Dataset '{dataset_name}' not found on the Hafnia platform.")
64
- access_dataset_endpoint = f"{endpoint_dataset}/{dataset_id}/temporary-credentials"
65
+
66
+ if utils.is_hafnia_cloud_job():
67
+ credentials_endpoint_suffix = "temporary-credentials-hidden" # Access to hidden datasets
68
+ else:
69
+ credentials_endpoint_suffix = "temporary-credentials" # Access to sample dataset
70
+ access_dataset_endpoint = f"{endpoint_dataset}/{dataset_id}/{credentials_endpoint_suffix}"
65
71
 
66
72
  download_dataset_from_access_endpoint(
67
73
  endpoint=access_dataset_endpoint,
@@ -80,7 +86,7 @@ def download_dataset_from_access_endpoint(
80
86
  ) -> None:
81
87
  resource_credentials = get_resource_credentials(endpoint, api_key)
82
88
 
83
- local_dataset_paths = [str(path_dataset / filename) for filename in DATASET_FILENAMES_REQUIRED]
89
+ local_dataset_paths = [(path_dataset / filename).as_posix() for filename in DATASET_FILENAMES_REQUIRED]
84
90
  s3_uri = resource_credentials.s3_uri()
85
91
  s3_dataset_files = [f"{s3_uri}/{filename}" for filename in DATASET_FILENAMES_REQUIRED]
86
92
 
@@ -94,7 +100,6 @@ def download_dataset_from_access_endpoint(
94
100
 
95
101
  if not download_files:
96
102
  return
97
-
98
103
  dataset = HafniaDataset.from_path(path_dataset, check_for_images=False)
99
104
  fast_copy_files_s3(
100
105
  src_paths=dataset.samples[ColumnName.REMOTE_PATH].to_list(),
@@ -124,8 +129,10 @@ def execute_s5cmd_commands(
124
129
  description: str = "Executing s5cmd commands",
125
130
  ) -> List[str]:
126
131
  append_envs = append_envs or {}
127
- with tempfile.NamedTemporaryFile(suffix=".txt") as tmp_file:
128
- tmp_file_path = Path(tmp_file.name)
132
+ # In Windows default "Temp" directory can not be deleted that is why we need to create a
133
+ # temporary directory.
134
+ with tempfile.TemporaryDirectory() as temp_dir:
135
+ tmp_file_path = Path(temp_dir, f"{uuid.uuid4().hex}.txt")
129
136
  tmp_file_path.write_text("\n".join(commands))
130
137
  run_cmds = [
131
138
  "s5cmd",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hafnia
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: Python SDK for communication with Hafnia platform.
5
5
  Author-email: Milestone Systems <hafniaplatform@milestone.dk>
6
6
  License-File: LICENSE
@@ -1,27 +1,26 @@
1
1
  cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- cli/__main__.py,sha256=pKYvJOk0toACDMrgEeUYT3P5EQPFmXdMRIQuLZLd3dc,1603
3
- cli/config.py,sha256=-BTdljjC42hXHb1P0yewea9knzgSBFsb909qJ5DEkCo,5531
2
+ cli/__main__.py,sha256=WPOiwolX6J5qLBQGv_b64PGYWScrwqbdVh5zs5AbzVk,1436
3
+ cli/config.py,sha256=hkVd1WyrRqLBgJbKWJkXBzRWlvBRr8dt_8f722yZiiM,6063
4
4
  cli/consts.py,sha256=sj0MRwbbCT2Yl77FPddck1VWkFxp7QY6I9l1o75j_aE,963
5
5
  cli/dataset_cmds.py,sha256=VUMhnHGYPtNNJUK9aobKTx2zpVzLex4gTMmyQXuzCVw,1623
6
6
  cli/experiment_cmds.py,sha256=L-k_ZJ4B7I4cA8OvHcheSwXM6nx9aTF9G7eKBzAcOzQ,1961
7
- cli/profile_cmds.py,sha256=-HQcFgYI6Rqaefi0Nj-91KhiqPKUj7zOaiJWbHx_bac,3196
7
+ cli/profile_cmds.py,sha256=qop9hW4EjbTEQ5d28tiIHCaG6iUM9opQcj289qI-tkg,3220
8
8
  cli/recipe_cmds.py,sha256=qnMfF-te47HXNkgyA0hm9X3etDQsqMnrVEGDCrzVjZU,1462
9
9
  cli/runc_cmds.py,sha256=QqhQe2sd7tK1Bl2aGfIWRyJjpP6F7Tducg7HULrHsZ4,4958
10
10
  hafnia/__init__.py,sha256=Zphq-cQoX95Z11zm4lkrU-YiAJxddR7IBfwDkxeHoDE,108
11
- hafnia/helper_testing.py,sha256=GnaNhXdY81arjCT9M2RUAmvn2-aIzRqlCtbWwGbOIaY,3901
12
11
  hafnia/http.py,sha256=HoPB03IL6e-nglTrw1NGT6sDx1T8VNas5HjTT1QZHnU,3035
13
12
  hafnia/log.py,sha256=sWF8tz78yBtwZ9ddzm19L1MBSBJ3L4G704IGeT1_OEU,784
14
13
  hafnia/torch_helpers.py,sha256=ho65B0WIu_SjbaKPRL4wabDNrnVumWH8QSXVH4r7NAY,11605
15
14
  hafnia/utils.py,sha256=aTZaeHldXn4Jx_AR2BYATxtLCRrBKBjjDFmpSZTSvV4,5138
16
15
  hafnia/data/__init__.py,sha256=o9QjiGbEcNa6r-qDmwwmxPXf-1UitNl5-WxFNcujqsg,111
17
16
  hafnia/data/factory.py,sha256=OY6l6c9UKk6OUDhG4Akb2VgcSaTRLHlbSndAe1HuW2U,813
18
- hafnia/dataset/dataset_helpers.py,sha256=WVCpbUfNbHy7MZJqJ3OyJF8k1hSObo3kScxpXT17Sj8,3510
17
+ hafnia/dataset/dataset_helpers.py,sha256=x6jub_aLWJn-sWSvXT_0-nwzzPG1xMM9yBMNDx6Nufw,5190
19
18
  hafnia/dataset/dataset_names.py,sha256=mp7A_TOqgoqHUEBCPC4ReKNJ93cxwQB451owoCqD6yM,2120
20
19
  hafnia/dataset/dataset_upload_helper.py,sha256=D1BGaeEar4McpUvXj4Yy8nk1tr12IEVhP_Ma47OoWmU,21150
21
- hafnia/dataset/hafnia_dataset.py,sha256=4SJUq7pAqLkcFzgnOUUx8ERraE_sABctOAsONBJExME,27664
22
- hafnia/dataset/dataset_recipe/dataset_recipe.py,sha256=DbPLlmshF6DC98Cwko04XtBaXgSg966LZKR6JXD_9Sg,13632
20
+ hafnia/dataset/hafnia_dataset.py,sha256=6yy13mU9OnJfIoG8R1ZS7mmkNR_VKOD9B4L2KjdS76I,27078
21
+ hafnia/dataset/dataset_recipe/dataset_recipe.py,sha256=3rMMd1xlfQzElxF9P2uNyKqK-GjbCADZgcFDoPviTmU,15796
23
22
  hafnia/dataset/dataset_recipe/recipe_transforms.py,sha256=wh1y2XyX0PwOwfuzJ3_17KKng2Rk0zLlgdfSHfS1SyM,1305
24
- hafnia/dataset/dataset_recipe/recipe_types.py,sha256=6LxfanhX9ihof1gGSonoC-56zSWsI8k2aS4Uw_QgXoM,5176
23
+ hafnia/dataset/dataset_recipe/recipe_types.py,sha256=breT8x81FcmiZ82U_D9FBut0F-eWwOeBWBOse9kNAYU,5256
25
24
  hafnia/dataset/operations/dataset_stats.py,sha256=tSHPmkXt4WNgjf5-j3jIrsSy1Ajld3619AkUHaesXb4,445
26
25
  hafnia/dataset/operations/dataset_transformations.py,sha256=4ibC11upEtRGJgoFLv8lUnglv2xANZVfNdsvI1BMvfM,2960
27
26
  hafnia/dataset/operations/table_transformations.py,sha256=kCLbLRdiFSx1JG0IWtaKkhWcMtM7hy8zgm0Ehz0zO_g,7639
@@ -38,13 +37,13 @@ hafnia/experiment/__init__.py,sha256=OEFE6HqhO5zcTCLZcPcPVjIg7wMFFnvZ1uOtAVhRz7M
38
37
  hafnia/experiment/hafnia_logger.py,sha256=dnV3VPzJK7DSeUh0g4Hk9w1g-eSXcVqJD9If0h2d2GE,6885
39
38
  hafnia/platform/__init__.py,sha256=zJsR6Hy_0iUcC9xL-lBnqR0mLfF4EUr_VXa_XQA7SlA,455
40
39
  hafnia/platform/builder.py,sha256=_g8ykQWETz5Y4Np9QU1a6wIzbbJwXCkbiOCA6JcF5Rc,5742
41
- hafnia/platform/datasets.py,sha256=J252hrejrBWUdS6hY4lRc9_SbYy7CMD92068lLHjPC8,6953
40
+ hafnia/platform/datasets.py,sha256=mRv8A0JSMYdBr3_0qqrw21kKSSPMSYUFWyPVNxYqZrA,7344
42
41
  hafnia/platform/download.py,sha256=oJzdxSIDTuw1an7maC6I7A5nZvDaZPhUkuAmyRwN9Kc,6843
43
42
  hafnia/platform/experiment.py,sha256=-nAfTmn1c8sE6pHDCTNZvWDTopkXndarJAPIGvsnk60,2389
44
43
  hafnia/visualizations/colors.py,sha256=003eAJVnBal4abaYIIpsrT7erIOIjTUHHYVJ1Tj1CDc,5226
45
44
  hafnia/visualizations/image_visualizations.py,sha256=RuFFj2fJCm9dxl2Lq0MumJHF81ZnX-IsDsTxm8ZFV9A,7313
46
- hafnia-0.2.1.dist-info/METADATA,sha256=A1_OEYNslARBFGoYBPm7_-3YivfUeA8adwUUbsM3UsY,19040
47
- hafnia-0.2.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
48
- hafnia-0.2.1.dist-info/entry_points.txt,sha256=FCJVIQ8GP2VE9I3eeGVF5eLxVDNW_01pOJCpG_CGnMM,45
49
- hafnia-0.2.1.dist-info/licenses/LICENSE,sha256=wLZw1B7_mod_CO1H8LXqQgfqlWD6QceJR8--LJYRZGE,1078
50
- hafnia-0.2.1.dist-info/RECORD,,
45
+ hafnia-0.2.3.dist-info/METADATA,sha256=7ZwSgwrbliqkHYTy11Sg01su1z-aZiWSGs9Y6qHBQ4k,19040
46
+ hafnia-0.2.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
47
+ hafnia-0.2.3.dist-info/entry_points.txt,sha256=FCJVIQ8GP2VE9I3eeGVF5eLxVDNW_01pOJCpG_CGnMM,45
48
+ hafnia-0.2.3.dist-info/licenses/LICENSE,sha256=wLZw1B7_mod_CO1H8LXqQgfqlWD6QceJR8--LJYRZGE,1078
49
+ hafnia-0.2.3.dist-info/RECORD,,
hafnia/helper_testing.py DELETED
@@ -1,108 +0,0 @@
1
- from inspect import getmembers, isfunction, signature
2
- from pathlib import Path
3
- from types import FunctionType
4
- from typing import Any, Callable, Dict, Union, get_origin
5
-
6
- from hafnia import utils
7
- from hafnia.dataset.dataset_names import FILENAME_ANNOTATIONS_JSONL, DatasetVariant
8
- from hafnia.dataset.hafnia_dataset import HafniaDataset, Sample
9
-
10
- MICRO_DATASETS = {
11
- "tiny-dataset": utils.PATH_DATASETS / "tiny-dataset",
12
- "coco-2017": utils.PATH_DATASETS / "coco-2017",
13
- }
14
-
15
-
16
- def get_path_workspace() -> Path:
17
- return Path(__file__).parents[2]
18
-
19
-
20
- def get_path_expected_images() -> Path:
21
- return get_path_workspace() / "tests" / "data" / "expected_images"
22
-
23
-
24
- def get_path_test_data() -> Path:
25
- return get_path_workspace() / "tests" / "data"
26
-
27
-
28
- def get_path_micro_hafnia_dataset_no_check() -> Path:
29
- return get_path_test_data() / "micro_test_datasets"
30
-
31
-
32
- def get_path_micro_hafnia_dataset(dataset_name: str, force_update=False) -> Path:
33
- import pytest
34
-
35
- if dataset_name not in MICRO_DATASETS:
36
- raise ValueError(f"Dataset name '{dataset_name}' is not recognized. Available options: {list(MICRO_DATASETS)}")
37
- path_dataset = MICRO_DATASETS[dataset_name]
38
-
39
- path_test_dataset = get_path_micro_hafnia_dataset_no_check() / dataset_name
40
- path_test_dataset_annotations = path_test_dataset / FILENAME_ANNOTATIONS_JSONL
41
- if path_test_dataset_annotations.exists() and not force_update:
42
- return path_test_dataset
43
-
44
- hafnia_dataset = HafniaDataset.from_path(path_dataset / DatasetVariant.SAMPLE.value)
45
- hafnia_dataset = hafnia_dataset.select_samples(n_samples=3, seed=42)
46
- hafnia_dataset.write(path_test_dataset)
47
-
48
- if force_update:
49
- pytest.fail(
50
- "Sample image and metadata have been updated using 'force_update=True'. Set 'force_update=False' and rerun the test."
51
- )
52
- pytest.fail("Missing test sample image. Please rerun the test.")
53
- return path_test_dataset
54
-
55
-
56
- def get_sample_micro_hafnia_dataset(dataset_name: str, force_update=False) -> Sample:
57
- micro_dataset = get_micro_hafnia_dataset(dataset_name=dataset_name, force_update=force_update)
58
- sample_dict = micro_dataset[0]
59
- sample = Sample(**sample_dict)
60
- return sample
61
-
62
-
63
- def get_micro_hafnia_dataset(dataset_name: str, force_update: bool = False) -> HafniaDataset:
64
- path_dataset = get_path_micro_hafnia_dataset(dataset_name=dataset_name, force_update=force_update)
65
- hafnia_dataset = HafniaDataset.from_path(path_dataset)
66
- return hafnia_dataset
67
-
68
-
69
- def is_hafnia_configured() -> bool:
70
- """
71
- Check if Hafnia is configured by verifying if the API key is set.
72
- """
73
- from cli.config import Config
74
-
75
- return Config().is_configured()
76
-
77
-
78
- def is_typing_type(annotation: Any) -> bool:
79
- return get_origin(annotation) is not None
80
-
81
-
82
- def annotation_as_string(annotation: Union[type, str]) -> str:
83
- """Convert type annotation to string."""
84
- if isinstance(annotation, str):
85
- return annotation.replace("'", "")
86
- if is_typing_type(annotation): # Is using typing types like List, Dict, etc.
87
- return str(annotation).replace("typing.", "")
88
- if hasattr(annotation, "__name__"):
89
- return annotation.__name__
90
- return str(annotation)
91
-
92
-
93
- def get_hafnia_functions_from_module(python_module) -> Dict[str, FunctionType]:
94
- def dataset_is_first_arg(func: Callable) -> bool:
95
- """
96
- Check if the function has 'HafniaDataset' as the first parameter.
97
- """
98
- func_signature = signature(func)
99
- params = func_signature.parameters
100
- if len(params) == 0:
101
- return False
102
- first_argument_type = list(params.values())[0]
103
-
104
- annotation_as_str = annotation_as_string(first_argument_type.annotation)
105
- return annotation_as_str == "HafniaDataset"
106
-
107
- functions = {func[0]: func[1] for func in getmembers(python_module, isfunction) if dataset_is_first_arg(func[1])}
108
- return functions
File without changes