hafnia 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hafnia
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: Python SDK for communication with Hafnia platform.
5
5
  Author-email: Milestone Systems <hafniaplatform@milestone.dk>
6
6
  License-File: LICENSE
@@ -147,22 +147,20 @@ The `HafniaDataset` object provides a convenient way to interact with the datase
147
147
  creating splits, accessing samples, printing statistics, saving to and loading from disk.
148
148
 
149
149
  In essence, the `HafniaDataset` class contains `dataset.info` with dataset information
150
- and `dataset.table` with annotations as a polars DataFrame
150
+ and `dataset.samples` with annotations as a polars DataFrame
151
151
 
152
152
  ```python
153
153
  # Annotations are stored in a polars DataFrame
154
- print(dataset.table.head(2))
154
+ print(dataset.samples.head(2))
155
155
  shape: (2, 14)
156
- ┌──────────┬────────────────────────────────┬────────┬───────┬───┬───────────────────────────────┬──────────┬──────────┬───────────────────────────────┐
157
- image_id ┆ file_name ┆ height ┆ width ┆ … ┆ objects ┆ bitmasks ┆ polygons ┆ meta
158
- │ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ ---
159
- str ┆ str ┆ i64 ┆ i64 ┆ ┆ list[struct[12]] ┆ null ┆ null ┆ struct[5]
160
- ╞══════════╪════════════════════════════════╪════════╪═══════╪═══╪═══════════════════════════════╪══════════╪══════════╪═══════════════════════════════╡
161
- 7800 ┆ /home/ubuntu/code/hafnia/.data ┆ 1080 ┆ 1920 ┆ … ┆ [{0.0492,0.0357,0.2083,0.23," ┆ null ┆ null ┆ {120.0,1.0,"2024-07-10T18:30: │
162
- │ ┆ … ┆ ┆V ┆ 0…
163
- │ 7900 ┆ /home/ubuntu/code/hafnia/.data ┆ 1080 ┆ 1920 ┆ … ┆ [{0.146382,0.078704,0.42963,0 ┆ null ┆ null ┆ {120.0,1.0,"2024-07-10T18:30: │
164
- │ ┆ … ┆ ┆ ┆ ┆ .… ┆ ┆ ┆ 0… │
165
- └──────────┴────────────────────────────────┴────────┴───────┴───┴───────────────────────────────┴──────────┴──────────┴───────────────────────────────┘
156
+ ┌──────────────┬─────────────────────────────────┬────────┬───────┬───┬─────────────────────────────────┬──────────┬──────────┬─────────────────────────────────┐
157
+ sample_index ┆ file_name ┆ height ┆ width ┆ … ┆ objects ┆ bitmasks ┆ polygons ┆ meta
158
+ │ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ ---
159
+ u32 ┆ str ┆ i64 ┆ i64 ┆ ┆ list[struct[11]] ┆ null ┆ null ┆ struct[5]
160
+ ╞══════════════╪═════════════════════════════════╪════════╪═══════╪═══╪═════════════════════════════════╪══════════╪══════════╪═════════════════════════════════╡
161
+ 0 ┆ /home/ubuntu/code/hafnia/.data ┆ 1080 ┆ 1920 ┆ … ┆ [{0.0492,0.0357,0.2083,0.23,"V… ┆ null ┆ null ┆ {120.0,1.0,"2024-07-10T18:30:0…
162
+ 100/home/ubuntu/code/hafnia/.data 10801920 ┆ [{0.146382,0.078704,0.42963,0.… null null ┆ {120.0,1.0,"2024-07-10T18:30:0
163
+ └──────────────┴─────────────────────────────────┴────────┴───────┴───┴─────────────────────────────────┴──────────┴──────────┴─────────────────────────────────┘
166
164
  ```
167
165
 
168
166
  ```python
@@ -235,16 +233,15 @@ It also contain annotations as primitive types such as `Bbox`, `Classification`.
235
233
  ```python
236
234
  rich.print(sample)
237
235
  Sample(
238
- image_id='7800',
239
- file_name='data/video_0026a86b-2f43-49f2-a17c-59244d10a585_1fps_mp4_frame_0
240
- 0000.png',
236
+ sample_index=120,
237
+ file_name='/home/ubuntu/code/hafnia/.data/datasets/midwest-vehicle-detection/data/343403325f27e390.png',
241
238
  height=1080,
242
239
  width=1920,
243
- split='test',
240
+ split='train',
244
241
  is_sample=True,
245
- frame_number=None,
246
- video_name=None,
247
- remote_path=None,
242
+ collection_index=None,
243
+ collection_id=None,
244
+ remote_path='s3://mdi-production-midwest-vehicle-detection/sample/data/343403325f27e390.png',
248
245
  classifications=[
249
246
  Classification(
250
247
  class_name='Clear',
@@ -1,6 +1,6 @@
1
1
  cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  cli/__main__.py,sha256=pKYvJOk0toACDMrgEeUYT3P5EQPFmXdMRIQuLZLd3dc,1603
3
- cli/config.py,sha256=R9w0NKIOtIxRKNs7ieeUrIKwRkrTlK5PqOVjc5VYljE,4923
3
+ cli/config.py,sha256=-BTdljjC42hXHb1P0yewea9knzgSBFsb909qJ5DEkCo,5531
4
4
  cli/consts.py,sha256=sj0MRwbbCT2Yl77FPddck1VWkFxp7QY6I9l1o75j_aE,963
5
5
  cli/dataset_cmds.py,sha256=VUMhnHGYPtNNJUK9aobKTx2zpVzLex4gTMmyQXuzCVw,1623
6
6
  cli/experiment_cmds.py,sha256=L-k_ZJ4B7I4cA8OvHcheSwXM6nx9aTF9G7eKBzAcOzQ,1961
@@ -8,19 +8,23 @@ cli/profile_cmds.py,sha256=-HQcFgYI6Rqaefi0Nj-91KhiqPKUj7zOaiJWbHx_bac,3196
8
8
  cli/recipe_cmds.py,sha256=qnMfF-te47HXNkgyA0hm9X3etDQsqMnrVEGDCrzVjZU,1462
9
9
  cli/runc_cmds.py,sha256=QqhQe2sd7tK1Bl2aGfIWRyJjpP6F7Tducg7HULrHsZ4,4958
10
10
  hafnia/__init__.py,sha256=Zphq-cQoX95Z11zm4lkrU-YiAJxddR7IBfwDkxeHoDE,108
11
- hafnia/helper_testing.py,sha256=4pIG5sA1FycbZRFmfG7CyhZMdQgWob86JCzX8ALuHfQ,2344
11
+ hafnia/helper_testing.py,sha256=GnaNhXdY81arjCT9M2RUAmvn2-aIzRqlCtbWwGbOIaY,3901
12
12
  hafnia/http.py,sha256=HoPB03IL6e-nglTrw1NGT6sDx1T8VNas5HjTT1QZHnU,3035
13
13
  hafnia/log.py,sha256=sWF8tz78yBtwZ9ddzm19L1MBSBJ3L4G704IGeT1_OEU,784
14
14
  hafnia/torch_helpers.py,sha256=ho65B0WIu_SjbaKPRL4wabDNrnVumWH8QSXVH4r7NAY,11605
15
- hafnia/utils.py,sha256=DFMALTCOxGsTWZ6qIVbm55Wr8xxwukj87sb0oYB6w64,4660
15
+ hafnia/utils.py,sha256=aTZaeHldXn4Jx_AR2BYATxtLCRrBKBjjDFmpSZTSvV4,5138
16
16
  hafnia/data/__init__.py,sha256=o9QjiGbEcNa6r-qDmwwmxPXf-1UitNl5-WxFNcujqsg,111
17
- hafnia/data/factory.py,sha256=RC36LSPCULJqHn93vGeadzNXoItHhsYZA8YxURY1nd0,717
17
+ hafnia/data/factory.py,sha256=OY6l6c9UKk6OUDhG4Akb2VgcSaTRLHlbSndAe1HuW2U,813
18
18
  hafnia/dataset/dataset_helpers.py,sha256=WVCpbUfNbHy7MZJqJ3OyJF8k1hSObo3kScxpXT17Sj8,3510
19
- hafnia/dataset/dataset_names.py,sha256=DhnFDlaq-PDNeGpvE1pFmhZ2M7HKeQNyRo6PwuchVe8,2074
20
- hafnia/dataset/dataset_transformation.py,sha256=LyXt4LOQaZ4EkYspKlqp_W3IbHJxB1AxVNkuuShiKg0,7761
19
+ hafnia/dataset/dataset_names.py,sha256=mp7A_TOqgoqHUEBCPC4ReKNJ93cxwQB451owoCqD6yM,2120
21
20
  hafnia/dataset/dataset_upload_helper.py,sha256=D1BGaeEar4McpUvXj4Yy8nk1tr12IEVhP_Ma47OoWmU,21150
22
- hafnia/dataset/hafnia_dataset.py,sha256=IJfPQivzM0wAZgGP7fx7-Mpoe8mhrEpLzBJ9JP3N6D4,19970
23
- hafnia/dataset/table_transformations.py,sha256=WuAbEXaNKyacmY4FShNkUoG3-rVC5pTKntO8Z7CJNoI,7628
21
+ hafnia/dataset/hafnia_dataset.py,sha256=4SJUq7pAqLkcFzgnOUUx8ERraE_sABctOAsONBJExME,27664
22
+ hafnia/dataset/dataset_recipe/dataset_recipe.py,sha256=DbPLlmshF6DC98Cwko04XtBaXgSg966LZKR6JXD_9Sg,13632
23
+ hafnia/dataset/dataset_recipe/recipe_transforms.py,sha256=wh1y2XyX0PwOwfuzJ3_17KKng2Rk0zLlgdfSHfS1SyM,1305
24
+ hafnia/dataset/dataset_recipe/recipe_types.py,sha256=6LxfanhX9ihof1gGSonoC-56zSWsI8k2aS4Uw_QgXoM,5176
25
+ hafnia/dataset/operations/dataset_stats.py,sha256=tSHPmkXt4WNgjf5-j3jIrsSy1Ajld3619AkUHaesXb4,445
26
+ hafnia/dataset/operations/dataset_transformations.py,sha256=4ibC11upEtRGJgoFLv8lUnglv2xANZVfNdsvI1BMvfM,2960
27
+ hafnia/dataset/operations/table_transformations.py,sha256=kCLbLRdiFSx1JG0IWtaKkhWcMtM7hy8zgm0Ehz0zO_g,7639
24
28
  hafnia/dataset/primitives/__init__.py,sha256=LAdTeK5GgmaF1se8f0Yj1lOTNqplXGL87kLeOnv1D8Q,627
25
29
  hafnia/dataset/primitives/bbox.py,sha256=HXYYy5BLNZwh-bO7aiAWg3z0OurUev8ISa-vYey8b8A,6055
26
30
  hafnia/dataset/primitives/bitmask.py,sha256=mq_wchMqGupJDc-a-mJh9uBO_mjHcXpLH49g591doAM,7619
@@ -31,16 +35,16 @@ hafnia/dataset/primitives/primitive.py,sha256=7jxcyFADVGf95pjeQHEOqAnR9eucLpxA2h
31
35
  hafnia/dataset/primitives/segmentation.py,sha256=jUMjOmYr9j4An3YSCw5CJC1W8ihXAbus3CXaTOpc7Xw,1905
32
36
  hafnia/dataset/primitives/utils.py,sha256=3gT1as-xXEj8CamoIuBb9gQwUN9Ae9qnqtqF_uEe0zo,1993
33
37
  hafnia/experiment/__init__.py,sha256=OEFE6HqhO5zcTCLZcPcPVjIg7wMFFnvZ1uOtAVhRz7M,85
34
- hafnia/experiment/hafnia_logger.py,sha256=TNGsb5aP_FE0t2NQofik6chK6dDWckYJMikiGS9GAd0,6855
38
+ hafnia/experiment/hafnia_logger.py,sha256=dnV3VPzJK7DSeUh0g4Hk9w1g-eSXcVqJD9If0h2d2GE,6885
35
39
  hafnia/platform/__init__.py,sha256=zJsR6Hy_0iUcC9xL-lBnqR0mLfF4EUr_VXa_XQA7SlA,455
36
40
  hafnia/platform/builder.py,sha256=_g8ykQWETz5Y4Np9QU1a6wIzbbJwXCkbiOCA6JcF5Rc,5742
37
- hafnia/platform/datasets.py,sha256=Sc2vkT3QYMwaSWp2Pf2E2BLW4YG40Tqdhn6vgkTgpQk,6643
41
+ hafnia/platform/datasets.py,sha256=J252hrejrBWUdS6hY4lRc9_SbYy7CMD92068lLHjPC8,6953
38
42
  hafnia/platform/download.py,sha256=oJzdxSIDTuw1an7maC6I7A5nZvDaZPhUkuAmyRwN9Kc,6843
39
43
  hafnia/platform/experiment.py,sha256=-nAfTmn1c8sE6pHDCTNZvWDTopkXndarJAPIGvsnk60,2389
40
44
  hafnia/visualizations/colors.py,sha256=003eAJVnBal4abaYIIpsrT7erIOIjTUHHYVJ1Tj1CDc,5226
41
- hafnia/visualizations/image_visualizations.py,sha256=mNlRfG-ooJw0vqKyNCuAZOBpjiFuqJH3mjd6spInJs4,7318
42
- hafnia-0.2.0.dist-info/METADATA,sha256=sYxIR2FnabM4KApFnNw_zKHPEDjQ9abU3dpon4RTXOY,19153
43
- hafnia-0.2.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
44
- hafnia-0.2.0.dist-info/entry_points.txt,sha256=FCJVIQ8GP2VE9I3eeGVF5eLxVDNW_01pOJCpG_CGnMM,45
45
- hafnia-0.2.0.dist-info/licenses/LICENSE,sha256=wLZw1B7_mod_CO1H8LXqQgfqlWD6QceJR8--LJYRZGE,1078
46
- hafnia-0.2.0.dist-info/RECORD,,
45
+ hafnia/visualizations/image_visualizations.py,sha256=RuFFj2fJCm9dxl2Lq0MumJHF81ZnX-IsDsTxm8ZFV9A,7313
46
+ hafnia-0.2.1.dist-info/METADATA,sha256=A1_OEYNslARBFGoYBPm7_-3YivfUeA8adwUUbsM3UsY,19040
47
+ hafnia-0.2.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
48
+ hafnia-0.2.1.dist-info/entry_points.txt,sha256=FCJVIQ8GP2VE9I3eeGVF5eLxVDNW_01pOJCpG_CGnMM,45
49
+ hafnia-0.2.1.dist-info/licenses/LICENSE,sha256=wLZw1B7_mod_CO1H8LXqQgfqlWD6QceJR8--LJYRZGE,1078
50
+ hafnia-0.2.1.dist-info/RECORD,,
@@ -1,187 +0,0 @@
1
- import hashlib
2
- import shutil
3
- from pathlib import Path
4
- from random import Random
5
- from typing import TYPE_CHECKING, Callable, Dict
6
-
7
- import cv2
8
- import numpy as np
9
- import polars as pl
10
- from PIL import Image
11
- from tqdm import tqdm
12
-
13
- from hafnia.dataset import dataset_helpers
14
- from hafnia.dataset.dataset_names import ColumnName
15
- from hafnia.log import user_logger
16
-
17
- if TYPE_CHECKING:
18
- from hafnia.dataset.hafnia_dataset import HafniaDataset
19
-
20
-
21
- ### Image transformations ###
22
- class AnonymizeByPixelation:
23
- def __init__(self, resize_factor: float = 0.10):
24
- self.resize_factor = resize_factor
25
-
26
- def __call__(self, frame: np.ndarray) -> np.ndarray:
27
- org_size = frame.shape[:2]
28
- frame = cv2.resize(frame, (0, 0), fx=self.resize_factor, fy=self.resize_factor)
29
- frame = cv2.resize(frame, org_size[::-1], interpolation=cv2.INTER_NEAREST)
30
- return frame
31
-
32
-
33
- def splits_by_ratios(dataset: "HafniaDataset", split_ratios: Dict[str, float], seed: int = 42) -> "HafniaDataset":
34
- """
35
- Divides the dataset into splits based on the provided ratios.
36
-
37
- Example: Defining split ratios and applying the transformation
38
-
39
- >>> dataset = HafniaDataset.read_from_path(Path("path/to/dataset"))
40
- >>> split_ratios = {SplitName.TRAIN: 0.8, SplitName.VAL: 0.1, SplitName.TEST: 0.1}
41
- >>> dataset_with_splits = splits_by_ratios(dataset, split_ratios, seed=42)
42
- Or use the function as a
43
- >>> dataset_with_splits = dataset.splits_by_ratios(split_ratios, seed=42)
44
- """
45
- n_items = len(dataset)
46
- split_name_column = dataset_helpers.create_split_name_list_from_ratios(
47
- split_ratios=split_ratios, n_items=n_items, seed=seed
48
- )
49
- table = dataset.samples.with_columns(pl.Series(split_name_column).alias("split"))
50
- return dataset.update_table(table)
51
-
52
-
53
- def divide_split_into_multiple_splits(
54
- dataset: "HafniaDataset",
55
- divide_split_name: str,
56
- split_ratios: Dict[str, float],
57
- ) -> "HafniaDataset":
58
- """
59
- Divides a dataset split ('divide_split_name') into multiple splits based on the provided split
60
- ratios ('split_ratios'). This is especially useful for some open datasets where they have only provide
61
- two splits or only provide annotations for two splits. This function allows you to create additional
62
- splits based on the provided ratios.
63
-
64
- Example: Defining split ratios and applying the transformation
65
- >>> dataset = HafniaDataset.read_from_path(Path("path/to/dataset"))
66
- >>> divide_split_name = SplitName.TEST
67
- >>> split_ratios = {SplitName.TEST: 0.8, SplitName.VAL: 0.2}
68
- >>> dataset_with_splits = divide_split_into_multiple_splits(dataset, divide_split_name, split_ratios)
69
- """
70
- dataset_split_to_be_divided = dataset.create_split_dataset(split_name=divide_split_name)
71
- if len(dataset_split_to_be_divided) == 0:
72
- split_counts = dict(dataset.samples.select(pl.col(ColumnName.SPLIT).value_counts()).iter_rows())
73
- raise ValueError(
74
- f"No samples in the '{divide_split_name}' split to divide into multiple splits. {split_counts=}"
75
- )
76
- assert len(dataset_split_to_be_divided) > 0, f"No samples in the '{divide_split_name}' split!"
77
- dataset_split_to_be_divided = dataset_split_to_be_divided.split_by_ratios(split_ratios=split_ratios, seed=42)
78
-
79
- remaining_data = dataset.samples.filter(pl.col(ColumnName.SPLIT).is_in([divide_split_name]).not_())
80
- new_table = pl.concat([remaining_data, dataset_split_to_be_divided.samples], how="vertical")
81
- dataset_new = dataset.update_table(new_table)
82
- return dataset_new
83
-
84
-
85
- def shuffle_dataset(dataset: "HafniaDataset", seed: int = 42) -> "HafniaDataset":
86
- table = dataset.samples.sample(n=len(dataset), with_replacement=False, seed=seed, shuffle=True)
87
- return dataset.update_table(table)
88
-
89
-
90
- def sample(dataset: "HafniaDataset", n_samples: int, shuffle: bool = True, seed: int = 42) -> "HafniaDataset":
91
- table = dataset.samples.sample(n=n_samples, with_replacement=False, seed=seed, shuffle=shuffle)
92
- return dataset.update_table(table)
93
-
94
-
95
- def define_sample_set_by_size(dataset: "HafniaDataset", n_samples: int, seed: int = 42) -> "HafniaDataset":
96
- is_sample_indices = Random(seed).sample(range(len(dataset)), n_samples)
97
- is_sample_column = [False for _ in range(len(dataset))]
98
- for idx in is_sample_indices:
99
- is_sample_column[idx] = True
100
-
101
- table = dataset.samples.with_columns(pl.Series(is_sample_column).alias("is_sample"))
102
- return dataset.update_table(table)
103
-
104
-
105
- def transform_images(
106
- dataset: "HafniaDataset",
107
- transform: Callable[[np.ndarray], np.ndarray],
108
- path_output: Path,
109
- ) -> "HafniaDataset":
110
- new_paths = []
111
- path_image_folder = path_output / "data"
112
- path_image_folder.mkdir(parents=True, exist_ok=True)
113
-
114
- for org_path in tqdm(dataset.samples["file_name"].to_list(), desc="Transform images"):
115
- org_path = Path(org_path)
116
- if not org_path.exists():
117
- raise FileNotFoundError(f"File {org_path} does not exist in the dataset.")
118
-
119
- image = np.array(Image.open(org_path))
120
- image_transformed = transform(image)
121
- new_path = dataset_helpers.save_image_with_hash_name(image_transformed, path_image_folder)
122
-
123
- if not new_path.exists():
124
- raise FileNotFoundError(f"Transformed file {new_path} does not exist in the dataset.")
125
- new_paths.append(str(new_path))
126
-
127
- table = dataset.samples.with_columns(pl.Series(new_paths).alias("file_name"))
128
- return dataset.update_table(table)
129
-
130
-
131
- def rename_to_unique_image_names(dataset: "HafniaDataset", path_output: Path) -> "HafniaDataset":
132
- user_logger.info(f"Copy images to have unique filenames. New path is '{path_output}'")
133
- shutil.rmtree(path_output, ignore_errors=True) # Remove the output folder if it exists
134
- new_paths = []
135
- for org_path in tqdm(dataset.samples["file_name"].to_list(), desc="- Rename/copy images"):
136
- org_path = Path(org_path)
137
- if not org_path.exists():
138
- raise FileNotFoundError(f"File {org_path} does not exist in the dataset.")
139
-
140
- hash_name = hashlib.md5(str(org_path).encode()).hexdigest()[
141
- :6
142
- ] # Generate a unique name based on the original file name
143
- new_path = path_output / "data" / f"{hash_name}_{org_path.name}"
144
- if not new_path.parent.exists():
145
- new_path.parent.mkdir(parents=True, exist_ok=True)
146
-
147
- shutil.copyfile(org_path, new_path) # Copy the original file to the new path
148
- new_paths.append(str(new_path))
149
-
150
- table = dataset.samples.with_columns(pl.Series(new_paths).alias("file_name"))
151
- return dataset.update_table(table)
152
-
153
-
154
- ### Hafnia Dataset Transformations ###
155
- class SplitsByRatios:
156
- def __init__(self, split_ratios: dict, seed: int = 42):
157
- self.split_ratios = split_ratios
158
- self.seed = seed
159
-
160
- def __call__(self, dataset: "HafniaDataset") -> "HafniaDataset":
161
- return splits_by_ratios(dataset, self.split_ratios, self.seed)
162
-
163
-
164
- class ShuffleDataset:
165
- def __init__(self, seed: int = 42):
166
- self.seed = seed
167
-
168
- def __call__(self, dataset: "HafniaDataset") -> "HafniaDataset":
169
- return shuffle_dataset(dataset, self.seed)
170
-
171
-
172
- class SampleSetBySize:
173
- def __init__(self, n_samples: int, seed: int = 42):
174
- self.n_samples = n_samples
175
- self.seed = seed
176
-
177
- def __call__(self, dataset: "HafniaDataset") -> "HafniaDataset":
178
- return define_sample_set_by_size(dataset, self.n_samples, self.seed)
179
-
180
-
181
- class TransformImages:
182
- def __init__(self, transform: Callable[[np.ndarray], np.ndarray], path_output: Path):
183
- self.transform = transform
184
- self.path_output = path_output
185
-
186
- def __call__(self, dataset: "HafniaDataset") -> "HafniaDataset":
187
- return transform_images(dataset, self.transform, self.path_output)
File without changes