hafnia 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/config.py +17 -4
- hafnia/data/factory.py +13 -10
- hafnia/dataset/dataset_names.py +2 -1
- hafnia/dataset/dataset_recipe/dataset_recipe.py +327 -0
- hafnia/dataset/dataset_recipe/recipe_transforms.py +53 -0
- hafnia/dataset/dataset_recipe/recipe_types.py +140 -0
- hafnia/dataset/hafnia_dataset.py +202 -31
- hafnia/dataset/operations/dataset_stats.py +15 -0
- hafnia/dataset/operations/dataset_transformations.py +82 -0
- hafnia/dataset/{table_transformations.py → operations/table_transformations.py} +1 -1
- hafnia/experiment/hafnia_logger.py +5 -5
- hafnia/helper_testing.py +48 -3
- hafnia/platform/datasets.py +26 -13
- hafnia/utils.py +20 -1
- hafnia/visualizations/image_visualizations.py +1 -1
- {hafnia-0.2.0.dist-info → hafnia-0.2.1.dist-info}/METADATA +17 -20
- {hafnia-0.2.0.dist-info → hafnia-0.2.1.dist-info}/RECORD +20 -16
- hafnia/dataset/dataset_transformation.py +0 -187
- {hafnia-0.2.0.dist-info → hafnia-0.2.1.dist-info}/WHEEL +0 -0
- {hafnia-0.2.0.dist-info → hafnia-0.2.1.dist-info}/entry_points.txt +0 -0
- {hafnia-0.2.0.dist-info → hafnia-0.2.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hafnia
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: Python SDK for communication with Hafnia platform.
|
|
5
5
|
Author-email: Milestone Systems <hafniaplatform@milestone.dk>
|
|
6
6
|
License-File: LICENSE
|
|
@@ -147,22 +147,20 @@ The `HafniaDataset` object provides a convenient way to interact with the datase
|
|
|
147
147
|
creating splits, accessing samples, printing statistics, saving to and loading from disk.
|
|
148
148
|
|
|
149
149
|
In essence, the `HafniaDataset` class contains `dataset.info` with dataset information
|
|
150
|
-
and `dataset.
|
|
150
|
+
and `dataset.samples` with annotations as a polars DataFrame
|
|
151
151
|
|
|
152
152
|
```python
|
|
153
153
|
# Annotations are stored in a polars DataFrame
|
|
154
|
-
print(dataset.
|
|
154
|
+
print(dataset.samples.head(2))
|
|
155
155
|
shape: (2, 14)
|
|
156
|
-
|
|
157
|
-
│
|
|
158
|
-
│ ---
|
|
159
|
-
│
|
|
160
|
-
|
|
161
|
-
│
|
|
162
|
-
│ ┆ …
|
|
163
|
-
|
|
164
|
-
│ ┆ … ┆ ┆ ┆ ┆ .… ┆ ┆ ┆ 0… │
|
|
165
|
-
└──────────┴────────────────────────────────┴────────┴───────┴───┴───────────────────────────────┴──────────┴──────────┴───────────────────────────────┘
|
|
156
|
+
┌──────────────┬─────────────────────────────────┬────────┬───────┬───┬─────────────────────────────────┬──────────┬──────────┬─────────────────────────────────┐
|
|
157
|
+
│ sample_index ┆ file_name ┆ height ┆ width ┆ … ┆ objects ┆ bitmasks ┆ polygons ┆ meta │
|
|
158
|
+
│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │
|
|
159
|
+
│ u32 ┆ str ┆ i64 ┆ i64 ┆ ┆ list[struct[11]] ┆ null ┆ null ┆ struct[5] │
|
|
160
|
+
╞══════════════╪═════════════════════════════════╪════════╪═══════╪═══╪═════════════════════════════════╪══════════╪══════════╪═════════════════════════════════╡
|
|
161
|
+
│ 0 ┆ /home/ubuntu/code/hafnia/.data… ┆ 1080 ┆ 1920 ┆ … ┆ [{0.0492,0.0357,0.2083,0.23,"V… ┆ null ┆ null ┆ {120.0,1.0,"2024-07-10T18:30:0… │
|
|
162
|
+
│ 100 ┆ /home/ubuntu/code/hafnia/.data… ┆ 1080 ┆ 1920 ┆ … ┆ [{0.146382,0.078704,0.42963,0.… ┆ null ┆ null ┆ {120.0,1.0,"2024-07-10T18:30:0… │
|
|
163
|
+
└──────────────┴─────────────────────────────────┴────────┴───────┴───┴─────────────────────────────────┴──────────┴──────────┴─────────────────────────────────┘
|
|
166
164
|
```
|
|
167
165
|
|
|
168
166
|
```python
|
|
@@ -235,16 +233,15 @@ It also contain annotations as primitive types such as `Bbox`, `Classification`.
|
|
|
235
233
|
```python
|
|
236
234
|
rich.print(sample)
|
|
237
235
|
Sample(
|
|
238
|
-
|
|
239
|
-
file_name='data/
|
|
240
|
-
0000.png',
|
|
236
|
+
sample_index=120,
|
|
237
|
+
file_name='/home/ubuntu/code/hafnia/.data/datasets/midwest-vehicle-detection/data/343403325f27e390.png',
|
|
241
238
|
height=1080,
|
|
242
239
|
width=1920,
|
|
243
|
-
split='
|
|
240
|
+
split='train',
|
|
244
241
|
is_sample=True,
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
remote_path=
|
|
242
|
+
collection_index=None,
|
|
243
|
+
collection_id=None,
|
|
244
|
+
remote_path='s3://mdi-production-midwest-vehicle-detection/sample/data/343403325f27e390.png',
|
|
248
245
|
classifications=[
|
|
249
246
|
Classification(
|
|
250
247
|
class_name='Clear',
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
cli/__main__.py,sha256=pKYvJOk0toACDMrgEeUYT3P5EQPFmXdMRIQuLZLd3dc,1603
|
|
3
|
-
cli/config.py,sha256
|
|
3
|
+
cli/config.py,sha256=-BTdljjC42hXHb1P0yewea9knzgSBFsb909qJ5DEkCo,5531
|
|
4
4
|
cli/consts.py,sha256=sj0MRwbbCT2Yl77FPddck1VWkFxp7QY6I9l1o75j_aE,963
|
|
5
5
|
cli/dataset_cmds.py,sha256=VUMhnHGYPtNNJUK9aobKTx2zpVzLex4gTMmyQXuzCVw,1623
|
|
6
6
|
cli/experiment_cmds.py,sha256=L-k_ZJ4B7I4cA8OvHcheSwXM6nx9aTF9G7eKBzAcOzQ,1961
|
|
@@ -8,19 +8,23 @@ cli/profile_cmds.py,sha256=-HQcFgYI6Rqaefi0Nj-91KhiqPKUj7zOaiJWbHx_bac,3196
|
|
|
8
8
|
cli/recipe_cmds.py,sha256=qnMfF-te47HXNkgyA0hm9X3etDQsqMnrVEGDCrzVjZU,1462
|
|
9
9
|
cli/runc_cmds.py,sha256=QqhQe2sd7tK1Bl2aGfIWRyJjpP6F7Tducg7HULrHsZ4,4958
|
|
10
10
|
hafnia/__init__.py,sha256=Zphq-cQoX95Z11zm4lkrU-YiAJxddR7IBfwDkxeHoDE,108
|
|
11
|
-
hafnia/helper_testing.py,sha256=
|
|
11
|
+
hafnia/helper_testing.py,sha256=GnaNhXdY81arjCT9M2RUAmvn2-aIzRqlCtbWwGbOIaY,3901
|
|
12
12
|
hafnia/http.py,sha256=HoPB03IL6e-nglTrw1NGT6sDx1T8VNas5HjTT1QZHnU,3035
|
|
13
13
|
hafnia/log.py,sha256=sWF8tz78yBtwZ9ddzm19L1MBSBJ3L4G704IGeT1_OEU,784
|
|
14
14
|
hafnia/torch_helpers.py,sha256=ho65B0WIu_SjbaKPRL4wabDNrnVumWH8QSXVH4r7NAY,11605
|
|
15
|
-
hafnia/utils.py,sha256=
|
|
15
|
+
hafnia/utils.py,sha256=aTZaeHldXn4Jx_AR2BYATxtLCRrBKBjjDFmpSZTSvV4,5138
|
|
16
16
|
hafnia/data/__init__.py,sha256=o9QjiGbEcNa6r-qDmwwmxPXf-1UitNl5-WxFNcujqsg,111
|
|
17
|
-
hafnia/data/factory.py,sha256=
|
|
17
|
+
hafnia/data/factory.py,sha256=OY6l6c9UKk6OUDhG4Akb2VgcSaTRLHlbSndAe1HuW2U,813
|
|
18
18
|
hafnia/dataset/dataset_helpers.py,sha256=WVCpbUfNbHy7MZJqJ3OyJF8k1hSObo3kScxpXT17Sj8,3510
|
|
19
|
-
hafnia/dataset/dataset_names.py,sha256=
|
|
20
|
-
hafnia/dataset/dataset_transformation.py,sha256=LyXt4LOQaZ4EkYspKlqp_W3IbHJxB1AxVNkuuShiKg0,7761
|
|
19
|
+
hafnia/dataset/dataset_names.py,sha256=mp7A_TOqgoqHUEBCPC4ReKNJ93cxwQB451owoCqD6yM,2120
|
|
21
20
|
hafnia/dataset/dataset_upload_helper.py,sha256=D1BGaeEar4McpUvXj4Yy8nk1tr12IEVhP_Ma47OoWmU,21150
|
|
22
|
-
hafnia/dataset/hafnia_dataset.py,sha256=
|
|
23
|
-
hafnia/dataset/
|
|
21
|
+
hafnia/dataset/hafnia_dataset.py,sha256=4SJUq7pAqLkcFzgnOUUx8ERraE_sABctOAsONBJExME,27664
|
|
22
|
+
hafnia/dataset/dataset_recipe/dataset_recipe.py,sha256=DbPLlmshF6DC98Cwko04XtBaXgSg966LZKR6JXD_9Sg,13632
|
|
23
|
+
hafnia/dataset/dataset_recipe/recipe_transforms.py,sha256=wh1y2XyX0PwOwfuzJ3_17KKng2Rk0zLlgdfSHfS1SyM,1305
|
|
24
|
+
hafnia/dataset/dataset_recipe/recipe_types.py,sha256=6LxfanhX9ihof1gGSonoC-56zSWsI8k2aS4Uw_QgXoM,5176
|
|
25
|
+
hafnia/dataset/operations/dataset_stats.py,sha256=tSHPmkXt4WNgjf5-j3jIrsSy1Ajld3619AkUHaesXb4,445
|
|
26
|
+
hafnia/dataset/operations/dataset_transformations.py,sha256=4ibC11upEtRGJgoFLv8lUnglv2xANZVfNdsvI1BMvfM,2960
|
|
27
|
+
hafnia/dataset/operations/table_transformations.py,sha256=kCLbLRdiFSx1JG0IWtaKkhWcMtM7hy8zgm0Ehz0zO_g,7639
|
|
24
28
|
hafnia/dataset/primitives/__init__.py,sha256=LAdTeK5GgmaF1se8f0Yj1lOTNqplXGL87kLeOnv1D8Q,627
|
|
25
29
|
hafnia/dataset/primitives/bbox.py,sha256=HXYYy5BLNZwh-bO7aiAWg3z0OurUev8ISa-vYey8b8A,6055
|
|
26
30
|
hafnia/dataset/primitives/bitmask.py,sha256=mq_wchMqGupJDc-a-mJh9uBO_mjHcXpLH49g591doAM,7619
|
|
@@ -31,16 +35,16 @@ hafnia/dataset/primitives/primitive.py,sha256=7jxcyFADVGf95pjeQHEOqAnR9eucLpxA2h
|
|
|
31
35
|
hafnia/dataset/primitives/segmentation.py,sha256=jUMjOmYr9j4An3YSCw5CJC1W8ihXAbus3CXaTOpc7Xw,1905
|
|
32
36
|
hafnia/dataset/primitives/utils.py,sha256=3gT1as-xXEj8CamoIuBb9gQwUN9Ae9qnqtqF_uEe0zo,1993
|
|
33
37
|
hafnia/experiment/__init__.py,sha256=OEFE6HqhO5zcTCLZcPcPVjIg7wMFFnvZ1uOtAVhRz7M,85
|
|
34
|
-
hafnia/experiment/hafnia_logger.py,sha256=
|
|
38
|
+
hafnia/experiment/hafnia_logger.py,sha256=dnV3VPzJK7DSeUh0g4Hk9w1g-eSXcVqJD9If0h2d2GE,6885
|
|
35
39
|
hafnia/platform/__init__.py,sha256=zJsR6Hy_0iUcC9xL-lBnqR0mLfF4EUr_VXa_XQA7SlA,455
|
|
36
40
|
hafnia/platform/builder.py,sha256=_g8ykQWETz5Y4Np9QU1a6wIzbbJwXCkbiOCA6JcF5Rc,5742
|
|
37
|
-
hafnia/platform/datasets.py,sha256=
|
|
41
|
+
hafnia/platform/datasets.py,sha256=J252hrejrBWUdS6hY4lRc9_SbYy7CMD92068lLHjPC8,6953
|
|
38
42
|
hafnia/platform/download.py,sha256=oJzdxSIDTuw1an7maC6I7A5nZvDaZPhUkuAmyRwN9Kc,6843
|
|
39
43
|
hafnia/platform/experiment.py,sha256=-nAfTmn1c8sE6pHDCTNZvWDTopkXndarJAPIGvsnk60,2389
|
|
40
44
|
hafnia/visualizations/colors.py,sha256=003eAJVnBal4abaYIIpsrT7erIOIjTUHHYVJ1Tj1CDc,5226
|
|
41
|
-
hafnia/visualizations/image_visualizations.py,sha256=
|
|
42
|
-
hafnia-0.2.
|
|
43
|
-
hafnia-0.2.
|
|
44
|
-
hafnia-0.2.
|
|
45
|
-
hafnia-0.2.
|
|
46
|
-
hafnia-0.2.
|
|
45
|
+
hafnia/visualizations/image_visualizations.py,sha256=RuFFj2fJCm9dxl2Lq0MumJHF81ZnX-IsDsTxm8ZFV9A,7313
|
|
46
|
+
hafnia-0.2.1.dist-info/METADATA,sha256=A1_OEYNslARBFGoYBPm7_-3YivfUeA8adwUUbsM3UsY,19040
|
|
47
|
+
hafnia-0.2.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
48
|
+
hafnia-0.2.1.dist-info/entry_points.txt,sha256=FCJVIQ8GP2VE9I3eeGVF5eLxVDNW_01pOJCpG_CGnMM,45
|
|
49
|
+
hafnia-0.2.1.dist-info/licenses/LICENSE,sha256=wLZw1B7_mod_CO1H8LXqQgfqlWD6QceJR8--LJYRZGE,1078
|
|
50
|
+
hafnia-0.2.1.dist-info/RECORD,,
|
|
@@ -1,187 +0,0 @@
|
|
|
1
|
-
import hashlib
|
|
2
|
-
import shutil
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
from random import Random
|
|
5
|
-
from typing import TYPE_CHECKING, Callable, Dict
|
|
6
|
-
|
|
7
|
-
import cv2
|
|
8
|
-
import numpy as np
|
|
9
|
-
import polars as pl
|
|
10
|
-
from PIL import Image
|
|
11
|
-
from tqdm import tqdm
|
|
12
|
-
|
|
13
|
-
from hafnia.dataset import dataset_helpers
|
|
14
|
-
from hafnia.dataset.dataset_names import ColumnName
|
|
15
|
-
from hafnia.log import user_logger
|
|
16
|
-
|
|
17
|
-
if TYPE_CHECKING:
|
|
18
|
-
from hafnia.dataset.hafnia_dataset import HafniaDataset
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
### Image transformations ###
|
|
22
|
-
class AnonymizeByPixelation:
|
|
23
|
-
def __init__(self, resize_factor: float = 0.10):
|
|
24
|
-
self.resize_factor = resize_factor
|
|
25
|
-
|
|
26
|
-
def __call__(self, frame: np.ndarray) -> np.ndarray:
|
|
27
|
-
org_size = frame.shape[:2]
|
|
28
|
-
frame = cv2.resize(frame, (0, 0), fx=self.resize_factor, fy=self.resize_factor)
|
|
29
|
-
frame = cv2.resize(frame, org_size[::-1], interpolation=cv2.INTER_NEAREST)
|
|
30
|
-
return frame
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def splits_by_ratios(dataset: "HafniaDataset", split_ratios: Dict[str, float], seed: int = 42) -> "HafniaDataset":
|
|
34
|
-
"""
|
|
35
|
-
Divides the dataset into splits based on the provided ratios.
|
|
36
|
-
|
|
37
|
-
Example: Defining split ratios and applying the transformation
|
|
38
|
-
|
|
39
|
-
>>> dataset = HafniaDataset.read_from_path(Path("path/to/dataset"))
|
|
40
|
-
>>> split_ratios = {SplitName.TRAIN: 0.8, SplitName.VAL: 0.1, SplitName.TEST: 0.1}
|
|
41
|
-
>>> dataset_with_splits = splits_by_ratios(dataset, split_ratios, seed=42)
|
|
42
|
-
Or use the function as a
|
|
43
|
-
>>> dataset_with_splits = dataset.splits_by_ratios(split_ratios, seed=42)
|
|
44
|
-
"""
|
|
45
|
-
n_items = len(dataset)
|
|
46
|
-
split_name_column = dataset_helpers.create_split_name_list_from_ratios(
|
|
47
|
-
split_ratios=split_ratios, n_items=n_items, seed=seed
|
|
48
|
-
)
|
|
49
|
-
table = dataset.samples.with_columns(pl.Series(split_name_column).alias("split"))
|
|
50
|
-
return dataset.update_table(table)
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
def divide_split_into_multiple_splits(
|
|
54
|
-
dataset: "HafniaDataset",
|
|
55
|
-
divide_split_name: str,
|
|
56
|
-
split_ratios: Dict[str, float],
|
|
57
|
-
) -> "HafniaDataset":
|
|
58
|
-
"""
|
|
59
|
-
Divides a dataset split ('divide_split_name') into multiple splits based on the provided split
|
|
60
|
-
ratios ('split_ratios'). This is especially useful for some open datasets where they have only provide
|
|
61
|
-
two splits or only provide annotations for two splits. This function allows you to create additional
|
|
62
|
-
splits based on the provided ratios.
|
|
63
|
-
|
|
64
|
-
Example: Defining split ratios and applying the transformation
|
|
65
|
-
>>> dataset = HafniaDataset.read_from_path(Path("path/to/dataset"))
|
|
66
|
-
>>> divide_split_name = SplitName.TEST
|
|
67
|
-
>>> split_ratios = {SplitName.TEST: 0.8, SplitName.VAL: 0.2}
|
|
68
|
-
>>> dataset_with_splits = divide_split_into_multiple_splits(dataset, divide_split_name, split_ratios)
|
|
69
|
-
"""
|
|
70
|
-
dataset_split_to_be_divided = dataset.create_split_dataset(split_name=divide_split_name)
|
|
71
|
-
if len(dataset_split_to_be_divided) == 0:
|
|
72
|
-
split_counts = dict(dataset.samples.select(pl.col(ColumnName.SPLIT).value_counts()).iter_rows())
|
|
73
|
-
raise ValueError(
|
|
74
|
-
f"No samples in the '{divide_split_name}' split to divide into multiple splits. {split_counts=}"
|
|
75
|
-
)
|
|
76
|
-
assert len(dataset_split_to_be_divided) > 0, f"No samples in the '{divide_split_name}' split!"
|
|
77
|
-
dataset_split_to_be_divided = dataset_split_to_be_divided.split_by_ratios(split_ratios=split_ratios, seed=42)
|
|
78
|
-
|
|
79
|
-
remaining_data = dataset.samples.filter(pl.col(ColumnName.SPLIT).is_in([divide_split_name]).not_())
|
|
80
|
-
new_table = pl.concat([remaining_data, dataset_split_to_be_divided.samples], how="vertical")
|
|
81
|
-
dataset_new = dataset.update_table(new_table)
|
|
82
|
-
return dataset_new
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
def shuffle_dataset(dataset: "HafniaDataset", seed: int = 42) -> "HafniaDataset":
|
|
86
|
-
table = dataset.samples.sample(n=len(dataset), with_replacement=False, seed=seed, shuffle=True)
|
|
87
|
-
return dataset.update_table(table)
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
def sample(dataset: "HafniaDataset", n_samples: int, shuffle: bool = True, seed: int = 42) -> "HafniaDataset":
|
|
91
|
-
table = dataset.samples.sample(n=n_samples, with_replacement=False, seed=seed, shuffle=shuffle)
|
|
92
|
-
return dataset.update_table(table)
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
def define_sample_set_by_size(dataset: "HafniaDataset", n_samples: int, seed: int = 42) -> "HafniaDataset":
|
|
96
|
-
is_sample_indices = Random(seed).sample(range(len(dataset)), n_samples)
|
|
97
|
-
is_sample_column = [False for _ in range(len(dataset))]
|
|
98
|
-
for idx in is_sample_indices:
|
|
99
|
-
is_sample_column[idx] = True
|
|
100
|
-
|
|
101
|
-
table = dataset.samples.with_columns(pl.Series(is_sample_column).alias("is_sample"))
|
|
102
|
-
return dataset.update_table(table)
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
def transform_images(
|
|
106
|
-
dataset: "HafniaDataset",
|
|
107
|
-
transform: Callable[[np.ndarray], np.ndarray],
|
|
108
|
-
path_output: Path,
|
|
109
|
-
) -> "HafniaDataset":
|
|
110
|
-
new_paths = []
|
|
111
|
-
path_image_folder = path_output / "data"
|
|
112
|
-
path_image_folder.mkdir(parents=True, exist_ok=True)
|
|
113
|
-
|
|
114
|
-
for org_path in tqdm(dataset.samples["file_name"].to_list(), desc="Transform images"):
|
|
115
|
-
org_path = Path(org_path)
|
|
116
|
-
if not org_path.exists():
|
|
117
|
-
raise FileNotFoundError(f"File {org_path} does not exist in the dataset.")
|
|
118
|
-
|
|
119
|
-
image = np.array(Image.open(org_path))
|
|
120
|
-
image_transformed = transform(image)
|
|
121
|
-
new_path = dataset_helpers.save_image_with_hash_name(image_transformed, path_image_folder)
|
|
122
|
-
|
|
123
|
-
if not new_path.exists():
|
|
124
|
-
raise FileNotFoundError(f"Transformed file {new_path} does not exist in the dataset.")
|
|
125
|
-
new_paths.append(str(new_path))
|
|
126
|
-
|
|
127
|
-
table = dataset.samples.with_columns(pl.Series(new_paths).alias("file_name"))
|
|
128
|
-
return dataset.update_table(table)
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
def rename_to_unique_image_names(dataset: "HafniaDataset", path_output: Path) -> "HafniaDataset":
|
|
132
|
-
user_logger.info(f"Copy images to have unique filenames. New path is '{path_output}'")
|
|
133
|
-
shutil.rmtree(path_output, ignore_errors=True) # Remove the output folder if it exists
|
|
134
|
-
new_paths = []
|
|
135
|
-
for org_path in tqdm(dataset.samples["file_name"].to_list(), desc="- Rename/copy images"):
|
|
136
|
-
org_path = Path(org_path)
|
|
137
|
-
if not org_path.exists():
|
|
138
|
-
raise FileNotFoundError(f"File {org_path} does not exist in the dataset.")
|
|
139
|
-
|
|
140
|
-
hash_name = hashlib.md5(str(org_path).encode()).hexdigest()[
|
|
141
|
-
:6
|
|
142
|
-
] # Generate a unique name based on the original file name
|
|
143
|
-
new_path = path_output / "data" / f"{hash_name}_{org_path.name}"
|
|
144
|
-
if not new_path.parent.exists():
|
|
145
|
-
new_path.parent.mkdir(parents=True, exist_ok=True)
|
|
146
|
-
|
|
147
|
-
shutil.copyfile(org_path, new_path) # Copy the original file to the new path
|
|
148
|
-
new_paths.append(str(new_path))
|
|
149
|
-
|
|
150
|
-
table = dataset.samples.with_columns(pl.Series(new_paths).alias("file_name"))
|
|
151
|
-
return dataset.update_table(table)
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
### Hafnia Dataset Transformations ###
|
|
155
|
-
class SplitsByRatios:
|
|
156
|
-
def __init__(self, split_ratios: dict, seed: int = 42):
|
|
157
|
-
self.split_ratios = split_ratios
|
|
158
|
-
self.seed = seed
|
|
159
|
-
|
|
160
|
-
def __call__(self, dataset: "HafniaDataset") -> "HafniaDataset":
|
|
161
|
-
return splits_by_ratios(dataset, self.split_ratios, self.seed)
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
class ShuffleDataset:
|
|
165
|
-
def __init__(self, seed: int = 42):
|
|
166
|
-
self.seed = seed
|
|
167
|
-
|
|
168
|
-
def __call__(self, dataset: "HafniaDataset") -> "HafniaDataset":
|
|
169
|
-
return shuffle_dataset(dataset, self.seed)
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
class SampleSetBySize:
|
|
173
|
-
def __init__(self, n_samples: int, seed: int = 42):
|
|
174
|
-
self.n_samples = n_samples
|
|
175
|
-
self.seed = seed
|
|
176
|
-
|
|
177
|
-
def __call__(self, dataset: "HafniaDataset") -> "HafniaDataset":
|
|
178
|
-
return define_sample_set_by_size(dataset, self.n_samples, self.seed)
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
class TransformImages:
|
|
182
|
-
def __init__(self, transform: Callable[[np.ndarray], np.ndarray], path_output: Path):
|
|
183
|
-
self.transform = transform
|
|
184
|
-
self.path_output = path_output
|
|
185
|
-
|
|
186
|
-
def __call__(self, dataset: "HafniaDataset") -> "HafniaDataset":
|
|
187
|
-
return transform_images(dataset, self.transform, self.path_output)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|