datago 2025.6.2__tar.gz → 2025.6.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datago-2025.6.2 → datago-2025.6.4}/Cargo.lock +8 -7
- {datago-2025.6.2 → datago-2025.6.4}/Cargo.toml +16 -1
- {datago-2025.6.2 → datago-2025.6.4}/PKG-INFO +1 -1
- datago-2025.6.4/python/test_datago_client.py +444 -0
- datago-2025.6.4/python/test_datago_edge_cases.py +428 -0
- {datago-2025.6.2 → datago-2025.6.4}/src/client.rs +2 -0
- datago-2025.6.4/src/generator_files.rs +472 -0
- {datago-2025.6.2 → datago-2025.6.4}/src/image_processing.rs +354 -0
- datago-2025.6.4/src/structs.rs +406 -0
- datago-2025.6.4/src/worker_files.rs +460 -0
- {datago-2025.6.2 → datago-2025.6.4}/src/worker_http.rs +7 -1
- datago-2025.6.2/src/generator_files.rs +0 -169
- datago-2025.6.2/src/structs.rs +0 -177
- datago-2025.6.2/src/worker_files.rs +0 -158
- {datago-2025.6.2 → datago-2025.6.4}/.github/workflows/ci-cd.yml +0 -0
- {datago-2025.6.2 → datago-2025.6.4}/.github/workflows/rust.yml +0 -0
- {datago-2025.6.2 → datago-2025.6.4}/.gitignore +0 -0
- {datago-2025.6.2 → datago-2025.6.4}/.pre-commit-config.yaml +0 -0
- {datago-2025.6.2 → datago-2025.6.4}/LICENSE +0 -0
- {datago-2025.6.2 → datago-2025.6.4}/README.md +0 -0
- {datago-2025.6.2 → datago-2025.6.4}/assets/447175851-2277afcb-8abf-4d17-b2db-dae27c6056d0.png +0 -0
- {datago-2025.6.2 → datago-2025.6.4}/pyproject.toml +0 -0
- {datago-2025.6.2 → datago-2025.6.4}/python/benchmark_db.py +0 -0
- {datago-2025.6.2 → datago-2025.6.4}/python/benchmark_filesystem.py +0 -0
- {datago-2025.6.2 → datago-2025.6.4}/python/benchmark_webdataset.py +0 -0
- {datago-2025.6.2 → datago-2025.6.4}/python/dataset.py +0 -0
- {datago-2025.6.2 → datago-2025.6.4}/python/raw_types.py +0 -0
- {datago-2025.6.2 → datago-2025.6.4}/python/test_datago_db.py +0 -0
- {datago-2025.6.2 → datago-2025.6.4}/python/test_datago_filesystem.py +0 -0
- {datago-2025.6.2 → datago-2025.6.4}/requirements-tests.txt +0 -0
- {datago-2025.6.2 → datago-2025.6.4}/requirements.txt +0 -0
- {datago-2025.6.2 → datago-2025.6.4}/src/generator_http.rs +0 -0
- {datago-2025.6.2 → datago-2025.6.4}/src/generator_wds.rs +0 -0
- {datago-2025.6.2 → datago-2025.6.4}/src/lib.rs +0 -0
- {datago-2025.6.2 → datago-2025.6.4}/src/main.rs +0 -0
- {datago-2025.6.2 → datago-2025.6.4}/src/worker_wds.rs +0 -0
|
@@ -500,7 +500,7 @@ dependencies = [
|
|
|
500
500
|
|
|
501
501
|
[[package]]
|
|
502
502
|
name = "datago"
|
|
503
|
-
version = "2025.6.
|
|
503
|
+
version = "2025.6.4"
|
|
504
504
|
dependencies = [
|
|
505
505
|
"async-compression",
|
|
506
506
|
"async-tar",
|
|
@@ -525,6 +525,7 @@ dependencies = [
|
|
|
525
525
|
"serde",
|
|
526
526
|
"serde_json",
|
|
527
527
|
"tar",
|
|
528
|
+
"tempfile",
|
|
528
529
|
"threadpool",
|
|
529
530
|
"tokio",
|
|
530
531
|
"tokio-util",
|
|
@@ -1771,7 +1772,7 @@ checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94"
|
|
|
1771
1772
|
dependencies = [
|
|
1772
1773
|
"rand_chacha 0.9.0",
|
|
1773
1774
|
"rand_core 0.9.3",
|
|
1774
|
-
"zerocopy 0.8.
|
|
1775
|
+
"zerocopy 0.8.25",
|
|
1775
1776
|
]
|
|
1776
1777
|
|
|
1777
1778
|
[[package]]
|
|
@@ -2981,11 +2982,11 @@ dependencies = [
|
|
|
2981
2982
|
|
|
2982
2983
|
[[package]]
|
|
2983
2984
|
name = "zerocopy"
|
|
2984
|
-
version = "0.8.
|
|
2985
|
+
version = "0.8.25"
|
|
2985
2986
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2986
|
-
checksum = "
|
|
2987
|
+
checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb"
|
|
2987
2988
|
dependencies = [
|
|
2988
|
-
"zerocopy-derive 0.8.
|
|
2989
|
+
"zerocopy-derive 0.8.25",
|
|
2989
2990
|
]
|
|
2990
2991
|
|
|
2991
2992
|
[[package]]
|
|
@@ -3001,9 +3002,9 @@ dependencies = [
|
|
|
3001
3002
|
|
|
3002
3003
|
[[package]]
|
|
3003
3004
|
name = "zerocopy-derive"
|
|
3004
|
-
version = "0.8.
|
|
3005
|
+
version = "0.8.25"
|
|
3005
3006
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
3006
|
-
checksum = "
|
|
3007
|
+
checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef"
|
|
3007
3008
|
dependencies = [
|
|
3008
3009
|
"proc-macro2",
|
|
3009
3010
|
"quote",
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "datago"
|
|
3
3
|
edition = "2021"
|
|
4
|
-
version = "2025.6.
|
|
4
|
+
version = "2025.6.4"
|
|
5
5
|
|
|
6
6
|
[lib]
|
|
7
7
|
# exposed by pyo3
|
|
@@ -43,8 +43,23 @@ bracoxide = "0.1.6"
|
|
|
43
43
|
infer = "0.16.0"
|
|
44
44
|
fast_image_resize = { version ="5.1.3", features=["image"]}
|
|
45
45
|
|
|
46
|
+
[dev-dependencies]
|
|
47
|
+
tempfile = "3.13.0"
|
|
48
|
+
|
|
46
49
|
[profile.release]
|
|
47
50
|
opt-level = 3
|
|
48
51
|
lto = "fat"
|
|
49
52
|
codegen-units = 1
|
|
50
53
|
panic = "abort"
|
|
54
|
+
|
|
55
|
+
[target.x86_64-apple-darwin]
|
|
56
|
+
rustflags = [
|
|
57
|
+
"-C", "link-arg=-undefined",
|
|
58
|
+
"-C", "link-arg=dynamic_lookup",
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
[target.aarch64-apple-darwin]
|
|
62
|
+
rustflags = [
|
|
63
|
+
"-C", "link-arg=-undefined",
|
|
64
|
+
"-C", "link-arg=dynamic_lookup",
|
|
65
|
+
]
|
|
@@ -0,0 +1,444 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import tempfile
|
|
3
|
+
import os
|
|
4
|
+
from datago import DatagoClient, initialize_logging
|
|
5
|
+
from PIL import Image
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def create_test_images(directory, count=5):
|
|
9
|
+
"""Helper function to create test images in a directory."""
|
|
10
|
+
image_paths = []
|
|
11
|
+
for i in range(count):
|
|
12
|
+
img = Image.new(
|
|
13
|
+
"RGB", (100, 100), color=(i * 50 % 255, (i * 100) % 255, (i * 150) % 255)
|
|
14
|
+
)
|
|
15
|
+
path = os.path.join(directory, f"test_image_{i}.png")
|
|
16
|
+
img.save(path)
|
|
17
|
+
image_paths.append(path)
|
|
18
|
+
return image_paths
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class TestDatagoClient:
|
|
22
|
+
"""Test cases for DatagoClient functionality."""
|
|
23
|
+
|
|
24
|
+
def test_initialize_logging(self):
|
|
25
|
+
"""Test the initialize_logging function."""
|
|
26
|
+
# Should return True on first call
|
|
27
|
+
result = initialize_logging("info")
|
|
28
|
+
assert isinstance(result, bool)
|
|
29
|
+
|
|
30
|
+
# Test with None parameter
|
|
31
|
+
result = initialize_logging(None)
|
|
32
|
+
assert isinstance(result, bool)
|
|
33
|
+
|
|
34
|
+
def test_client_instantiation_file_source(self):
|
|
35
|
+
"""Test creating a client with file source configuration."""
|
|
36
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
37
|
+
create_test_images(tmpdir, 3)
|
|
38
|
+
|
|
39
|
+
config = {
|
|
40
|
+
"source_type": "file",
|
|
41
|
+
"source_config": {
|
|
42
|
+
"root_path": tmpdir,
|
|
43
|
+
"rank": 0,
|
|
44
|
+
"world_size": 1,
|
|
45
|
+
"random_sampling": False,
|
|
46
|
+
},
|
|
47
|
+
"limit": 3,
|
|
48
|
+
"samples_buffer_size": 10,
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
client = DatagoClient(json.dumps(config))
|
|
52
|
+
assert client is not None
|
|
53
|
+
|
|
54
|
+
# We panic out at the moment if the config is invalid, so this test is commented out.
|
|
55
|
+
# Uncomment this test if you want to handle invalid configurations gracefully.
|
|
56
|
+
# def test_client_instantiation_invalid_config(self):
|
|
57
|
+
# """Test that invalid configuration raises an error."""
|
|
58
|
+
# invalid_config = '{"invalid": "config"}'
|
|
59
|
+
|
|
60
|
+
# with pytest.raises((ValueError, RuntimeError)):
|
|
61
|
+
# DatagoClient(invalid_config)
|
|
62
|
+
|
|
63
|
+
def test_client_start_stop_file_source(self):
|
|
64
|
+
"""Test starting and stopping client with file source."""
|
|
65
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
66
|
+
create_test_images(tmpdir, 3)
|
|
67
|
+
|
|
68
|
+
config = {
|
|
69
|
+
"source_type": "file",
|
|
70
|
+
"source_config": {
|
|
71
|
+
"root_path": tmpdir,
|
|
72
|
+
"rank": 0,
|
|
73
|
+
"world_size": 1,
|
|
74
|
+
"random_sampling": False,
|
|
75
|
+
},
|
|
76
|
+
"limit": 3,
|
|
77
|
+
"samples_buffer_size": 10,
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
client = DatagoClient(json.dumps(config))
|
|
81
|
+
client.start()
|
|
82
|
+
client.stop()
|
|
83
|
+
|
|
84
|
+
def test_get_sample_file_source(self):
|
|
85
|
+
"""Test getting samples from file source."""
|
|
86
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
87
|
+
create_test_images(tmpdir, 5)
|
|
88
|
+
|
|
89
|
+
config = {
|
|
90
|
+
"source_type": "file",
|
|
91
|
+
"source_config": {
|
|
92
|
+
"root_path": tmpdir,
|
|
93
|
+
"rank": 0,
|
|
94
|
+
"world_size": 1,
|
|
95
|
+
"random_sampling": False,
|
|
96
|
+
},
|
|
97
|
+
"limit": 3,
|
|
98
|
+
"samples_buffer_size": 10,
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
client = DatagoClient(json.dumps(config))
|
|
102
|
+
|
|
103
|
+
samples_received = []
|
|
104
|
+
for _ in range(3):
|
|
105
|
+
sample = client.get_sample()
|
|
106
|
+
if sample:
|
|
107
|
+
samples_received.append(sample)
|
|
108
|
+
else:
|
|
109
|
+
break
|
|
110
|
+
|
|
111
|
+
assert len(samples_received) <= 3
|
|
112
|
+
|
|
113
|
+
for sample in samples_received:
|
|
114
|
+
assert sample.id != ""
|
|
115
|
+
assert sample.source == "filesystem"
|
|
116
|
+
assert sample.image.width > 0
|
|
117
|
+
assert sample.image.height > 0
|
|
118
|
+
assert len(sample.image.data) > 0
|
|
119
|
+
|
|
120
|
+
def test_client_with_image_transformations(self):
|
|
121
|
+
"""Test client with image transformation configuration."""
|
|
122
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
123
|
+
create_test_images(tmpdir, 3)
|
|
124
|
+
|
|
125
|
+
config = {
|
|
126
|
+
"source_type": "file",
|
|
127
|
+
"source_config": {
|
|
128
|
+
"root_path": tmpdir,
|
|
129
|
+
"rank": 0,
|
|
130
|
+
"world_size": 1,
|
|
131
|
+
"random_sampling": False,
|
|
132
|
+
},
|
|
133
|
+
"image_config": {
|
|
134
|
+
"crop_and_resize": True,
|
|
135
|
+
"default_image_size": 64,
|
|
136
|
+
"downsampling_ratio": 16,
|
|
137
|
+
"min_aspect_ratio": 0.5,
|
|
138
|
+
"max_aspect_ratio": 2.0,
|
|
139
|
+
"pre_encode_images": False,
|
|
140
|
+
"image_to_rgb8": True,
|
|
141
|
+
},
|
|
142
|
+
"limit": 2,
|
|
143
|
+
"samples_buffer_size": 10,
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
client = DatagoClient(json.dumps(config))
|
|
147
|
+
sample = client.get_sample()
|
|
148
|
+
|
|
149
|
+
assert sample is not None
|
|
150
|
+
assert sample.image.width <= 64
|
|
151
|
+
assert sample.image.height <= 64
|
|
152
|
+
assert sample.image.channels == 3 # RGB8
|
|
153
|
+
|
|
154
|
+
def test_client_with_image_encoding(self):
|
|
155
|
+
"""Test client with image encoding enabled."""
|
|
156
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
157
|
+
create_test_images(tmpdir, 3)
|
|
158
|
+
|
|
159
|
+
config = {
|
|
160
|
+
"source_type": "file",
|
|
161
|
+
"source_config": {
|
|
162
|
+
"root_path": tmpdir,
|
|
163
|
+
"rank": 0,
|
|
164
|
+
"world_size": 1,
|
|
165
|
+
"random_sampling": False,
|
|
166
|
+
},
|
|
167
|
+
"image_config": {
|
|
168
|
+
"crop_and_resize": False,
|
|
169
|
+
"pre_encode_images": True,
|
|
170
|
+
"image_to_rgb8": False,
|
|
171
|
+
},
|
|
172
|
+
"limit": 2,
|
|
173
|
+
"samples_buffer_size": 10,
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
client = DatagoClient(json.dumps(config))
|
|
177
|
+
sample = client.get_sample()
|
|
178
|
+
|
|
179
|
+
assert sample is not None
|
|
180
|
+
assert sample.image.channels == -1 # Encoded images have channels = -1
|
|
181
|
+
assert len(sample.image.data) > 0
|
|
182
|
+
|
|
183
|
+
def test_random_sampling(self):
|
|
184
|
+
"""Test that random sampling produces different results."""
|
|
185
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
186
|
+
create_test_images(tmpdir, 10)
|
|
187
|
+
|
|
188
|
+
config_base = {
|
|
189
|
+
"source_type": "file",
|
|
190
|
+
"source_config": {
|
|
191
|
+
"root_path": tmpdir,
|
|
192
|
+
"rank": 0,
|
|
193
|
+
"world_size": 1,
|
|
194
|
+
"random_sampling": True,
|
|
195
|
+
},
|
|
196
|
+
"limit": 5,
|
|
197
|
+
"samples_buffer_size": 10,
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
# Get two sets of samples with random sampling
|
|
201
|
+
client1 = DatagoClient(json.dumps(config_base))
|
|
202
|
+
samples1 = []
|
|
203
|
+
for _ in range(5):
|
|
204
|
+
sample = client1.get_sample()
|
|
205
|
+
if sample:
|
|
206
|
+
samples1.append(sample.id)
|
|
207
|
+
else:
|
|
208
|
+
break
|
|
209
|
+
|
|
210
|
+
client2 = DatagoClient(json.dumps(config_base))
|
|
211
|
+
samples2 = []
|
|
212
|
+
for _ in range(5):
|
|
213
|
+
sample = client2.get_sample()
|
|
214
|
+
if sample:
|
|
215
|
+
samples2.append(sample.id)
|
|
216
|
+
else:
|
|
217
|
+
break
|
|
218
|
+
|
|
219
|
+
# With random sampling, the samples should be different
|
|
220
|
+
assert len(samples1) > 0
|
|
221
|
+
assert len(samples2) > 0
|
|
222
|
+
assert (
|
|
223
|
+
len(set(samples1) & set(samples2)) < 5
|
|
224
|
+
) # Expect some overlap, but not all
|
|
225
|
+
|
|
226
|
+
def test_world_size_and_rank(self):
|
|
227
|
+
"""Test that different ranks get different subsets of data."""
|
|
228
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
229
|
+
create_test_images(tmpdir, 10)
|
|
230
|
+
|
|
231
|
+
config_rank0 = {
|
|
232
|
+
"source_type": "file",
|
|
233
|
+
"source_config": {
|
|
234
|
+
"root_path": tmpdir,
|
|
235
|
+
"rank": 0,
|
|
236
|
+
"world_size": 2,
|
|
237
|
+
"random_sampling": False,
|
|
238
|
+
},
|
|
239
|
+
"limit": 10,
|
|
240
|
+
"samples_buffer_size": 10,
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
config_rank1 = {
|
|
244
|
+
"source_type": "file",
|
|
245
|
+
"source_config": {
|
|
246
|
+
"root_path": tmpdir,
|
|
247
|
+
"rank": 1,
|
|
248
|
+
"world_size": 2,
|
|
249
|
+
"random_sampling": False,
|
|
250
|
+
},
|
|
251
|
+
"limit": 10,
|
|
252
|
+
"samples_buffer_size": 10,
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
client0 = DatagoClient(json.dumps(config_rank0))
|
|
256
|
+
samples0 = []
|
|
257
|
+
for _ in range(10):
|
|
258
|
+
sample = client0.get_sample()
|
|
259
|
+
if sample:
|
|
260
|
+
samples0.append(sample.id)
|
|
261
|
+
else:
|
|
262
|
+
break
|
|
263
|
+
|
|
264
|
+
client1 = DatagoClient(json.dumps(config_rank1))
|
|
265
|
+
samples1 = []
|
|
266
|
+
for _ in range(10):
|
|
267
|
+
sample = client1.get_sample()
|
|
268
|
+
if sample:
|
|
269
|
+
samples1.append(sample.id)
|
|
270
|
+
else:
|
|
271
|
+
break
|
|
272
|
+
|
|
273
|
+
# Different ranks should get different samples
|
|
274
|
+
assert len(samples0) > 0
|
|
275
|
+
assert len(samples1) > 0
|
|
276
|
+
|
|
277
|
+
# No overlap between ranks
|
|
278
|
+
overlap = set(samples0) & set(samples1)
|
|
279
|
+
assert len(overlap) == 0
|
|
280
|
+
|
|
281
|
+
def test_limit_respected(self):
|
|
282
|
+
"""Test that the client respects the limit parameter."""
|
|
283
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
284
|
+
create_test_images(tmpdir, 10)
|
|
285
|
+
|
|
286
|
+
config = {
|
|
287
|
+
"source_type": "file",
|
|
288
|
+
"source_config": {
|
|
289
|
+
"root_path": tmpdir,
|
|
290
|
+
"rank": 0,
|
|
291
|
+
"world_size": 1,
|
|
292
|
+
"random_sampling": False,
|
|
293
|
+
},
|
|
294
|
+
"limit": 3,
|
|
295
|
+
"samples_buffer_size": 10,
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
client = DatagoClient(json.dumps(config))
|
|
299
|
+
samples_received = 0
|
|
300
|
+
|
|
301
|
+
while True:
|
|
302
|
+
sample = client.get_sample()
|
|
303
|
+
if sample:
|
|
304
|
+
samples_received += 1
|
|
305
|
+
else:
|
|
306
|
+
break
|
|
307
|
+
|
|
308
|
+
# Safety valve to prevent infinite loop
|
|
309
|
+
if samples_received > 10:
|
|
310
|
+
break
|
|
311
|
+
|
|
312
|
+
# Should respect the limit (might have small buffer)
|
|
313
|
+
assert samples_received <= 4 # Allow small buffer
|
|
314
|
+
|
|
315
|
+
def test_empty_directory(self):
|
|
316
|
+
"""Test client behavior with empty directory."""
|
|
317
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
318
|
+
config = {
|
|
319
|
+
"source_type": "file",
|
|
320
|
+
"source_config": {
|
|
321
|
+
"root_path": tmpdir,
|
|
322
|
+
"rank": 0,
|
|
323
|
+
"world_size": 1,
|
|
324
|
+
"random_sampling": False,
|
|
325
|
+
},
|
|
326
|
+
"limit": 3,
|
|
327
|
+
"samples_buffer_size": 10,
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
client = DatagoClient(json.dumps(config))
|
|
331
|
+
sample = client.get_sample()
|
|
332
|
+
|
|
333
|
+
# Should return None when no files available
|
|
334
|
+
assert sample is None
|
|
335
|
+
|
|
336
|
+
def test_nonexistent_directory(self):
|
|
337
|
+
"""Test client behavior with nonexistent directory."""
|
|
338
|
+
config = {
|
|
339
|
+
"source_type": "file",
|
|
340
|
+
"source_config": {
|
|
341
|
+
"root_path": "/nonexistent/directory",
|
|
342
|
+
"rank": 0,
|
|
343
|
+
"world_size": 1,
|
|
344
|
+
"random_sampling": False,
|
|
345
|
+
},
|
|
346
|
+
"limit": 3,
|
|
347
|
+
"samples_buffer_size": 10,
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
client = DatagoClient(json.dumps(config))
|
|
351
|
+
sample = client.get_sample()
|
|
352
|
+
|
|
353
|
+
# Should handle gracefully and return None
|
|
354
|
+
assert sample is None
|
|
355
|
+
|
|
356
|
+
def test_client_drop_cleanup(self):
|
|
357
|
+
"""Test that client cleans up properly when dropped."""
|
|
358
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
359
|
+
create_test_images(tmpdir, 3)
|
|
360
|
+
|
|
361
|
+
config = {
|
|
362
|
+
"source_type": "file",
|
|
363
|
+
"source_config": {
|
|
364
|
+
"root_path": tmpdir,
|
|
365
|
+
"rank": 0,
|
|
366
|
+
"world_size": 1,
|
|
367
|
+
"random_sampling": False,
|
|
368
|
+
},
|
|
369
|
+
"limit": 3,
|
|
370
|
+
"samples_buffer_size": 10,
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
client = DatagoClient(json.dumps(config))
|
|
374
|
+
client.start()
|
|
375
|
+
|
|
376
|
+
# Client should clean up when it goes out of scope
|
|
377
|
+
del client
|
|
378
|
+
|
|
379
|
+
def test_multiple_starts_stops(self):
|
|
380
|
+
"""Test that multiple start/stop calls don't cause issues."""
|
|
381
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
382
|
+
create_test_images(tmpdir, 3)
|
|
383
|
+
|
|
384
|
+
config = {
|
|
385
|
+
"source_type": "file",
|
|
386
|
+
"source_config": {
|
|
387
|
+
"root_path": tmpdir,
|
|
388
|
+
"rank": 0,
|
|
389
|
+
"world_size": 1,
|
|
390
|
+
"random_sampling": False,
|
|
391
|
+
},
|
|
392
|
+
"limit": 3,
|
|
393
|
+
"samples_buffer_size": 10,
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
client = DatagoClient(json.dumps(config))
|
|
397
|
+
|
|
398
|
+
# Multiple starts should be safe
|
|
399
|
+
client.start()
|
|
400
|
+
client.start()
|
|
401
|
+
|
|
402
|
+
# Multiple stops should be safe
|
|
403
|
+
client.stop()
|
|
404
|
+
client.stop()
|
|
405
|
+
|
|
406
|
+
def test_various_image_formats(self):
|
|
407
|
+
"""Test client with various image formats."""
|
|
408
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
409
|
+
# Create images with different formats
|
|
410
|
+
formats = [
|
|
411
|
+
("test1.png", "PNG"),
|
|
412
|
+
("test2.jpg", "JPEG"),
|
|
413
|
+
]
|
|
414
|
+
|
|
415
|
+
for filename, format_name in formats:
|
|
416
|
+
img = Image.new("RGB", (50, 50), color="red")
|
|
417
|
+
path = os.path.join(tmpdir, filename)
|
|
418
|
+
img.save(path, format=format_name)
|
|
419
|
+
|
|
420
|
+
config = {
|
|
421
|
+
"source_type": "file",
|
|
422
|
+
"source_config": {
|
|
423
|
+
"root_path": tmpdir,
|
|
424
|
+
"rank": 0,
|
|
425
|
+
"world_size": 1,
|
|
426
|
+
"random_sampling": False,
|
|
427
|
+
},
|
|
428
|
+
"limit": 4,
|
|
429
|
+
"samples_buffer_size": 10,
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
client = DatagoClient(json.dumps(config))
|
|
433
|
+
|
|
434
|
+
samples_received = 0
|
|
435
|
+
while True:
|
|
436
|
+
sample = client.get_sample()
|
|
437
|
+
if sample:
|
|
438
|
+
samples_received += 1
|
|
439
|
+
assert sample.image.width == 50
|
|
440
|
+
assert sample.image.height == 50
|
|
441
|
+
else:
|
|
442
|
+
break
|
|
443
|
+
|
|
444
|
+
assert samples_received == len(formats)
|