datago 2025.3.10__tar.gz → 2025.3.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datago-2025.3.10 → datago-2025.3.11}/Cargo.lock +1 -1
- {datago-2025.3.10 → datago-2025.3.11}/Cargo.toml +1 -1
- {datago-2025.3.10 → datago-2025.3.11}/PKG-INFO +1 -1
- {datago-2025.3.10 → datago-2025.3.11}/python/test_datago_filesystem.py +20 -9
- {datago-2025.3.10 → datago-2025.3.11}/src/client.rs +7 -1
- {datago-2025.3.10 → datago-2025.3.11}/src/image_processing.rs +74 -1
- {datago-2025.3.10 → datago-2025.3.11}/src/main.rs +1 -0
- {datago-2025.3.10 → datago-2025.3.11}/src/worker_files.rs +23 -52
- {datago-2025.3.10 → datago-2025.3.11}/src/worker_http.rs +18 -63
- {datago-2025.3.10 → datago-2025.3.11}/tests/client_test.rs +2 -0
- {datago-2025.3.10 → datago-2025.3.11}/.github/workflows/ci-cd.yml +0 -0
- {datago-2025.3.10 → datago-2025.3.11}/.github/workflows/rust.yml +0 -0
- {datago-2025.3.10 → datago-2025.3.11}/.gitignore +0 -0
- {datago-2025.3.10 → datago-2025.3.11}/.pre-commit-config.yaml +0 -0
- {datago-2025.3.10 → datago-2025.3.11}/LICENSE +0 -0
- {datago-2025.3.10 → datago-2025.3.11}/README.md +0 -0
- {datago-2025.3.10 → datago-2025.3.11}/pyproject.toml +0 -0
- {datago-2025.3.10 → datago-2025.3.11}/python/benchmark_db.py +0 -0
- {datago-2025.3.10 → datago-2025.3.11}/python/benchmark_filesystem.py +0 -0
- {datago-2025.3.10 → datago-2025.3.11}/python/dataset.py +0 -0
- {datago-2025.3.10 → datago-2025.3.11}/python/raw_types.py +0 -0
- {datago-2025.3.10 → datago-2025.3.11}/python/test_datago_db.py +0 -0
- {datago-2025.3.10 → datago-2025.3.11}/requirements-tests.txt +0 -0
- {datago-2025.3.10 → datago-2025.3.11}/requirements.txt +0 -0
- {datago-2025.3.10 → datago-2025.3.11}/src/generator_files.rs +0 -0
- {datago-2025.3.10 → datago-2025.3.11}/src/generator_http.rs +1 -1
- {datago-2025.3.10 → datago-2025.3.11}/src/lib.rs +0 -0
- {datago-2025.3.10 → datago-2025.3.11}/src/structs.rs +0 -0
|
@@ -3,28 +3,31 @@ from datago import DatagoClient
|
|
|
3
3
|
import json
|
|
4
4
|
import tempfile
|
|
5
5
|
import pytest
|
|
6
|
-
import
|
|
6
|
+
from io import BytesIO
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
def generate_tmp_files(dir, limit):
|
|
9
|
+
def generate_tmp_files(dir: str, limit: int, rgb16: bool = False, rgba: bool = False):
|
|
10
10
|
for i in range(limit):
|
|
11
11
|
# Prepare an ephemeral test set
|
|
12
|
-
mode = "
|
|
12
|
+
mode = "RGBA" if rgba else "RGB"
|
|
13
13
|
img = Image.new(mode, (100, 100))
|
|
14
14
|
|
|
15
15
|
# Randomly make the image 16 bits
|
|
16
|
-
if
|
|
16
|
+
if rgb16:
|
|
17
17
|
img = img.convert("I;16")
|
|
18
18
|
|
|
19
19
|
img.save(dir + f"/test_{i}.png")
|
|
20
20
|
|
|
21
21
|
|
|
22
|
-
@pytest.mark.parametrize(
|
|
23
|
-
|
|
22
|
+
@pytest.mark.parametrize(
|
|
23
|
+
["pre_encode_images", "rgb16", "rgba"],
|
|
24
|
+
[(a, b, c) for a in [True, False] for b in [True, False] for c in [True, False]],
|
|
25
|
+
)
|
|
26
|
+
def test_get_sample_filesystem(pre_encode_images: bool, rgb16: bool, rgba: bool):
|
|
24
27
|
limit = 10
|
|
25
28
|
|
|
26
29
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
|
27
|
-
generate_tmp_files(tmpdirname, limit)
|
|
30
|
+
generate_tmp_files(tmpdirname, limit, rgb16, rgba)
|
|
28
31
|
|
|
29
32
|
# Check that we can instantiate a client and get a sample, nothing more
|
|
30
33
|
client_config = {
|
|
@@ -39,6 +42,7 @@ def test_get_sample_filesystem(pre_encode_images: bool):
|
|
|
39
42
|
"min_aspect_ratio": 0.5,
|
|
40
43
|
"max_aspect_ratio": 2.0,
|
|
41
44
|
"pre_encode_images": pre_encode_images,
|
|
45
|
+
"image_to_rgb8": rgb16 or rgba,
|
|
42
46
|
},
|
|
43
47
|
"limit": limit,
|
|
44
48
|
"prefetch_buffer_size": 64,
|
|
@@ -58,9 +62,16 @@ def test_get_sample_filesystem(pre_encode_images: bool):
|
|
|
58
62
|
assert data.image.width == 100
|
|
59
63
|
assert data.image.height == 100
|
|
60
64
|
|
|
61
|
-
if
|
|
65
|
+
if rgb16:
|
|
62
66
|
assert data.image.bit_depth == 8
|
|
63
67
|
|
|
68
|
+
# Open the image in python scope and check properties
|
|
69
|
+
if pre_encode_images:
|
|
70
|
+
test_image = Image.open(BytesIO(data.image.data))
|
|
71
|
+
assert test_image.width == 100
|
|
72
|
+
assert test_image.height == 100
|
|
73
|
+
assert test_image.mode == "RGB"
|
|
74
|
+
|
|
64
75
|
assert count == limit
|
|
65
76
|
|
|
66
77
|
|
|
@@ -112,4 +123,4 @@ def test_random_walk():
|
|
|
112
123
|
|
|
113
124
|
|
|
114
125
|
if __name__ == "__main__":
|
|
115
|
-
test_get_sample_filesystem(True)
|
|
126
|
+
test_get_sample_filesystem(True, True, True)
|
|
@@ -33,6 +33,7 @@ pub struct DatagoClient {
|
|
|
33
33
|
// Sample processing
|
|
34
34
|
image_transform: Option<ARAwareTransform>,
|
|
35
35
|
encode_images: bool,
|
|
36
|
+
image_to_rgb8: bool, // Convert all images to RGB 8 bits format
|
|
36
37
|
|
|
37
38
|
// Threads
|
|
38
39
|
pinger: Option<thread::JoinHandle<()>>,
|
|
@@ -55,12 +56,13 @@ impl DatagoClient {
|
|
|
55
56
|
|
|
56
57
|
let mut image_transform: Option<ARAwareTransform> = None;
|
|
57
58
|
let mut encode_images = false;
|
|
58
|
-
|
|
59
|
+
let mut image_to_rgb8 = false;
|
|
59
60
|
if let Some(image_config) = config.image_config {
|
|
60
61
|
if image_config.crop_and_resize {
|
|
61
62
|
image_transform = Some(image_config.get_ar_aware_transform());
|
|
62
63
|
}
|
|
63
64
|
encode_images = image_config.pre_encode_images;
|
|
65
|
+
image_to_rgb8 = image_config.image_to_rgb8;
|
|
64
66
|
}
|
|
65
67
|
|
|
66
68
|
DatagoClient {
|
|
@@ -79,6 +81,7 @@ impl DatagoClient {
|
|
|
79
81
|
samples_rx,
|
|
80
82
|
image_transform,
|
|
81
83
|
encode_images,
|
|
84
|
+
image_to_rgb8,
|
|
82
85
|
pinger: None,
|
|
83
86
|
feeder: None,
|
|
84
87
|
worker: None,
|
|
@@ -157,6 +160,7 @@ impl DatagoClient {
|
|
|
157
160
|
let samples_tx_local = self.samples_tx.clone();
|
|
158
161
|
let local_image_transform = self.image_transform.clone();
|
|
159
162
|
let encode_images = self.encode_images;
|
|
163
|
+
let image_to_rgb8 = self.image_to_rgb8;
|
|
160
164
|
|
|
161
165
|
match self.source_type {
|
|
162
166
|
SourceType::Db => {
|
|
@@ -168,6 +172,7 @@ impl DatagoClient {
|
|
|
168
172
|
samples_tx_local,
|
|
169
173
|
local_image_transform,
|
|
170
174
|
encode_images,
|
|
175
|
+
image_to_rgb8,
|
|
171
176
|
limit,
|
|
172
177
|
);
|
|
173
178
|
}));
|
|
@@ -179,6 +184,7 @@ impl DatagoClient {
|
|
|
179
184
|
samples_tx_local,
|
|
180
185
|
local_image_transform,
|
|
181
186
|
encode_images,
|
|
187
|
+
image_to_rgb8,
|
|
182
188
|
limit,
|
|
183
189
|
);
|
|
184
190
|
}));
|
|
@@ -1,6 +1,9 @@
|
|
|
1
|
-
|
|
1
|
+
use crate::structs::ImagePayload;
|
|
2
2
|
use serde::Deserialize;
|
|
3
3
|
use serde::Serialize;
|
|
4
|
+
use std::io::Cursor;
|
|
5
|
+
|
|
6
|
+
// --- Sample data structures - these will be exposed to the Python world ---------------------------------------------------------------------------------------------------------------------------------------------------------------
|
|
4
7
|
|
|
5
8
|
#[derive(Debug, Serialize, Deserialize)]
|
|
6
9
|
pub struct ImageTransformConfig {
|
|
@@ -9,7 +12,12 @@ pub struct ImageTransformConfig {
|
|
|
9
12
|
pub downsampling_ratio: i32,
|
|
10
13
|
pub min_aspect_ratio: f64,
|
|
11
14
|
pub max_aspect_ratio: f64,
|
|
15
|
+
|
|
16
|
+
#[serde(default)]
|
|
12
17
|
pub pre_encode_images: bool,
|
|
18
|
+
|
|
19
|
+
#[serde(default)]
|
|
20
|
+
pub image_to_rgb8: bool, // Convert all images to RGB 8 bits format
|
|
13
21
|
}
|
|
14
22
|
|
|
15
23
|
impl ImageTransformConfig {
|
|
@@ -130,6 +138,70 @@ impl ARAwareTransform {
|
|
|
130
138
|
}
|
|
131
139
|
}
|
|
132
140
|
|
|
141
|
+
// ------------------------------------------------------------------------
|
|
142
|
+
pub async fn image_to_payload(
|
|
143
|
+
mut image: image::DynamicImage,
|
|
144
|
+
img_tfm: &Option<ARAwareTransform>,
|
|
145
|
+
aspect_ratio: &String,
|
|
146
|
+
encode_images: bool,
|
|
147
|
+
img_to_rgb8: bool,
|
|
148
|
+
) -> Result<ImagePayload, image::ImageError> {
|
|
149
|
+
let original_height = image.height() as usize;
|
|
150
|
+
let original_width = image.width() as usize;
|
|
151
|
+
let mut channels = image.color().channel_count() as i8;
|
|
152
|
+
let mut bit_depth =
|
|
153
|
+
(image.color().bits_per_pixel() / image.color().channel_count() as u16) as usize;
|
|
154
|
+
|
|
155
|
+
// Optionally transform the additional image in the same way the main image was
|
|
156
|
+
if let Some(img_tfm) = img_tfm {
|
|
157
|
+
let aspect_ratio_input = if aspect_ratio.is_empty() {
|
|
158
|
+
None
|
|
159
|
+
} else {
|
|
160
|
+
Some(aspect_ratio)
|
|
161
|
+
};
|
|
162
|
+
image = img_tfm.crop_and_resize(&image, aspect_ratio_input).await;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
let height = image.height() as usize;
|
|
166
|
+
let width = image.width() as usize;
|
|
167
|
+
|
|
168
|
+
// Image to RGB8 if requested
|
|
169
|
+
if img_to_rgb8 && image.color() != image::ColorType::Rgb8 {
|
|
170
|
+
image = image::DynamicImage::ImageRgb8(image.to_rgb8());
|
|
171
|
+
bit_depth = 8;
|
|
172
|
+
channels = 3;
|
|
173
|
+
assert!((image.color().bits_per_pixel() / image.color().channel_count() as u16) == 8);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// Encode the image if needed
|
|
177
|
+
let mut image_bytes: Vec<u8> = Vec::new();
|
|
178
|
+
if encode_images {
|
|
179
|
+
if image
|
|
180
|
+
.write_to(&mut Cursor::new(&mut image_bytes), image::ImageFormat::Png)
|
|
181
|
+
.is_err()
|
|
182
|
+
{
|
|
183
|
+
return Err(image::ImageError::IoError(std::io::Error::new(
|
|
184
|
+
std::io::ErrorKind::Other,
|
|
185
|
+
"Failed to encode image",
|
|
186
|
+
)));
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
channels = -1; // Signal the fact that the image is encoded
|
|
190
|
+
} else {
|
|
191
|
+
image_bytes = image.into_bytes();
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
Ok(ImagePayload {
|
|
195
|
+
data: image_bytes,
|
|
196
|
+
original_height,
|
|
197
|
+
original_width,
|
|
198
|
+
height,
|
|
199
|
+
width,
|
|
200
|
+
channels,
|
|
201
|
+
bit_depth,
|
|
202
|
+
})
|
|
203
|
+
}
|
|
204
|
+
|
|
133
205
|
// ------------------------------------------------------------------------------------------------------------------------
|
|
134
206
|
|
|
135
207
|
#[cfg(test)]
|
|
@@ -147,6 +219,7 @@ mod tests {
|
|
|
147
219
|
min_aspect_ratio: 0.5,
|
|
148
220
|
max_aspect_ratio: 2.0,
|
|
149
221
|
pre_encode_images: false,
|
|
222
|
+
image_to_rgb8: false,
|
|
150
223
|
};
|
|
151
224
|
|
|
152
225
|
let transform = config.get_ar_aware_transform();
|
|
@@ -2,7 +2,6 @@ use crate::image_processing;
|
|
|
2
2
|
use crate::structs::{ImagePayload, Sample};
|
|
3
3
|
use std::cmp::min;
|
|
4
4
|
use std::collections::HashMap;
|
|
5
|
-
use std::io::Cursor;
|
|
6
5
|
use std::sync::Arc;
|
|
7
6
|
|
|
8
7
|
async fn image_from_path(path: &str) -> Result<image::DynamicImage, image::ImageError> {
|
|
@@ -19,58 +18,18 @@ async fn image_payload_from_path(
|
|
|
19
18
|
path: &str,
|
|
20
19
|
img_tfm: &Option<image_processing::ARAwareTransform>,
|
|
21
20
|
encode_images: bool,
|
|
21
|
+
img_to_rgb8: bool,
|
|
22
22
|
) -> Result<ImagePayload, image::ImageError> {
|
|
23
23
|
match image_from_path(path).await {
|
|
24
|
-
Ok(
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
if let Some(img_tfm) = img_tfm {
|
|
34
|
-
new_image = img_tfm.crop_and_resize(&new_image, None).await;
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
let height = new_image.height() as usize;
|
|
38
|
-
let width = new_image.width() as usize;
|
|
39
|
-
|
|
40
|
-
// Encode the image if needed
|
|
41
|
-
let mut image_bytes: Vec<u8> = Vec::new();
|
|
42
|
-
if encode_images {
|
|
43
|
-
if new_image.color() != image::ColorType::Rgb8 {
|
|
44
|
-
new_image = image::DynamicImage::ImageRgb8(new_image.to_rgb8());
|
|
45
|
-
bit_depth = (new_image.color().bits_per_pixel()
|
|
46
|
-
/ new_image.color().channel_count() as u16)
|
|
47
|
-
as usize;
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
if new_image
|
|
51
|
-
.write_to(&mut Cursor::new(&mut image_bytes), image::ImageFormat::Png)
|
|
52
|
-
.is_err()
|
|
53
|
-
{
|
|
54
|
-
return Err(image::ImageError::IoError(std::io::Error::new(
|
|
55
|
-
std::io::ErrorKind::Other,
|
|
56
|
-
"Failed to encode image",
|
|
57
|
-
)));
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
channels = -1; // Signal the fact that the image is encoded
|
|
61
|
-
} else {
|
|
62
|
-
image_bytes = new_image.into_bytes();
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
Ok(ImagePayload {
|
|
66
|
-
data: image_bytes,
|
|
67
|
-
original_height,
|
|
68
|
-
original_width,
|
|
69
|
-
height,
|
|
70
|
-
width,
|
|
71
|
-
channels,
|
|
72
|
-
bit_depth,
|
|
73
|
-
})
|
|
24
|
+
Ok(new_image) => {
|
|
25
|
+
image_processing::image_to_payload(
|
|
26
|
+
new_image,
|
|
27
|
+
img_tfm,
|
|
28
|
+
&"".to_string(),
|
|
29
|
+
encode_images,
|
|
30
|
+
img_to_rgb8,
|
|
31
|
+
)
|
|
32
|
+
.await
|
|
74
33
|
}
|
|
75
34
|
Err(e) => Err(e),
|
|
76
35
|
}
|
|
@@ -80,9 +39,17 @@ async fn pull_sample(
|
|
|
80
39
|
sample_json: serde_json::Value,
|
|
81
40
|
img_tfm: Arc<Option<image_processing::ARAwareTransform>>,
|
|
82
41
|
encode_images: bool,
|
|
42
|
+
img_to_rgb8: bool,
|
|
83
43
|
samples_tx: kanal::Sender<Option<Sample>>,
|
|
84
44
|
) -> Result<(), ()> {
|
|
85
|
-
match image_payload_from_path(
|
|
45
|
+
match image_payload_from_path(
|
|
46
|
+
sample_json.as_str().unwrap(),
|
|
47
|
+
&img_tfm,
|
|
48
|
+
encode_images,
|
|
49
|
+
img_to_rgb8,
|
|
50
|
+
)
|
|
51
|
+
.await
|
|
52
|
+
{
|
|
86
53
|
Ok(image) => {
|
|
87
54
|
let sample = Sample {
|
|
88
55
|
id: sample_json.to_string(),
|
|
@@ -127,6 +94,7 @@ async fn async_pull_samples(
|
|
|
127
94
|
samples_tx: kanal::Sender<Option<Sample>>,
|
|
128
95
|
image_transform: Option<image_processing::ARAwareTransform>,
|
|
129
96
|
encode_images: bool,
|
|
97
|
+
img_to_rgb8: bool,
|
|
130
98
|
limit: usize,
|
|
131
99
|
) {
|
|
132
100
|
// We use async-await here, to better use IO stalls
|
|
@@ -148,6 +116,7 @@ async fn async_pull_samples(
|
|
|
148
116
|
received,
|
|
149
117
|
shareable_img_tfm.clone(),
|
|
150
118
|
encode_images,
|
|
119
|
+
img_to_rgb8,
|
|
151
120
|
samples_tx.clone(),
|
|
152
121
|
)));
|
|
153
122
|
|
|
@@ -177,6 +146,7 @@ pub fn pull_samples(
|
|
|
177
146
|
samples_tx: kanal::Sender<Option<Sample>>,
|
|
178
147
|
image_transform: Option<image_processing::ARAwareTransform>,
|
|
179
148
|
encode_images: bool,
|
|
149
|
+
img_to_rgb8: bool,
|
|
180
150
|
limit: usize,
|
|
181
151
|
) {
|
|
182
152
|
tokio::runtime::Builder::new_multi_thread()
|
|
@@ -189,6 +159,7 @@ pub fn pull_samples(
|
|
|
189
159
|
samples_tx,
|
|
190
160
|
image_transform,
|
|
191
161
|
encode_images,
|
|
162
|
+
img_to_rgb8,
|
|
192
163
|
limit,
|
|
193
164
|
)
|
|
194
165
|
.await;
|
|
@@ -5,7 +5,6 @@ use reqwest_middleware::{ClientBuilder, ClientWithMiddleware};
|
|
|
5
5
|
use reqwest_retry::{policies::ExponentialBackoff, RetryTransientMiddleware};
|
|
6
6
|
use serde::{Deserialize, Serialize};
|
|
7
7
|
use std::cmp::min;
|
|
8
|
-
use std::io::Cursor;
|
|
9
8
|
use std::sync::Arc;
|
|
10
9
|
|
|
11
10
|
// We'll share a single connection pool across all worker threads
|
|
@@ -83,72 +82,20 @@ async fn image_payload_from_url(
|
|
|
83
82
|
img_tfm: &Option<image_processing::ARAwareTransform>,
|
|
84
83
|
aspect_ratio: &String,
|
|
85
84
|
encode_images: bool,
|
|
85
|
+
img_to_rgb8: bool,
|
|
86
86
|
) -> Result<ImagePayload, image::ImageError> {
|
|
87
87
|
let retries = 5;
|
|
88
88
|
|
|
89
89
|
match image_from_url(client, url, retries).await {
|
|
90
|
-
Ok(
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
if let Some(img_tfm) = img_tfm {
|
|
100
|
-
let aspect_ratio_input = if aspect_ratio.is_empty() {
|
|
101
|
-
None
|
|
102
|
-
} else {
|
|
103
|
-
Some(aspect_ratio)
|
|
104
|
-
};
|
|
105
|
-
|
|
106
|
-
// TODO: tokio::spawn this
|
|
107
|
-
new_image = img_tfm
|
|
108
|
-
.crop_and_resize(&new_image, aspect_ratio_input)
|
|
109
|
-
.await;
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
let height = new_image.height() as usize;
|
|
113
|
-
let width = new_image.width() as usize;
|
|
114
|
-
|
|
115
|
-
// Encode the image if needed
|
|
116
|
-
let mut image_bytes: Vec<u8> = Vec::new();
|
|
117
|
-
|
|
118
|
-
if encode_images {
|
|
119
|
-
// If the image is 16 bits or 2 channels, standardize to 8 bits, 3 channels
|
|
120
|
-
if new_image.color() != image::ColorType::Rgb8 {
|
|
121
|
-
new_image = image::DynamicImage::ImageRgb8(new_image.to_rgb8());
|
|
122
|
-
bit_depth = (new_image.color().bits_per_pixel()
|
|
123
|
-
/ new_image.color().channel_count() as u16)
|
|
124
|
-
as usize;
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
// Pre-encode the payload as requested, we move everything to PNGs
|
|
128
|
-
if new_image
|
|
129
|
-
.write_to(&mut Cursor::new(&mut image_bytes), image::ImageFormat::Png)
|
|
130
|
-
.is_err()
|
|
131
|
-
{
|
|
132
|
-
return Err(image::ImageError::IoError(std::io::Error::new(
|
|
133
|
-
std::io::ErrorKind::Other,
|
|
134
|
-
"Failed to encode image",
|
|
135
|
-
)));
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
channels = -1; // Signal the fact that the image is encoded
|
|
139
|
-
} else {
|
|
140
|
-
image_bytes = new_image.into_bytes();
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
Ok(ImagePayload {
|
|
144
|
-
data: image_bytes,
|
|
145
|
-
original_height,
|
|
146
|
-
original_width,
|
|
147
|
-
height,
|
|
148
|
-
width,
|
|
149
|
-
channels,
|
|
150
|
-
bit_depth,
|
|
151
|
-
})
|
|
90
|
+
Ok(new_image) => {
|
|
91
|
+
image_processing::image_to_payload(
|
|
92
|
+
new_image,
|
|
93
|
+
img_tfm,
|
|
94
|
+
aspect_ratio,
|
|
95
|
+
encode_images,
|
|
96
|
+
img_to_rgb8,
|
|
97
|
+
)
|
|
98
|
+
.await
|
|
152
99
|
}
|
|
153
100
|
Err(e) => Err(e),
|
|
154
101
|
}
|
|
@@ -159,6 +106,7 @@ async fn pull_sample(
|
|
|
159
106
|
sample_json: serde_json::Value,
|
|
160
107
|
img_tfm: Arc<Option<image_processing::ARAwareTransform>>,
|
|
161
108
|
encode_images: bool,
|
|
109
|
+
img_to_rgb8: bool,
|
|
162
110
|
samples_tx: Arc<kanal::Sender<Option<Sample>>>,
|
|
163
111
|
) -> Result<(), ()> {
|
|
164
112
|
// Deserialize the sample metadata
|
|
@@ -175,6 +123,7 @@ async fn pull_sample(
|
|
|
175
123
|
&img_tfm,
|
|
176
124
|
&String::new(),
|
|
177
125
|
encode_images,
|
|
126
|
+
img_to_rgb8,
|
|
178
127
|
)
|
|
179
128
|
.await
|
|
180
129
|
{
|
|
@@ -211,6 +160,7 @@ async fn pull_sample(
|
|
|
211
160
|
&img_tfm,
|
|
212
161
|
&aspect_ratio,
|
|
213
162
|
encode_images,
|
|
163
|
+
img_to_rgb8,
|
|
214
164
|
)
|
|
215
165
|
.await
|
|
216
166
|
{
|
|
@@ -235,6 +185,7 @@ async fn pull_sample(
|
|
|
235
185
|
&img_tfm,
|
|
236
186
|
&aspect_ratio,
|
|
237
187
|
encode_images,
|
|
188
|
+
false, // Masks are not converted to RGB8
|
|
238
189
|
)
|
|
239
190
|
.await
|
|
240
191
|
{
|
|
@@ -306,6 +257,7 @@ async fn async_pull_samples(
|
|
|
306
257
|
samples_tx: kanal::Sender<Option<Sample>>,
|
|
307
258
|
image_transform: Option<image_processing::ARAwareTransform>,
|
|
308
259
|
encode_images: bool,
|
|
260
|
+
img_to_rgb8: bool,
|
|
309
261
|
limit: usize,
|
|
310
262
|
) {
|
|
311
263
|
// TODO: Join with the other workers' implementation, same logic
|
|
@@ -332,6 +284,7 @@ async fn async_pull_samples(
|
|
|
332
284
|
received,
|
|
333
285
|
shareable_img_tfm.clone(),
|
|
334
286
|
encode_images,
|
|
287
|
+
img_to_rgb8,
|
|
335
288
|
shareable_channel_tx.clone(),
|
|
336
289
|
)));
|
|
337
290
|
|
|
@@ -362,6 +315,7 @@ pub fn pull_samples(
|
|
|
362
315
|
samples_tx: kanal::Sender<Option<Sample>>,
|
|
363
316
|
image_transform: Option<image_processing::ARAwareTransform>,
|
|
364
317
|
encode_images: bool,
|
|
318
|
+
img_to_rgb8: bool,
|
|
365
319
|
limit: usize,
|
|
366
320
|
) {
|
|
367
321
|
tokio::runtime::Builder::new_multi_thread()
|
|
@@ -376,6 +330,7 @@ pub fn pull_samples(
|
|
|
376
330
|
samples_tx,
|
|
377
331
|
image_transform,
|
|
378
332
|
encode_images,
|
|
333
|
+
img_to_rgb8,
|
|
379
334
|
limit,
|
|
380
335
|
)
|
|
381
336
|
.await;
|
|
@@ -172,6 +172,7 @@ fn test_crop_resize() {
|
|
|
172
172
|
min_aspect_ratio: 0.5,
|
|
173
173
|
max_aspect_ratio: 2.0,
|
|
174
174
|
pre_encode_images: false,
|
|
175
|
+
image_to_rgb8: false
|
|
175
176
|
});
|
|
176
177
|
|
|
177
178
|
let mut client = DatagoClient::new(config.to_string());
|
|
@@ -203,6 +204,7 @@ fn test_img_compression() {
|
|
|
203
204
|
min_aspect_ratio: 0.5,
|
|
204
205
|
max_aspect_ratio: 2.0,
|
|
205
206
|
pre_encode_images: true, // new part being tested
|
|
207
|
+
image_to_rgb8: false
|
|
206
208
|
});
|
|
207
209
|
|
|
208
210
|
let mut client = DatagoClient::new(config.to_string());
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -10,6 +10,7 @@ use crate::worker_http::SharedClient;
|
|
|
10
10
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
11
11
|
pub struct SourceDBConfig {
|
|
12
12
|
pub sources: String,
|
|
13
|
+
pub page_size: usize,
|
|
13
14
|
|
|
14
15
|
#[serde(default)]
|
|
15
16
|
pub sources_ne: String,
|
|
@@ -61,7 +62,6 @@ pub struct SourceDBConfig {
|
|
|
61
62
|
|
|
62
63
|
#[serde(default)]
|
|
63
64
|
pub random_sampling: bool,
|
|
64
|
-
pub page_size: usize,
|
|
65
65
|
}
|
|
66
66
|
|
|
67
67
|
// TODO: Derive from the above
|
|
File without changes
|
|
File without changes
|