rapidata 2.41.3__py3-none-any.whl → 2.42.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rapidata might be problematic. Click here for more details.
- rapidata/__init__.py +1 -5
- rapidata/api_client/__init__.py +14 -14
- rapidata/api_client/api/__init__.py +1 -0
- rapidata/api_client/api/asset_api.py +851 -0
- rapidata/api_client/api/benchmark_api.py +298 -0
- rapidata/api_client/api/customer_rapid_api.py +29 -43
- rapidata/api_client/api/dataset_api.py +163 -1143
- rapidata/api_client/api/participant_api.py +28 -74
- rapidata/api_client/api/validation_set_api.py +283 -0
- rapidata/api_client/models/__init__.py +13 -14
- rapidata/api_client/models/add_validation_rapid_model.py +3 -3
- rapidata/api_client/models/add_validation_rapid_new_model.py +152 -0
- rapidata/api_client/models/add_validation_rapid_new_model_asset.py +182 -0
- rapidata/api_client/models/compare_workflow_model.py +3 -3
- rapidata/api_client/models/create_datapoint_from_files_model.py +3 -3
- rapidata/api_client/models/create_datapoint_from_text_sources_model.py +3 -3
- rapidata/api_client/models/create_datapoint_from_urls_model.py +3 -3
- rapidata/api_client/models/create_datapoint_model.py +108 -0
- rapidata/api_client/models/create_datapoint_model_asset.py +182 -0
- rapidata/api_client/models/create_demographic_rapid_model.py +13 -2
- rapidata/api_client/models/create_demographic_rapid_model_asset.py +188 -0
- rapidata/api_client/models/create_demographic_rapid_model_new.py +119 -0
- rapidata/api_client/models/create_sample_model.py +8 -2
- rapidata/api_client/models/create_sample_model_asset.py +182 -0
- rapidata/api_client/models/create_sample_model_obsolete.py +87 -0
- rapidata/api_client/models/file_asset_input_file.py +8 -22
- rapidata/api_client/models/fork_benchmark_result.py +87 -0
- rapidata/api_client/models/form_file_wrapper.py +17 -2
- rapidata/api_client/models/get_asset_metadata_result.py +100 -0
- rapidata/api_client/models/multi_asset_input_assets_inner.py +10 -24
- rapidata/api_client/models/prompt_asset_metadata_input.py +3 -3
- rapidata/api_client/models/proxy_file_wrapper.py +17 -2
- rapidata/api_client/models/stream_file_wrapper.py +25 -3
- rapidata/api_client/models/submit_prompt_model.py +3 -3
- rapidata/api_client/models/text_metadata.py +6 -1
- rapidata/api_client/models/text_metadata_model.py +7 -2
- rapidata/api_client/models/upload_file_from_url_result.py +87 -0
- rapidata/api_client/models/upload_file_result.py +87 -0
- rapidata/api_client/models/zip_entry_file_wrapper.py +33 -2
- rapidata/api_client_README.md +28 -25
- rapidata/rapidata_client/__init__.py +0 -1
- rapidata/rapidata_client/benchmark/participant/_participant.py +24 -22
- rapidata/rapidata_client/benchmark/rapidata_benchmark.py +89 -102
- rapidata/rapidata_client/datapoints/__init__.py +0 -1
- rapidata/rapidata_client/datapoints/_asset_uploader.py +71 -0
- rapidata/rapidata_client/datapoints/_datapoint.py +58 -171
- rapidata/rapidata_client/datapoints/_datapoint_uploader.py +95 -0
- rapidata/rapidata_client/datapoints/assets/__init__.py +0 -11
- rapidata/rapidata_client/datapoints/metadata/_media_asset_metadata.py +10 -7
- rapidata/rapidata_client/demographic/demographic_manager.py +21 -8
- rapidata/rapidata_client/exceptions/failed_upload_exception.py +0 -62
- rapidata/rapidata_client/order/_rapidata_order_builder.py +0 -10
- rapidata/rapidata_client/order/dataset/_rapidata_dataset.py +67 -187
- rapidata/rapidata_client/order/rapidata_order_manager.py +58 -124
- rapidata/rapidata_client/validation/rapidata_validation_set.py +9 -5
- rapidata/rapidata_client/validation/rapids/_validation_rapid_uploader.py +101 -0
- rapidata/rapidata_client/validation/rapids/box.py +35 -11
- rapidata/rapidata_client/validation/rapids/rapids.py +26 -128
- rapidata/rapidata_client/validation/rapids/rapids_manager.py +123 -104
- rapidata/rapidata_client/validation/validation_set_manager.py +25 -34
- rapidata/rapidata_client/workflow/_ranking_workflow.py +14 -17
- rapidata/rapidata_client/workflow/_select_words_workflow.py +3 -16
- rapidata/service/openapi_service.py +8 -3
- {rapidata-2.41.3.dist-info → rapidata-2.42.0.dist-info}/METADATA +1 -1
- {rapidata-2.41.3.dist-info → rapidata-2.42.0.dist-info}/RECORD +67 -58
- {rapidata-2.41.3.dist-info → rapidata-2.42.0.dist-info}/WHEEL +1 -1
- rapidata/rapidata_client/datapoints/assets/_base_asset.py +0 -13
- rapidata/rapidata_client/datapoints/assets/_media_asset.py +0 -318
- rapidata/rapidata_client/datapoints/assets/_multi_asset.py +0 -61
- rapidata/rapidata_client/datapoints/assets/_sessions.py +0 -40
- rapidata/rapidata_client/datapoints/assets/_text_asset.py +0 -34
- rapidata/rapidata_client/datapoints/assets/data_type_enum.py +0 -8
- rapidata/rapidata_client/order/dataset/_progress_tracker.py +0 -100
- {rapidata-2.41.3.dist-info → rapidata-2.42.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
"""Base Asset Module
|
|
2
|
-
|
|
3
|
-
Defines the BaseAsset class, which serves as the abstract base class for all asset types.
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class BaseAsset:
|
|
8
|
-
"""BaseAsset Class
|
|
9
|
-
|
|
10
|
-
An abstract base class for different types of assets. This class is intended to be subclassed by specific asset types.
|
|
11
|
-
"""
|
|
12
|
-
|
|
13
|
-
pass
|
|
@@ -1,318 +0,0 @@
|
|
|
1
|
-
"""Media Asset Module with Lazy Loading
|
|
2
|
-
|
|
3
|
-
Defines the MediaAsset class for handling media file paths within assets.
|
|
4
|
-
Implements lazy loading for URL-based media to prevent unnecessary downloads.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
from typing import Optional, cast
|
|
8
|
-
import os
|
|
9
|
-
from io import BytesIO
|
|
10
|
-
from rapidata.rapidata_client.datapoints.assets._base_asset import BaseAsset
|
|
11
|
-
import requests
|
|
12
|
-
import re
|
|
13
|
-
from PIL import Image
|
|
14
|
-
from tinytag import TinyTag
|
|
15
|
-
import tempfile
|
|
16
|
-
from pydantic import StrictStr, StrictBytes
|
|
17
|
-
import logging
|
|
18
|
-
from functools import cached_property
|
|
19
|
-
from rapidata.rapidata_client.datapoints.assets._sessions import SessionManager
|
|
20
|
-
from rapidata.rapidata_client.config import logger
|
|
21
|
-
from rapidata.rapidata_client.datapoints.assets.constants import (
|
|
22
|
-
ALLOWED_IMAGE_EXTENSIONS,
|
|
23
|
-
ALLOWED_MEDIA_EXTENSIONS,
|
|
24
|
-
)
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
class MediaAsset(BaseAsset):
|
|
28
|
-
"""MediaAsset Class with Lazy Loading
|
|
29
|
-
|
|
30
|
-
Represents a media asset by storing the file path or URL.
|
|
31
|
-
Only downloads URL content when needed.
|
|
32
|
-
Supports local files and URLs for images, MP3, and MP4.
|
|
33
|
-
|
|
34
|
-
Args:
|
|
35
|
-
path (str): The file system path to the media asset or URL.
|
|
36
|
-
|
|
37
|
-
Raises:
|
|
38
|
-
FileNotFoundError: If the provided file path does not exist.
|
|
39
|
-
"""
|
|
40
|
-
|
|
41
|
-
_logger = logging.getLogger(__name__ + ".MediaAsset")
|
|
42
|
-
|
|
43
|
-
ALLOWED_TYPES = [
|
|
44
|
-
"image/",
|
|
45
|
-
"audio/mp3", # MP3
|
|
46
|
-
"video/mp4", # MP4
|
|
47
|
-
]
|
|
48
|
-
|
|
49
|
-
MIME_TYPES = {
|
|
50
|
-
"jpg": "image/jpeg",
|
|
51
|
-
"jpeg": "image/jpeg",
|
|
52
|
-
"png": "image/png",
|
|
53
|
-
"gif": "image/gif",
|
|
54
|
-
"webp": "image/webp",
|
|
55
|
-
"mp3": "audio/mp3",
|
|
56
|
-
"mp4": "video/mp4",
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
FILE_SIGNATURES = {
|
|
60
|
-
b"\xFF\xD8\xFF": "image/jpeg",
|
|
61
|
-
b"\x89PNG\r\n\x1a\n": "image/png",
|
|
62
|
-
b"GIF87a": "image/gif",
|
|
63
|
-
b"GIF89a": "image/gif",
|
|
64
|
-
b"RIFF": "image/webp",
|
|
65
|
-
b"ID3": "audio/mp3",
|
|
66
|
-
b"\xFF\xFB": "audio/mp3",
|
|
67
|
-
b"\xFF\xF3": "audio/mp3",
|
|
68
|
-
b"ftyp": "video/mp4",
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
def __init__(self, path: str):
|
|
72
|
-
"""
|
|
73
|
-
Initialize a MediaAsset instance.
|
|
74
|
-
|
|
75
|
-
Args:
|
|
76
|
-
path (str): The file system path to the media asset or a URL.
|
|
77
|
-
|
|
78
|
-
Raises:
|
|
79
|
-
FileNotFoundError: If the provided file path does not exist.
|
|
80
|
-
ValueError: If path is not a string.
|
|
81
|
-
"""
|
|
82
|
-
if not isinstance(path, str):
|
|
83
|
-
raise ValueError(
|
|
84
|
-
f"Media must be a string, either a local file path or a URL, got {type(path)}"
|
|
85
|
-
)
|
|
86
|
-
|
|
87
|
-
self._url = None
|
|
88
|
-
self._content = None
|
|
89
|
-
self.session: requests.Session = SessionManager.get_session()
|
|
90
|
-
|
|
91
|
-
if re.match(r"^https?://", path):
|
|
92
|
-
self._url = path
|
|
93
|
-
self.name = path.split("/")[-1]
|
|
94
|
-
self.name = self.__check_name_ending(self.name)
|
|
95
|
-
self.path = path
|
|
96
|
-
return
|
|
97
|
-
|
|
98
|
-
if not os.path.exists(path):
|
|
99
|
-
raise FileNotFoundError(f"File not found: {path}")
|
|
100
|
-
|
|
101
|
-
self.path = path
|
|
102
|
-
self.name = path
|
|
103
|
-
|
|
104
|
-
@cached_property
|
|
105
|
-
def content(self) -> bytes:
|
|
106
|
-
"""
|
|
107
|
-
Lazy loader for URL content. Only downloads when first accessed.
|
|
108
|
-
Uses cached_property to store the result after first download.
|
|
109
|
-
"""
|
|
110
|
-
if self._url is None:
|
|
111
|
-
self.path = cast(str, self.path)
|
|
112
|
-
with open(self.path, "rb") as f:
|
|
113
|
-
return f.read()
|
|
114
|
-
|
|
115
|
-
return self.__get_media_bytes(self._url)
|
|
116
|
-
|
|
117
|
-
def get_duration(self) -> int:
|
|
118
|
-
"""
|
|
119
|
-
Get the duration of audio/video files in milliseconds.
|
|
120
|
-
Returns 0 for static images.
|
|
121
|
-
|
|
122
|
-
Returns:
|
|
123
|
-
int: Duration in milliseconds for audio/video, 0 for static images
|
|
124
|
-
|
|
125
|
-
Raises:
|
|
126
|
-
ValueError: If the duration cannot be determined
|
|
127
|
-
"""
|
|
128
|
-
path_to_check = self.name.lower()
|
|
129
|
-
|
|
130
|
-
# Return 0 for static images
|
|
131
|
-
if any(path_to_check.endswith(ext) for ext in ALLOWED_IMAGE_EXTENSIONS):
|
|
132
|
-
return 0
|
|
133
|
-
|
|
134
|
-
try:
|
|
135
|
-
# Create temporary file from content
|
|
136
|
-
with tempfile.NamedTemporaryFile(
|
|
137
|
-
suffix=os.path.splitext(self.name)[1], delete=False
|
|
138
|
-
) as tmp:
|
|
139
|
-
tmp.write(self.content)
|
|
140
|
-
tmp.flush()
|
|
141
|
-
tmp_path = tmp.name
|
|
142
|
-
|
|
143
|
-
try:
|
|
144
|
-
tag = TinyTag.get(tmp_path)
|
|
145
|
-
finally:
|
|
146
|
-
# Clean up the temporary file
|
|
147
|
-
os.unlink(tmp_path)
|
|
148
|
-
|
|
149
|
-
if tag.duration is None:
|
|
150
|
-
raise ValueError("Could not read duration from file")
|
|
151
|
-
|
|
152
|
-
return int(tag.duration * 1000) # Convert to milliseconds
|
|
153
|
-
|
|
154
|
-
except Exception as e:
|
|
155
|
-
raise ValueError(f"Could not determine media duration: {str(e)}")
|
|
156
|
-
|
|
157
|
-
def get_image_dimension(self) -> tuple[int, int] | None:
|
|
158
|
-
"""
|
|
159
|
-
Get the dimensions (width, height) of an image file.
|
|
160
|
-
Returns None for non-image files or if dimensions can't be determined.
|
|
161
|
-
"""
|
|
162
|
-
if not any(self.name.lower().endswith(ext) for ext in ALLOWED_IMAGE_EXTENSIONS):
|
|
163
|
-
return None
|
|
164
|
-
|
|
165
|
-
try:
|
|
166
|
-
img = Image.open(BytesIO(self.content))
|
|
167
|
-
return img.size
|
|
168
|
-
except Exception:
|
|
169
|
-
return None
|
|
170
|
-
|
|
171
|
-
def set_custom_name(self, name: str) -> "MediaAsset":
|
|
172
|
-
"""Set a custom name for the media asset (only works with URLs)."""
|
|
173
|
-
if self._url is not None:
|
|
174
|
-
self.name = self.__check_name_ending(name)
|
|
175
|
-
else:
|
|
176
|
-
raise ValueError("Custom name can only be set for URLs.")
|
|
177
|
-
return self
|
|
178
|
-
|
|
179
|
-
def __check_name_ending(self, name: str) -> str:
|
|
180
|
-
"""Check if the media path is valid."""
|
|
181
|
-
if not any(name.endswith(ext) for ext in ALLOWED_MEDIA_EXTENSIONS):
|
|
182
|
-
logger.warning(
|
|
183
|
-
f"Warning: Supported file types: {ALLOWED_MEDIA_EXTENSIONS}. Image might not be displayed correctly."
|
|
184
|
-
)
|
|
185
|
-
name = name + ".jpg"
|
|
186
|
-
return name
|
|
187
|
-
|
|
188
|
-
def __get_media_type_from_extension(self, url: str) -> Optional[str]:
|
|
189
|
-
"""
|
|
190
|
-
Determine media type from URL file extension.
|
|
191
|
-
|
|
192
|
-
Args:
|
|
193
|
-
url: The URL to check
|
|
194
|
-
|
|
195
|
-
Returns:
|
|
196
|
-
Optional[str]: MIME type if valid extension found, None otherwise
|
|
197
|
-
"""
|
|
198
|
-
try:
|
|
199
|
-
ext = url.lower().split("?")[0].split(".")[-1]
|
|
200
|
-
return self.MIME_TYPES.get(ext)
|
|
201
|
-
except IndexError:
|
|
202
|
-
return None
|
|
203
|
-
|
|
204
|
-
def __validate_image_content(self, content: bytes) -> bool:
|
|
205
|
-
"""
|
|
206
|
-
Validate image content using PIL.
|
|
207
|
-
|
|
208
|
-
Args:
|
|
209
|
-
content: Image bytes to validate
|
|
210
|
-
|
|
211
|
-
Returns:
|
|
212
|
-
bool: True if valid image, False otherwise
|
|
213
|
-
"""
|
|
214
|
-
try:
|
|
215
|
-
img = Image.open(BytesIO(content))
|
|
216
|
-
img.verify()
|
|
217
|
-
return True
|
|
218
|
-
except Exception as e:
|
|
219
|
-
self._logger.debug(f"Image validation failed: {str(e)}")
|
|
220
|
-
return False
|
|
221
|
-
|
|
222
|
-
def __get_media_type_from_signature(self, content: bytes) -> Optional[str]:
|
|
223
|
-
"""
|
|
224
|
-
Determine media type from file signature.
|
|
225
|
-
|
|
226
|
-
Args:
|
|
227
|
-
content: File content bytes
|
|
228
|
-
|
|
229
|
-
Returns:
|
|
230
|
-
Optional[str]: MIME type if valid signature found, None otherwise
|
|
231
|
-
"""
|
|
232
|
-
file_start = content[:32]
|
|
233
|
-
for signature, mime_type in self.FILE_SIGNATURES.items():
|
|
234
|
-
if file_start.startswith(signature) or (signature in file_start[:10]):
|
|
235
|
-
return mime_type
|
|
236
|
-
return None
|
|
237
|
-
|
|
238
|
-
def __get_media_bytes(self, url: str) -> bytes:
|
|
239
|
-
"""
|
|
240
|
-
Downloads and validates media files from URL with retry logic and session reuse.
|
|
241
|
-
|
|
242
|
-
Args:
|
|
243
|
-
url: URL of the media file
|
|
244
|
-
|
|
245
|
-
Returns:
|
|
246
|
-
bytes: Validated media content
|
|
247
|
-
|
|
248
|
-
Raises:
|
|
249
|
-
ValueError: If media type is unsupported or content validation fails
|
|
250
|
-
requests.exceptions.RequestException: If download fails after all retries
|
|
251
|
-
"""
|
|
252
|
-
# Use existing session or throw error if not set
|
|
253
|
-
if self.session is None:
|
|
254
|
-
raise RuntimeError("HTTP session not configured")
|
|
255
|
-
|
|
256
|
-
try:
|
|
257
|
-
response = self.session.get(
|
|
258
|
-
url, stream=False, timeout=(5, 30) # (connect timeout, read timeout)
|
|
259
|
-
)
|
|
260
|
-
response.raise_for_status()
|
|
261
|
-
except requests.exceptions.RequestException as e:
|
|
262
|
-
self._logger.error(
|
|
263
|
-
f"Failed to download media from {url} after retries: {str(e)}"
|
|
264
|
-
)
|
|
265
|
-
raise
|
|
266
|
-
|
|
267
|
-
content = response.content
|
|
268
|
-
content_type = response.headers.get("content-type", "").lower()
|
|
269
|
-
|
|
270
|
-
# Case 1: Content-type is already allowed
|
|
271
|
-
if any(content_type.startswith(t) for t in self.ALLOWED_TYPES):
|
|
272
|
-
self._logger.debug(f"Content-type {content_type} is allowed")
|
|
273
|
-
return content
|
|
274
|
-
|
|
275
|
-
# Case 2: Try to validate based on extension
|
|
276
|
-
mime_type = self.__get_media_type_from_extension(url)
|
|
277
|
-
if mime_type and mime_type.startswith(tuple(self.ALLOWED_TYPES)):
|
|
278
|
-
self._logger.debug(f"Found valid mime type from extension: {mime_type}")
|
|
279
|
-
return content
|
|
280
|
-
|
|
281
|
-
# Case 3: Try to validate based on file signature
|
|
282
|
-
mime_type = self.__get_media_type_from_signature(content)
|
|
283
|
-
if mime_type and mime_type.startswith(tuple(self.ALLOWED_TYPES)):
|
|
284
|
-
self._logger.debug(f"Found valid mime type from signature: {mime_type}")
|
|
285
|
-
return content
|
|
286
|
-
|
|
287
|
-
# Case 4: Last resort - try direct image validation
|
|
288
|
-
if self.__validate_image_content(content):
|
|
289
|
-
self._logger.debug("Content validated as image through direct validation")
|
|
290
|
-
return content
|
|
291
|
-
|
|
292
|
-
# If we get here, validation failed
|
|
293
|
-
error_msg = (
|
|
294
|
-
f"Could not validate media type from content.\n"
|
|
295
|
-
f"Content-Type: {content_type}\n"
|
|
296
|
-
f'URL extension: {url.split("?")[0].split(".")[-1]}\n'
|
|
297
|
-
f"Allowed types: {self.ALLOWED_TYPES}"
|
|
298
|
-
)
|
|
299
|
-
self._logger.error(error_msg)
|
|
300
|
-
raise ValueError(error_msg)
|
|
301
|
-
|
|
302
|
-
def is_local(self) -> bool:
|
|
303
|
-
"""Check if the media asset is a local file."""
|
|
304
|
-
return self._url is None
|
|
305
|
-
|
|
306
|
-
def to_file(self) -> StrictStr | tuple[StrictStr, StrictBytes] | StrictBytes:
|
|
307
|
-
"""Convert the media asset to a file representation."""
|
|
308
|
-
if self._url is None:
|
|
309
|
-
self.path = cast(str, self.path)
|
|
310
|
-
return self.path
|
|
311
|
-
else:
|
|
312
|
-
return (self.name, self.content)
|
|
313
|
-
|
|
314
|
-
def __str__(self) -> str:
|
|
315
|
-
return f"MediaAsset(path={self.path})"
|
|
316
|
-
|
|
317
|
-
def __repr__(self) -> str:
|
|
318
|
-
return f"MediaAsset(path={self.path})"
|
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
"""Multi Asset Module
|
|
2
|
-
|
|
3
|
-
Defines the MultiAsset class for handling multiple BaseAsset instances.
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
from rapidata.rapidata_client.datapoints.assets._base_asset import BaseAsset
|
|
7
|
-
from rapidata.rapidata_client.datapoints.assets import MediaAsset, TextAsset
|
|
8
|
-
from typing import Iterator, Sequence
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class MultiAsset(BaseAsset):
|
|
12
|
-
"""MultiAsset Class
|
|
13
|
-
|
|
14
|
-
Represents a collection of multiple BaseAsset instances.
|
|
15
|
-
|
|
16
|
-
Args:
|
|
17
|
-
assets (List[BaseAsset]): A list of BaseAsset instances to be managed together.
|
|
18
|
-
"""
|
|
19
|
-
|
|
20
|
-
def __init__(self, assets: Sequence[BaseAsset]) -> None:
|
|
21
|
-
"""
|
|
22
|
-
Initialize a MultiAsset instance.
|
|
23
|
-
|
|
24
|
-
Args:
|
|
25
|
-
assets (List[BaseAsset]): A list of BaseAsset instances to be managed together.
|
|
26
|
-
"""
|
|
27
|
-
if len(assets) != 2:
|
|
28
|
-
raise ValueError("Assets must come in pairs for comparison tasks.")
|
|
29
|
-
|
|
30
|
-
for asset in assets:
|
|
31
|
-
if not isinstance(asset, (TextAsset, MediaAsset)):
|
|
32
|
-
raise TypeError("All assets must be a TextAsset or MediaAsset.")
|
|
33
|
-
|
|
34
|
-
if not all(isinstance(asset, type(assets[0])) for asset in assets):
|
|
35
|
-
raise ValueError("All assets must be of the same type.")
|
|
36
|
-
|
|
37
|
-
self.assets = assets
|
|
38
|
-
|
|
39
|
-
def __len__(self) -> int:
|
|
40
|
-
"""
|
|
41
|
-
Get the number of assets in the MultiAsset.
|
|
42
|
-
|
|
43
|
-
Returns:
|
|
44
|
-
int: The number of assets.
|
|
45
|
-
"""
|
|
46
|
-
return len(self.assets)
|
|
47
|
-
|
|
48
|
-
def __iter__(self) -> Iterator[BaseAsset]:
|
|
49
|
-
"""
|
|
50
|
-
Return an iterator over the assets in the MultiAsset.
|
|
51
|
-
|
|
52
|
-
Returns:
|
|
53
|
-
Iterator[BaseAsset]: An iterator over the assets.
|
|
54
|
-
"""
|
|
55
|
-
return iter(self.assets)
|
|
56
|
-
|
|
57
|
-
def __str__(self) -> str:
|
|
58
|
-
return f"MultiAsset(assets={self.assets})"
|
|
59
|
-
|
|
60
|
-
def __repr__(self) -> str:
|
|
61
|
-
return f"MultiAsset(assets={self.assets})"
|
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
import requests
|
|
2
|
-
from requests.adapters import HTTPAdapter
|
|
3
|
-
from urllib3.util.retry import Retry
|
|
4
|
-
|
|
5
|
-
from rapidata.rapidata_client.config.rapidata_config import rapidata_config
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class SessionManager:
|
|
9
|
-
_session = None
|
|
10
|
-
|
|
11
|
-
@classmethod
|
|
12
|
-
def get_session(
|
|
13
|
-
cls,
|
|
14
|
-
) -> requests.Session:
|
|
15
|
-
"""Get a singleton requests session with retry logic.
|
|
16
|
-
|
|
17
|
-
Returns:
|
|
18
|
-
requests.Session: A singleton requests session with retry logic.
|
|
19
|
-
"""
|
|
20
|
-
if cls._session is None:
|
|
21
|
-
max_retries: int = rapidata_config.upload.maxRetries
|
|
22
|
-
max_workers: int = rapidata_config.upload.maxWorkers
|
|
23
|
-
cls._session = requests.Session()
|
|
24
|
-
retries = Retry(
|
|
25
|
-
total=max_retries,
|
|
26
|
-
backoff_factor=1,
|
|
27
|
-
status_forcelist=[500, 502, 503, 504],
|
|
28
|
-
allowed_methods=["GET"],
|
|
29
|
-
respect_retry_after_header=True,
|
|
30
|
-
)
|
|
31
|
-
|
|
32
|
-
adapter = HTTPAdapter(
|
|
33
|
-
pool_connections=max_workers * 2,
|
|
34
|
-
pool_maxsize=max_workers * 4,
|
|
35
|
-
max_retries=retries,
|
|
36
|
-
)
|
|
37
|
-
cls._session.mount("http://", adapter)
|
|
38
|
-
cls._session.mount("https://", adapter)
|
|
39
|
-
|
|
40
|
-
return cls._session
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
"""Text Asset Module
|
|
2
|
-
|
|
3
|
-
Defines the TextAsset class for handling textual data within assets.
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
from rapidata.rapidata_client.datapoints.assets._base_asset import BaseAsset
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class TextAsset(BaseAsset):
|
|
10
|
-
"""TextAsset Class
|
|
11
|
-
|
|
12
|
-
Represents a textual asset.
|
|
13
|
-
|
|
14
|
-
Args:
|
|
15
|
-
text (str): The text content of the asset.
|
|
16
|
-
"""
|
|
17
|
-
|
|
18
|
-
def __init__(self, text: str):
|
|
19
|
-
"""
|
|
20
|
-
Initialize a TextAsset instance.
|
|
21
|
-
|
|
22
|
-
Args:
|
|
23
|
-
text (str): The textual content of the asset.
|
|
24
|
-
"""
|
|
25
|
-
if not isinstance(text, str):
|
|
26
|
-
raise ValueError(f"Text must be a string, got {type(text)}")
|
|
27
|
-
|
|
28
|
-
self.text = text
|
|
29
|
-
|
|
30
|
-
def __str__(self) -> str:
|
|
31
|
-
return f"TextAsset(text={self.text})"
|
|
32
|
-
|
|
33
|
-
def __repr__(self) -> str:
|
|
34
|
-
return f"TextAsset(text={self.text})"
|
|
@@ -1,100 +0,0 @@
|
|
|
1
|
-
import threading
|
|
2
|
-
import time
|
|
3
|
-
from tqdm import tqdm
|
|
4
|
-
|
|
5
|
-
from rapidata.service.openapi_service import OpenAPIService
|
|
6
|
-
from rapidata.rapidata_client.config import logger, rapidata_config
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class ProgressTracker:
|
|
10
|
-
"""
|
|
11
|
-
Track dataset upload progress in a background thread with shallow indentation.
|
|
12
|
-
|
|
13
|
-
This class encapsulates the progress polling loop to keep methods in
|
|
14
|
-
`RapidataDataset` simpler and below the maximum indentation depth.
|
|
15
|
-
"""
|
|
16
|
-
|
|
17
|
-
def __init__(
|
|
18
|
-
self,
|
|
19
|
-
dataset_id: str,
|
|
20
|
-
openapi_service: OpenAPIService,
|
|
21
|
-
total_uploads: int,
|
|
22
|
-
progress_poll_interval: float,
|
|
23
|
-
) -> None:
|
|
24
|
-
self.dataset_id = dataset_id
|
|
25
|
-
self.openapi_service = openapi_service
|
|
26
|
-
self.total_uploads = total_uploads
|
|
27
|
-
self.progress_poll_interval = progress_poll_interval
|
|
28
|
-
self.upload_complete = False
|
|
29
|
-
|
|
30
|
-
def _get_progress_or_none(self):
|
|
31
|
-
try:
|
|
32
|
-
return self.openapi_service.dataset_api.dataset_dataset_id_progress_get(
|
|
33
|
-
self.dataset_id
|
|
34
|
-
)
|
|
35
|
-
except Exception:
|
|
36
|
-
return None
|
|
37
|
-
|
|
38
|
-
def complete(self) -> None:
|
|
39
|
-
logger.debug("Upload complete, setting upload_complete to True")
|
|
40
|
-
self.upload_complete = True
|
|
41
|
-
|
|
42
|
-
def run(self) -> None:
|
|
43
|
-
try:
|
|
44
|
-
with tqdm(
|
|
45
|
-
total=self.total_uploads,
|
|
46
|
-
desc="Uploading datapoints",
|
|
47
|
-
disable=rapidata_config.logging.silent_mode,
|
|
48
|
-
) as pbar:
|
|
49
|
-
final_pass = False
|
|
50
|
-
while True:
|
|
51
|
-
current_progress = self._get_progress_or_none()
|
|
52
|
-
if current_progress is None:
|
|
53
|
-
time.sleep(self.progress_poll_interval)
|
|
54
|
-
logger.debug(
|
|
55
|
-
"No progress yet, sleeping for %s seconds",
|
|
56
|
-
self.progress_poll_interval,
|
|
57
|
-
)
|
|
58
|
-
continue
|
|
59
|
-
|
|
60
|
-
total_completed = current_progress.ready + current_progress.failed
|
|
61
|
-
|
|
62
|
-
pbar.n = total_completed
|
|
63
|
-
pbar.refresh()
|
|
64
|
-
|
|
65
|
-
time.sleep(self.progress_poll_interval)
|
|
66
|
-
if total_completed >= self.total_uploads:
|
|
67
|
-
break
|
|
68
|
-
|
|
69
|
-
if self.upload_complete and current_progress.pending == 0:
|
|
70
|
-
if not final_pass:
|
|
71
|
-
logger.debug("Final pass")
|
|
72
|
-
time.sleep(self.progress_poll_interval)
|
|
73
|
-
final_pass = True
|
|
74
|
-
continue
|
|
75
|
-
logger.debug("Final pass done, breaking out of loop")
|
|
76
|
-
break
|
|
77
|
-
|
|
78
|
-
pbar.close()
|
|
79
|
-
|
|
80
|
-
success_rate = (
|
|
81
|
-
round((current_progress.ready / self.total_uploads * 100), 2)
|
|
82
|
-
if self.total_uploads > 0
|
|
83
|
-
else 0
|
|
84
|
-
)
|
|
85
|
-
|
|
86
|
-
logger.info(
|
|
87
|
-
"Upload complete: %s ready, %s failed, %s pending (%s%% success rate)",
|
|
88
|
-
current_progress.ready,
|
|
89
|
-
current_progress.failed,
|
|
90
|
-
current_progress.pending,
|
|
91
|
-
success_rate,
|
|
92
|
-
)
|
|
93
|
-
except Exception as e:
|
|
94
|
-
logger.error("Progress tracking thread error: %s", str(e))
|
|
95
|
-
raise RuntimeError("Progress tracking failed, aborting uploads")
|
|
96
|
-
|
|
97
|
-
def create_thread(self) -> threading.Thread:
|
|
98
|
-
thread = threading.Thread(target=self.run)
|
|
99
|
-
thread.daemon = True
|
|
100
|
-
return thread
|
|
File without changes
|