dtlpy 1.116.6__py3-none-any.whl → 1.118.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dtlpy/__init__.py +1 -1
- dtlpy/__version__.py +1 -1
- dtlpy/entities/__init__.py +1 -1
- dtlpy/entities/annotation.py +1 -1
- dtlpy/entities/app.py +1 -1
- dtlpy/entities/compute.py +1 -0
- dtlpy/entities/dataset.py +17 -2
- dtlpy/entities/feature_set.py +7 -0
- dtlpy/entities/item.py +16 -0
- dtlpy/entities/model.py +1 -1
- dtlpy/entities/ontology.py +1 -1
- dtlpy/entities/paged_entities.py +7 -3
- dtlpy/entities/service.py +11 -0
- dtlpy/ml/base_model_adapter.py +68 -37
- dtlpy/repositories/apps.py +12 -13
- dtlpy/repositories/datasets.py +165 -84
- dtlpy/repositories/downloader.py +299 -118
- dtlpy/repositories/feature_sets.py +159 -70
- dtlpy/repositories/recipes.py +15 -5
- dtlpy/services/api_client.py +5 -4
- {dtlpy-1.116.6.dist-info → dtlpy-1.118.12.dist-info}/METADATA +14 -15
- {dtlpy-1.116.6.dist-info → dtlpy-1.118.12.dist-info}/RECORD +29 -31
- {dtlpy-1.116.6.dist-info → dtlpy-1.118.12.dist-info}/WHEEL +1 -1
- {dtlpy-1.116.6.dist-info → dtlpy-1.118.12.dist-info}/top_level.txt +0 -1
- tests/features/__init__.py +0 -0
- tests/features/environment.py +0 -551
- {dtlpy-1.116.6.data → dtlpy-1.118.12.data}/scripts/dlp +0 -0
- {dtlpy-1.116.6.data → dtlpy-1.118.12.data}/scripts/dlp.bat +0 -0
- {dtlpy-1.116.6.data → dtlpy-1.118.12.data}/scripts/dlp.py +0 -0
- {dtlpy-1.116.6.dist-info → dtlpy-1.118.12.dist-info}/entry_points.txt +0 -0
- {dtlpy-1.116.6.dist-info → dtlpy-1.118.12.dist-info}/licenses/LICENSE +0 -0
dtlpy/repositories/datasets.py
CHANGED
|
@@ -2,23 +2,28 @@
|
|
|
2
2
|
Datasets Repository
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
+
import copy
|
|
6
|
+
import json
|
|
7
|
+
import logging
|
|
5
8
|
import os
|
|
6
9
|
import sys
|
|
10
|
+
import tempfile
|
|
7
11
|
import time
|
|
8
|
-
import copy
|
|
9
|
-
import tqdm
|
|
10
|
-
import logging
|
|
11
12
|
import zipfile
|
|
12
|
-
import
|
|
13
|
-
from typing import
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Generator, Optional, Union
|
|
15
|
+
|
|
16
|
+
import tqdm
|
|
14
17
|
|
|
15
|
-
from .. import entities,
|
|
18
|
+
from .. import _api_reference, entities, exceptions, miscellaneous, PlatformException, repositories, services
|
|
19
|
+
from ..entities.dataset import ExportType, OutputExportType
|
|
20
|
+
from ..services import service_defaults
|
|
16
21
|
from ..services.api_client import ApiClient
|
|
17
|
-
from ..entities.dataset import OutputExportType, ExportType
|
|
18
22
|
|
|
19
23
|
logger = logging.getLogger(name='dtlpy')
|
|
20
24
|
|
|
21
25
|
MAX_ITEMS_PER_SUBSET = 50000
|
|
26
|
+
DOWNLOAD_ANNOTATIONS_MAX_ITEMS_PER_SUBSET = 1000
|
|
22
27
|
|
|
23
28
|
class Datasets:
|
|
24
29
|
"""
|
|
@@ -129,6 +134,54 @@ class Datasets:
|
|
|
129
134
|
dataset_id = dataset.id
|
|
130
135
|
return dataset_id
|
|
131
136
|
|
|
137
|
+
@staticmethod
|
|
138
|
+
def _save_item_json_file(item_data, base_path: Path, export_version=None):
|
|
139
|
+
"""
|
|
140
|
+
Save a single item's JSON data to a file, creating the directory structure as needed.
|
|
141
|
+
|
|
142
|
+
:param dict item_data: The item data dictionary (must have 'filename' key)
|
|
143
|
+
:param Path base_path: Base directory path where JSON files should be saved
|
|
144
|
+
:param entities.ExportVersion export_version: Optional export version (V1 or V2) affecting filename handling
|
|
145
|
+
:return: Path to the saved JSON file
|
|
146
|
+
:rtype: Path
|
|
147
|
+
"""
|
|
148
|
+
# Get filename and remove leading slash
|
|
149
|
+
filename = item_data.get('filename', '')
|
|
150
|
+
if not filename:
|
|
151
|
+
raise ValueError("item_data must have a 'filename' key")
|
|
152
|
+
filename = filename.lstrip('/')
|
|
153
|
+
|
|
154
|
+
# Determine relative JSON path based on export version
|
|
155
|
+
if export_version == entities.ExportVersion.V1:
|
|
156
|
+
# V1: Replace extension with .json (e.g., "file.jpg" -> "file.json")
|
|
157
|
+
rel_json_path = str(Path(filename).with_suffix('.json'))
|
|
158
|
+
elif export_version == entities.ExportVersion.V2:
|
|
159
|
+
# V2: Append .json (e.g., "file.jpg" -> "file.jpg.json")
|
|
160
|
+
rel_json_path = filename + '.json'
|
|
161
|
+
else:
|
|
162
|
+
# Default/None: Replace extension with .json (backward compatible with section 1)
|
|
163
|
+
rel_json_path = os.path.splitext(filename)[0] + '.json'
|
|
164
|
+
|
|
165
|
+
# Remove leading slash if present
|
|
166
|
+
if rel_json_path.startswith('/'):
|
|
167
|
+
rel_json_path = rel_json_path[1:]
|
|
168
|
+
|
|
169
|
+
# Build output path
|
|
170
|
+
out_path = base_path / rel_json_path
|
|
171
|
+
|
|
172
|
+
# Create parent directories
|
|
173
|
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
174
|
+
|
|
175
|
+
# Write JSON file
|
|
176
|
+
try:
|
|
177
|
+
with open(out_path, 'w') as outf:
|
|
178
|
+
json.dump(item_data, outf, indent=2)
|
|
179
|
+
except Exception:
|
|
180
|
+
logger.exception(f'Failed writing export item JSON to {out_path}')
|
|
181
|
+
raise
|
|
182
|
+
|
|
183
|
+
return out_path
|
|
184
|
+
|
|
132
185
|
@staticmethod
|
|
133
186
|
def _build_payload(filters, include_feature_vectors, include_annotations,
|
|
134
187
|
export_type, annotation_filters, feature_vector_filters, dataset_lock, lock_timeout_sec, export_summary):
|
|
@@ -902,17 +955,7 @@ class Datasets:
|
|
|
902
955
|
logger.debug("start building per-item JSON files under local_path mirroring remote structure")
|
|
903
956
|
# Build per-item JSON files under local_path mirroring remote structure
|
|
904
957
|
for item in all_items:
|
|
905
|
-
|
|
906
|
-
# Remove leading slash to make it a relative path
|
|
907
|
-
if rel_json_path.startswith('/'):
|
|
908
|
-
rel_json_path = rel_json_path[1:]
|
|
909
|
-
out_path = os.path.join(base_dir, rel_json_path)
|
|
910
|
-
os.makedirs(os.path.dirname(out_path), exist_ok=True)
|
|
911
|
-
try:
|
|
912
|
-
with open(out_path, 'w') as outf:
|
|
913
|
-
json.dump(item, outf)
|
|
914
|
-
except Exception:
|
|
915
|
-
logger.exception(f'Failed writing export item JSON to {out_path}')
|
|
958
|
+
self._save_item_json_file(item_data=item, base_path=Path(base_dir), export_version=None)
|
|
916
959
|
logger.debug("end building per-item JSON files under local_path mirroring remote structure")
|
|
917
960
|
return base_dir
|
|
918
961
|
|
|
@@ -1159,7 +1202,7 @@ class Datasets:
|
|
|
1159
1202
|
include_annotations_in_output: bool = True,
|
|
1160
1203
|
export_png_files: bool = False,
|
|
1161
1204
|
filter_output_annotations: bool = False,
|
|
1162
|
-
alpha: float =
|
|
1205
|
+
alpha: float = 1,
|
|
1163
1206
|
export_version=entities.ExportVersion.V1,
|
|
1164
1207
|
dataset_lock: bool = False,
|
|
1165
1208
|
lock_timeout_sec: int = None,
|
|
@@ -1216,33 +1259,26 @@ class Datasets:
|
|
|
1216
1259
|
elif not isinstance(annotation_options, list):
|
|
1217
1260
|
annotation_options = [annotation_options]
|
|
1218
1261
|
for ann_option in annotation_options:
|
|
1219
|
-
if not
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
ann_option, list(entities.ViewAnnotationOptions)))
|
|
1225
|
-
|
|
1262
|
+
if ann_option not in entities.ViewAnnotationOptions:
|
|
1263
|
+
raise PlatformException(
|
|
1264
|
+
error='400',
|
|
1265
|
+
message=f'Unknown annotation download option: {ann_option}, please choose from: {list(entities.ViewAnnotationOptions)}',
|
|
1266
|
+
)
|
|
1226
1267
|
if remote_path is not None:
|
|
1227
|
-
logger.warning(
|
|
1228
|
-
|
|
1268
|
+
logger.warning(f'"remote_path" is ignored. Use "filters=dl.Filters(field="dir, values={remote_path!r}"')
|
|
1269
|
+
if filter_output_annotations is True:
|
|
1270
|
+
logger.warning("'filter_output_annotations' is ignored but kept for legacy support")
|
|
1271
|
+
if include_annotations_in_output is False:
|
|
1272
|
+
logger.warning("include_annotations_in_output was False, but was set to True since this function downloads annotations.")
|
|
1273
|
+
include_annotations_in_output = True
|
|
1274
|
+
|
|
1229
1275
|
if local_path is None:
|
|
1230
1276
|
if dataset.project is None:
|
|
1231
1277
|
# by dataset name
|
|
1232
|
-
local_path =
|
|
1233
|
-
services.service_defaults.DATALOOP_PATH,
|
|
1234
|
-
"datasets",
|
|
1235
|
-
"{}_{}".format(dataset.name, dataset.id),
|
|
1236
|
-
)
|
|
1278
|
+
local_path = str(Path(service_defaults.DATALOOP_PATH) / "datasets" / f"{dataset.name}_{dataset.id}")
|
|
1237
1279
|
else:
|
|
1238
1280
|
# by dataset and project name
|
|
1239
|
-
local_path =
|
|
1240
|
-
services.service_defaults.DATALOOP_PATH,
|
|
1241
|
-
"projects",
|
|
1242
|
-
dataset.project.name,
|
|
1243
|
-
"datasets",
|
|
1244
|
-
dataset.name,
|
|
1245
|
-
)
|
|
1281
|
+
local_path = str(Path(service_defaults.DATALOOP_PATH) / "projects" / dataset.project.name / "datasets" / dataset.name)
|
|
1246
1282
|
|
|
1247
1283
|
if filters is None:
|
|
1248
1284
|
filters = entities.Filters()
|
|
@@ -1260,53 +1296,98 @@ class Datasets:
|
|
|
1260
1296
|
method=entities.FiltersMethod.OR)
|
|
1261
1297
|
|
|
1262
1298
|
downloader = repositories.Downloader(items_repository=dataset.items)
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
pages = dataset.items.list(filters=filters)
|
|
1278
|
-
if not isinstance(annotation_options, list):
|
|
1279
|
-
annotation_options = [annotation_options]
|
|
1280
|
-
# convert all annotations to annotation_options
|
|
1299
|
+
|
|
1300
|
+
# Setup for incremental processing
|
|
1301
|
+
if len(annotation_options) == 0 :
|
|
1302
|
+
pool = None
|
|
1303
|
+
progress = None
|
|
1304
|
+
jobs = []
|
|
1305
|
+
else:
|
|
1306
|
+
# Get total count for progress bar
|
|
1307
|
+
filter_copy = copy.deepcopy(filters)
|
|
1308
|
+
filter_copy.page_size = 0
|
|
1309
|
+
pages = dataset.items.list(filters=filter_copy)
|
|
1310
|
+
total_items = pages.items_count
|
|
1311
|
+
|
|
1312
|
+
# Setup thread pool and progress bar
|
|
1281
1313
|
pool = dataset._client_api.thread_pools(pool_name='dataset.download')
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1314
|
+
progress = tqdm.tqdm(
|
|
1315
|
+
total=total_items,
|
|
1316
|
+
disable=dataset._client_api.verbose.disable_progress_bar_download_annotations,
|
|
1317
|
+
file=sys.stdout,
|
|
1318
|
+
desc='Download Annotations'
|
|
1319
|
+
)
|
|
1320
|
+
jobs = []
|
|
1321
|
+
|
|
1322
|
+
|
|
1323
|
+
# Call _export_recursive as generator
|
|
1324
|
+
export_generator = dataset.project.datasets._export_recursive(
|
|
1325
|
+
dataset=dataset,
|
|
1326
|
+
local_path=tempfile.mkdtemp(prefix='annotations_jsons_'),
|
|
1327
|
+
filters=filters,
|
|
1328
|
+
annotation_filters=annotation_filters,
|
|
1329
|
+
include_annotations=True,
|
|
1330
|
+
export_type=ExportType.JSON,
|
|
1331
|
+
dataset_lock=dataset_lock,
|
|
1332
|
+
lock_timeout_sec=lock_timeout_sec,
|
|
1333
|
+
export_summary=export_summary,
|
|
1334
|
+
timeout=0,
|
|
1335
|
+
max_items_per_subset=DOWNLOAD_ANNOTATIONS_MAX_ITEMS_PER_SUBSET
|
|
1336
|
+
)
|
|
1337
|
+
|
|
1338
|
+
# Process each subset JSON file incrementally
|
|
1339
|
+
for subset_json_file in export_generator:
|
|
1340
|
+
if subset_json_file is None or not Path(subset_json_file).is_file():
|
|
1341
|
+
continue
|
|
1342
|
+
|
|
1343
|
+
try:
|
|
1344
|
+
# Open and load the items array
|
|
1345
|
+
with open(subset_json_file, 'r') as f:
|
|
1346
|
+
items_data = json.load(f)
|
|
1347
|
+
|
|
1348
|
+
# Process each item immediately
|
|
1349
|
+
for item_data in items_data:
|
|
1350
|
+
# Split and save individual JSON file
|
|
1351
|
+
Datasets._save_item_json_file(item_data=item_data, base_path=Path(local_path) / 'json', export_version=export_version)
|
|
1352
|
+
|
|
1353
|
+
# If annotation_options are provided, submit to thread pool immediately
|
|
1354
|
+
if annotation_options:
|
|
1355
|
+
# Create Item entity from item_data
|
|
1356
|
+
item = entities.Item.from_json(
|
|
1357
|
+
_json=item_data,
|
|
1358
|
+
client_api=dataset._client_api,
|
|
1359
|
+
dataset=dataset
|
|
1360
|
+
)
|
|
1361
|
+
|
|
1362
|
+
job = pool.submit(
|
|
1363
|
+
Datasets._convert_single,
|
|
1364
|
+
**{
|
|
1365
|
+
'downloader': downloader,
|
|
1366
|
+
'item': item,
|
|
1367
|
+
'img_filepath': None,
|
|
1368
|
+
'local_path': local_path,
|
|
1369
|
+
'overwrite': overwrite,
|
|
1370
|
+
'annotation_options': annotation_options,
|
|
1371
|
+
'annotation_filters': annotation_filters,
|
|
1372
|
+
'thickness': thickness,
|
|
1373
|
+
'with_text': with_text,
|
|
1374
|
+
'progress': progress,
|
|
1375
|
+
'alpha': alpha,
|
|
1376
|
+
'export_version': export_version
|
|
1377
|
+
}
|
|
1378
|
+
)
|
|
1379
|
+
jobs.append(job)
|
|
1380
|
+
|
|
1381
|
+
# Clean up temporary subset JSON file
|
|
1382
|
+
os.remove(subset_json_file)
|
|
1383
|
+
except Exception as e:
|
|
1384
|
+
logger.exception(f'Failed processing subset JSON file {subset_json_file}: {e}')
|
|
1385
|
+
|
|
1386
|
+
# Wait for all thread pool jobs to complete
|
|
1387
|
+
if annotation_options:
|
|
1308
1388
|
_ = [j.result() for j in jobs]
|
|
1309
1389
|
progress.close()
|
|
1390
|
+
|
|
1310
1391
|
return local_path
|
|
1311
1392
|
|
|
1312
1393
|
def _upload_single_item_annotation(self, item, file, pbar):
|