dtlpy 1.116.6__py3-none-any.whl → 1.118.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,23 +2,28 @@
2
2
  Datasets Repository
3
3
  """
4
4
 
5
+ import copy
6
+ import json
7
+ import logging
5
8
  import os
6
9
  import sys
10
+ import tempfile
7
11
  import time
8
- import copy
9
- import tqdm
10
- import logging
11
12
  import zipfile
12
- import json
13
- from typing import Union, Generator, Optional
13
+ from pathlib import Path
14
+ from typing import Generator, Optional, Union
15
+
16
+ import tqdm
14
17
 
15
- from .. import entities, repositories, miscellaneous, exceptions, services, PlatformException, _api_reference
18
+ from .. import _api_reference, entities, exceptions, miscellaneous, PlatformException, repositories, services
19
+ from ..entities.dataset import ExportType, OutputExportType
20
+ from ..services import service_defaults
16
21
  from ..services.api_client import ApiClient
17
- from ..entities.dataset import OutputExportType, ExportType
18
22
 
19
23
  logger = logging.getLogger(name='dtlpy')
20
24
 
21
25
  MAX_ITEMS_PER_SUBSET = 50000
26
+ DOWNLOAD_ANNOTATIONS_MAX_ITEMS_PER_SUBSET = 1000
22
27
 
23
28
  class Datasets:
24
29
  """
@@ -129,6 +134,54 @@ class Datasets:
129
134
  dataset_id = dataset.id
130
135
  return dataset_id
131
136
 
137
+ @staticmethod
138
+ def _save_item_json_file(item_data, base_path: Path, export_version=None):
139
+ """
140
+ Save a single item's JSON data to a file, creating the directory structure as needed.
141
+
142
+ :param dict item_data: The item data dictionary (must have 'filename' key)
143
+ :param Path base_path: Base directory path where JSON files should be saved
144
+ :param entities.ExportVersion export_version: Optional export version (V1 or V2) affecting filename handling
145
+ :return: Path to the saved JSON file
146
+ :rtype: Path
147
+ """
148
+ # Get filename and remove leading slash
149
+ filename = item_data.get('filename', '')
150
+ if not filename:
151
+ raise ValueError("item_data must have a 'filename' key")
152
+ filename = filename.lstrip('/')
153
+
154
+ # Determine relative JSON path based on export version
155
+ if export_version == entities.ExportVersion.V1:
156
+ # V1: Replace extension with .json (e.g., "file.jpg" -> "file.json")
157
+ rel_json_path = str(Path(filename).with_suffix('.json'))
158
+ elif export_version == entities.ExportVersion.V2:
159
+ # V2: Append .json (e.g., "file.jpg" -> "file.jpg.json")
160
+ rel_json_path = filename + '.json'
161
+ else:
162
+ # Default/None: Replace extension with .json (backward compatible with section 1)
163
+ rel_json_path = os.path.splitext(filename)[0] + '.json'
164
+
165
+ # Remove leading slash if present
166
+ if rel_json_path.startswith('/'):
167
+ rel_json_path = rel_json_path[1:]
168
+
169
+ # Build output path
170
+ out_path = base_path / rel_json_path
171
+
172
+ # Create parent directories
173
+ out_path.parent.mkdir(parents=True, exist_ok=True)
174
+
175
+ # Write JSON file
176
+ try:
177
+ with open(out_path, 'w') as outf:
178
+ json.dump(item_data, outf, indent=2)
179
+ except Exception:
180
+ logger.exception(f'Failed writing export item JSON to {out_path}')
181
+ raise
182
+
183
+ return out_path
184
+
132
185
  @staticmethod
133
186
  def _build_payload(filters, include_feature_vectors, include_annotations,
134
187
  export_type, annotation_filters, feature_vector_filters, dataset_lock, lock_timeout_sec, export_summary):
@@ -902,17 +955,7 @@ class Datasets:
902
955
  logger.debug("start building per-item JSON files under local_path mirroring remote structure")
903
956
  # Build per-item JSON files under local_path mirroring remote structure
904
957
  for item in all_items:
905
- rel_json_path = os.path.splitext(item.get('filename'))[0] + '.json'
906
- # Remove leading slash to make it a relative path
907
- if rel_json_path.startswith('/'):
908
- rel_json_path = rel_json_path[1:]
909
- out_path = os.path.join(base_dir, rel_json_path)
910
- os.makedirs(os.path.dirname(out_path), exist_ok=True)
911
- try:
912
- with open(out_path, 'w') as outf:
913
- json.dump(item, outf)
914
- except Exception:
915
- logger.exception(f'Failed writing export item JSON to {out_path}')
958
+ self._save_item_json_file(item_data=item, base_path=Path(base_dir), export_version=None)
916
959
  logger.debug("end building per-item JSON files under local_path mirroring remote structure")
917
960
  return base_dir
918
961
 
@@ -1159,7 +1202,7 @@ class Datasets:
1159
1202
  include_annotations_in_output: bool = True,
1160
1203
  export_png_files: bool = False,
1161
1204
  filter_output_annotations: bool = False,
1162
- alpha: float = None,
1205
+ alpha: float = 1,
1163
1206
  export_version=entities.ExportVersion.V1,
1164
1207
  dataset_lock: bool = False,
1165
1208
  lock_timeout_sec: int = None,
@@ -1216,33 +1259,26 @@ class Datasets:
1216
1259
  elif not isinstance(annotation_options, list):
1217
1260
  annotation_options = [annotation_options]
1218
1261
  for ann_option in annotation_options:
1219
- if not isinstance(ann_option, entities.ViewAnnotationOptions):
1220
- if ann_option not in list(entities.ViewAnnotationOptions):
1221
- raise PlatformException(
1222
- error='400',
1223
- message='Unknown annotation download option: {}, please choose from: {}'.format(
1224
- ann_option, list(entities.ViewAnnotationOptions)))
1225
-
1262
+ if ann_option not in entities.ViewAnnotationOptions:
1263
+ raise PlatformException(
1264
+ error='400',
1265
+ message=f'Unknown annotation download option: {ann_option}, please choose from: {list(entities.ViewAnnotationOptions)}',
1266
+ )
1226
1267
  if remote_path is not None:
1227
- logger.warning(
1228
- '"remote_path" is ignored. Use "filters=dl.Filters(field="dir, values={!r}"'.format(remote_path))
1268
+ logger.warning(f'"remote_path" is ignored. Use "filters=dl.Filters(field="dir, values={remote_path!r}"')
1269
+ if filter_output_annotations is True:
1270
+ logger.warning("'filter_output_annotations' is ignored but kept for legacy support")
1271
+ if include_annotations_in_output is False:
1272
+ logger.warning("include_annotations_in_output was False, but was set to True since this function downloads annotations.")
1273
+ include_annotations_in_output = True
1274
+
1229
1275
  if local_path is None:
1230
1276
  if dataset.project is None:
1231
1277
  # by dataset name
1232
- local_path = os.path.join(
1233
- services.service_defaults.DATALOOP_PATH,
1234
- "datasets",
1235
- "{}_{}".format(dataset.name, dataset.id),
1236
- )
1278
+ local_path = str(Path(service_defaults.DATALOOP_PATH) / "datasets" / f"{dataset.name}_{dataset.id}")
1237
1279
  else:
1238
1280
  # by dataset and project name
1239
- local_path = os.path.join(
1240
- services.service_defaults.DATALOOP_PATH,
1241
- "projects",
1242
- dataset.project.name,
1243
- "datasets",
1244
- dataset.name,
1245
- )
1281
+ local_path = str(Path(service_defaults.DATALOOP_PATH) / "projects" / dataset.project.name / "datasets" / dataset.name)
1246
1282
 
1247
1283
  if filters is None:
1248
1284
  filters = entities.Filters()
@@ -1260,53 +1296,98 @@ class Datasets:
1260
1296
  method=entities.FiltersMethod.OR)
1261
1297
 
1262
1298
  downloader = repositories.Downloader(items_repository=dataset.items)
1263
- downloader.download_annotations(dataset=dataset,
1264
- filters=filters,
1265
- annotation_filters=annotation_filters,
1266
- local_path=local_path,
1267
- overwrite=overwrite,
1268
- include_annotations_in_output=include_annotations_in_output,
1269
- export_png_files=export_png_files,
1270
- filter_output_annotations=filter_output_annotations,
1271
- export_version=export_version,
1272
- dataset_lock=dataset_lock,
1273
- lock_timeout_sec=lock_timeout_sec,
1274
- export_summary=export_summary
1275
- )
1276
- if annotation_options:
1277
- pages = dataset.items.list(filters=filters)
1278
- if not isinstance(annotation_options, list):
1279
- annotation_options = [annotation_options]
1280
- # convert all annotations to annotation_options
1299
+
1300
+ # Setup for incremental processing
1301
+ if len(annotation_options) == 0 :
1302
+ pool = None
1303
+ progress = None
1304
+ jobs = []
1305
+ else:
1306
+ # Get total count for progress bar
1307
+ filter_copy = copy.deepcopy(filters)
1308
+ filter_copy.page_size = 0
1309
+ pages = dataset.items.list(filters=filter_copy)
1310
+ total_items = pages.items_count
1311
+
1312
+ # Setup thread pool and progress bar
1281
1313
  pool = dataset._client_api.thread_pools(pool_name='dataset.download')
1282
- jobs = [None for _ in range(pages.items_count)]
1283
- progress = tqdm.tqdm(total=pages.items_count,
1284
- disable=dataset._client_api.verbose.disable_progress_bar_download_annotations,
1285
- file=sys.stdout, desc='Download Annotations')
1286
- i_item = 0
1287
- for page in pages:
1288
- for item in page:
1289
- jobs[i_item] = pool.submit(
1290
- Datasets._convert_single,
1291
- **{
1292
- 'downloader': downloader,
1293
- 'item': item,
1294
- 'img_filepath': None,
1295
- 'local_path': local_path,
1296
- 'overwrite': overwrite,
1297
- 'annotation_options': annotation_options,
1298
- 'annotation_filters': annotation_filters,
1299
- 'thickness': thickness,
1300
- 'with_text': with_text,
1301
- 'progress': progress,
1302
- 'alpha': alpha,
1303
- 'export_version': export_version
1304
- }
1305
- )
1306
- i_item += 1
1307
- # get all results
1314
+ progress = tqdm.tqdm(
1315
+ total=total_items,
1316
+ disable=dataset._client_api.verbose.disable_progress_bar_download_annotations,
1317
+ file=sys.stdout,
1318
+ desc='Download Annotations'
1319
+ )
1320
+ jobs = []
1321
+
1322
+
1323
+ # Call _export_recursive as generator
1324
+ export_generator = dataset.project.datasets._export_recursive(
1325
+ dataset=dataset,
1326
+ local_path=tempfile.mkdtemp(prefix='annotations_jsons_'),
1327
+ filters=filters,
1328
+ annotation_filters=annotation_filters,
1329
+ include_annotations=True,
1330
+ export_type=ExportType.JSON,
1331
+ dataset_lock=dataset_lock,
1332
+ lock_timeout_sec=lock_timeout_sec,
1333
+ export_summary=export_summary,
1334
+ timeout=0,
1335
+ max_items_per_subset=DOWNLOAD_ANNOTATIONS_MAX_ITEMS_PER_SUBSET
1336
+ )
1337
+
1338
+ # Process each subset JSON file incrementally
1339
+ for subset_json_file in export_generator:
1340
+ if subset_json_file is None or not Path(subset_json_file).is_file():
1341
+ continue
1342
+
1343
+ try:
1344
+ # Open and load the items array
1345
+ with open(subset_json_file, 'r') as f:
1346
+ items_data = json.load(f)
1347
+
1348
+ # Process each item immediately
1349
+ for item_data in items_data:
1350
+ # Split and save individual JSON file
1351
+ Datasets._save_item_json_file(item_data=item_data, base_path=Path(local_path) / 'json', export_version=export_version)
1352
+
1353
+ # If annotation_options are provided, submit to thread pool immediately
1354
+ if annotation_options:
1355
+ # Create Item entity from item_data
1356
+ item = entities.Item.from_json(
1357
+ _json=item_data,
1358
+ client_api=dataset._client_api,
1359
+ dataset=dataset
1360
+ )
1361
+
1362
+ job = pool.submit(
1363
+ Datasets._convert_single,
1364
+ **{
1365
+ 'downloader': downloader,
1366
+ 'item': item,
1367
+ 'img_filepath': None,
1368
+ 'local_path': local_path,
1369
+ 'overwrite': overwrite,
1370
+ 'annotation_options': annotation_options,
1371
+ 'annotation_filters': annotation_filters,
1372
+ 'thickness': thickness,
1373
+ 'with_text': with_text,
1374
+ 'progress': progress,
1375
+ 'alpha': alpha,
1376
+ 'export_version': export_version
1377
+ }
1378
+ )
1379
+ jobs.append(job)
1380
+
1381
+ # Clean up temporary subset JSON file
1382
+ os.remove(subset_json_file)
1383
+ except Exception as e:
1384
+ logger.exception(f'Failed processing subset JSON file {subset_json_file}: {e}')
1385
+
1386
+ # Wait for all thread pool jobs to complete
1387
+ if annotation_options:
1308
1388
  _ = [j.result() for j in jobs]
1309
1389
  progress.close()
1390
+
1310
1391
  return local_path
1311
1392
 
1312
1393
  def _upload_single_item_annotation(self, item, file, pbar):