deriva-ml 1.17.10__py3-none-any.whl → 1.17.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. deriva_ml/__init__.py +43 -1
  2. deriva_ml/asset/__init__.py +17 -0
  3. deriva_ml/asset/asset.py +357 -0
  4. deriva_ml/asset/aux_classes.py +100 -0
  5. deriva_ml/bump_version.py +254 -11
  6. deriva_ml/catalog/__init__.py +21 -0
  7. deriva_ml/catalog/clone.py +1199 -0
  8. deriva_ml/catalog/localize.py +426 -0
  9. deriva_ml/core/__init__.py +29 -0
  10. deriva_ml/core/base.py +817 -1067
  11. deriva_ml/core/config.py +169 -21
  12. deriva_ml/core/constants.py +120 -19
  13. deriva_ml/core/definitions.py +123 -13
  14. deriva_ml/core/enums.py +47 -73
  15. deriva_ml/core/ermrest.py +226 -193
  16. deriva_ml/core/exceptions.py +297 -14
  17. deriva_ml/core/filespec.py +99 -28
  18. deriva_ml/core/logging_config.py +225 -0
  19. deriva_ml/core/mixins/__init__.py +42 -0
  20. deriva_ml/core/mixins/annotation.py +915 -0
  21. deriva_ml/core/mixins/asset.py +384 -0
  22. deriva_ml/core/mixins/dataset.py +237 -0
  23. deriva_ml/core/mixins/execution.py +408 -0
  24. deriva_ml/core/mixins/feature.py +365 -0
  25. deriva_ml/core/mixins/file.py +263 -0
  26. deriva_ml/core/mixins/path_builder.py +145 -0
  27. deriva_ml/core/mixins/rid_resolution.py +204 -0
  28. deriva_ml/core/mixins/vocabulary.py +400 -0
  29. deriva_ml/core/mixins/workflow.py +322 -0
  30. deriva_ml/core/validation.py +389 -0
  31. deriva_ml/dataset/__init__.py +2 -1
  32. deriva_ml/dataset/aux_classes.py +20 -4
  33. deriva_ml/dataset/catalog_graph.py +575 -0
  34. deriva_ml/dataset/dataset.py +1242 -1008
  35. deriva_ml/dataset/dataset_bag.py +1311 -182
  36. deriva_ml/dataset/history.py +27 -14
  37. deriva_ml/dataset/upload.py +225 -38
  38. deriva_ml/demo_catalog.py +126 -110
  39. deriva_ml/execution/__init__.py +46 -2
  40. deriva_ml/execution/base_config.py +639 -0
  41. deriva_ml/execution/execution.py +543 -242
  42. deriva_ml/execution/execution_configuration.py +26 -11
  43. deriva_ml/execution/execution_record.py +592 -0
  44. deriva_ml/execution/find_caller.py +298 -0
  45. deriva_ml/execution/model_protocol.py +175 -0
  46. deriva_ml/execution/multirun_config.py +153 -0
  47. deriva_ml/execution/runner.py +595 -0
  48. deriva_ml/execution/workflow.py +223 -34
  49. deriva_ml/experiment/__init__.py +8 -0
  50. deriva_ml/experiment/experiment.py +411 -0
  51. deriva_ml/feature.py +6 -1
  52. deriva_ml/install_kernel.py +143 -6
  53. deriva_ml/interfaces.py +862 -0
  54. deriva_ml/model/__init__.py +99 -0
  55. deriva_ml/model/annotations.py +1278 -0
  56. deriva_ml/model/catalog.py +286 -60
  57. deriva_ml/model/database.py +144 -649
  58. deriva_ml/model/deriva_ml_database.py +308 -0
  59. deriva_ml/model/handles.py +14 -0
  60. deriva_ml/run_model.py +319 -0
  61. deriva_ml/run_notebook.py +507 -38
  62. deriva_ml/schema/__init__.py +18 -2
  63. deriva_ml/schema/annotations.py +62 -33
  64. deriva_ml/schema/create_schema.py +169 -69
  65. deriva_ml/schema/validation.py +601 -0
  66. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/METADATA +4 -4
  67. deriva_ml-1.17.11.dist-info/RECORD +77 -0
  68. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/WHEEL +1 -1
  69. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/entry_points.txt +1 -0
  70. deriva_ml/protocols/dataset.py +0 -19
  71. deriva_ml/test.py +0 -94
  72. deriva_ml-1.17.10.dist-info/RECORD +0 -45
  73. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/licenses/LICENSE +0 -0
  74. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,25 @@
1
1
  import base64
2
2
  import struct
3
3
  from datetime import datetime
4
+ from typing import Any
4
5
 
5
6
  from dateutil.parser import isoparse
6
7
  from deriva.core import urlquote
8
+ from deriva.core.deriva_server import DerivaServer
7
9
 
8
10
 
9
11
  # -- ==============================================================================================
10
- def get_record_history(server, cid, sname, tname, kvals, kcols=["RID"], snap=None):
12
+ def get_record_history(
13
+ server: DerivaServer,
14
+ cid: str | int,
15
+ sname: str,
16
+ tname: str,
17
+ kvals: list[str],
18
+ kcols: list[str] | None = None,
19
+ snap: str | None = None,
20
+ ) -> dict[str, dict[str, Any]]:
11
21
  """Get the history of a record from the catalog.
12
-
22
+
13
23
  Args:
14
24
  server: The server instance.
15
25
  cid: The catalog ID.
@@ -18,13 +28,16 @@ def get_record_history(server, cid, sname, tname, kvals, kcols=["RID"], snap=Non
18
28
  kvals: The key values to look up.
19
29
  kcols: The key columns. Defaults to ["RID"].
20
30
  snap: Optional snapshot ID.
21
-
31
+
22
32
  Returns:
23
33
  The history data for the record.
24
-
34
+
25
35
  Raises:
26
36
  ValueError: If more than one row is returned.
27
37
  """
38
+ if kcols is None:
39
+ kcols = ["RID"]
40
+
28
41
  parts = {
29
42
  "cid": urlquote(cid),
30
43
  "sname": urlquote(sname),
@@ -46,7 +59,7 @@ def get_record_history(server, cid, sname, tname, kvals, kcols=["RID"], snap=Non
46
59
  path = "/ermrest/catalog/%(cid)s@%(snap)s/entity/%(sname)s:%(tname)s/%(filter)s"
47
60
 
48
61
  rows_found = []
49
- snap2rows = {}
62
+ snap2rows: dict[str, dict[str, Any]] = {}
50
63
  while True:
51
64
  url = path % parts
52
65
  # sys.stderr.write("%s\n" % url)
@@ -67,12 +80,12 @@ def get_record_history(server, cid, sname, tname, kvals, kcols=["RID"], snap=Non
67
80
 
68
81
 
69
82
  # -- --------------------------------------------------------------------------------------
70
- def datetime_epoch_us(dt):
83
+ def datetime_epoch_us(dt: datetime) -> int:
71
84
  """Convert datetime to epoch microseconds.
72
-
85
+
73
86
  Args:
74
87
  dt: The datetime object to convert.
75
-
88
+
76
89
  Returns:
77
90
  The epoch time in microseconds.
78
91
  """
@@ -84,12 +97,12 @@ def datetime_epoch_us(dt):
84
97
  #
85
98
 
86
99
 
87
- def iso_to_snap(iso_datetime):
100
+ def iso_to_snap(iso_datetime: str) -> int:
88
101
  """Convert ISO datetime string to snapshot format.
89
-
102
+
90
103
  Args:
91
104
  iso_datetime: The ISO datetime string.
92
-
105
+
93
106
  Returns:
94
107
  The snapshot timestamp.
95
108
  """
@@ -97,12 +110,12 @@ def iso_to_snap(iso_datetime):
97
110
 
98
111
 
99
112
  # -- --------------------------------------------------------------------------------------
100
- def urlb32_encode(i):
113
+ def urlb32_encode(i: int) -> str:
101
114
  """Encode an integer to URL-safe base32.
102
-
115
+
103
116
  Args:
104
117
  i: The integer to encode.
105
-
118
+
106
119
  Returns:
107
120
  The URL-safe base32 encoded string.
108
121
  """
@@ -39,20 +39,32 @@ import json
39
39
  import os
40
40
  from pathlib import Path
41
41
  from tempfile import TemporaryDirectory
42
- from typing import Any, Optional
42
+ from typing import Any, Callable, Optional
43
43
 
44
44
  import regex as re
45
- from deriva.core import urlquote
46
- from deriva.core.ermrest_model import Table
47
- from deriva.core.hatrac_store import HatracStore
48
- from deriva.core.utils import hash_utils, mime_utils
49
- from deriva.transfer.upload.deriva_upload import GenericUploader
45
+
46
+ # Deriva imports - use importlib to avoid shadowing by local 'deriva.py' files
47
+ import importlib
48
+ _deriva_core = importlib.import_module("deriva.core")
49
+ _ermrest_model = importlib.import_module("deriva.core.ermrest_model")
50
+ _hatrac_store = importlib.import_module("deriva.core.hatrac_store")
51
+ _hash_utils = importlib.import_module("deriva.core.utils.hash_utils")
52
+ _mime_utils = importlib.import_module("deriva.core.utils.mime_utils")
53
+ _deriva_upload = importlib.import_module("deriva.transfer.upload.deriva_upload")
54
+
55
+ urlquote = _deriva_core.urlquote
56
+ Table = _ermrest_model.Table
57
+ HatracStore = _hatrac_store.HatracStore
58
+ hash_utils = _hash_utils
59
+ mime_utils = _mime_utils
60
+ GenericUploader = _deriva_upload.GenericUploader
50
61
  from pydantic import ConfigDict, validate_call
51
62
 
52
63
  from deriva_ml.core.definitions import (
53
64
  RID,
54
65
  DerivaSystemColumns,
55
66
  FileUploadState,
67
+ UploadProgress,
56
68
  UploadState,
57
69
  )
58
70
  from deriva_ml.core.exceptions import DerivaMLException
@@ -89,7 +101,7 @@ def is_feature_dir(path: Path) -> Optional[re.Match]:
89
101
  return re.match(feature_table_dir_regex + "$", path.as_posix())
90
102
 
91
103
 
92
- def normalize_asset_dir(path: str) -> Optional[tuple[str, str]]:
104
+ def normalize_asset_dir(path: str | Path) -> Optional[tuple[str, str]]:
93
105
  """Parse a path to an asset file and return the asset table name and file name.
94
106
 
95
107
  Args:
@@ -177,12 +189,16 @@ def table_path(prefix: Path | str, schema: str, table: str) -> Path:
177
189
  return path / f"{table}.csv"
178
190
 
179
191
 
180
- def asset_table_upload_spec(model: DerivaModel, asset_table: str | Table):
192
+ def asset_table_upload_spec(
193
+ model: DerivaModel, asset_table: str | Table, chunk_size: int | None = None
194
+ ):
181
195
  """Generate upload specification for an asset table.
182
196
 
183
197
  Args:
184
198
  model: The DerivaModel instance.
185
199
  asset_table: The asset table name or Table object.
200
+ chunk_size: Optional chunk size in bytes for hatrac uploads. If provided,
201
+ large files will be uploaded in chunks of this size.
186
202
 
187
203
  Returns:
188
204
  A dictionary containing the upload specification for the asset table.
@@ -197,6 +213,11 @@ def asset_table_upload_spec(model: DerivaModel, asset_table: str | Table):
197
213
  asset_table = model.name_to_table(asset_table)
198
214
  schema = model.name_to_table(asset_table).schema.name
199
215
 
216
+ # Build hatrac_options with optional chunk_size
217
+ hatrac_options = {"versioned_urls": True}
218
+ if chunk_size is not None:
219
+ hatrac_options["chunk_size"] = chunk_size
220
+
200
221
  # Create upload specification
201
222
  spec = {
202
223
  # Upload assets into an asset table of an asset table.
@@ -211,7 +232,7 @@ def asset_table_upload_spec(model: DerivaModel, asset_table: str | Table):
211
232
  "asset_type": "file",
212
233
  "target_table": [schema, asset_table.name],
213
234
  "checksum_types": ["sha256", "md5"],
214
- "hatrac_options": {"versioned_urls": True},
235
+ "hatrac_options": hatrac_options,
215
236
  "hatrac_templates": {
216
237
  "hatrac_uri": f"/hatrac/{asset_table.name}/{{md5}}.{{file_name}}",
217
238
  "content-disposition": "filename*=UTF-8''{file_name}",
@@ -221,14 +242,27 @@ def asset_table_upload_spec(model: DerivaModel, asset_table: str | Table):
221
242
  return spec
222
243
 
223
244
 
224
- def bulk_upload_configuration(model: DerivaModel) -> dict[str, Any]:
245
+ def bulk_upload_configuration(
246
+ model: DerivaModel, chunk_size: int | None = None
247
+ ) -> dict[str, Any]:
225
248
  """Return an upload specification for deriva-ml
226
- Arguments:
227
- model: Model from which to generate the upload configuration
249
+
250
+ Args:
251
+ model: Model from which to generate the upload configuration.
252
+ chunk_size: Optional chunk size in bytes for hatrac uploads. If provided,
253
+ large files will be uploaded in chunks of this size.
228
254
  """
229
255
  asset_tables_with_metadata = [
230
- asset_table_upload_spec(model=model, asset_table=t) for t in model.find_assets() if model.asset_metadata(t)
256
+ asset_table_upload_spec(model=model, asset_table=t, chunk_size=chunk_size)
257
+ for t in model.find_assets()
258
+ if model.asset_metadata(t)
231
259
  ]
260
+
261
+ # Build hatrac_options with optional chunk_size for non-metadata assets
262
+ hatrac_options = {"versioned_urls": True}
263
+ if chunk_size is not None:
264
+ hatrac_options["chunk_size"] = chunk_size
265
+
232
266
  return {
233
267
  "asset_mappings": asset_tables_with_metadata
234
268
  + [
@@ -244,7 +278,7 @@ def bulk_upload_configuration(model: DerivaModel) -> dict[str, Any]:
244
278
  "target_table": ["{schema}", "{asset_table}"],
245
279
  "file_pattern": asset_path_regex + "/" + asset_file_regex, # Sets schema, asset_table, name, ext
246
280
  "checksum_types": ["sha256", "md5"],
247
- "hatrac_options": {"versioned_urls": True},
281
+ "hatrac_options": hatrac_options,
248
282
  "hatrac_templates": {
249
283
  "hatrac_uri": "/hatrac/{asset_table}/{md5}.{file_name}",
250
284
  "content-disposition": "filename*=UTF-8''{file_name}",
@@ -273,14 +307,42 @@ def bulk_upload_configuration(model: DerivaModel) -> dict[str, Any]:
273
307
  }
274
308
 
275
309
 
310
+ # Default timeout for large file uploads in seconds
311
+ # The requests timeout tuple is (connect_timeout, read_timeout), but this doesn't
312
+ # cover write operations. We also need to set socket.setdefaulttimeout() for writes.
313
+ DEFAULT_UPLOAD_TIMEOUT = (6, 600)
314
+
315
+ # Socket timeout for write operations (in seconds)
316
+ # This is needed because requests timeout only covers connect and read, not write.
317
+ # For large chunk uploads, the socket write can take significant time.
318
+ DEFAULT_SOCKET_TIMEOUT = 600.0
319
+
320
+
276
321
  @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
277
- def upload_directory(model: DerivaModel, directory: Path | str) -> dict[Any, FileUploadState] | None:
322
+ def upload_directory(
323
+ model: DerivaModel,
324
+ directory: Path | str,
325
+ progress_callback: Callable[[UploadProgress], None] | None = None,
326
+ max_retries: int = 3,
327
+ retry_delay: float = 5.0,
328
+ timeout: tuple[int, int] | None = None,
329
+ chunk_size: int | None = None,
330
+ ) -> dict[Any, FileUploadState] | None:
278
331
  """Upload assets from a directory. This routine assumes that the current upload specification includes a
279
332
  configuration for the specified directory. Every asset in the specified directory is uploaded
280
333
 
281
334
  Args:
282
335
  model: Model to upload assets to.
283
336
  directory: Directory containing the assets and tables to upload.
337
+ progress_callback: Optional callback function to receive upload progress updates.
338
+ Called with UploadProgress objects containing file information and progress.
339
+ max_retries: Maximum number of retry attempts for failed uploads (default: 3).
340
+ retry_delay: Initial delay in seconds between retries, doubles with each attempt (default: 5.0).
341
+ timeout: Tuple of (connect_timeout, read_timeout) in seconds. Default is (6, 600)
342
+ which allows up to 10 minutes for each chunk upload. Increase read_timeout for
343
+ very large files on slow connections.
344
+ chunk_size: Optional chunk size in bytes for hatrac uploads. If provided,
345
+ large files will be uploaded in chunks of this size.
284
346
 
285
347
  Returns:
286
348
  Results of the upload operation.
@@ -288,37 +350,162 @@ def upload_directory(model: DerivaModel, directory: Path | str) -> dict[Any, Fil
288
350
  Raises:
289
351
  DerivaMLException: If there is an issue with uploading the assets.
290
352
  """
353
+ import logging
354
+ import time
355
+
356
+ from deriva.core import DEFAULT_SESSION_CONFIG
357
+
358
+ logger = logging.getLogger("deriva_ml")
359
+
291
360
  directory = Path(directory)
292
361
  if not directory.is_dir():
293
362
  raise DerivaMLException("Directory does not exist")
294
363
 
364
+ # Track upload progress across files
365
+ # status_callback is called twice per file: once before upload starts, once after it completes
366
+ upload_state = {"completed_files": 0, "total_files": 0, "status_calls": 0}
367
+
368
+ # Count total files to upload
369
+ for root, dirs, files in os.walk(directory):
370
+ upload_state["total_files"] += len(files)
371
+
372
+ # Create wrapper callbacks for GenericUploader if a progress callback was provided
373
+ def file_callback(**kwargs) -> bool:
374
+ """Callback for per-chunk progress updates from GenericUploader.
375
+
376
+ The deriva GenericUploader passes kwargs with: completed, total, file_path, host, job_info.
377
+ Note: This callback is only invoked for large files (> 25MB) that use chunked uploads.
378
+ Small files are uploaded in a single request and this callback won't be called.
379
+ """
380
+ if progress_callback is not None:
381
+ file_path = kwargs.get("file_path", "")
382
+ completed_chunks = kwargs.get("completed", 0)
383
+ total_chunks = kwargs.get("total", 0)
384
+
385
+ progress = UploadProgress(
386
+ file_path=file_path,
387
+ file_name=Path(file_path).name if file_path else "",
388
+ bytes_completed=completed_chunks,
389
+ bytes_total=total_chunks,
390
+ percent_complete=(completed_chunks / total_chunks * 100) if total_chunks > 0 else 0,
391
+ phase="uploading_chunks",
392
+ message=f"Uploading large file: chunk {completed_chunks} of {total_chunks}",
393
+ )
394
+ progress_callback(progress)
395
+ return True # Continue upload
396
+
397
+ def status_callback() -> None:
398
+ """Callback for per-file status updates from GenericUploader.
399
+
400
+ GenericUploader calls this twice per file: once before upload starts (odd calls)
401
+ and once after upload completes (even calls). We use even calls to track completed files.
402
+ """
403
+ if progress_callback is not None:
404
+ upload_state["status_calls"] += 1
405
+
406
+ # Even calls indicate file completion (after upload)
407
+ if upload_state["status_calls"] % 2 == 0:
408
+ upload_state["completed_files"] += 1
409
+
410
+ # Report progress with current file count
411
+ current_file = (upload_state["status_calls"] + 1) // 2 # 1-indexed current file
412
+ progress = UploadProgress(
413
+ phase="uploading",
414
+ message=f"Uploading file {current_file} of {upload_state['total_files']}",
415
+ percent_complete=(upload_state["completed_files"] / upload_state["total_files"] * 100)
416
+ if upload_state["total_files"] > 0
417
+ else 0,
418
+ )
419
+ progress_callback(progress)
420
+
421
+ def do_upload(uploader) -> dict[str, dict]:
422
+ """Perform the upload and return raw results."""
423
+ uploader.getUpdatedConfig()
424
+ uploader.scanDirectory(directory, purge_state=True)
425
+ return uploader.uploadFiles(
426
+ file_callback=file_callback if progress_callback else None,
427
+ status_callback=status_callback if progress_callback else None,
428
+ )
429
+
430
+ # Use provided timeout or default
431
+ upload_timeout = timeout if timeout is not None else DEFAULT_UPLOAD_TIMEOUT
432
+
295
433
  # Now upload the files by creating an upload spec and then calling the uploader.
296
434
  with TemporaryDirectory() as temp_dir:
297
435
  spec_file = Path(temp_dir) / "config.json"
298
436
  with spec_file.open("w+") as cfile:
299
- json.dump(bulk_upload_configuration(model), cfile)
300
- uploader = GenericUploader(
301
- server={
302
- "host": model.hostname,
303
- "protocol": "https",
304
- "catalog_id": model.catalog.catalog_id,
305
- },
306
- config_file=spec_file,
307
- )
308
- try:
309
- uploader.getUpdatedConfig()
310
- uploader.scanDirectory(directory, purge_state=True)
311
- results = {
312
- path: FileUploadState(
313
- state=UploadState(result["State"]),
314
- status=result["Status"],
315
- result=result["Result"],
437
+ json.dump(bulk_upload_configuration(model, chunk_size=chunk_size), cfile)
438
+
439
+ # Create session config with longer timeout for large file uploads
440
+ session_config = DEFAULT_SESSION_CONFIG.copy()
441
+ session_config["timeout"] = upload_timeout
442
+ logger.debug(f"Upload session config timeout: {session_config['timeout']}")
443
+
444
+ all_results = {}
445
+ attempt = 0
446
+ current_delay = retry_delay
447
+
448
+ while attempt <= max_retries:
449
+ uploader = GenericUploader(
450
+ server={
451
+ "host": model.hostname,
452
+ "protocol": "https",
453
+ "catalog_id": model.catalog.catalog_id,
454
+ "session": session_config,
455
+ },
456
+ config_file=spec_file,
457
+ )
458
+ try:
459
+ raw_results = do_upload(uploader)
460
+
461
+ # Process results and check for failures
462
+ failed_files = []
463
+ for path, result in raw_results.items():
464
+ state = UploadState(result["State"])
465
+ if state == UploadState.failed or result["Result"] is None:
466
+ failed_files.append((path, result["Status"]))
467
+ else:
468
+ # Store successful results
469
+ all_results[path] = FileUploadState(
470
+ state=state,
471
+ status=result["Status"],
472
+ result=result["Result"],
473
+ )
474
+
475
+ if not failed_files:
476
+ # All uploads successful
477
+ break
478
+
479
+ attempt += 1
480
+ if attempt > max_retries:
481
+ # Final attempt failed, raise error with details
482
+ error_details = "; ".join([f"{path}: {msg}" for path, msg in failed_files])
483
+ raise DerivaMLException(
484
+ f"Failed to upload {len(failed_files)} file(s) after {max_retries} retries: {error_details}"
485
+ )
486
+
487
+ # Log retry attempt and wait before retrying
488
+ logger.warning(
489
+ f"Upload failed for {len(failed_files)} file(s), retrying in {current_delay:.1f}s "
490
+ f"(attempt {attempt}/{max_retries}): {[p for p, _ in failed_files]}"
316
491
  )
317
- for path, result in uploader.uploadFiles().items()
318
- }
319
- finally:
320
- uploader.cleanup()
321
- return results
492
+ if progress_callback:
493
+ progress_callback(UploadProgress(
494
+ phase="retrying",
495
+ message=f"Retrying {len(failed_files)} failed upload(s) in {current_delay:.1f}s (attempt {attempt}/{max_retries})",
496
+ percent_complete=0,
497
+ ))
498
+
499
+ time.sleep(current_delay)
500
+ current_delay *= 2 # Exponential backoff
501
+
502
+ # Reset upload state for retry
503
+ upload_state["status_calls"] = 0
504
+
505
+ finally:
506
+ uploader.cleanup()
507
+
508
+ return all_results
322
509
 
323
510
 
324
511
  @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
@@ -347,7 +534,7 @@ def upload_asset(model: DerivaModel, file: Path | str, table: Table, **kwargs: A
347
534
  server=model.catalog.deriva_server.server,
348
535
  credentials=model.catalog.deriva_server.credentials,
349
536
  )
350
- md5_hashes = hash_utils.compute_file_hashes(file, ["md5"])["md5"]
537
+ md5_hashes = hash_utils.compute_file_hashes(file, frozenset(["md5"]))["md5"]
351
538
  sanitized_filename = urlquote(re.sub("[^a-zA-Z0-9_.-]", "_", md5_hashes[0] + "." + file_name))
352
539
  hatrac_path = f"{hatrac_path}{sanitized_filename}"
353
540