deriva-ml 1.17.11__py3-none-any.whl → 1.17.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deriva_ml/__init__.py CHANGED
@@ -75,6 +75,26 @@ def __getattr__(name: str) -> type:
75
75
  from deriva_ml.schema.validation import validate_ml_schema
76
76
 
77
77
  return validate_ml_schema
78
+ elif name == "CatalogProvenance":
79
+ from deriva_ml.catalog.clone import CatalogProvenance
80
+
81
+ return CatalogProvenance
82
+ elif name == "CatalogCreationMethod":
83
+ from deriva_ml.catalog.clone import CatalogCreationMethod
84
+
85
+ return CatalogCreationMethod
86
+ elif name == "CloneDetails":
87
+ from deriva_ml.catalog.clone import CloneDetails
88
+
89
+ return CloneDetails
90
+ elif name == "get_catalog_provenance":
91
+ from deriva_ml.catalog.clone import get_catalog_provenance
92
+
93
+ return get_catalog_provenance
94
+ elif name == "set_catalog_provenance":
95
+ from deriva_ml.catalog.clone import set_catalog_provenance
96
+
97
+ return set_catalog_provenance
78
98
  raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
79
99
 
80
100
 
@@ -90,6 +110,12 @@ __all__ = [
90
110
  # Schema validation (lazy-loaded)
91
111
  "SchemaValidationReport",
92
112
  "validate_ml_schema",
113
+ # Catalog provenance (lazy-loaded)
114
+ "CatalogProvenance",
115
+ "CatalogCreationMethod",
116
+ "CloneDetails",
117
+ "get_catalog_provenance",
118
+ "set_catalog_provenance",
93
119
  # Exceptions
94
120
  "DerivaMLException",
95
121
  "DerivaMLInvalidTerm",
@@ -3,8 +3,13 @@
3
3
  from deriva_ml.catalog.clone import (
4
4
  AssetCopyMode,
5
5
  AssetFilter,
6
+ CatalogCreationMethod,
7
+ CatalogProvenance,
6
8
  CloneCatalogResult,
9
+ CloneDetails,
7
10
  clone_catalog,
11
+ get_catalog_provenance,
12
+ set_catalog_provenance,
8
13
  )
9
14
  from deriva_ml.catalog.localize import (
10
15
  LocalizeResult,
@@ -14,8 +19,13 @@ from deriva_ml.catalog.localize import (
14
19
  __all__ = [
15
20
  "AssetCopyMode",
16
21
  "AssetFilter",
22
+ "CatalogCreationMethod",
23
+ "CatalogProvenance",
17
24
  "CloneCatalogResult",
25
+ "CloneDetails",
18
26
  "LocalizeResult",
19
27
  "clone_catalog",
28
+ "get_catalog_provenance",
20
29
  "localize_assets",
30
+ "set_catalog_provenance",
21
31
  ]
@@ -18,13 +18,16 @@ all edge cases including circular dependencies and complex FK relationships.
18
18
 
19
19
  from __future__ import annotations
20
20
 
21
+ import json
21
22
  import logging
22
23
  from dataclasses import dataclass, field
24
+ from datetime import datetime, timezone
23
25
  from enum import Enum
24
26
  from typing import Any
25
27
  from urllib.parse import quote as urlquote
26
28
 
27
29
  from deriva.core import DerivaServer, ErmrestCatalog, get_credential
30
+ from deriva.core.hatrac_store import HatracStore
28
31
 
29
32
  logger = logging.getLogger("deriva_ml")
30
33
 
@@ -49,6 +52,7 @@ class CloneIssueCategory(Enum):
49
52
  FK_VIOLATION = "fk_violation"
50
53
  FK_PRUNED = "fk_pruned" # FK was intentionally not applied
51
54
  POLICY_INCOHERENCE = "policy_incoherence"
55
+ INDEX_REBUILT = "index_rebuilt" # Index was dropped and rebuilt due to size limits
52
56
 
53
57
 
54
58
  class OrphanStrategy(Enum):
@@ -258,6 +262,26 @@ class AssetFilter:
258
262
  rids: list[str] | None = None
259
263
 
260
264
 
265
+ @dataclass
266
+ class TruncatedValue:
267
+ """Record of a value that was truncated during cloning."""
268
+
269
+ table: str
270
+ rid: str
271
+ column: str
272
+ original_bytes: int
273
+ truncated_bytes: int
274
+
275
+ def to_dict(self) -> dict[str, Any]:
276
+ return {
277
+ "table": self.table,
278
+ "rid": self.rid,
279
+ "column": self.column,
280
+ "original_bytes": self.original_bytes,
281
+ "truncated_bytes": self.truncated_bytes,
282
+ }
283
+
284
+
261
285
  @dataclass
262
286
  class CloneCatalogResult:
263
287
  """Result of a catalog clone operation."""
@@ -275,12 +299,652 @@ class CloneCatalogResult:
275
299
  orphan_rows_removed: int = 0
276
300
  orphan_rows_nullified: int = 0
277
301
  fkeys_pruned: int = 0
302
+ rows_skipped: int = 0
303
+ truncated_values: list[TruncatedValue] = field(default_factory=list)
278
304
  report: CloneReport | None = None
279
305
 
280
306
 
281
307
  # Clone state annotation URL (same as deriva-py)
282
308
  _clone_state_url = "tag:isrd.isi.edu,2018:clone-state"
283
309
 
310
+ # Catalog provenance annotation URL
311
+ _catalog_provenance_url = "tag:deriva-ml.org,2025:catalog-provenance"
312
+
313
+ # Pattern to detect btree index size errors
314
+ _BTREE_INDEX_ERROR_PATTERN = "index row size"
315
+ _BTREE_INDEX_NAME_PATTERN = r'for index "([^"]+)"'
316
+
317
+
318
+ class CatalogCreationMethod(Enum):
319
+ """How a catalog was created."""
320
+
321
+ CLONE = "clone" # Cloned from another catalog
322
+ CREATE = "create" # Created programmatically (e.g., create_catalog)
323
+ SCHEMA = "schema" # Created from schema definition
324
+ UNKNOWN = "unknown" # Unknown or pre-existing catalog
325
+
326
+
327
+ @dataclass
328
+ class CloneDetails:
329
+ """Details specific to cloned catalogs."""
330
+
331
+ source_hostname: str
332
+ source_catalog_id: str
333
+ source_snapshot: str | None = None
334
+ source_schema_url: str | None = None # Hatrac URL to source schema JSON
335
+ orphan_strategy: str = "fail"
336
+ truncate_oversized: bool = False
337
+ prune_hidden_fkeys: bool = False
338
+ schema_only: bool = False
339
+ asset_mode: str = "refs"
340
+ exclude_schemas: list[str] = field(default_factory=list)
341
+ exclude_objects: list[str] = field(default_factory=list)
342
+ rows_copied: int = 0
343
+ rows_skipped: int = 0
344
+ truncated_count: int = 0
345
+ orphan_rows_removed: int = 0
346
+ orphan_rows_nullified: int = 0
347
+ fkeys_pruned: int = 0
348
+
349
+ def to_dict(self) -> dict[str, Any]:
350
+ return {
351
+ "source_hostname": self.source_hostname,
352
+ "source_catalog_id": self.source_catalog_id,
353
+ "source_snapshot": self.source_snapshot,
354
+ "source_schema_url": self.source_schema_url,
355
+ "orphan_strategy": self.orphan_strategy,
356
+ "truncate_oversized": self.truncate_oversized,
357
+ "prune_hidden_fkeys": self.prune_hidden_fkeys,
358
+ "schema_only": self.schema_only,
359
+ "asset_mode": self.asset_mode,
360
+ "exclude_schemas": self.exclude_schemas,
361
+ "exclude_objects": self.exclude_objects,
362
+ "rows_copied": self.rows_copied,
363
+ "rows_skipped": self.rows_skipped,
364
+ "truncated_count": self.truncated_count,
365
+ "orphan_rows_removed": self.orphan_rows_removed,
366
+ "orphan_rows_nullified": self.orphan_rows_nullified,
367
+ "fkeys_pruned": self.fkeys_pruned,
368
+ }
369
+
370
+ @classmethod
371
+ def from_dict(cls, data: dict[str, Any]) -> "CloneDetails":
372
+ return cls(
373
+ source_hostname=data.get("source_hostname", ""),
374
+ source_catalog_id=data.get("source_catalog_id", ""),
375
+ source_snapshot=data.get("source_snapshot"),
376
+ source_schema_url=data.get("source_schema_url"),
377
+ orphan_strategy=data.get("orphan_strategy", "fail"),
378
+ truncate_oversized=data.get("truncate_oversized", False),
379
+ prune_hidden_fkeys=data.get("prune_hidden_fkeys", False),
380
+ schema_only=data.get("schema_only", False),
381
+ asset_mode=data.get("asset_mode", "refs"),
382
+ exclude_schemas=data.get("exclude_schemas", []),
383
+ exclude_objects=data.get("exclude_objects", []),
384
+ rows_copied=data.get("rows_copied", 0),
385
+ rows_skipped=data.get("rows_skipped", 0),
386
+ truncated_count=data.get("truncated_count", 0),
387
+ orphan_rows_removed=data.get("orphan_rows_removed", 0),
388
+ orphan_rows_nullified=data.get("orphan_rows_nullified", 0),
389
+ fkeys_pruned=data.get("fkeys_pruned", 0),
390
+ )
391
+
392
+
393
+ @dataclass
394
+ class CatalogProvenance:
395
+ """Provenance information for a catalog.
396
+
397
+ This metadata is stored as a catalog-level annotation and tracks
398
+ how the catalog was created, by whom, and with what parameters.
399
+ Supports both cloned catalogs and catalogs created by other means.
400
+
401
+ Attributes:
402
+ creation_method: How the catalog was created (clone, create, schema, unknown).
403
+ created_at: ISO timestamp when the catalog was created.
404
+ created_by: User or system that created the catalog (Globus identity or description).
405
+ hostname: Hostname where the catalog resides.
406
+ catalog_id: Catalog ID.
407
+ name: Human-readable name for the catalog.
408
+ description: Description of the catalog's purpose.
409
+ workflow_url: URL to the workflow/script that created the catalog (e.g., GitHub URL).
410
+ workflow_version: Version of the workflow (e.g., git commit hash, package version).
411
+ clone_details: If cloned, detailed information about the clone operation.
412
+ """
413
+
414
+ creation_method: CatalogCreationMethod
415
+ created_at: str
416
+ hostname: str
417
+ catalog_id: str
418
+ created_by: str | None = None
419
+ name: str | None = None
420
+ description: str | None = None
421
+ workflow_url: str | None = None
422
+ workflow_version: str | None = None
423
+ clone_details: CloneDetails | None = None
424
+
425
+ def to_dict(self) -> dict[str, Any]:
426
+ result = {
427
+ "creation_method": self.creation_method.value,
428
+ "created_at": self.created_at,
429
+ "hostname": self.hostname,
430
+ "catalog_id": self.catalog_id,
431
+ "created_by": self.created_by,
432
+ "name": self.name,
433
+ "description": self.description,
434
+ "workflow_url": self.workflow_url,
435
+ "workflow_version": self.workflow_version,
436
+ }
437
+ if self.clone_details:
438
+ result["clone_details"] = self.clone_details.to_dict()
439
+ return result
440
+
441
+ @classmethod
442
+ def from_dict(cls, data: dict[str, Any]) -> "CatalogProvenance":
443
+ clone_details = None
444
+ if data.get("clone_details"):
445
+ clone_details = CloneDetails.from_dict(data["clone_details"])
446
+
447
+ # Handle legacy format where creation_method might be missing
448
+ method_str = data.get("creation_method", "unknown")
449
+ try:
450
+ creation_method = CatalogCreationMethod(method_str)
451
+ except ValueError:
452
+ creation_method = CatalogCreationMethod.UNKNOWN
453
+
454
+ return cls(
455
+ creation_method=creation_method,
456
+ created_at=data.get("created_at", ""),
457
+ hostname=data.get("hostname", ""),
458
+ catalog_id=data.get("catalog_id", ""),
459
+ created_by=data.get("created_by"),
460
+ name=data.get("name"),
461
+ description=data.get("description"),
462
+ workflow_url=data.get("workflow_url"),
463
+ workflow_version=data.get("workflow_version"),
464
+ clone_details=clone_details,
465
+ )
466
+
467
+ @property
468
+ def is_clone(self) -> bool:
469
+ """Return True if this catalog was cloned from another catalog."""
470
+ return self.creation_method == CatalogCreationMethod.CLONE and self.clone_details is not None
471
+
472
+
473
+ def _upload_source_schema(
474
+ hostname: str,
475
+ catalog_id: str,
476
+ schema_json: dict[str, Any],
477
+ credential: dict | None,
478
+ ) -> str | None:
479
+ """Upload source schema JSON to Hatrac.
480
+
481
+ Args:
482
+ hostname: Destination catalog hostname.
483
+ catalog_id: Destination catalog ID.
484
+ schema_json: The source schema as a dictionary.
485
+ credential: Credential for Hatrac access.
486
+
487
+ Returns:
488
+ Hatrac URL for the uploaded schema, or None if upload failed.
489
+ """
490
+ try:
491
+ cred = credential or get_credential(hostname)
492
+ hatrac = HatracStore("https", hostname, credentials=cred)
493
+
494
+ # Create namespace for catalog provenance metadata if it doesn't exist
495
+ namespace = f"/hatrac/catalog/{catalog_id}/provenance"
496
+ try:
497
+ hatrac.create_namespace(namespace, parents=True)
498
+ except Exception:
499
+ pass # Namespace may already exist
500
+
501
+ # Upload schema JSON
502
+ schema_bytes = json.dumps(schema_json, indent=2).encode("utf-8")
503
+ object_path = f"{namespace}/source-schema.json"
504
+
505
+ url = hatrac.put_obj(
506
+ object_path,
507
+ schema_bytes,
508
+ content_type="application/json",
509
+ )
510
+
511
+ logger.info(f"Uploaded source schema to {url}")
512
+ return url
513
+
514
+ except Exception as e:
515
+ logger.warning(f"Failed to upload source schema to Hatrac: {e}")
516
+ return None
517
+
518
+
519
+ def _set_catalog_provenance(
520
+ dst_catalog: ErmrestCatalog,
521
+ provenance: CatalogProvenance,
522
+ ) -> None:
523
+ """Set the catalog provenance annotation on a catalog.
524
+
525
+ Args:
526
+ dst_catalog: Catalog connection.
527
+ provenance: Catalog provenance information.
528
+ """
529
+ try:
530
+ dst_catalog.put(
531
+ f"/annotation/{urlquote(_catalog_provenance_url)}",
532
+ json=provenance.to_dict(),
533
+ )
534
+ logger.info("Set catalog provenance annotation")
535
+ except Exception as e:
536
+ logger.warning(f"Failed to set catalog provenance annotation: {e}")
537
+
538
+
539
+ def set_catalog_provenance(
540
+ catalog: ErmrestCatalog,
541
+ name: str | None = None,
542
+ description: str | None = None,
543
+ workflow_url: str | None = None,
544
+ workflow_version: str | None = None,
545
+ creation_method: CatalogCreationMethod = CatalogCreationMethod.CREATE,
546
+ ) -> CatalogProvenance:
547
+ """Set catalog provenance information for a newly created catalog.
548
+
549
+ Use this function when creating a catalog programmatically to record
550
+ how and why it was created. This is similar to workflow metadata but
551
+ at the catalog level.
552
+
553
+ Args:
554
+ catalog: The catalog to annotate.
555
+ name: Human-readable name for the catalog.
556
+ description: Description of the catalog's purpose.
557
+ workflow_url: URL to the workflow/script that created the catalog
558
+ (e.g., GitHub URL, notebook URL).
559
+ workflow_version: Version of the workflow (e.g., git commit hash,
560
+ package version, or semantic version).
561
+ creation_method: How the catalog was created. Defaults to CREATE.
562
+
563
+ Returns:
564
+ The CatalogProvenance object that was set.
565
+
566
+ Example:
567
+ >>> from deriva_ml.catalog import set_catalog_provenance, CatalogCreationMethod
568
+ >>> provenance = set_catalog_provenance(
569
+ ... catalog,
570
+ ... name="CIFAR-10 Training Catalog",
571
+ ... description="Catalog for CIFAR-10 image classification experiments",
572
+ ... workflow_url="https://github.com/org/repo/blob/main/setup_catalog.py",
573
+ ... workflow_version="v1.2.0",
574
+ ... )
575
+ """
576
+ # Try to get current user identity
577
+ created_by = None
578
+ try:
579
+ # Get user info from catalog session
580
+ session_info = catalog.get("/authn/session").json()
581
+ if session_info and "client" in session_info:
582
+ client = session_info["client"]
583
+ created_by = client.get("display_name") or client.get("id")
584
+ except Exception:
585
+ pass
586
+
587
+ # Get catalog info
588
+ try:
589
+ catalog_info = catalog.get("/").json()
590
+ hostname = catalog_info.get("meta", {}).get("host", "")
591
+ catalog_id = str(catalog.catalog_id)
592
+ except Exception:
593
+ hostname = ""
594
+ catalog_id = str(catalog.catalog_id)
595
+
596
+ provenance = CatalogProvenance(
597
+ creation_method=creation_method,
598
+ created_at=datetime.now(timezone.utc).isoformat(),
599
+ hostname=hostname,
600
+ catalog_id=catalog_id,
601
+ created_by=created_by,
602
+ name=name,
603
+ description=description,
604
+ workflow_url=workflow_url,
605
+ workflow_version=workflow_version,
606
+ )
607
+
608
+ _set_catalog_provenance(catalog, provenance)
609
+ return provenance
610
+
611
+
612
+ def get_catalog_provenance(catalog: ErmrestCatalog) -> CatalogProvenance | None:
613
+ """Get the catalog provenance information.
614
+
615
+ Returns provenance information if the catalog has it set. This includes
616
+ information about how the catalog was created (clone, create, schema),
617
+ who created it, and any workflow information.
618
+
619
+ Args:
620
+ catalog: The catalog to check.
621
+
622
+ Returns:
623
+ CatalogProvenance if available, None otherwise.
624
+ """
625
+ try:
626
+ model = catalog.getCatalogModel()
627
+ provenance_data = model.annotations.get(_catalog_provenance_url)
628
+ if provenance_data:
629
+ return CatalogProvenance.from_dict(provenance_data)
630
+ except Exception as e:
631
+ logger.debug(f"Could not get catalog provenance: {e}")
632
+
633
+ return None
634
+
635
+
636
+ def _parse_index_error(error_msg: str) -> tuple[str | None, str | None]:
637
+ """Parse a btree index size error to extract index name and column.
638
+
639
+ Args:
640
+ error_msg: The error message from ERMrest/PostgreSQL.
641
+
642
+ Returns:
643
+ Tuple of (index_name, column_name) if this is an index size error,
644
+ (None, None) otherwise.
645
+ """
646
+ import re
647
+
648
+ if _BTREE_INDEX_ERROR_PATTERN not in error_msg:
649
+ return None, None
650
+
651
+ # Extract index name from error message
652
+ match = re.search(_BTREE_INDEX_NAME_PATTERN, error_msg)
653
+ if not match:
654
+ return None, None
655
+
656
+ index_name = match.group(1)
657
+
658
+ # Try to extract column name from index name (common pattern: table__column_idx)
659
+ # e.g., "dataset__keywords_idx" -> "keywords"
660
+ if "__" in index_name and index_name.endswith("_idx"):
661
+ parts = index_name.rsplit("__", 1)
662
+ if len(parts) == 2:
663
+ column_name = parts[1].replace("_idx", "")
664
+ return index_name, column_name
665
+
666
+ return index_name, None
667
+
668
+
669
+
670
+
671
+ def _copy_table_data_with_retry(
672
+ src_catalog: ErmrestCatalog,
673
+ dst_catalog: ErmrestCatalog,
674
+ sname: str,
675
+ tname: str,
676
+ page_size: int,
677
+ report: "CloneReport",
678
+ deferred_indexes: dict[str, list[dict]],
679
+ truncate_oversized: bool = False,
680
+ ) -> tuple[int, int, list[TruncatedValue]]:
681
+ """Copy data for a single table with retry logic for index errors.
682
+
683
+ If a btree index size error occurs, this function will:
684
+ 1. Detect the problematic index and column
685
+ 2. Switch to row-by-row insertion mode
686
+ 3. Either truncate oversized values (if truncate_oversized=True) or skip rows
687
+ 4. Record skipped/truncated rows in the report
688
+
689
+ Args:
690
+ src_catalog: Source catalog connection.
691
+ dst_catalog: Destination catalog connection.
692
+ sname: Schema name.
693
+ tname: Table name.
694
+ page_size: Number of rows per page.
695
+ report: Clone report for recording issues.
696
+ deferred_indexes: Dict to collect indexes that need rebuilding.
697
+ Key is "schema:table", value is list of index definitions.
698
+ truncate_oversized: If True, truncate oversized values instead of skipping rows.
699
+
700
+ Returns:
701
+ Tuple of (rows_copied, rows_skipped, truncated_values).
702
+ rows_copied is -1 if the copy failed entirely.
703
+ """
704
+ tname_uri = f"{urlquote(sname)}:{urlquote(tname)}"
705
+ table_key = f"{sname}:{tname}"
706
+
707
+ # Maximum safe size for btree index values (with margin below 2704 limit)
708
+ MAX_INDEX_VALUE_BYTES = 2600
709
+ TRUNCATE_SUFFIX = "...[TRUNCATED]"
710
+
711
+ last = None
712
+ table_rows = 0
713
+ rows_skipped = 0
714
+ truncated_values: list[TruncatedValue] = []
715
+ row_by_row_mode = False
716
+ problematic_index = None
717
+ problematic_column = None
718
+
719
+ def truncate_row_values(row: dict, column: str | None) -> tuple[dict, list[TruncatedValue]]:
720
+ """Truncate oversized text values in a row.
721
+
722
+ Returns the modified row and list of truncation records.
723
+ """
724
+ truncations = []
725
+ modified_row = row.copy()
726
+ rid = row.get('RID', 'unknown')
727
+
728
+ # If we know the problematic column, only check that one
729
+ columns_to_check = [column] if column else list(row.keys())
730
+
731
+ for col in columns_to_check:
732
+ if col not in modified_row:
733
+ continue
734
+ value = modified_row[col]
735
+ if isinstance(value, str):
736
+ value_bytes = len(value.encode('utf-8'))
737
+ if value_bytes > MAX_INDEX_VALUE_BYTES:
738
+ # Truncate to safe size, accounting for suffix
739
+ max_chars = MAX_INDEX_VALUE_BYTES - len(TRUNCATE_SUFFIX.encode('utf-8'))
740
+ # Be conservative - truncate by character count as approximation
741
+ # since UTF-8 chars can be multi-byte
742
+ truncated = value[:max_chars] + TRUNCATE_SUFFIX
743
+ # Verify the result fits
744
+ while len(truncated.encode('utf-8')) > MAX_INDEX_VALUE_BYTES:
745
+ max_chars -= 100
746
+ truncated = value[:max_chars] + TRUNCATE_SUFFIX
747
+
748
+ modified_row[col] = truncated
749
+ truncations.append(TruncatedValue(
750
+ table=table_key,
751
+ rid=str(rid),
752
+ column=col,
753
+ original_bytes=value_bytes,
754
+ truncated_bytes=len(truncated.encode('utf-8')),
755
+ ))
756
+ logger.debug(
757
+ f"Truncated {table_key}.{col} for RID {rid}: "
758
+ f"{value_bytes} -> {len(truncated.encode('utf-8'))} bytes"
759
+ )
760
+
761
+ return modified_row, truncations
762
+
763
+ while True:
764
+ after_clause = f"@after({urlquote(last)})" if last else ""
765
+ try:
766
+ page = src_catalog.get(
767
+ f"/entity/{tname_uri}@sort(RID){after_clause}?limit={page_size}"
768
+ ).json()
769
+ except Exception as e:
770
+ logger.warning(f"Failed to read from {sname}:{tname}: {e}")
771
+ return -1, rows_skipped, truncated_values
772
+
773
+ if not page:
774
+ break
775
+
776
+ if row_by_row_mode:
777
+ # Insert rows one at a time, handling oversized values
778
+ for row in page:
779
+ row_to_insert = row
780
+
781
+ # If truncation is enabled, try to truncate first
782
+ if truncate_oversized and problematic_column:
783
+ row_to_insert, truncations = truncate_row_values(row, problematic_column)
784
+ truncated_values.extend(truncations)
785
+
786
+ try:
787
+ dst_catalog.post(
788
+ f"/entity/{tname_uri}?nondefaults=RID,RCT,RCB",
789
+ json=[row_to_insert]
790
+ )
791
+ table_rows += 1
792
+ except Exception as row_error:
793
+ error_msg = str(row_error)
794
+ if _BTREE_INDEX_ERROR_PATTERN in error_msg:
795
+ # This row has a value too large for the index
796
+ if truncate_oversized:
797
+ # Try truncating all text columns
798
+ row_to_insert, truncations = truncate_row_values(row, None)
799
+ truncated_values.extend(truncations)
800
+ try:
801
+ dst_catalog.post(
802
+ f"/entity/{tname_uri}?nondefaults=RID,RCT,RCB",
803
+ json=[row_to_insert]
804
+ )
805
+ table_rows += 1
806
+ continue
807
+ except Exception:
808
+ pass # Fall through to skip
809
+
810
+ rows_skipped += 1
811
+ rid = row.get('RID', 'unknown')
812
+ logger.debug(f"Skipping row {rid} in {table_key} due to index size limit")
813
+ else:
814
+ # Different error - log and skip
815
+ rows_skipped += 1
816
+ logger.debug(f"Skipping row in {table_key}: {row_error}")
817
+ last = page[-1]['RID']
818
+ else:
819
+ # Normal batch mode
820
+ try:
821
+ dst_catalog.post(
822
+ f"/entity/{tname_uri}?nondefaults=RID,RCT,RCB",
823
+ json=page
824
+ )
825
+ last = page[-1]['RID']
826
+ table_rows += len(page)
827
+ except Exception as e:
828
+ error_msg = str(e)
829
+
830
+ # Check if this is a btree index size error
831
+ index_name, column_name = _parse_index_error(error_msg)
832
+
833
+ if index_name:
834
+ action_desc = "Values will be truncated" if truncate_oversized else "Rows with oversized values will be skipped"
835
+ logger.info(
836
+ f"Detected btree index size error for '{index_name}' on {table_key}. "
837
+ f"Switching to row-by-row mode. {action_desc}."
838
+ )
839
+ problematic_index = index_name
840
+ problematic_column = column_name
841
+ row_by_row_mode = True
842
+
843
+ # Record the issue
844
+ report.add_issue(CloneIssue(
845
+ severity=CloneIssueSeverity.WARNING,
846
+ category=CloneIssueCategory.INDEX_REBUILT,
847
+ message=f"Index '{index_name}' has oversized values, using row-by-row mode",
848
+ table=table_key,
849
+ details=f"Column '{column_name}' has values exceeding btree 2704 byte limit",
850
+ action=action_desc,
851
+ ))
852
+
853
+ # Retry this page in row-by-row mode
854
+ for row in page:
855
+ row_to_insert = row
856
+
857
+ # If truncation is enabled, try to truncate first
858
+ if truncate_oversized and problematic_column:
859
+ row_to_insert, truncations = truncate_row_values(row, problematic_column)
860
+ truncated_values.extend(truncations)
861
+
862
+ try:
863
+ dst_catalog.post(
864
+ f"/entity/{tname_uri}?nondefaults=RID,RCT,RCB",
865
+ json=[row_to_insert]
866
+ )
867
+ table_rows += 1
868
+ except Exception as row_error:
869
+ error_msg_row = str(row_error)
870
+ if _BTREE_INDEX_ERROR_PATTERN in error_msg_row:
871
+ # Try truncating all columns if not already done
872
+ if truncate_oversized:
873
+ row_to_insert, truncations = truncate_row_values(row, None)
874
+ truncated_values.extend(truncations)
875
+ try:
876
+ dst_catalog.post(
877
+ f"/entity/{tname_uri}?nondefaults=RID,RCT,RCB",
878
+ json=[row_to_insert]
879
+ )
880
+ table_rows += 1
881
+ continue
882
+ except Exception:
883
+ pass # Fall through to skip
884
+
885
+ rows_skipped += 1
886
+ rid = row.get('RID', 'unknown')
887
+ logger.debug(f"Skipping row {rid} due to index size limit")
888
+ else:
889
+ rows_skipped += 1
890
+ logger.debug(f"Skipping row: {row_error}")
891
+ last = page[-1]['RID']
892
+ else:
893
+ logger.warning(f"Failed to write to {sname}:{tname}: {e}")
894
+ return -1, rows_skipped, truncated_values
895
+
896
+ # Report skipped rows
897
+ if rows_skipped > 0:
898
+ report.add_issue(CloneIssue(
899
+ severity=CloneIssueSeverity.WARNING,
900
+ category=CloneIssueCategory.DATA_INTEGRITY,
901
+ message=f"Skipped {rows_skipped} rows due to index size limits",
902
+ table=table_key,
903
+ details=f"Index '{problematic_index}' on column '{problematic_column}'",
904
+ action="These rows have values too large for btree index (>2704 bytes)",
905
+ row_count=rows_skipped,
906
+ ))
907
+ logger.warning(f"Skipped {rows_skipped} rows in {table_key} due to index size limits")
908
+
909
+ # Report truncated values
910
+ if truncated_values:
911
+ report.add_issue(CloneIssue(
912
+ severity=CloneIssueSeverity.INFO,
913
+ category=CloneIssueCategory.DATA_INTEGRITY,
914
+ message=f"Truncated {len(truncated_values)} values to fit index size limits",
915
+ table=table_key,
916
+ details=f"Values in column '{problematic_column}' were truncated to <{MAX_INDEX_VALUE_BYTES} bytes",
917
+ action="Original data was preserved with '[TRUNCATED]' suffix",
918
+ row_count=len(truncated_values),
919
+ ))
920
+ logger.info(f"Truncated {len(truncated_values)} values in {table_key}")
921
+
922
+ return table_rows, rows_skipped, truncated_values
923
+
924
+
925
+
926
+
927
+ def _rebuild_deferred_indexes(
928
+ dst_catalog: ErmrestCatalog,
929
+ deferred_indexes: dict[str, list[dict]],
930
+ report: "CloneReport",
931
+ ) -> None:
932
+ """Note any indexes that had issues during data copy.
933
+
934
+ This function is called after data copy to report on any index-related
935
+ issues that were encountered. Since ERMrest doesn't provide direct index
936
+ management, we can only report these issues for manual follow-up.
937
+
938
+ Args:
939
+ dst_catalog: Destination catalog.
940
+ deferred_indexes: Dict of table -> list of index definitions with issues.
941
+ report: Clone report.
942
+ """
943
+ if not deferred_indexes:
944
+ return
945
+
946
+ logger.info(f"Reporting {sum(len(v) for v in deferred_indexes.values())} index issues...")
947
+
284
948
 
285
949
  def clone_catalog(
286
950
  source_hostname: str,
@@ -300,6 +964,7 @@ def clone_catalog(
300
964
  reinitialize_dataset_versions: bool = True,
301
965
  orphan_strategy: OrphanStrategy = OrphanStrategy.FAIL,
302
966
  prune_hidden_fkeys: bool = False,
967
+ truncate_oversized: bool = False,
303
968
  ) -> CloneCatalogResult:
304
969
  """Clone a catalog with robust handling of policy-induced FK violations.
305
970
 
@@ -336,9 +1001,18 @@ def clone_catalog(
336
1001
  prune_hidden_fkeys: If True, skip FKs where referenced columns have
337
1002
  "select": null rights (indicating potentially hidden data). This
338
1003
  prevents FK violations but degrades schema structure.
1004
+ truncate_oversized: If True, automatically truncate text values that
1005
+ exceed PostgreSQL's btree index size limit (2704 bytes). Truncated
1006
+ values will have "...[TRUNCATED]" appended. If False (default),
1007
+ rows with oversized values are skipped. All truncations are recorded
1008
+ in the result's truncated_values list.
339
1009
 
340
1010
  Returns:
341
- CloneCatalogResult with details of the cloned catalog.
1011
+ CloneCatalogResult with details of the cloned catalog, including:
1012
+ - truncated_values: List of TruncatedValue records for any values
1013
+ that were truncated due to index size limits.
1014
+ - rows_skipped: Count of rows skipped due to index size limits
1015
+ (when truncate_oversized=False).
342
1016
 
343
1017
  Raises:
344
1018
  ValueError: If invalid parameters or FK violations with FAIL strategy.
@@ -372,6 +1046,9 @@ def clone_catalog(
372
1046
  src_server = DerivaServer("https", source_hostname, credentials=src_cred)
373
1047
  src_catalog = src_server.connect_ermrest(source_catalog_id)
374
1048
 
1049
+ # Capture source schema for provenance before any modifications
1050
+ source_schema_json = src_catalog.get("/schema").json()
1051
+
375
1052
  # Connect to destination and create new catalog
376
1053
  if is_same_server:
377
1054
  dst_cred = src_cred
@@ -387,8 +1064,15 @@ def clone_catalog(
387
1064
 
388
1065
  report = CloneReport()
389
1066
 
1067
+ # Track truncated values
1068
+ truncated_values: list[TruncatedValue] = []
1069
+ rows_skipped = 0
1070
+
1071
+ # Record clone timestamp
1072
+ clone_timestamp = datetime.now(timezone.utc).isoformat()
1073
+
390
1074
  # Perform the three-stage clone
391
- orphan_rows_removed, orphan_rows_nullified, fkeys_pruned = _clone_three_stage(
1075
+ orphan_rows_removed, orphan_rows_nullified, fkeys_pruned, rows_skipped, truncated_values = _clone_three_stage(
392
1076
  src_catalog=src_catalog,
393
1077
  dst_catalog=dst_catalog,
394
1078
  copy_data=not schema_only,
@@ -398,6 +1082,7 @@ def clone_catalog(
398
1082
  exclude_objects=exclude_objects or [],
399
1083
  orphan_strategy=orphan_strategy,
400
1084
  prune_hidden_fkeys=prune_hidden_fkeys,
1085
+ truncate_oversized=truncate_oversized,
401
1086
  report=report,
402
1087
  )
403
1088
 
@@ -412,9 +1097,66 @@ def clone_catalog(
412
1097
  orphan_rows_removed=orphan_rows_removed,
413
1098
  orphan_rows_nullified=orphan_rows_nullified,
414
1099
  fkeys_pruned=fkeys_pruned,
1100
+ rows_skipped=rows_skipped,
1101
+ truncated_values=truncated_values,
415
1102
  report=report,
416
1103
  )
417
1104
 
1105
+ # Upload source schema to Hatrac and set catalog provenance
1106
+ source_schema_url = _upload_source_schema(
1107
+ hostname=effective_dest_hostname,
1108
+ catalog_id=result.catalog_id,
1109
+ schema_json=source_schema_json,
1110
+ credential=dst_cred,
1111
+ )
1112
+
1113
+ # Calculate total rows copied from report
1114
+ total_rows_copied = sum(report.tables_restored.values())
1115
+
1116
+ # Try to get current user identity
1117
+ created_by = None
1118
+ try:
1119
+ session_info = dst_catalog.get("/authn/session").json()
1120
+ if session_info and "client" in session_info:
1121
+ client = session_info["client"]
1122
+ created_by = client.get("display_name") or client.get("id")
1123
+ except Exception:
1124
+ pass
1125
+
1126
+ # Create clone details
1127
+ clone_details = CloneDetails(
1128
+ source_hostname=source_hostname,
1129
+ source_catalog_id=source_catalog_id,
1130
+ source_snapshot=source_snapshot,
1131
+ source_schema_url=source_schema_url,
1132
+ orphan_strategy=orphan_strategy.value,
1133
+ truncate_oversized=truncate_oversized,
1134
+ prune_hidden_fkeys=prune_hidden_fkeys,
1135
+ schema_only=schema_only,
1136
+ asset_mode=asset_mode.value,
1137
+ exclude_schemas=exclude_schemas or [],
1138
+ exclude_objects=exclude_objects or [],
1139
+ rows_copied=total_rows_copied,
1140
+ rows_skipped=rows_skipped,
1141
+ truncated_count=len(truncated_values),
1142
+ orphan_rows_removed=orphan_rows_removed,
1143
+ orphan_rows_nullified=orphan_rows_nullified,
1144
+ fkeys_pruned=fkeys_pruned,
1145
+ )
1146
+
1147
+ # Create and set catalog provenance annotation
1148
+ provenance = CatalogProvenance(
1149
+ creation_method=CatalogCreationMethod.CLONE,
1150
+ created_at=clone_timestamp,
1151
+ hostname=effective_dest_hostname,
1152
+ catalog_id=result.catalog_id,
1153
+ created_by=created_by,
1154
+ name=alias or f"Clone of {source_catalog_id}",
1155
+ description=f"Cloned from {source_hostname}:{source_catalog_id}",
1156
+ clone_details=clone_details,
1157
+ )
1158
+ _set_catalog_provenance(dst_catalog, provenance)
1159
+
418
1160
  # Post-clone operations
419
1161
  result = _post_clone_operations(
420
1162
  result=result,
@@ -442,11 +1184,12 @@ def _clone_three_stage(
442
1184
  exclude_objects: list[str],
443
1185
  orphan_strategy: OrphanStrategy,
444
1186
  prune_hidden_fkeys: bool,
1187
+ truncate_oversized: bool,
445
1188
  report: CloneReport,
446
- ) -> tuple[int, int, int]:
1189
+ ) -> tuple[int, int, int, int, list[TruncatedValue]]:
447
1190
  """Perform three-stage catalog cloning.
448
1191
 
449
- Returns: (orphan_rows_removed, orphan_rows_nullified, fkeys_pruned)
1192
+ Returns: (orphan_rows_removed, orphan_rows_nullified, fkeys_pruned, rows_skipped, truncated_values)
450
1193
  """
451
1194
  src_model = src_catalog.getCatalogModel()
452
1195
 
@@ -584,6 +1327,10 @@ def _clone_three_stage(
584
1327
 
585
1328
  # Stage 2: Copy data
586
1329
  total_rows = 0
1330
+ total_rows_skipped = 0
1331
+ all_truncated_values: list[TruncatedValue] = []
1332
+ deferred_indexes: dict[str, list[dict]] = {} # Track indexes dropped for later rebuild
1333
+
587
1334
  if copy_data:
588
1335
  logger.info("Stage 2: Copying data...")
589
1336
  page_size = 10000
@@ -592,40 +1339,29 @@ def _clone_three_stage(
592
1339
  if state != 1:
593
1340
  continue
594
1341
 
595
- tname_uri = f"{urlquote(sname)}:{urlquote(tname)}"
596
- logger.debug(f"Copying data for {sname}:{tname}")
597
-
598
- last = None
599
- table_rows = 0
600
-
601
- while True:
602
- after_clause = f"@after({urlquote(last)})" if last else ""
603
- try:
604
- page = src_catalog.get(
605
- f"/entity/{tname_uri}@sort(RID){after_clause}?limit={page_size}"
606
- ).json()
607
- except Exception as e:
608
- logger.warning(f"Failed to read from {sname}:{tname}: {e}")
609
- report.tables_failed.append(f"{sname}:{tname}")
610
- break
1342
+ table_key = f"{sname}:{tname}"
1343
+ logger.debug(f"Copying data for {table_key}")
1344
+
1345
+ # Use the new copy function with index error handling
1346
+ table_rows, rows_skipped, truncated = _copy_table_data_with_retry(
1347
+ src_catalog=src_catalog,
1348
+ dst_catalog=dst_catalog,
1349
+ sname=sname,
1350
+ tname=tname,
1351
+ page_size=page_size,
1352
+ report=report,
1353
+ deferred_indexes=deferred_indexes,
1354
+ truncate_oversized=truncate_oversized,
1355
+ )
611
1356
 
612
- if page:
613
- try:
614
- dst_catalog.post(
615
- f"/entity/{tname_uri}?nondefaults=RID,RCT,RCB",
616
- json=page
617
- )
618
- last = page[-1]['RID']
619
- table_rows += len(page)
620
- except Exception as e:
621
- logger.warning(f"Failed to write to {sname}:{tname}: {e}")
622
- report.tables_failed.append(f"{sname}:{tname}")
623
- break
624
- else:
625
- break
1357
+ total_rows_skipped += rows_skipped
1358
+ all_truncated_values.extend(truncated)
626
1359
 
627
- if f"{sname}:{tname}" not in report.tables_failed:
628
- report.tables_restored[f"{sname}:{tname}"] = table_rows
1360
+ if table_rows < 0:
1361
+ # Copy failed
1362
+ report.tables_failed.append(table_key)
1363
+ else:
1364
+ report.tables_restored[table_key] = table_rows
629
1365
  total_rows += table_rows
630
1366
 
631
1367
  # Mark complete
@@ -639,6 +1375,10 @@ def _clone_three_stage(
639
1375
 
640
1376
  logger.info(f"Stage 2 complete: {total_rows} rows copied")
641
1377
 
1378
+ # Rebuild any indexes that were dropped during data copy
1379
+ if deferred_indexes:
1380
+ _rebuild_deferred_indexes(dst_catalog, deferred_indexes, report)
1381
+
642
1382
  # Stage 3: Apply foreign keys
643
1383
  logger.info("Stage 3: Applying foreign keys...")
644
1384
  orphan_rows_removed = 0
@@ -841,7 +1581,7 @@ def _clone_three_stage(
841
1581
  if copy_annotations or copy_policy:
842
1582
  _copy_configuration(src_model, dst_catalog, copy_annotations, copy_policy, exclude_schemas, excluded_tables)
843
1583
 
844
- return orphan_rows_removed, orphan_rows_nullified, fkeys_pruned
1584
+ return orphan_rows_removed, orphan_rows_nullified, fkeys_pruned, total_rows_skipped, all_truncated_values
845
1585
 
846
1586
 
847
1587
  def _identify_orphan_values(
deriva_ml/core/base.py CHANGED
@@ -71,6 +71,7 @@ except ImportError: # Graceful fallback if IceCream isn't installed.
71
71
  ic = lambda *a: None if not a else (a[0] if len(a) == 1 else a) # noqa
72
72
 
73
73
  if TYPE_CHECKING:
74
+ from deriva_ml.catalog.clone import CatalogProvenance
74
75
  from deriva_ml.execution.execution import Execution
75
76
  from deriva_ml.model.catalog import DerivaModel
76
77
 
@@ -479,6 +480,33 @@ class DerivaML(
479
480
  except DerivaMLException as _e:
480
481
  raise DerivaMLException("Entity RID does not exist")
481
482
 
483
+ @property
484
+ def catalog_provenance(self) -> "CatalogProvenance | None":
485
+ """Get the provenance information for this catalog.
486
+
487
+ Returns provenance information if the catalog has it set. This includes
488
+ information about how the catalog was created (clone, create, schema),
489
+ who created it, when, and any workflow information.
490
+
491
+ For cloned catalogs, additional details about the clone operation are
492
+ available in the `clone_details` attribute.
493
+
494
+ Returns:
495
+ CatalogProvenance if available, None otherwise.
496
+
497
+ Example:
498
+ >>> ml = DerivaML('localhost', '45')
499
+ >>> prov = ml.catalog_provenance
500
+ >>> if prov:
501
+ ... print(f"Created: {prov.created_at} by {prov.created_by}")
502
+ ... print(f"Method: {prov.creation_method.value}")
503
+ ... if prov.is_clone:
504
+ ... print(f"Cloned from: {prov.clone_details.source_hostname}")
505
+ """
506
+ from deriva_ml.catalog.clone import get_catalog_provenance
507
+
508
+ return get_catalog_provenance(self.catalog)
509
+
482
510
  def user_list(self) -> List[Dict[str, str]]:
483
511
  """Returns catalog user list.
484
512
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deriva-ml
3
- Version: 1.17.11
3
+ Version: 1.17.12
4
4
  Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
5
5
  Author-email: ISRD <isrd-dev@isi.edu>
6
6
  Requires-Python: >=3.12
@@ -1,5 +1,5 @@
1
1
  deriva_ml/.DS_Store,sha256=gb-f5IXVed_gS5Be1Z6WxCYjrI_r5SdblvfFpIOY4ro,8196
2
- deriva_ml/__init__.py,sha256=TYQpDOVErAx5JX7XybhYuDCfcLm3aYZoNTLhW3lCUIs,3282
2
+ deriva_ml/__init__.py,sha256=a7mk8HCe7i3SUGPRPm5ECvZwAYHswAEwgeGD6CBIAEk,4152
3
3
  deriva_ml/bump_version.py,sha256=DrVJA8AKqvwQ8Cc-omBLOjfDcKdBxUXm_XNj11SIJZo,11905
4
4
  deriva_ml/demo_catalog.py,sha256=_gQVeZm38dHmd2EThhfvCWRPt1meSuNXerkRULRO87U,17760
5
5
  deriva_ml/feature.py,sha256=Ap0cIK0kElAEfvlbfYtrWB23NJgy8St6Okhz-nDEZqY,8661
@@ -10,11 +10,11 @@ deriva_ml/run_notebook.py,sha256=BUShaMlzExfcjerm54en_zow2rcQFK6L0eHX-wwt_cg,277
10
10
  deriva_ml/asset/__init__.py,sha256=YuV0rFEL0kMDzB8W-qWiUs6HahEadiaYWuS-d3OcoMw,445
11
11
  deriva_ml/asset/asset.py,sha256=A8938V8iVufOzk5HdDxm5If1OkaLX1YJqQw-K-Um2rI,13489
12
12
  deriva_ml/asset/aux_classes.py,sha256=QIH_pd3koIG04fb-gzHVgdKtykfVgDGJH3F7RN3-dwg,3486
13
- deriva_ml/catalog/__init__.py,sha256=ilOOvLBlMImcWHW7RqCm0U2skvK6AhWEAu4OnY13c18,400
14
- deriva_ml/catalog/clone.py,sha256=_QV9RfBXStzQjCe6-3YrqEum2hg7H-vfpKkYXo92iKs,45558
13
+ deriva_ml/catalog/__init__.py,sha256=WzAPL8EGtdVRliIsRe0RyTIivkDvlwpcn718liKlpsU,658
14
+ deriva_ml/catalog/clone.py,sha256=PQpxtampevAo7xIF1MMORWMT-QEG6X_ubJ0VdpU0rSY,74577
15
15
  deriva_ml/catalog/localize.py,sha256=-YNvB_dYo0RjoI-VDj2Yu_qFB8TeAFPHfOJTYMTMYF8,14981
16
16
  deriva_ml/core/__init__.py,sha256=oqWgo4ckyAfebeXBQXJ9O8ans81tbmzPRnsVHLeVXT8,2000
17
- deriva_ml/core/base.py,sha256=F8Fq-jDaypxDe_onAmhZ8mwPaZPOUyvak2VEusBLYUM,55973
17
+ deriva_ml/core/base.py,sha256=THdHOrTp7Rk0DxyzHW4PildQixn8Z-mqP1jCWMMgtxY,57135
18
18
  deriva_ml/core/config.py,sha256=2RjpJrzdXC1JlrDGozWbtW_0YAbOf7eyHHr-E0xTozw,9681
19
19
  deriva_ml/core/constants.py,sha256=dlS3Wa7Tmmh2JVhhCJjN5Wltu0bJB5rMOSChJ1bdhRA,5300
20
20
  deriva_ml/core/definitions.py,sha256=EPGTtUT0cBuss4sZRY-0mQHab9GqBZYdc4ozbxFqC4o,5578
@@ -69,9 +69,9 @@ deriva_ml/schema/deriva-ml-reference.json,sha256=AEOMIgwKO3dNMMWHb0lxaXyamvfAEbU
69
69
  deriva_ml/schema/policy.json,sha256=5ykB8nnZFl-oCHzlAwppCFKJHWJFIkYognUMVEanfY8,1826
70
70
  deriva_ml/schema/table_comments_utils.py,sha256=4flCqnZAaqg_uSZ9I18pNUWAZoLfmMCXbmI5uERY5vM,2007
71
71
  deriva_ml/schema/validation.py,sha256=C0TvWj2kjOj40w1N5FIWp55DWPdLPN8tk3JJfN5ezW4,19912
72
- deriva_ml-1.17.11.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
73
- deriva_ml-1.17.11.dist-info/METADATA,sha256=yL1ErE2xUh12Do3nHb0GMOO7KctAYIWMNzUn1g3qy_A,1216
74
- deriva_ml-1.17.11.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
75
- deriva_ml-1.17.11.dist-info/entry_points.txt,sha256=nwRBpDI6yGUMhvEJG__O0LHz6JovazaVXhykvSNF4og,554
76
- deriva_ml-1.17.11.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
77
- deriva_ml-1.17.11.dist-info/RECORD,,
72
+ deriva_ml-1.17.12.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
73
+ deriva_ml-1.17.12.dist-info/METADATA,sha256=pVWfVxpnSawna5gOvGBni0SgWD7oAUKzJsh1APsDba0,1216
74
+ deriva_ml-1.17.12.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
75
+ deriva_ml-1.17.12.dist-info/entry_points.txt,sha256=nwRBpDI6yGUMhvEJG__O0LHz6JovazaVXhykvSNF4og,554
76
+ deriva_ml-1.17.12.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
77
+ deriva_ml-1.17.12.dist-info/RECORD,,