deriva-ml 1.17.13__tar.gz → 1.17.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (179) hide show
  1. {deriva_ml-1.17.13/src/deriva_ml.egg-info → deriva_ml-1.17.15}/PKG-INFO +1 -1
  2. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/catalog/clone.py +88 -18
  3. {deriva_ml-1.17.13 → deriva_ml-1.17.15/src/deriva_ml.egg-info}/PKG-INFO +1 -1
  4. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/.DS_Store +0 -0
  5. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/.cursor.config +0 -0
  6. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/.github/release-drafter.yml +0 -0
  7. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/.github/workflows/publish-docs.yml +0 -0
  8. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/.github/workflows/release.yml +0 -0
  9. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/.gitignore +0 -0
  10. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/.vscode/settings.json +0 -0
  11. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/CLAUDE.md +0 -0
  12. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/LICENSE +0 -0
  13. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/README.md +0 -0
  14. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/.DS_Store +0 -0
  15. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/Notebooks/DerivaML Create Notes.ipynb +0 -0
  16. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/Notebooks/DerivaML Dataset.ipynb +0 -0
  17. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/Notebooks/DerivaML Execution.ipynb +0 -0
  18. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/Notebooks/DerivaML Features.ipynb +0 -0
  19. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/Notebooks/DerivaML Ingest.ipynb +0 -0
  20. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/Notebooks/DerivaML Vocabulary.ipynb +0 -0
  21. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/architecture.md +0 -0
  22. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/assets/ERD.png +0 -0
  23. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/assets/Launcher.png +0 -0
  24. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/assets/copy_minid.png +0 -0
  25. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/assets/deriva-logo.png +0 -0
  26. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/assets/deriva-ml.pdf +0 -0
  27. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/assets/sharing-at-home.pdf +0 -0
  28. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/code-docs/dataset.md +0 -0
  29. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/code-docs/dataset_aux_classes.md +0 -0
  30. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/code-docs/dataset_bag.md +0 -0
  31. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/code-docs/deriva_definitions.md +0 -0
  32. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/code-docs/deriva_ml_base.md +0 -0
  33. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/code-docs/deriva_model.md +0 -0
  34. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/code-docs/exceptions.md +0 -0
  35. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/code-docs/execution.md +0 -0
  36. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/code-docs/execution_configuration.md +0 -0
  37. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/code-docs/feature.md +0 -0
  38. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/code-docs/upload.md +0 -0
  39. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/code-docs/workflow.md +0 -0
  40. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/index.md +0 -0
  41. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/release-notes.md +0 -0
  42. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/user-guide/annotations.md +0 -0
  43. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/user-guide/datasets.md +0 -0
  44. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/user-guide/deriva_ml_structure.md +0 -0
  45. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/user-guide/execution-configuration.md +0 -0
  46. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/user-guide/features.md +0 -0
  47. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/user-guide/file-assets.md +0 -0
  48. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/user-guide/hydra-zen-configuration.md +0 -0
  49. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/user-guide/identifiers.md +0 -0
  50. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/user-guide/install.md +0 -0
  51. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/user-guide/notebooks.md +0 -0
  52. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/docs/user-guide/overview.md +0 -0
  53. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/mkdocs.yml +0 -0
  54. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/pyproject.toml +0 -0
  55. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/setup.cfg +0 -0
  56. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/.DS_Store +0 -0
  57. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/.DS_Store +0 -0
  58. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/__init__.py +0 -0
  59. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/asset/__init__.py +0 -0
  60. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/asset/asset.py +0 -0
  61. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/asset/aux_classes.py +0 -0
  62. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/bump_version.py +0 -0
  63. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/catalog/__init__.py +0 -0
  64. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/catalog/localize.py +0 -0
  65. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/core/__init__.py +0 -0
  66. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/core/base.py +0 -0
  67. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/core/config.py +0 -0
  68. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/core/constants.py +0 -0
  69. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/core/definitions.py +0 -0
  70. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/core/enums.py +0 -0
  71. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/core/ermrest.py +0 -0
  72. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/core/exceptions.py +0 -0
  73. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/core/filespec.py +0 -0
  74. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/core/logging_config.py +0 -0
  75. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/core/mixins/__init__.py +0 -0
  76. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/core/mixins/annotation.py +0 -0
  77. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/core/mixins/asset.py +0 -0
  78. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/core/mixins/dataset.py +0 -0
  79. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/core/mixins/execution.py +0 -0
  80. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/core/mixins/feature.py +0 -0
  81. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/core/mixins/file.py +0 -0
  82. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/core/mixins/path_builder.py +0 -0
  83. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/core/mixins/rid_resolution.py +0 -0
  84. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/core/mixins/vocabulary.py +0 -0
  85. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/core/mixins/workflow.py +0 -0
  86. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/core/validation.py +0 -0
  87. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/dataset/__init__.py +0 -0
  88. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/dataset/aux_classes.py +0 -0
  89. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/dataset/catalog_graph.py +0 -0
  90. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/dataset/dataset.py +0 -0
  91. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/dataset/dataset_bag.py +0 -0
  92. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/dataset/history.py +0 -0
  93. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/dataset/upload.py +0 -0
  94. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/demo_catalog.py +0 -0
  95. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/execution/__init__.py +0 -0
  96. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/execution/base_config.py +0 -0
  97. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/execution/environment.py +0 -0
  98. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/execution/execution.py +0 -0
  99. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/execution/execution_configuration.py +0 -0
  100. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/execution/execution_record.py +0 -0
  101. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/execution/find_caller.py +0 -0
  102. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/execution/model_protocol.py +0 -0
  103. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/execution/multirun_config.py +0 -0
  104. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/execution/runner.py +0 -0
  105. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/execution/workflow.py +0 -0
  106. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/experiment/__init__.py +0 -0
  107. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/experiment/experiment.py +0 -0
  108. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/feature.py +0 -0
  109. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/install_kernel.py +0 -0
  110. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/interfaces.py +0 -0
  111. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/model/__init__.py +0 -0
  112. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/model/annotations.py +0 -0
  113. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/model/catalog.py +0 -0
  114. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/model/database.py +0 -0
  115. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/model/deriva_ml_database.py +0 -0
  116. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/model/handles.py +0 -0
  117. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/run_model.py +0 -0
  118. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/run_notebook.py +0 -0
  119. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/schema/__init__.py +0 -0
  120. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/schema/annotations.py +0 -0
  121. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/schema/check_schema.py +0 -0
  122. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/schema/create_schema.py +0 -0
  123. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/schema/deriva-ml-reference.json +0 -0
  124. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/schema/policy.json +0 -0
  125. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/schema/table_comments_utils.py +0 -0
  126. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml/schema/validation.py +0 -0
  127. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml.egg-info/SOURCES.txt +0 -0
  128. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml.egg-info/dependency_links.txt +0 -0
  129. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml.egg-info/entry_points.txt +0 -0
  130. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml.egg-info/requires.txt +0 -0
  131. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/src/deriva_ml.egg-info/top_level.txt +0 -0
  132. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/test_output.txt +0 -0
  133. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/__init__.py +0 -0
  134. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/asset/__init__.py +0 -0
  135. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/asset/test_asset.py +0 -0
  136. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/catalog/__init__.py +0 -0
  137. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/catalog/test_clone_catalog.py +0 -0
  138. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/catalog_manager.py +0 -0
  139. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/conftest.py +0 -0
  140. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/core/__init__.py +0 -0
  141. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/core/test_basic_tables.py +0 -0
  142. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/core/test_catalog_annotations.py +0 -0
  143. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/core/test_file.py +0 -0
  144. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/core/test_hydra_zen_config.py +0 -0
  145. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/core/test_rid_resolution.py +0 -0
  146. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/core/test_vocabulary.py +0 -0
  147. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/dataset/__init__.py +0 -0
  148. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/dataset/demo-catalog-schema.json +0 -0
  149. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/dataset/deriva-ml-reference.json +0 -0
  150. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/dataset/eye-ai-catalog-schema.json +0 -0
  151. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/dataset/test_catalog_dataset_functions.py +0 -0
  152. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/dataset/test_dataset_version.py +0 -0
  153. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/dataset/test_datasets.py +0 -0
  154. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/dataset/test_denormalize.py +0 -0
  155. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/dataset/test_download.py +0 -0
  156. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/dataset/test_restructure.py +0 -0
  157. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/execution/__init__.py +0 -0
  158. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/execution/test_execution.py +0 -0
  159. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/execution/test_find_caller.py +0 -0
  160. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/execution/test_runner.py +0 -0
  161. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/execution/test_storage.py +0 -0
  162. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/execution/workflow-test.ipynb +0 -0
  163. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/execution/workflow-test.py +0 -0
  164. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/experiment/__init__.py +0 -0
  165. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/experiment/test_experiment.py +0 -0
  166. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/factories.py +0 -0
  167. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/feature/test_features.py +0 -0
  168. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/model/__init__.py +0 -0
  169. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/model/test_annotations.py +0 -0
  170. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/model/test_database.py +0 -0
  171. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/model/test_handles.py +0 -0
  172. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/model/test_models.py +0 -0
  173. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/schema/__init__.py +0 -0
  174. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/schema/test_validation.py +0 -0
  175. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/test-files/execution-parameters.json +0 -0
  176. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/test-files/notebook-parameters.json +0 -0
  177. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/test_factories.py +0 -0
  178. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/tests/test_utils.py +0 -0
  179. {deriva_ml-1.17.13 → deriva_ml-1.17.15}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deriva-ml
3
- Version: 1.17.13
3
+ Version: 1.17.15
4
4
  Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
5
5
  Author-email: ISRD <isrd-dev@isi.edu>
6
6
  Requires-Python: >=3.12
@@ -87,9 +87,10 @@ class CloneIssue:
87
87
  details: str | None = None
88
88
  action: str | None = None
89
89
  row_count: int = 0
90
+ skipped_rids: list[str] | None = None # RIDs of rows that were skipped
90
91
 
91
92
  def to_dict(self) -> dict[str, Any]:
92
- return {
93
+ result = {
93
94
  "severity": self.severity.value,
94
95
  "category": self.category.value,
95
96
  "message": self.message,
@@ -98,6 +99,9 @@ class CloneIssue:
98
99
  "action": self.action,
99
100
  "row_count": self.row_count,
100
101
  }
102
+ if self.skipped_rids:
103
+ result["skipped_rids"] = self.skipped_rids
104
+ return result
101
105
 
102
106
  def __str__(self) -> str:
103
107
  parts = [f"[{self.severity.value.upper()}]"]
@@ -106,7 +110,14 @@ class CloneIssue:
106
110
  parts.append(self.message)
107
111
  if self.row_count > 0:
108
112
  parts.append(f"({self.row_count} rows)")
109
- return " ".join(parts)
113
+ result = " ".join(parts)
114
+ if self.skipped_rids:
115
+ # For small numbers, list the RIDs; for large numbers, just show count
116
+ if len(self.skipped_rids) <= 5:
117
+ result += f"\n Skipped RIDs: {', '.join(self.skipped_rids)}"
118
+ else:
119
+ result += f"\n Skipped RIDs: {len(self.skipped_rids)} rows (see JSON for full list)"
120
+ return result
110
121
 
111
122
 
112
123
  @dataclass
@@ -332,6 +343,7 @@ class CloneDetails:
332
343
  source_catalog_id: str
333
344
  source_snapshot: str | None = None
334
345
  source_schema_url: str | None = None # Hatrac URL to source schema JSON
346
+ # Clone parameters
335
347
  orphan_strategy: str = "fail"
336
348
  truncate_oversized: bool = False
337
349
  prune_hidden_fkeys: bool = False
@@ -339,15 +351,21 @@ class CloneDetails:
339
351
  asset_mode: str = "refs"
340
352
  exclude_schemas: list[str] = field(default_factory=list)
341
353
  exclude_objects: list[str] = field(default_factory=list)
354
+ add_ml_schema: bool = False
355
+ copy_annotations: bool = True
356
+ copy_policy: bool = True
357
+ reinitialize_dataset_versions: bool = True
358
+ # Statistics
342
359
  rows_copied: int = 0
343
360
  rows_skipped: int = 0
361
+ skipped_rids: list[str] = field(default_factory=list) # RIDs of skipped rows
344
362
  truncated_count: int = 0
345
363
  orphan_rows_removed: int = 0
346
364
  orphan_rows_nullified: int = 0
347
365
  fkeys_pruned: int = 0
348
366
 
349
367
  def to_dict(self) -> dict[str, Any]:
350
- return {
368
+ result = {
351
369
  "source_hostname": self.source_hostname,
352
370
  "source_catalog_id": self.source_catalog_id,
353
371
  "source_snapshot": self.source_snapshot,
@@ -359,6 +377,10 @@ class CloneDetails:
359
377
  "asset_mode": self.asset_mode,
360
378
  "exclude_schemas": self.exclude_schemas,
361
379
  "exclude_objects": self.exclude_objects,
380
+ "add_ml_schema": self.add_ml_schema,
381
+ "copy_annotations": self.copy_annotations,
382
+ "copy_policy": self.copy_policy,
383
+ "reinitialize_dataset_versions": self.reinitialize_dataset_versions,
362
384
  "rows_copied": self.rows_copied,
363
385
  "rows_skipped": self.rows_skipped,
364
386
  "truncated_count": self.truncated_count,
@@ -366,6 +388,9 @@ class CloneDetails:
366
388
  "orphan_rows_nullified": self.orphan_rows_nullified,
367
389
  "fkeys_pruned": self.fkeys_pruned,
368
390
  }
391
+ if self.skipped_rids:
392
+ result["skipped_rids"] = self.skipped_rids
393
+ return result
369
394
 
370
395
  @classmethod
371
396
  def from_dict(cls, data: dict[str, Any]) -> "CloneDetails":
@@ -381,8 +406,13 @@ class CloneDetails:
381
406
  asset_mode=data.get("asset_mode", "refs"),
382
407
  exclude_schemas=data.get("exclude_schemas", []),
383
408
  exclude_objects=data.get("exclude_objects", []),
409
+ add_ml_schema=data.get("add_ml_schema", False),
410
+ copy_annotations=data.get("copy_annotations", True),
411
+ copy_policy=data.get("copy_policy", True),
412
+ reinitialize_dataset_versions=data.get("reinitialize_dataset_versions", True),
384
413
  rows_copied=data.get("rows_copied", 0),
385
414
  rows_skipped=data.get("rows_skipped", 0),
415
+ skipped_rids=data.get("skipped_rids", []),
386
416
  truncated_count=data.get("truncated_count", 0),
387
417
  orphan_rows_removed=data.get("orphan_rows_removed", 0),
388
418
  orphan_rows_nullified=data.get("orphan_rows_nullified", 0),
@@ -677,7 +707,7 @@ def _copy_table_data_with_retry(
677
707
  report: "CloneReport",
678
708
  deferred_indexes: dict[str, list[dict]],
679
709
  truncate_oversized: bool = False,
680
- ) -> tuple[int, int, list[TruncatedValue]]:
710
+ ) -> tuple[int, int, list[str], list[TruncatedValue]]:
681
711
  """Copy data for a single table with retry logic for index errors.
682
712
 
683
713
  If a btree index size error occurs, this function will:
@@ -698,7 +728,7 @@ def _copy_table_data_with_retry(
698
728
  truncate_oversized: If True, truncate oversized values instead of skipping rows.
699
729
 
700
730
  Returns:
701
- Tuple of (rows_copied, rows_skipped, truncated_values).
731
+ Tuple of (rows_copied, rows_skipped, skipped_rids, truncated_values).
702
732
  rows_copied is -1 if the copy failed entirely.
703
733
  """
704
734
  tname_uri = f"{urlquote(sname)}:{urlquote(tname)}"
@@ -711,6 +741,7 @@ def _copy_table_data_with_retry(
711
741
  last = None
712
742
  table_rows = 0
713
743
  rows_skipped = 0
744
+ skipped_rids: list[str] = [] # Track RIDs of skipped rows
714
745
  truncated_values: list[TruncatedValue] = []
715
746
  row_by_row_mode = False
716
747
  problematic_index = None
@@ -768,7 +799,7 @@ def _copy_table_data_with_retry(
768
799
  ).json()
769
800
  except Exception as e:
770
801
  logger.warning(f"Failed to read from {sname}:{tname}: {e}")
771
- return -1, rows_skipped, truncated_values
802
+ return -1, rows_skipped, skipped_rids, truncated_values
772
803
 
773
804
  if not page:
774
805
  break
@@ -809,11 +840,14 @@ def _copy_table_data_with_retry(
809
840
 
810
841
  rows_skipped += 1
811
842
  rid = row.get('RID', 'unknown')
843
+ skipped_rids.append(rid)
812
844
  logger.debug(f"Skipping row {rid} in {table_key} due to index size limit")
813
845
  else:
814
846
  # Different error - log and skip
815
847
  rows_skipped += 1
816
- logger.debug(f"Skipping row in {table_key}: {row_error}")
848
+ rid = row.get('RID', 'unknown')
849
+ skipped_rids.append(rid)
850
+ logger.debug(f"Skipping row {rid} in {table_key}: {row_error}")
817
851
  last = page[-1]['RID']
818
852
  else:
819
853
  # Normal batch mode
@@ -884,14 +918,17 @@ def _copy_table_data_with_retry(
884
918
 
885
919
  rows_skipped += 1
886
920
  rid = row.get('RID', 'unknown')
921
+ skipped_rids.append(rid)
887
922
  logger.debug(f"Skipping row {rid} due to index size limit")
888
923
  else:
889
924
  rows_skipped += 1
890
- logger.debug(f"Skipping row: {row_error}")
925
+ rid = row.get('RID', 'unknown')
926
+ skipped_rids.append(rid)
927
+ logger.debug(f"Skipping row {rid}: {row_error}")
891
928
  last = page[-1]['RID']
892
929
  else:
893
930
  logger.warning(f"Failed to write to {sname}:{tname}: {e}")
894
- return -1, rows_skipped, truncated_values
931
+ return -1, rows_skipped, skipped_rids, truncated_values
895
932
 
896
933
  # Report skipped rows
897
934
  if rows_skipped > 0:
@@ -903,8 +940,9 @@ def _copy_table_data_with_retry(
903
940
  details=f"Index '{problematic_index}' on column '{problematic_column}'",
904
941
  action="These rows have values too large for btree index (>2704 bytes)",
905
942
  row_count=rows_skipped,
943
+ skipped_rids=skipped_rids if skipped_rids else None,
906
944
  ))
907
- logger.warning(f"Skipped {rows_skipped} rows in {table_key} due to index size limits")
945
+ logger.warning(f"Skipped {rows_skipped} rows in {table_key} due to index size limits: RIDs={skipped_rids}")
908
946
 
909
947
  # Report truncated values
910
948
  if truncated_values:
@@ -919,7 +957,7 @@ def _copy_table_data_with_retry(
919
957
  ))
920
958
  logger.info(f"Truncated {len(truncated_values)} values in {table_key}")
921
959
 
922
- return table_rows, rows_skipped, truncated_values
960
+ return table_rows, rows_skipped, skipped_rids, truncated_values
923
961
 
924
962
 
925
963
 
@@ -1072,7 +1110,7 @@ def clone_catalog(
1072
1110
  clone_timestamp = datetime.now(timezone.utc).isoformat()
1073
1111
 
1074
1112
  # Perform the three-stage clone
1075
- orphan_rows_removed, orphan_rows_nullified, fkeys_pruned, rows_skipped, truncated_values = _clone_three_stage(
1113
+ orphan_rows_removed, orphan_rows_nullified, fkeys_pruned, rows_skipped, skipped_rids, truncated_values = _clone_three_stage(
1076
1114
  src_catalog=src_catalog,
1077
1115
  dst_catalog=dst_catalog,
1078
1116
  copy_data=not schema_only,
@@ -1136,8 +1174,13 @@ def clone_catalog(
1136
1174
  asset_mode=asset_mode.value,
1137
1175
  exclude_schemas=exclude_schemas or [],
1138
1176
  exclude_objects=exclude_objects or [],
1177
+ add_ml_schema=add_ml_schema,
1178
+ copy_annotations=copy_annotations,
1179
+ copy_policy=copy_policy,
1180
+ reinitialize_dataset_versions=reinitialize_dataset_versions,
1139
1181
  rows_copied=total_rows_copied,
1140
1182
  rows_skipped=rows_skipped,
1183
+ skipped_rids=skipped_rids,
1141
1184
  truncated_count=len(truncated_values),
1142
1185
  orphan_rows_removed=orphan_rows_removed,
1143
1186
  orphan_rows_nullified=orphan_rows_nullified,
@@ -1186,10 +1229,10 @@ def _clone_three_stage(
1186
1229
  prune_hidden_fkeys: bool,
1187
1230
  truncate_oversized: bool,
1188
1231
  report: CloneReport,
1189
- ) -> tuple[int, int, int, int, list[TruncatedValue]]:
1232
+ ) -> tuple[int, int, int, int, list[str], list[TruncatedValue]]:
1190
1233
  """Perform three-stage catalog cloning.
1191
1234
 
1192
- Returns: (orphan_rows_removed, orphan_rows_nullified, fkeys_pruned, rows_skipped, truncated_values)
1235
+ Returns: (orphan_rows_removed, orphan_rows_nullified, fkeys_pruned, rows_skipped, skipped_rids, truncated_values)
1193
1236
  """
1194
1237
  src_model = src_catalog.getCatalogModel()
1195
1238
 
@@ -1328,6 +1371,7 @@ def _clone_three_stage(
1328
1371
  # Stage 2: Copy data
1329
1372
  total_rows = 0
1330
1373
  total_rows_skipped = 0
1374
+ all_skipped_rids: list[str] = []
1331
1375
  all_truncated_values: list[TruncatedValue] = []
1332
1376
  deferred_indexes: dict[str, list[dict]] = {} # Track indexes dropped for later rebuild
1333
1377
 
@@ -1343,7 +1387,7 @@ def _clone_three_stage(
1343
1387
  logger.debug(f"Copying data for {table_key}")
1344
1388
 
1345
1389
  # Use the new copy function with index error handling
1346
- table_rows, rows_skipped, truncated = _copy_table_data_with_retry(
1390
+ table_rows, rows_skipped, skipped_rids, truncated = _copy_table_data_with_retry(
1347
1391
  src_catalog=src_catalog,
1348
1392
  dst_catalog=dst_catalog,
1349
1393
  sname=sname,
@@ -1355,6 +1399,7 @@ def _clone_three_stage(
1355
1399
  )
1356
1400
 
1357
1401
  total_rows_skipped += rows_skipped
1402
+ all_skipped_rids.extend(skipped_rids)
1358
1403
  all_truncated_values.extend(truncated)
1359
1404
 
1360
1405
  if table_rows < 0:
@@ -1581,7 +1626,7 @@ def _clone_three_stage(
1581
1626
  if copy_annotations or copy_policy:
1582
1627
  _copy_configuration(src_model, dst_catalog, copy_annotations, copy_policy, exclude_schemas, excluded_tables)
1583
1628
 
1584
- return orphan_rows_removed, orphan_rows_nullified, fkeys_pruned, total_rows_skipped, all_truncated_values
1629
+ return orphan_rows_removed, orphan_rows_nullified, fkeys_pruned, total_rows_skipped, all_skipped_rids, all_truncated_values
1585
1630
 
1586
1631
 
1587
1632
  def _identify_orphan_values(
@@ -1892,12 +1937,37 @@ def _post_clone_operations(
1892
1937
 
1893
1938
  if add_ml_schema:
1894
1939
  try:
1895
- from deriva_ml.schema import add_ml_schema as add_schema
1940
+ from deriva_ml.schema import create_ml_schema
1896
1941
  catalog = server.connect_ermrest(result.catalog_id)
1897
- add_schema(catalog)
1942
+ create_ml_schema(catalog)
1898
1943
  result.ml_schema_added = True
1944
+
1945
+ # Apply catalog annotations (chaise-config, navbar, etc.)
1946
+ try:
1947
+ from deriva_ml import DerivaML
1948
+ ml = DerivaML(result.hostname, result.catalog_id, check_auth=False)
1949
+ ml.apply_catalog_annotations()
1950
+ logger.info("Applied catalog annotations (chaise-config, navbar)")
1951
+ except Exception as e:
1952
+ logger.warning(f"Failed to apply catalog annotations: {e}")
1953
+ if result.report:
1954
+ result.report.add_issue(CloneIssue(
1955
+ severity=CloneIssueSeverity.WARNING,
1956
+ category=CloneIssueCategory.SCHEMA_ISSUE,
1957
+ message="Failed to apply catalog annotations",
1958
+ details=str(e),
1959
+ action="Manually call apply_catalog_annotations() after clone",
1960
+ ))
1899
1961
  except Exception as e:
1900
1962
  logger.warning(f"Failed to add ML schema: {e}")
1963
+ if result.report:
1964
+ result.report.add_issue(CloneIssue(
1965
+ severity=CloneIssueSeverity.ERROR,
1966
+ category=CloneIssueCategory.SCHEMA_ISSUE,
1967
+ message="Failed to add DerivaML schema",
1968
+ details=str(e),
1969
+ action="ML schema was not added to the clone",
1970
+ ))
1901
1971
 
1902
1972
  return result
1903
1973
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deriva-ml
3
- Version: 1.17.13
3
+ Version: 1.17.15
4
4
  Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
5
5
  Author-email: ISRD <isrd-dev@isi.edu>
6
6
  Requires-Python: >=3.12
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes