PyPI - deriva-ml - Versions diffs - 1.17.10__py3-none-any.whl → 1.17.12__py3-none-any.whl - Mend

deriva-ml 1.17.10py3-none-any.whl → 1.17.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

deriva_ml/__init__.py +69 -1
deriva_ml/asset/__init__.py +17 -0
deriva_ml/asset/asset.py +357 -0
deriva_ml/asset/aux_classes.py +100 -0
deriva_ml/bump_version.py +254 -11
deriva_ml/catalog/__init__.py +31 -0
deriva_ml/catalog/clone.py +1939 -0
deriva_ml/catalog/localize.py +426 -0
deriva_ml/core/__init__.py +29 -0
deriva_ml/core/base.py +845 -1067
deriva_ml/core/config.py +169 -21
deriva_ml/core/constants.py +120 -19
deriva_ml/core/definitions.py +123 -13
deriva_ml/core/enums.py +47 -73
deriva_ml/core/ermrest.py +226 -193
deriva_ml/core/exceptions.py +297 -14
deriva_ml/core/filespec.py +99 -28
deriva_ml/core/logging_config.py +225 -0
deriva_ml/core/mixins/__init__.py +42 -0
deriva_ml/core/mixins/annotation.py +915 -0
deriva_ml/core/mixins/asset.py +384 -0
deriva_ml/core/mixins/dataset.py +237 -0
deriva_ml/core/mixins/execution.py +408 -0
deriva_ml/core/mixins/feature.py +365 -0
deriva_ml/core/mixins/file.py +263 -0
deriva_ml/core/mixins/path_builder.py +145 -0
deriva_ml/core/mixins/rid_resolution.py +204 -0
deriva_ml/core/mixins/vocabulary.py +400 -0
deriva_ml/core/mixins/workflow.py +322 -0
deriva_ml/core/validation.py +389 -0
deriva_ml/dataset/__init__.py +2 -1
deriva_ml/dataset/aux_classes.py +20 -4
deriva_ml/dataset/catalog_graph.py +575 -0
deriva_ml/dataset/dataset.py +1242 -1008
deriva_ml/dataset/dataset_bag.py +1311 -182
deriva_ml/dataset/history.py +27 -14
deriva_ml/dataset/upload.py +225 -38
deriva_ml/demo_catalog.py +126 -110
deriva_ml/execution/__init__.py +46 -2
deriva_ml/execution/base_config.py +639 -0
deriva_ml/execution/execution.py +543 -242
deriva_ml/execution/execution_configuration.py +26 -11
deriva_ml/execution/execution_record.py +592 -0
deriva_ml/execution/find_caller.py +298 -0
deriva_ml/execution/model_protocol.py +175 -0
deriva_ml/execution/multirun_config.py +153 -0
deriva_ml/execution/runner.py +595 -0
deriva_ml/execution/workflow.py +223 -34
deriva_ml/experiment/__init__.py +8 -0
deriva_ml/experiment/experiment.py +411 -0
deriva_ml/feature.py +6 -1
deriva_ml/install_kernel.py +143 -6
deriva_ml/interfaces.py +862 -0
deriva_ml/model/__init__.py +99 -0
deriva_ml/model/annotations.py +1278 -0
deriva_ml/model/catalog.py +286 -60
deriva_ml/model/database.py +144 -649
deriva_ml/model/deriva_ml_database.py +308 -0
deriva_ml/model/handles.py +14 -0
deriva_ml/run_model.py +319 -0
deriva_ml/run_notebook.py +507 -38
deriva_ml/schema/__init__.py +18 -2
deriva_ml/schema/annotations.py +62 -33
deriva_ml/schema/create_schema.py +169 -69
deriva_ml/schema/validation.py +601 -0
{deriva_ml-1.17.10.dist-info → deriva_ml-1.17.12.dist-info}/METADATA +4 -4
deriva_ml-1.17.12.dist-info/RECORD +77 -0
{deriva_ml-1.17.10.dist-info → deriva_ml-1.17.12.dist-info}/WHEEL +1 -1
{deriva_ml-1.17.10.dist-info → deriva_ml-1.17.12.dist-info}/entry_points.txt +1 -0
deriva_ml/protocols/dataset.py +0 -19
deriva_ml/test.py +0 -94
deriva_ml-1.17.10.dist-info/RECORD +0 -45
{deriva_ml-1.17.10.dist-info → deriva_ml-1.17.12.dist-info}/licenses/LICENSE +0 -0
{deriva_ml-1.17.10.dist-info → deriva_ml-1.17.12.dist-info}/top_level.txt +0 -0

deriva_ml/schema/__init__.py CHANGED Viewed

@@ -1,3 +1,19 @@
-from deriva_ml.schema.create_schema import create_ml_catalog, reset_ml_schema
+from deriva_ml.schema.create_schema import create_ml_catalog, create_ml_schema, reset_ml_schema
+from deriva_ml.schema.validation import (
+    SchemaValidationReport,
+    SchemaValidator,
+    ValidationIssue,
+    ValidationSeverity,
+    validate_ml_schema,
+)
-__all__ = ["create_ml_catalog", "reset_ml_schema"]
+__all__ = [
+    "create_ml_catalog",
+    "create_ml_schema",
+    "reset_ml_schema",
+    "SchemaValidationReport",
+    "SchemaValidator",
+    "ValidationIssue",
+    "ValidationSeverity",
+    "validate_ml_schema",
+]

deriva_ml/schema/annotations.py CHANGED Viewed

@@ -1,8 +1,14 @@
 import argparse
 import sys
-from deriva.core.ermrest_model import Model, Table
-from deriva.core.utils.core_utils import tag as deriva_tags
+# Deriva imports - use importlib to avoid shadowing by local 'deriva.py' files
+import importlib
+_ermrest_model = importlib.import_module("deriva.core.ermrest_model")
+_core_utils = importlib.import_module("deriva.core.utils.core_utils")
+Model = _ermrest_model.Model
+Table = _ermrest_model.Table
+deriva_tags = _core_utils.tag
 from deriva_ml.core.constants import DerivaAssetColumns
 from deriva_ml.dataset.upload import bulk_upload_configuration
@@ -86,7 +92,7 @@ def catalog_annotation(model: DerivaModel) -> None:
                             },
                         ],
                     },
-                    {  # All the primary tables in deriva-ml schema.
+                    {  # WWW schema tables.
                         "name": "WWW",
                         "children": [
                             {
@@ -99,19 +105,24 @@ def catalog_annotation(model: DerivaModel) -> None:
                             },
                         ],
                     },
-                    {
-                        "name": model.domain_schema,
-                        "children": [
-                            {
-                                "name": tname,
-                                "url": f"/chaise/recordset/#{catalog_id}/{model.domain_schema}:{tname}",
-                            }
-                            for tname in model.schemas[model.domain_schema].tables
-                            # Don't include controlled vocabularies, association tables, or feature tables.
-                            if not (model.is_vocabulary(tname) or model.is_association(tname, pure=False, max_arity=3))
-                        ],
-                    },
-                    {  # Vocabulary menu which will list all the controlled vocabularies in deriva-ml and domain.
+                    # One menu per domain schema
+                    *[
+                        {
+                            "name": domain_schema,
+                            "children": [
+                                {
+                                    "name": tname,
+                                    "url": f"/chaise/recordset/#{catalog_id}/{domain_schema}:{tname}",
+                                }
+                                for tname in model.schemas[domain_schema].tables
+                                # Don't include controlled vocabularies, association tables, or feature tables.
+                                if not (model.is_vocabulary(tname) or model.is_association(tname, pure=False, max_arity=3))
+                            ],
+                        }
+                        for domain_schema in sorted(model.domain_schemas)
+                        if domain_schema in model.schemas
+                    ],
+                    {  # Vocabulary menu with all controlled vocabularies.
                         "name": "Vocabulary",
                         "children": [{"name": f"{ml_schema} Vocabularies", "header": True}]
                         + [
@@ -123,21 +134,22 @@ def catalog_annotation(model: DerivaModel) -> None:
                             if model.is_vocabulary(tname)
                         ]
                         + [
-                            {
-                                "name": f"{model.domain_schema} Vocabularies",
-                                "header": True,
-                            }
-                        ]
-                        + [
-                            {
-                                "url": f"/chaise/recordset/#{catalog_id}/{model.domain_schema}:{tname}",
-                                "name": tname,
-                            }
-                            for tname in model.schemas[model.domain_schema].tables
-                            if model.is_vocabulary(tname)
+                            item
+                            for domain_schema in sorted(model.domain_schemas)
+                            if domain_schema in model.schemas
+                            for item in [
+                                {"name": f"{domain_schema} Vocabularies", "header": True}
+                            ] + [
+                                {
+                                    "url": f"/chaise/recordset/#{catalog_id}/{domain_schema}:{tname}",
+                                    "name": tname,
+                                }
+                                for tname in model.schemas[domain_schema].tables
+                                if model.is_vocabulary(tname)
+                            ]
                         ],
                     },
-                    {  # List of all of the asset tables in deriva-ml and domain schemas.
+                    {  # List of all asset tables.
                         "name": "Assets",
                         "children": [
                             {
@@ -149,10 +161,12 @@ def catalog_annotation(model: DerivaModel) -> None:
                         ]
                         + [
                             {
-                                "url": f"/chaise/recordset/#{catalog_id}/{model.domain_schema}:{tname}",
+                                "url": f"/chaise/recordset/#{catalog_id}/{domain_schema}:{tname}",
                                 "name": tname,
                             }
-                            for tname in model.schemas[model.domain_schema].tables
+                            for domain_schema in sorted(model.domain_schemas)
+                            if domain_schema in model.schemas
+                            for tname in model.schemas[domain_schema].tables
                             if model.is_asset(tname)
                         ],
                     },
@@ -248,7 +262,6 @@ def asset_annotation(asset_table: Table):
 def generate_annotation(model: Model, schema: str) -> dict:
-    catalog_id = model.catalog.catalog_id
     workflow_annotation = {
         deriva_tags.visible_columns: {
             "*": [
@@ -287,8 +300,24 @@ def generate_annotation(model: Model, schema: str) -> dict:
                 "Status_Detail",
             ]
         },
-        "tag:isrd.isi.edu,2016:visible-foreign-keys": {
+        deriva_tags.visible_foreign_keys: {
             "detailed": [
+                {
+                    "source": [
+                        {"inbound": [schema, "Execution_Execution_Nested_Execution_fkey"]},
+                        {"outbound": [schema, "Execution_Execution_Execution_fkey"]},
+                        "RID",
+                    ],
+                    "markdown_name": "Parent Executions",
+                },
+                {
+                    "source": [
+                        {"inbound": [schema, "Execution_Execution_Execution_fkey"]},
+                        {"outbound": [schema, "Execution_Execution_Nested_Execution_fkey"]},
+                        "RID",
+                    ],
+                    "markdown_name": "Child Executions",
+                },
                 {
                     "source": [
                         {"inbound": [schema, "Dataset_Execution_Execution_fkey"]},

deriva_ml/schema/create_schema.py CHANGED Viewed

@@ -5,14 +5,16 @@ from importlib.resources import files
 from typing import Any, Optional
 from deriva.core import DerivaServer, ErmrestCatalog, get_credential
-from deriva.core.ermrest_model import (
-    Column,
-    ForeignKey,
-    Key,
-    Model,
-    Schema,
-    Table,
-    builtin_types,
+from deriva.core.ermrest_model import Model, Schema, Table
+from deriva.core.typed import (
+    BuiltinType,
+    ColumnDef,
+    ForeignKeyDef,
+    KeyDef,
+    SchemaDef,
+    TableDef,
+    VocabularyTableDef,
+    AssetTableDef,
 )
 from deriva_ml.core.definitions import ML_SCHEMA, MLTable, MLVocab
@@ -32,18 +34,21 @@ def create_dataset_table(
     version_annotation: Optional[dict] = None,
 ) -> Table:
     dataset_table = schema.create_table(
-        Table.define(
-            tname=MLTable.dataset,
-            column_defs=[
-                Column.define("Description", builtin_types.markdown),
-                Column.define("Deleted", builtin_types.boolean),
+        TableDef(
+            name=MLTable.dataset,
+            columns=[
+                ColumnDef("Description", BuiltinType.markdown),
+                ColumnDef("Deleted", BuiltinType.boolean),
             ],
             annotations=dataset_annotation if dataset_annotation is not None else {},
         )
     )
-    dataset_type = schema.create_table(Table.define_vocabulary(MLVocab.dataset_type, f"{project_name}:{{RID}}"))
+    dataset_type = schema.create_table(
+        VocabularyTableDef(name=MLVocab.dataset_type, curie_template=f"{project_name}:{{RID}}")
+    )
+    # Association table for Dataset <-> Dataset_Type
     schema.create_table(
         Table.define_association(
             associates=[
@@ -66,7 +71,7 @@ def create_dataset_table(
     return dataset_table
-def define_table_dataset_version(sname: str, annotation: Optional[dict] = None):
+def define_table_dataset_version(sname: str, annotation: Optional[dict] = None) -> TableDef:
     """Define the dataset version table in the specified schema.
     Args:
@@ -74,38 +79,47 @@ def define_table_dataset_version(sname: str, annotation: Optional[dict] = None):
         annotation: Optional annotation dictionary for the table.
     Returns:
-        The created Table object.
+        A TableDef for the dataset version table.
     """
-    table = Table.define(
-        tname=MLTable.dataset_version,
-        column_defs=[
-            Column.define(
-                "Version",
-                builtin_types.text,
+    return TableDef(
+        name=MLTable.dataset_version,
+        columns=[
+            ColumnDef(
+                name="Version",
+                type=BuiltinType.text,
                 default="0.1.0",
                 comment="Semantic version of dataset",
             ),
-            Column.define("Description", builtin_types.markdown),
-            Column.define("Dataset", builtin_types.text, comment="RID of dataset"),
-            Column.define("Execution", builtin_types.text, comment="RID of execution"),
-            Column.define("Minid", builtin_types.text, comment="URL to MINID for dataset"),
-            Column.define(
-                "Snapshot",
-                builtin_types.text,
+            ColumnDef("Description", BuiltinType.markdown),
+            ColumnDef("Dataset", BuiltinType.text, comment="RID of dataset"),
+            ColumnDef("Execution", BuiltinType.text, comment="RID of execution"),
+            ColumnDef("Minid", BuiltinType.text, comment="URL to MINID for dataset"),
+            ColumnDef(
+                name="Snapshot",
+                type=BuiltinType.text,
                 comment="Catalog Snapshot ID for dataset",
             ),
         ],
-        annotations=annotation,
-        key_defs=[Key.define(["Dataset", "Version"])],
-        fkey_defs=[
-            ForeignKey.define(["Dataset"], sname, "Dataset", ["RID"]),
-            ForeignKey.define(["Execution"], sname, "Execution", ["RID"]),
+        annotations=annotation if annotation else {},
+        keys=[KeyDef(columns=["Dataset", "Version"])],
+        foreign_keys=[
+            ForeignKeyDef(
+                columns=["Dataset"],
+                referenced_schema=sname,
+                referenced_table="Dataset",
+                referenced_columns=["RID"],
+            ),
+            ForeignKeyDef(
+                columns=["Execution"],
+                referenced_schema=sname,
+                referenced_table="Execution",
+                referenced_columns=["RID"],
+            ),
         ],
     )
-    return table
-def create_execution_table(schema, annotation: Optional[dict] = None):
+def create_execution_table(schema: Schema, annotation: Optional[dict] = None) -> Table:
     """Create the execution table in the specified schema.
     Args:
@@ -117,34 +131,71 @@ def create_execution_table(schema, annotation: Optional[dict] = None):
     """
     annotation = annotation if annotation is not None else {}
     execution = schema.create_table(
-        Table.define(
-            MLTable.execution,
-            column_defs=[
-                Column.define("Workflow", builtin_types.text),
-                Column.define("Description", builtin_types.markdown),
-                Column.define("Duration", builtin_types.text),
-                Column.define("Status", builtin_types.text),
-                Column.define("Status_Detail", builtin_types.text),
+        TableDef(
+            name=MLTable.execution,
+            columns=[
+                ColumnDef("Workflow", BuiltinType.text),
+                ColumnDef("Description", BuiltinType.markdown),
+                ColumnDef("Duration", BuiltinType.text),
+                ColumnDef("Status", BuiltinType.text),
+                ColumnDef("Status_Detail", BuiltinType.text),
+            ],
+            foreign_keys=[
+                ForeignKeyDef(
+                    columns=["Workflow"],
+                    referenced_schema=schema.name,
+                    referenced_table="Workflow",
+                    referenced_columns=["RID"],
+                )
             ],
-            fkey_defs=[ForeignKey.define(["Workflow"], schema.name, "Workflow", ["RID"])],
             annotations=annotation,
         )
     )
+    # Nested executions - allows grouping executions hierarchically
+    # (e.g., a sweep/multirun as parent with individual runs as children)
+    schema.create_table(
+        Table.define_association(
+            associates=[("Execution", execution), ("Nested_Execution", execution)],
+            comment="Association table for hierarchical execution nesting (parent-child relationships)",
+            metadata=[
+                ColumnDef(
+                    name="Sequence",
+                    type=BuiltinType.int4,
+                    nullok=True,
+                    comment="Order of nested execution (null if parallel)",
+                ).to_dict()  # Convert to dict for Table.define_association()
+            ],
+        )
+    )
     return execution
 def create_asset_table(
-    schema,
+    schema: Schema,
     asset_name: str,
-    execution_table,
-    asset_type_table,
-    asset_role_table,
+    execution_table: Table,
+    asset_type_table: Table,
+    asset_role_table: Table,
     use_hatrac: bool = True,
-):
+) -> Table:
+    """Create an asset table with associated type and execution associations.
+    Args:
+        schema: The schema where the table should be created.
+        asset_name: Name for the asset table.
+        execution_table: The execution table for association.
+        asset_type_table: The asset type vocabulary table.
+        asset_role_table: The asset role vocabulary table.
+        use_hatrac: Whether to use Hatrac for file storage (default True).
+    Returns:
+        The created asset Table object.
+    """
     asset_table = schema.create_table(
-        Table.define_asset(
-            sname=schema.name,
-            tname=asset_name,
+        AssetTableDef(
+            schema_name=schema.name,
+            name=asset_name,
             hatrac_template="/hatrac/metadata/{{MD5}}.{{Filename}}",
         )
     )
@@ -170,7 +221,7 @@ def create_asset_table(
     return asset_table
-def create_workflow_table(schema: Schema, annotations: Optional[dict[str, Any]] = None):
+def create_workflow_table(schema: Schema, annotations: Optional[dict[str, Any]] = None) -> Table:
     """Create the workflow table in the specified schema.
     Args:
@@ -181,20 +232,22 @@ def create_workflow_table(schema: Schema, annotations: Optional[dict[str, Any]]
         The created Table object.
     """
     workflow_table = schema.create_table(
-        Table.define(
-            tname=MLTable.workflow,
-            column_defs=[
-                Column.define("Name", builtin_types.text),
-                Column.define("Description", builtin_types.markdown),
-                Column.define("URL", builtin_types.ermrest_uri),
-                Column.define("Checksum", builtin_types.text),
-                Column.define("Version", builtin_types.text),
+        TableDef(
+            name=MLTable.workflow,
+            columns=[
+                ColumnDef("Name", BuiltinType.text),
+                ColumnDef("Description", BuiltinType.markdown),
+                ColumnDef("URL", BuiltinType.ermrest_uri),
+                ColumnDef("Checksum", BuiltinType.text),
+                ColumnDef("Version", BuiltinType.text),
             ],
-            annotations=annotations,
+            annotations=annotations if annotations else {},
         )
     )
     workflow_table.create_reference(
-        schema.create_table(Table.define_vocabulary(MLVocab.workflow_type, f"{schema.name}:{{RID}}"))
+        schema.create_table(
+            VocabularyTableDef(name=MLVocab.workflow_type, curie_template=f"{schema.name}:{{RID}}")
+        )
     )
     return workflow_table
@@ -221,13 +274,21 @@ def create_ml_schema(
     model.schemas["public"].tables["ERMrest_Client"].annotations.update(client_annotation)
     model.apply()
-    schema = model.create_schema(Schema.define(schema_name, annotations=annotations["schema_annotation"]))
+    schema = model.create_schema(
+        SchemaDef(name=schema_name, annotations=annotations["schema_annotation"])
+    )
     # Create workflow and execution table.
-    schema.create_table(Table.define_vocabulary(MLVocab.feature_name, f"{project_name}:{{RID}}"))
-    asset_type_table = schema.create_table(Table.define_vocabulary(MLVocab.asset_type, f"{project_name}:{{RID}}"))
-    asset_role_table = schema.create_table(Table.define_vocabulary(MLVocab.asset_role, f"{project_name}:{{RID}}"))
+    schema.create_table(
+        VocabularyTableDef(name=MLVocab.feature_name, curie_template=f"{project_name}:{{RID}}")
+    )
+    asset_type_table = schema.create_table(
+        VocabularyTableDef(name=MLVocab.asset_type, curie_template=f"{project_name}:{{RID}}")
+    )
+    asset_role_table = schema.create_table(
+        VocabularyTableDef(name=MLVocab.asset_role, curie_template=f"{project_name}:{{RID}}")
+    )
     create_workflow_table(schema, annotations["workflow_annotation"])
     execution_table = create_execution_table(schema, annotations["execution_annotation"])
@@ -300,6 +361,14 @@ def initialize_ml_schema(model: Model, schema_name: str = "deriva-ml"):
                 "Name": "Runtime_Env",
                 "Description": "Information about the runtime environment",
             },
+            {
+                "Name": "Hydra_Config",
+                "Description": "Hydra YAML configuration file (config.yaml, overrides.yaml, hydra.yaml)",
+            },
+            {
+                "Name": "Deriva_Config",
+                "Description": "DerivaML execution configuration (configuration.json with datasets, assets, workflow)",
+            },
             {
                 "Name": "Execution_Metadata",
                 "Description": "Information about the execution environment",
@@ -335,7 +404,28 @@ def initialize_ml_schema(model: Model, schema_name: str = "deriva-ml"):
     )
-def create_ml_catalog(hostname: str, project_name: str) -> ErmrestCatalog:
+def create_ml_catalog(
+    hostname: str,
+    project_name: str,
+    catalog_alias: str | None = None,
+) -> ErmrestCatalog:
+    """Create a new DerivaML catalog with all ML schema tables.
+    Args:
+        hostname: Server hostname (e.g., "localhost", "www.eye-ai.org").
+        project_name: Name for the project, becomes the domain schema name.
+        catalog_alias: Optional alias name for the catalog. If provided, creates
+            an alias that points to the new catalog, allowing access via the
+            alias name instead of the numeric catalog ID.
+    Returns:
+        The created ErmrestCatalog instance.
+    Example:
+        # Create catalog with alias
+        catalog = create_ml_catalog("localhost", "my_project", catalog_alias="my-project")
+        # Now accessible as both /ermrest/catalog/<id> and /ermrest/catalog/my-project
+    """
     server = DerivaServer("https", hostname, credentials=get_credential(hostname))
     catalog = server.create_ermrest_catalog()
     model = catalog.getCatalogModel()
@@ -352,6 +442,16 @@ def create_ml_catalog(hostname: str, project_name: str) -> ErmrestCatalog:
         ]
     )
     create_ml_schema(catalog, project_name=project_name)
+    # Create alias if requested
+    if catalog_alias:
+        server.create_ermrest_alias(
+            id=catalog_alias,
+            alias_target=catalog.catalog_id,
+            name=project_name,
+            description=f"Alias for {project_name} catalog (ID: {catalog.catalog_id})",
+        )
     return catalog

deriva-ml 1.17.10__py3-none-any.whl → 1.17.12__py3-none-any.whl

deriva-ml 1.17.10py3-none-any.whl → 1.17.12py3-none-any.whl