PyPI - albert - Versions diffs - 1.10.0rc2__py3-none-any.whl → 1.11.1__py3-none-any.whl - Mend

albert 1.10.0rc2py3-none-any.whl → 1.11.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

albert/__init__.py +1 -1
albert/client.py +5 -0
albert/collections/custom_templates.py +3 -0
albert/collections/data_templates.py +118 -264
albert/collections/entity_types.py +19 -3
albert/collections/inventory.py +1 -1
albert/collections/notebooks.py +154 -26
albert/collections/parameters.py +1 -0
albert/collections/property_data.py +384 -280
albert/collections/reports.py +4 -0
albert/collections/synthesis.py +292 -0
albert/collections/tasks.py +2 -1
albert/collections/worksheets.py +3 -0
albert/core/shared/models/base.py +3 -1
albert/core/shared/models/patch.py +1 -1
albert/resources/batch_data.py +4 -2
albert/resources/cas.py +3 -1
albert/resources/custom_fields.py +3 -1
albert/resources/data_templates.py +60 -12
albert/resources/entity_types.py +15 -4
albert/resources/inventory.py +6 -4
albert/resources/lists.py +3 -1
albert/resources/notebooks.py +12 -7
albert/resources/parameter_groups.py +3 -1
albert/resources/property_data.py +64 -5
albert/resources/sheets.py +16 -14
albert/resources/synthesis.py +61 -0
albert/resources/tags.py +3 -1
albert/resources/tasks.py +4 -7
albert/resources/workflows.py +4 -2
albert/utils/data_template.py +392 -37
albert/utils/property_data.py +638 -0
albert/utils/tasks.py +3 -3
{albert-1.10.0rc2.dist-info → albert-1.11.1.dist-info}/METADATA +1 -1
{albert-1.10.0rc2.dist-info → albert-1.11.1.dist-info}/RECORD +37 -34
{albert-1.10.0rc2.dist-info → albert-1.11.1.dist-info}/WHEEL +0 -0
{albert-1.10.0rc2.dist-info → albert-1.11.1.dist-info}/licenses/LICENSE +0 -0

albert/utils/data_template.py CHANGED Viewed

@@ -1,24 +1,33 @@
 """Utilities for working with data templates."""
+from __future__ import annotations
 import uuid
 from pathlib import Path
 from typing import TYPE_CHECKING
 from tenacity import retry, stop_after_attempt, wait_exponential
+from albert.collections.attachments import AttachmentCollection
+from albert.collections.files import FileCollection
 from albert.core.logging import logger
 from albert.core.shared.identifiers import AttachmentId, DataColumnId, DataTemplateId
 from albert.core.shared.models.patch import (
     GeneralPatchDatum,
     GeneralPatchPayload,
+    PatchDatum,
     PatchOperation,
-    PGPatchDatum,
 )
 from albert.exceptions import AlbertHTTPError
 from albert.resources.attachments import Attachment, AttachmentCategory
-from albert.resources.data_templates import DataColumnValue, DataTemplate
+from albert.resources.data_templates import DataColumnValue, DataTemplate, ImportMode
 from albert.resources.files import FileNamespace
-from albert.resources.parameter_groups import DataType, ValueValidation
+from albert.resources.parameter_groups import (
+    DataType,
+    EnumValidationValue,
+    ParameterValue,
+    ValueValidation,
+)
 from albert.resources.tasks import CsvCurveInput, CsvCurveResponse, TaskMetadata
 from albert.resources.worker_jobs import (
     WORKER_JOB_PENDING_STATES,
@@ -28,21 +37,41 @@ from albert.resources.worker_jobs import (
     WorkerJobState,
 )
 from albert.utils.tasks import (
+    CSV_EXTENSIONS,
     determine_extension,
     extract_extensions_from_attachment,
+    fetch_csv_table_rows,
     map_csv_headers_to_columns,
     resolve_attachment,
 )
 if TYPE_CHECKING:
-    from albert.collections.attachments import AttachmentCollection
-    from albert.collections.files import FileCollection
     from albert.core.session import AlbertSession
+    from albert.resources.data_templates import CurveExample, ImageExample
 _CURVE_JOB_POLL_INTERVAL = 2.0
 _CURVE_JOB_MAX_ATTEMPTS = 20
 _CURVE_JOB_MAX_WAIT = 10.0
+SUPPORTED_IMAGE_EXTENSIONS = [
+    ".png",
+    ".jpg",
+    ".jpeg",
+    ".jfif",
+    ".pjpeg",
+    ".pjp",
+    ".svg",
+    ".gif",
+    ".apng",
+    ".avif",
+    ".webp",
+    ".bmp",
+    ".ico",
+    ".cur",
+    ".tif",
+    ".tiff",
+    ".heic",
+]
 def get_target_data_column(
@@ -104,7 +133,7 @@ def validate_data_column_type(*, target_column: DataColumnValue) -> None:
 def get_script_attachment(
     *,
-    attachment_collection: "AttachmentCollection",
+    attachment_collection: AttachmentCollection,
     data_template_id: DataTemplateId,
     column_id: DataColumnId,
 ) -> tuple[Attachment, set[str]]:
@@ -144,15 +173,22 @@ def get_script_attachment(
 def prepare_curve_input_attachment(
     *,
-    attachment_collection: "AttachmentCollection",
+    attachment_collection: AttachmentCollection,
     data_template_id: DataTemplateId,
     column_id: DataColumnId,
     allowed_extensions: set[str] | None,
     file_path: str | Path | None,
     attachment_id: AttachmentId | None,
     require_signed_url: bool,
+    parent_id: str | None = None,
+    upload_key: str | None = None,
+    auto_upload_key: bool = True,
 ) -> Attachment:
-    """Resolve the input attachment, uploading a file when required, and validate it."""
+    """Resolve the input attachment, uploading a file when required, and validate it.
+    When ``parent_id`` is provided, the attachment is created under that parent.
+    Set ``auto_upload_key=False`` to skip curve-input key generation.
+    """
     if (attachment_id is None) == (file_path is None):
         raise ValueError("Provide exactly one of 'attachment_id' or 'file_path'.")
@@ -161,7 +197,6 @@ def prepare_curve_input_attachment(
     normalized_extensions = {ext.lower().lstrip(".") for ext in allowed_extensions if ext}
     display_extensions = sorted(allowed_extensions) if allowed_extensions else []
-    upload_key: str | None = None
     resolved_path: Path | None = None
     if file_path is not None:
         resolved_path = Path(file_path)
@@ -169,12 +204,15 @@ def prepare_curve_input_attachment(
         if not suffix:
             derived_extension = determine_extension(filename=resolved_path.name)
             suffix = f".{derived_extension}" if derived_extension else ""
-        upload_key = f"curve-input/{data_template_id}/{column_id}/{uuid.uuid4().hex[:10]}{suffix}"
+        if auto_upload_key and upload_key is None:
+            upload_key = (
+                f"curve-input/{data_template_id}/{column_id}/{uuid.uuid4().hex[:10]}{suffix}"
+            )
     resolved_attachment_id = AttachmentId(
         resolve_attachment(
             attachment_collection=attachment_collection,
-            task_id=data_template_id,
+            task_id=parent_id or data_template_id,
             file_path=resolved_path or file_path,
             attachment_id=str(attachment_id) if attachment_id else None,
             allowed_extensions=normalized_extensions,
@@ -206,13 +244,14 @@ def prepare_curve_input_attachment(
 def exec_curve_script(
     *,
-    session: "AlbertSession",
-    api_version: str,
+    session: AlbertSession,
     data_template_id: DataTemplateId,
     column_id: DataColumnId,
     raw_attachment: Attachment,
-    file_collection: "FileCollection",
+    file_collection: FileCollection,
     script_attachment_signed_url: str,
+    task_id: str | None = None,
+    block_id: str | None = None,
 ) -> tuple[str, dict[str, str]]:
     """Execute the curve preprocessing script and return the processed key and column headers."""
@@ -220,14 +259,24 @@ def exec_curve_script(
     if not raw_signed_url:
         raise ValueError("Curve input attachment does not include a signed URL.")
-    processed_input_key = f"curve-input/{data_template_id}/{column_id}/{uuid.uuid4().hex}.csv"
+    if task_id and block_id:
+        processed_input_key = (
+            f"curve-input/{task_id}/{block_id}/{data_template_id}/"
+            f"{column_id}/{uuid.uuid4().hex[:10]}.csv"
+        )
+    else:
+        processed_input_key = f"curve-input/{data_template_id}/{column_id}/{uuid.uuid4().hex}.csv"
     content_type = raw_attachment.mime_type or "text/csv"
     upload_url = file_collection.get_signed_upload_url(
         name=processed_input_key,
         namespace=FileNamespace.RESULT,
         content_type=content_type,
     )
-    metadata_payload = TaskMetadata(filename=raw_attachment.name or "", task_id=data_template_id)
+    metadata_payload = TaskMetadata(
+        filename=raw_attachment.name or "",
+        task_id=task_id or data_template_id,
+        block_id=block_id,
+    )
     csv_payload = CsvCurveInput(
         script_s3_url=script_attachment_signed_url,
         data_s3_url=raw_signed_url,
@@ -235,7 +284,7 @@ def exec_curve_script(
         task_metadata=metadata_payload,
     )
     response = session.post(
-        f"/api/{api_version}/proxy/csvtable/curve",
+        "/api/v3/proxy/csvtable/curve",
         json=csv_payload.model_dump(by_alias=True, mode="json", exclude_none=True),
     )
     curve_response = CsvCurveResponse.model_validate(response.json())
@@ -294,19 +343,30 @@ def derive_curve_csv_mapping(
 def create_curve_import_job(
     *,
-    session: "AlbertSession",
+    session: AlbertSession,
     data_template_id: DataTemplateId,
     column_id: DataColumnId,
     csv_mapping: dict[str, str],
     raw_attachment: Attachment,
     processed_input_key: str,
+    task_id: str | None = None,
+    block_id: str | None = None,
 ) -> tuple[str, str, str]:
     """Create the curve import job and wait for completion."""
     partition_uuid = str(uuid.uuid4())
-    s3_output_key = (
-        f"curve-output/{data_template_id}/{column_id}/"
-        f"parentid=null/blockid=null/datatemplateid={data_template_id}/uuid={partition_uuid}"
-    )
+    if (task_id is None) != (block_id is None):
+        raise ValueError("task_id and block_id must be provided together for curve imports.")
+    if task_id and block_id:
+        s3_output_key = (
+            f"curve-output/{data_template_id}/{column_id}/"
+            f"parentid={task_id}/blockid={block_id}/"
+            f"datatemplateid={data_template_id}/uuid={partition_uuid}"
+        )
+    else:
+        s3_output_key = (
+            f"curve-output/{data_template_id}/{column_id}/"
+            f"parentid=null/blockid=null/datatemplateid={data_template_id}/uuid={partition_uuid}"
+        )
     namespace = raw_attachment.namespace or "result"
     worker_metadata = WorkerJobMetadata(
         parent_type="DAT",
@@ -339,12 +399,13 @@ def create_curve_import_job(
         reraise=True,
     )
     def _poll_worker_job() -> WorkerJob:
+        """Poll a worker job status for completion."""
         status_response = session.get(f"/api/v3/worker-jobs/{job_id}")
         current_job = WorkerJob.model_validate(status_response.json())
         state = current_job.state
         if state in WORKER_JOB_PENDING_STATES:
-            logger.warning(
+            logger.info(
                 "Curve data import in progress for template %s column %s",
                 data_template_id,
                 column_id,
@@ -392,22 +453,22 @@ def build_curve_import_patch_payload(
         },
     }
     actions = [
-        PGPatchDatum(
+        PatchDatum(
             operation=PatchOperation.ADD.value,
             attribute="jobId",
             new_value=job_id,
         ),
-        PGPatchDatum(
+        PatchDatum(
             operation=PatchOperation.ADD.value,
             attribute="csvMapping",
             new_value=csv_mapping,
         ),
-        PGPatchDatum(
+        PatchDatum(
             operation=PatchOperation.ADD.value,
             attribute="value",
             new_value=value_payload,
         ),
-        PGPatchDatum(
+        PatchDatum(
             operation=PatchOperation.ADD.value,
             attribute="athenaPartitionKey",
             new_value=partition_uuid,
@@ -424,13 +485,307 @@ def build_curve_import_patch_payload(
     )
-def _validation_is_curve(validation: ValueValidation | dict | None) -> bool:
-    if isinstance(validation, ValueValidation):
-        return validation.datatype == DataType.CURVE
-    if isinstance(validation, dict):
-        datatype = validation.get("datatype")
-        if isinstance(datatype, DataType):
-            return datatype == DataType.CURVE
-        if isinstance(datatype, str):
-            return datatype.lower() == DataType.CURVE.value
-    return False
+def add_parameter_enums(
+    *,
+    session: AlbertSession,
+    base_path: str,
+    data_template_id: DataTemplateId,
+    new_parameters: list[ParameterValue],
+) -> dict[str, list[EnumValidationValue]]:
+    """Add enum values to newly created parameters and return updated enum sequences."""
+    data_template = DataTemplate(**session.get(f"{base_path}/{data_template_id}").json())
+    existing_parameters = data_template.parameter_values or []
+    enums_by_sequence: dict[str, list[EnumValidationValue]] = {}
+    for parameter in new_parameters:
+        this_sequence = next(
+            (
+                p.sequence
+                for p in existing_parameters
+                if p.id == parameter.id and p.short_name == parameter.short_name
+            ),
+            None,
+        )
+        enum_patches: list[dict[str, str]] = []
+        if (
+            parameter.validation
+            and len(parameter.validation) > 0
+            and isinstance(parameter.validation[0].value, list)
+        ):
+            existing_validation = (
+                [x for x in existing_parameters if x.sequence == parameter.sequence]
+                if existing_parameters
+                else []
+            )
+            existing_enums = (
+                [
+                    x
+                    for x in existing_validation[0].validation[0].value
+                    if isinstance(x, EnumValidationValue) and x.id is not None
+                ]
+                if (
+                    existing_validation
+                    and len(existing_validation) > 0
+                    and existing_validation[0].validation
+                    and len(existing_validation[0].validation) > 0
+                    and existing_validation[0].validation[0].value
+                    and isinstance(existing_validation[0].validation[0].value, list)
+                )
+                else []
+            )
+            updated_enums = (
+                [x for x in parameter.validation[0].value if isinstance(x, EnumValidationValue)]
+                if parameter.validation[0].value
+                else []
+            )
+            deleted_enums = [
+                x for x in existing_enums if x.id not in [y.id for y in updated_enums]
+            ]
+            new_enums = [x for x in updated_enums if x.id not in [y.id for y in existing_enums]]
+            matching_enums = [x for x in updated_enums if x.id in [y.id for y in existing_enums]]
+            for new_enum in new_enums:
+                enum_patches.append({"operation": "add", "text": new_enum.text})
+            for deleted_enum in deleted_enums:
+                enum_patches.append({"operation": "delete", "id": deleted_enum.id})
+            for matching_enum in matching_enums:
+                if (
+                    matching_enum.text
+                    != [x for x in existing_enums if x.id == matching_enum.id][0].text
+                ):
+                    enum_patches.append(
+                        {
+                            "operation": "update",
+                            "id": matching_enum.id,
+                            "text": matching_enum.text,
+                        }
+                    )
+            if enum_patches and this_sequence:
+                enum_response = session.put(
+                    f"{base_path}/{data_template_id}/parameters/{this_sequence}/enums",
+                    json=enum_patches,
+                )
+                enums_by_sequence[this_sequence] = [
+                    EnumValidationValue(**x) for x in enum_response.json()
+                ]
+    return enums_by_sequence
+def upload_image_example_attachment(
+    *,
+    attachment_collection: AttachmentCollection,
+    data_template_id: DataTemplateId,
+    file_path: str | Path | None,
+    attachment_id: AttachmentId | None,
+    upload_key: str | None = None,
+) -> Attachment:
+    """Upload or resolve an image attachment for a data template example."""
+    supported_extensions = {ext.lstrip(".").lower() for ext in SUPPORTED_IMAGE_EXTENSIONS}
+    resolved_attachment_id = AttachmentId(
+        resolve_attachment(
+            attachment_collection=attachment_collection,
+            task_id=data_template_id,
+            file_path=file_path,
+            attachment_id=str(attachment_id) if attachment_id else None,
+            allowed_extensions=supported_extensions,
+            note_text=None,
+            upload_key=upload_key,
+        )
+    )
+    attachment = attachment_collection.get_by_id(id=resolved_attachment_id)
+    if supported_extensions:
+        attachment_ext = determine_extension(filename=attachment.name)
+        if attachment_ext and attachment_ext not in supported_extensions:
+            raise ValueError(
+                f"Attachment '{attachment.name}' is not a supported image type "
+                f"({sorted(supported_extensions)})."
+            )
+    return attachment
+def build_data_column_image_example_payload(
+    *,
+    target_column: DataColumnValue,
+    attachment: Attachment,
+) -> GeneralPatchPayload:
+    """Construct the patch payload to set an image example on a data column."""
+    key = attachment.key
+    file_name = attachment.name
+    if not key:
+        raise ValueError("Image attachment is missing an S3 key.")
+    if target_column.sequence is None:
+        raise ValueError("Data column sequence is required to patch image examples.")
+    value_payload = {
+        "fileName": file_name,
+        "s3Key": {
+            "original": key,
+            "thumb": key,
+            "preview": key,
+        },
+    }
+    action = PatchDatum(
+        operation=PatchOperation.ADD.value,
+        attribute="value",
+        new_value=value_payload,
+    )
+    return GeneralPatchPayload(
+        data=[
+            GeneralPatchDatum(
+                attribute="datacolumn",
+                colId=target_column.sequence,
+                actions=[action],
+            )
+        ]
+    )
+def ensure_data_column_accepts_images(*, target_column: DataColumnValue) -> None:
+    """Ensure the resolved data column is configured for image data."""
+    validations = target_column.validation or []
+    if not any(_validation_is_image(validation) for validation in validations):
+        raise ValueError(
+            f"Data column '{target_column.name}' must be an image-type column to add image examples."
+        )
+def _validation_is_curve(validation: ValueValidation | None) -> bool:
+    """Return True when validation indicates curve data."""
+    return isinstance(validation, ValueValidation) and validation.datatype == DataType.CURVE
+def _validation_is_image(validation: ValueValidation | None) -> bool:
+    """Return True when validation indicates image data."""
+    return isinstance(validation, ValueValidation) and validation.datatype == DataType.IMAGE
+def build_curve_example(
+    *,
+    session: AlbertSession,
+    data_template_id: DataTemplateId,
+    example: CurveExample,
+    target_column: DataColumnValue,
+) -> GeneralPatchPayload:
+    """Construct the patch payload for a curve example on a data template."""
+    validate_data_column_type(target_column=target_column)
+    column_id = target_column.data_column_id
+    if column_id is None:
+        raise ValueError("Target data column is missing an identifier.")
+    attachment_collection = AttachmentCollection(session=session)
+    file_collection = FileCollection(session=session)
+    script_attachment_signed_url: str | None = None
+    if example.mode is ImportMode.SCRIPT:
+        script_attachment, script_extensions = get_script_attachment(
+            attachment_collection=attachment_collection,
+            data_template_id=data_template_id,
+            column_id=column_id,
+        )
+        if not script_extensions:
+            raise ValueError("Script attachment must define allowed extensions.")
+        script_attachment_signed_url = script_attachment.signed_url
+        allowed_extensions = set(script_extensions)
+    else:
+        allowed_extensions = set(CSV_EXTENSIONS)
+    raw_attachment = prepare_curve_input_attachment(
+        attachment_collection=attachment_collection,
+        data_template_id=data_template_id,
+        column_id=column_id,
+        allowed_extensions=allowed_extensions,
+        file_path=example.file_path,
+        attachment_id=example.attachment_id,
+        require_signed_url=example.mode is ImportMode.SCRIPT,
+    )
+    raw_key = raw_attachment.key
+    if raw_attachment.id is None:
+        raise ValueError("Curve input attachment did not return an identifier.")
+    resolved_attachment_id = AttachmentId(raw_attachment.id)
+    processed_input_key: str = raw_key
+    column_headers: dict[str, str] = {}
+    if example.mode is ImportMode.SCRIPT:
+        processed_input_key, column_headers = exec_curve_script(
+            session=session,
+            data_template_id=data_template_id,
+            column_id=column_id,
+            raw_attachment=raw_attachment,
+            file_collection=file_collection,
+            script_attachment_signed_url=script_attachment_signed_url,
+        )
+    else:
+        table_rows = fetch_csv_table_rows(
+            session=session,
+            attachment_id=resolved_attachment_id,
+            headers_only=True,
+        )
+        header_row = table_rows[0]
+        if not isinstance(header_row, dict):
+            raise ValueError("Unexpected CSV header format returned by preview endpoint.")
+        column_headers = {
+            key: value
+            for key, value in header_row.items()
+            if isinstance(key, str) and isinstance(value, str) and value
+        }
+    csv_mapping = derive_curve_csv_mapping(
+        target_column=target_column,
+        column_headers=column_headers,
+        field_mapping=example.field_mapping,
+    )
+    job_id, partition_uuid, s3_output_key = create_curve_import_job(
+        session=session,
+        data_template_id=data_template_id,
+        column_id=column_id,
+        csv_mapping=csv_mapping,
+        raw_attachment=raw_attachment,
+        processed_input_key=processed_input_key,
+    )
+    return build_curve_import_patch_payload(
+        target_column=target_column,
+        job_id=job_id,
+        csv_mapping=csv_mapping,
+        raw_attachment=raw_attachment,
+        partition_uuid=partition_uuid,
+        s3_output_key=s3_output_key,
+    )
+def build_image_example(
+    *,
+    session: AlbertSession,
+    data_template_id: DataTemplateId,
+    example: ImageExample,
+    target_column: DataColumnValue,
+) -> GeneralPatchPayload:
+    """Construct the patch payload for an image example on a data template."""
+    ensure_data_column_accepts_images(target_column=target_column)
+    resolved_path = Path(example.file_path)
+    upload_ext = resolved_path.suffix.lower()
+    if not upload_ext:
+        raise ValueError("File extension is required for image examples.")
+    upload_key = f"imagedata/original/{data_template_id}/{uuid.uuid4().hex[:10]}{upload_ext}"
+    attachment_collection = AttachmentCollection(session=session)
+    attachment = upload_image_example_attachment(
+        attachment_collection=attachment_collection,
+        data_template_id=data_template_id,
+        file_path=example.file_path,
+        attachment_id=None,
+        upload_key=upload_key,
+    )
+    return build_data_column_image_example_payload(
+        target_column=target_column, attachment=attachment
+    )

albert 1.10.0rc2__py3-none-any.whl → 1.11.1__py3-none-any.whl

albert 1.10.0rc2py3-none-any.whl → 1.11.1py3-none-any.whl