PyPI - flowyml - Versions diffs - 1.2.0__py3-none-any.whl → 1.4.0__py3-none-any.whl - Mend

flowyml 1.2.0py3-none-any.whl → 1.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

flowyml/__init__.py +3 -0
flowyml/assets/base.py +10 -0
flowyml/assets/metrics.py +6 -0
flowyml/cli/main.py +108 -2
flowyml/cli/run.py +9 -2
flowyml/core/execution_status.py +52 -0
flowyml/core/hooks.py +106 -0
flowyml/core/observability.py +210 -0
flowyml/core/orchestrator.py +274 -0
flowyml/core/pipeline.py +193 -231
flowyml/core/project.py +34 -2
flowyml/core/remote_orchestrator.py +109 -0
flowyml/core/resources.py +34 -17
flowyml/core/retry_policy.py +80 -0
flowyml/core/scheduler.py +9 -9
flowyml/core/scheduler_config.py +2 -3
flowyml/core/step.py +18 -1
flowyml/core/submission_result.py +53 -0
flowyml/integrations/keras.py +95 -22
flowyml/monitoring/alerts.py +2 -2
flowyml/stacks/__init__.py +15 -0
flowyml/stacks/aws.py +599 -0
flowyml/stacks/azure.py +295 -0
flowyml/stacks/bridge.py +9 -9
flowyml/stacks/components.py +24 -2
flowyml/stacks/gcp.py +158 -11
flowyml/stacks/local.py +5 -0
flowyml/stacks/plugins.py +2 -2
flowyml/stacks/registry.py +21 -0
flowyml/storage/artifacts.py +15 -5
flowyml/storage/materializers/__init__.py +2 -0
flowyml/storage/materializers/base.py +33 -0
flowyml/storage/materializers/cloudpickle.py +74 -0
flowyml/storage/metadata.py +3 -881
flowyml/storage/remote.py +590 -0
flowyml/storage/sql.py +911 -0
flowyml/ui/backend/dependencies.py +28 -0
flowyml/ui/backend/main.py +43 -80
flowyml/ui/backend/routers/assets.py +483 -17
flowyml/ui/backend/routers/client.py +46 -0
flowyml/ui/backend/routers/execution.py +13 -2
flowyml/ui/backend/routers/experiments.py +97 -14
flowyml/ui/backend/routers/metrics.py +168 -0
flowyml/ui/backend/routers/pipelines.py +77 -12
flowyml/ui/backend/routers/projects.py +33 -7
flowyml/ui/backend/routers/runs.py +221 -12
flowyml/ui/backend/routers/schedules.py +5 -21
flowyml/ui/backend/routers/stats.py +14 -0
flowyml/ui/backend/routers/traces.py +37 -53
flowyml/ui/frontend/dist/assets/index-DcYwrn2j.css +1 -0
flowyml/ui/frontend/dist/assets/index-Dlz_ygOL.js +592 -0
flowyml/ui/frontend/dist/index.html +2 -2
flowyml/ui/frontend/src/App.jsx +4 -1
flowyml/ui/frontend/src/app/assets/page.jsx +260 -230
flowyml/ui/frontend/src/app/dashboard/page.jsx +38 -7
flowyml/ui/frontend/src/app/experiments/page.jsx +61 -314
flowyml/ui/frontend/src/app/observability/page.jsx +277 -0
flowyml/ui/frontend/src/app/pipelines/page.jsx +79 -402
flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectArtifactsList.jsx +151 -0
flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectExperimentsList.jsx +145 -0
flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectHeader.jsx +45 -0
flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectHierarchy.jsx +467 -0
flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectMetricsPanel.jsx +253 -0
flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectPipelinesList.jsx +105 -0
flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectRelations.jsx +189 -0
flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectRunsList.jsx +136 -0
flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectTabs.jsx +95 -0
flowyml/ui/frontend/src/app/projects/[projectId]/page.jsx +326 -0
flowyml/ui/frontend/src/app/projects/page.jsx +13 -3
flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +79 -10
flowyml/ui/frontend/src/app/runs/page.jsx +82 -424
flowyml/ui/frontend/src/app/settings/page.jsx +1 -0
flowyml/ui/frontend/src/app/tokens/page.jsx +62 -16
flowyml/ui/frontend/src/components/AssetDetailsPanel.jsx +373 -0
flowyml/ui/frontend/src/components/AssetLineageGraph.jsx +291 -0
flowyml/ui/frontend/src/components/AssetStatsDashboard.jsx +302 -0
flowyml/ui/frontend/src/components/AssetTreeHierarchy.jsx +477 -0
flowyml/ui/frontend/src/components/ExperimentDetailsPanel.jsx +227 -0
flowyml/ui/frontend/src/components/NavigationTree.jsx +401 -0
flowyml/ui/frontend/src/components/PipelineDetailsPanel.jsx +239 -0
flowyml/ui/frontend/src/components/PipelineGraph.jsx +67 -3
flowyml/ui/frontend/src/components/ProjectSelector.jsx +115 -0
flowyml/ui/frontend/src/components/RunDetailsPanel.jsx +298 -0
flowyml/ui/frontend/src/components/header/Header.jsx +48 -1
flowyml/ui/frontend/src/components/plugins/ZenMLIntegration.jsx +106 -0
flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +52 -26
flowyml/ui/frontend/src/components/ui/DataView.jsx +35 -17
flowyml/ui/frontend/src/components/ui/ErrorBoundary.jsx +118 -0
flowyml/ui/frontend/src/contexts/ProjectContext.jsx +2 -2
flowyml/ui/frontend/src/contexts/ToastContext.jsx +116 -0
flowyml/ui/frontend/src/layouts/MainLayout.jsx +5 -1
flowyml/ui/frontend/src/router/index.jsx +4 -0
flowyml/ui/frontend/src/utils/date.js +10 -0
flowyml/ui/frontend/src/utils/downloads.js +11 -0
flowyml/utils/config.py +6 -0
flowyml/utils/stack_config.py +45 -3
{flowyml-1.2.0.dist-info → flowyml-1.4.0.dist-info}/METADATA +44 -4
flowyml-1.4.0.dist-info/RECORD +200 -0
{flowyml-1.2.0.dist-info → flowyml-1.4.0.dist-info}/licenses/LICENSE +1 -1
flowyml/ui/frontend/dist/assets/index-DFNQnrUj.js +0 -448
flowyml/ui/frontend/dist/assets/index-pWI271rZ.css +0 -1
flowyml-1.2.0.dist-info/RECORD +0 -159
{flowyml-1.2.0.dist-info → flowyml-1.4.0.dist-info}/WHEEL +0 -0
{flowyml-1.2.0.dist-info → flowyml-1.4.0.dist-info}/entry_points.txt +0 -0

flowyml/core/remote_orchestrator.py ADDED Viewed

@@ -0,0 +1,109 @@
+"""Remote Orchestrator - Executes pipelines on remote infrastructure."""
+from typing import Any, TYPE_CHECKING
+from flowyml.stacks.components import Orchestrator, ComponentType, ResourceConfig, DockerConfig
+from flowyml.core.execution_status import ExecutionStatus
+from flowyml.core.submission_result import SubmissionResult
+if TYPE_CHECKING:
+    from flowyml.core.pipeline import Pipeline
+class RemoteOrchestrator(Orchestrator):
+    """Base orchestrator for remote execution.
+    This orchestrator submits jobs to remote infrastructure and returns job IDs.
+    Cloud-specific orchestrators (AWS, GCP, Azure) inherit from this.
+    """
+    def __init__(self, name: str = "remote"):
+        super().__init__(name)
+    @property
+    def component_type(self) -> ComponentType:
+        return ComponentType.ORCHESTRATOR
+    def validate(self) -> bool:
+        """Validate remote orchestrator configuration."""
+        return True
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "name": self.name,
+            "type": "remote",
+        }
+    def get_run_status(self, job_id: str) -> ExecutionStatus:
+        """Get status of a remote pipeline run.
+        This should be overridden by cloud-specific orchestrators to query
+        the actual remote execution status.
+        Args:
+            job_id: The remote job identifier.
+        Returns:
+            The current execution status.
+        """
+        return ExecutionStatus.RUNNING
+    def fetch_step_statuses(self, job_id: str) -> dict[str, ExecutionStatus]:
+        """Get status of individual steps in a remote run.
+        Args:
+            job_id: The remote job identifier.
+        Returns:
+            Dictionary mapping step names to their execution status.
+        """
+        # Default implementation - override in subclasses
+        return {}
+    def stop_run(self, job_id: str, graceful: bool = True) -> None:
+        """Stop a remote pipeline run.
+        Args:
+            job_id: The remote job identifier.
+            graceful: If True, attempt graceful shutdown. If False, force kill.
+        Raises:
+            NotImplementedError: If stopping is not supported.
+        """
+        raise NotImplementedError(
+            f"{self.__class__.__name__} does not support stopping runs",
+        )
+    def run_pipeline(
+        self,
+        pipeline: "Pipeline",
+        run_id: str,
+        resources: ResourceConfig | None = None,
+        docker_config: DockerConfig | None = None,
+        inputs: dict[str, Any] | None = None,
+        context: dict[str, Any] | None = None,
+        **kwargs,
+    ) -> SubmissionResult:
+        """Submit pipeline to remote infrastructure.
+        This base implementation should be overridden by cloud-specific orchestrators
+        to submit to their respective services (AWS Batch, Vertex AI, Azure ML, etc.).
+        Args:
+            pipeline: The pipeline to run.
+            run_id: The unique run identifier.
+            resources: Resource configuration.
+            docker_config: Docker configuration.
+            inputs: Input data.
+            context: Context variables.
+            **kwargs: Additional arguments.
+        Returns:
+            SubmissionResult with remote job ID and optional wait function.
+        Raises:
+            NotImplementedError: Must be implemented by cloud-specific orchestrators.
+        """
+        raise NotImplementedError(
+            "RemoteOrchestrator.run_pipeline must be implemented by cloud-specific orchestrators",
+        )

flowyml/core/resources.py CHANGED Viewed

@@ -5,7 +5,7 @@ including CPU, memory, GPU, storage, and node affinity requirements.
 """
 from dataclasses import dataclass, field
-from typing import Any, Optional
+from typing import Any
 import re
@@ -25,7 +25,7 @@ class GPUConfig:
     gpu_type: str
     count: int = 1
-    memory: Optional[str] = None
+    memory: str | None = None
     def __post_init__(self):
         """Validate GPU configuration."""
@@ -39,7 +39,7 @@ class GPUConfig:
     @staticmethod
     def _is_valid_memory(memory: str) -> bool:
         """Check if memory string is valid (e.g., '16Gi', '32768Mi')."""
-        return bool(re.match(r"^\d+(\.\d+)?(Ki|Mi|Gi|Ti|K|M|G|T)$", memory))
+        return bool(re.match(r"^\d+(\.\d+)?(Ki|Mi|Gi|Ti|KB|MB|GB|TB|K|M|G|T)$", memory))
     def to_dict(self) -> dict[str, Any]:
         """Convert to dictionary representation."""
@@ -105,7 +105,7 @@ class GPUConfig:
         def to_bytes(mem: str) -> int:
             import re
-            match = re.match(r"^(\d+(?:\.\d+)?)(Ki|Mi|Gi|Ti|K|M|G|T)?$", mem)
+            match = re.match(r"^(\d+(?:\.\d+)?)(Ki|Mi|Gi|Ti|KB|MB|GB|TB|K|M|G|T)?$", mem)
             if not match:
                 return 0
             value, unit = float(match.group(1)), match.group(2) or ""
@@ -114,6 +114,10 @@ class GPUConfig:
                 "Mi": 1024**2,
                 "Gi": 1024**3,
                 "Ti": 1024**4,
+                "KB": 1000,
+                "MB": 1000**2,
+                "GB": 1000**3,
+                "TB": 1000**4,
                 "K": 1000,
                 "M": 1000**2,
                 "G": 1000**3,
@@ -211,11 +215,11 @@ class ResourceRequirements:
         ... )
     """
-    cpu: Optional[str] = None
-    memory: Optional[str] = None
-    storage: Optional[str] = None
-    gpu: Optional[GPUConfig] = None
-    node_affinity: Optional[NodeAffinity] = None
+    cpu: str | None = None
+    memory: str | None = None
+    storage: str | None = None
+    gpu: GPUConfig | None = None
+    node_affinity: NodeAffinity | None = None
     def __post_init__(self):
         """Validate resource specifications."""
@@ -236,8 +240,8 @@ class ResourceRequirements:
     @staticmethod
     def _is_valid_memory(memory: str) -> bool:
-        """Check if memory string is valid (e.g., '16Gi', '32768Mi')."""
-        return bool(re.match(r"^\d+(\.\d+)?(Ki|Mi|Gi|Ti|K|M|G|T|B)?$", memory))
+        """Check if memory string is valid (e.g., '16Gi', '32768Mi', '4GB')."""
+        return bool(re.match(r"^\d+(\.\d+)?(Ki|Mi|Gi|Ti|KB|MB|GB|TB|K|M|G|T|B)?$", memory))
     def to_dict(self) -> dict[str, Any]:
         """Convert to dictionary representation."""
@@ -258,6 +262,15 @@ class ResourceRequirements:
         """Check if GPU resources are requested."""
         return self.gpu is not None
+    def __getitem__(self, key: str) -> Any:
+        """Provide dict-style access for backwards compatibility."""
+        if not hasattr(self, key):
+            raise KeyError(key)
+        value = getattr(self, key)
+        if key == "gpu" and isinstance(value, GPUConfig):
+            return value.count
+        return value
     def get_gpu_count(self) -> int:
         """Get total number of GPUs requested."""
         return self.gpu.count if self.gpu else 0
@@ -295,7 +308,7 @@ class ResourceRequirements:
         import re
         def to_bytes(mem: str) -> int:
-            match = re.match(r"^(\d+(?:\.\d+)?)(Ki|Mi|Gi|Ti|K|M|G|T|B)?$", mem)
+            match = re.match(r"^(\d+(?:\.\d+)?)(Ki|Mi|Gi|Ti|KB|MB|GB|TB|K|M|G|T|B)?$", mem)
             if not match:
                 return 0
             value, unit = float(match.group(1)), match.group(2) or "B"
@@ -304,6 +317,10 @@ class ResourceRequirements:
                 "Mi": 1024**2,
                 "Gi": 1024**3,
                 "Ti": 1024**4,
+                "KB": 1000,
+                "MB": 1000**2,
+                "GB": 1000**3,
+                "TB": 1000**4,
                 "K": 1000,
                 "M": 1000**2,
                 "G": 1000**3,
@@ -391,11 +408,11 @@ class ResourceRequirements:
 def resources(
-    cpu: Optional[str] = None,
-    memory: Optional[str] = None,
-    storage: Optional[str] = None,
-    gpu: Optional[GPUConfig] = None,
-    node_affinity: Optional[NodeAffinity] = None,
+    cpu: str | None = None,
+    memory: str | None = None,
+    storage: str | None = None,
+    gpu: GPUConfig | None = None,
+    node_affinity: NodeAffinity | None = None,
 ) -> ResourceRequirements:
     """Create a ResourceRequirements object with validation.

flowyml/core/retry_policy.py ADDED Viewed

@@ -0,0 +1,80 @@
+"""Retry policies for orchestrators."""
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+from flowyml.core.error_handling import RetryConfig, ExponentialBackoff, execute_with_retry
+if TYPE_CHECKING:
+    from flowyml.core.pipeline import Pipeline
+@dataclass
+class OrchestratorRetryPolicy:
+    """Retry policy for orchestrator-level failures.
+    This handles retries at the orchestrator level (entire pipeline runs),
+    distinct from step-level retries.
+    """
+    max_attempts: int = 3
+    """Maximum number of pipeline retry attempts"""
+    initial_delay: float = 60.0
+    """Initial delay between retries in seconds"""
+    max_delay: float = 600.0
+    """Maximum delay between retries in seconds"""
+    multiplier: float = 2.0
+    """Backoff multiplier for exponential backoff"""
+    retry_on_status: list[str] = None
+    """Retry on specific execution statuses (e.g., ['FAILED', 'STOPPED'])"""
+    def __post_init__(self):
+        if self.retry_on_status is None:
+            self.retry_on_status = ["FAILED"]
+    def to_retry_config(self) -> RetryConfig:
+        """Convert to RetryConfig for execute_with_retry."""
+        backoff = ExponentialBackoff(
+            initial=self.initial_delay,
+            max_delay=self.max_delay,
+            multiplier=self.multiplier,
+            jitter=True,
+        )
+        return RetryConfig(
+            max_attempts=self.max_attempts,
+            backoff=backoff,
+            retry_on=[Exception],  # Catch all exceptions
+            not_retry_on=[KeyboardInterrupt],  # Don't retry on manual interruption
+        )
+def with_retry(orchestrator_method):
+    """Decorator to add retry logic to orchestrator methods.
+    Usage:
+        @with_retry
+        def run_pipeline(self, pipeline, ...):
+            ...
+    """
+    def wrapper(self, pipeline: "Pipeline", *args, retry_policy: OrchestratorRetryPolicy | None = None, **kwargs):
+        if retry_policy is None:
+            # No retry policy, execute normally
+            return orchestrator_method(self, pipeline, *args, **kwargs)
+        # Execute with retry
+        retry_config = retry_policy.to_retry_config()
+        return execute_with_retry(
+            orchestrator_method,
+            retry_config,
+            self,
+            pipeline,
+            *args,
+            **kwargs,
+        )
+    return wrapper

flowyml/core/scheduler.py CHANGED Viewed

@@ -10,7 +10,7 @@ from collections.abc import Callable
 from dataclasses import dataclass
 from datetime import datetime, timedelta
 from pathlib import Path
-from typing import Any, Optional
+from typing import Any
 from flowyml.core.scheduler_config import SchedulerConfig
@@ -77,10 +77,10 @@ class ScheduleExecution:
     schedule_name: str
     started_at: datetime
-    completed_at: Optional[datetime] = None
+    completed_at: datetime | None = None
     success: bool = False
-    error: Optional[str] = None
-    duration_seconds: Optional[float] = None
+    error: str | None = None
+    duration_seconds: float | None = None
 class SchedulerMetrics:
@@ -121,7 +121,7 @@ class SchedulerMetrics:
 class SchedulerPersistence:
     """Persist schedules to SQLite database."""
-    def __init__(self, db_path: Optional[str] = None):
+    def __init__(self, db_path: str | None = None):
         self.db_path = db_path or str(Path.cwd() / ".flowyml_scheduler.db")
         self._init_db()
@@ -230,7 +230,7 @@ class SchedulerPersistence:
 class DistributedLock:
     """Distributed lock for coordinating multiple scheduler instances."""
-    def __init__(self, backend: str = "file", redis_url: Optional[str] = None):
+    def __init__(self, backend: str = "file", redis_url: str | None = None):
         self.backend = backend
         self.redis_url = redis_url
         self._redis = None
@@ -286,9 +286,9 @@ class PipelineScheduler:
     def __init__(
         self,
-        config: Optional[SchedulerConfig] = None,
-        on_success: Optional[Callable] = None,
-        on_failure: Optional[Callable] = None,
+        config: SchedulerConfig | None = None,
+        on_success: Callable | None = None,
+        on_failure: Callable | None = None,
     ):
         self.config = config or SchedulerConfig.from_env()
         self.schedules: dict[str, Schedule] = {}

flowyml/core/scheduler_config.py CHANGED Viewed

@@ -1,7 +1,6 @@
 """Scheduler configuration."""
 import os
-from typing import Optional
 from pydantic import BaseModel
@@ -9,10 +8,10 @@ class SchedulerConfig(BaseModel):
     """Scheduler configuration."""
     persist_schedules: bool = True
-    db_path: Optional[str] = None
+    db_path: str | None = None
     distributed: bool = False
     lock_backend: str = "file"  # "file", "redis"
-    redis_url: Optional[str] = None
+    redis_url: str | None = None
     check_interval_seconds: int = 10
     max_concurrent_runs: int = 5
     timezone: str = "UTC"

flowyml/core/step.py CHANGED Viewed

@@ -1,5 +1,6 @@
 """Step Decorator - Define pipeline steps with automatic context injection."""
+import contextlib
 import hashlib
 import inspect
 import json
@@ -9,9 +10,10 @@ from dataclasses import dataclass, field
 # Import resource types
 try:
-    from flowyml.core.resources import ResourceRequirements
+    from flowyml.core.resources import ResourceRequirements, GPUConfig
 except ImportError:
     ResourceRequirements = None  # Type: ignore
+    GPUConfig = None  # Type: ignore
 @dataclass
@@ -62,6 +64,21 @@ class Step:
         # Store resources (accept both dict for backward compatibility and ResourceRequirements)
         self.resources = resources
+        if self.resources and ResourceRequirements and not isinstance(self.resources, ResourceRequirements):
+            if isinstance(self.resources, dict):
+                resource_kwargs = dict(self.resources)
+                gpu_value = resource_kwargs.get("gpu")
+                if GPUConfig and gpu_value is not None:
+                    if isinstance(gpu_value, dict):
+                        resource_kwargs["gpu"] = GPUConfig(
+                            gpu_type=gpu_value.get("gpu_type") or gpu_value.get("type") or "generic",
+                            count=int(gpu_value.get("count", 1)),
+                            memory=gpu_value.get("memory"),
+                        )
+                    elif isinstance(gpu_value, (int, float)):
+                        resource_kwargs["gpu"] = GPUConfig(gpu_type="generic", count=int(gpu_value))
+                with contextlib.suppress(TypeError):
+                    self.resources = ResourceRequirements(**resource_kwargs)
         self.tags = tags or {}
         self.condition = condition

flowyml/core/submission_result.py ADDED Viewed

@@ -0,0 +1,53 @@
+"""Submission result for async pipeline execution."""
+from typing import Any
+from collections.abc import Callable
+class SubmissionResult:
+    """Result of submitting a pipeline run to an orchestrator.
+    This class enables async execution patterns where the orchestrator
+    submits the pipeline and returns immediately, optionally providing
+    a way to wait for completion.
+    """
+    def __init__(
+        self,
+        job_id: str,
+        wait_for_completion: Callable[[], None] | None = None,
+        metadata: dict[str, Any] | None = None,
+    ):
+        """Initialize a submission result.
+        Args:
+            job_id: The remote job/run identifier.
+            wait_for_completion: Optional function to block until pipeline completes.
+            metadata: Optional metadata about the submission.
+        """
+        self.job_id = job_id
+        self.wait_for_completion = wait_for_completion
+        self.metadata = metadata or {}
+    def wait(self, timeout: int | None = None) -> None:
+        """Wait for the pipeline run to complete.
+        Args:
+            timeout: Optional timeout in seconds. If None, waits indefinitely.
+        Raises:
+            RuntimeError: If no wait_for_completion function was provided.
+            TimeoutError: If timeout is exceeded.
+        """
+        if not self.wait_for_completion:
+            raise RuntimeError(
+                f"Cannot wait for job {self.job_id}: no wait function provided",
+            )
+        # TODO: Add timeout support
+        if timeout:
+            import warnings
+            warnings.warn("Timeout parameter not yet implemented", UserWarning, stacklevel=2)
+        self.wait_for_completion()

flowyml/integrations/keras.py CHANGED Viewed

@@ -2,6 +2,7 @@
 from pathlib import Path
 from datetime import datetime
+import uuid
 try:
     from tensorflow import keras
@@ -16,28 +17,38 @@ from flowyml.storage.metadata import SQLiteMetadataStore
 class FlowymlKerasCallback(keras.callbacks.Callback if keras else object):
-    """Keras callback for flowyml tracking.
+    """Keras callback for flowyml tracking with automatic training history collection.
     Automatically logs:
-    - Training metrics (loss, accuracy, etc.)
-    - Model checkpoints (optional)
+    - Training metrics (loss, accuracy, etc.) per epoch
+    - Complete training history for visualization
+    - Model checkpoints with training history attached
     - Model architecture
     - Training parameters
+    Example:
+        >>> from flowyml.integrations.keras import FlowymlKerasCallback
+        >>> callback = FlowymlKerasCallback(experiment_name="my-experiment", project="my-project", auto_log_history=True)
+        >>> model.fit(x_train, y_train, epochs=50, callbacks=[callback])
     """
     def __init__(
         self,
         experiment_name: str,
         run_name: str | None = None,
+        project: str | None = None,
         log_model: bool = True,
         log_every_epoch: bool = True,
+        auto_log_history: bool = True,
         metadata_store: SQLiteMetadataStore | None = None,
     ):
         """Args:
         experiment_name: Name of the experiment
         run_name: Optional run name (defaults to timestamp)
+        project: Project name for organizing runs
         log_model: Whether to save the model as an artifact
         log_every_epoch: Whether to log metrics every epoch
+        auto_log_history: Whether to automatically collect training history
         metadata_store: Optional metadata store override.
         """
         if keras is None:
@@ -46,8 +57,10 @@ class FlowymlKerasCallback(keras.callbacks.Callback if keras else object):
         super().__init__()
         self.experiment_name = experiment_name
         self.run_name = run_name or datetime.now().strftime("run_%Y%m%d_%H%M%S")
+        self.project = project
         self.log_model = log_model
         self.log_every_epoch = log_every_epoch
+        self.auto_log_history = auto_log_history
         self.metadata_store = metadata_store or SQLiteMetadataStore()
@@ -57,6 +70,16 @@ class FlowymlKerasCallback(keras.callbacks.Callback if keras else object):
         # Track params
         self.params_logged = False
+        # Training history accumulator
+        self.training_history = {
+            "epochs": [],
+            "train_loss": [],
+            "train_accuracy": [],
+            "val_loss": [],
+            "val_accuracy": [],
+        }
+        self.custom_metrics = set()
     def on_train_begin(self, logs=None) -> None:
         """Log initial parameters."""
         if not self.params_logged:
@@ -85,6 +108,7 @@ class FlowymlKerasCallback(keras.callbacks.Callback if keras else object):
                     "name": "model_architecture",
                     "type": "json",
                     "run_id": self.run_name,
+                    "project": self.project,
                     "value": model_json,
                     "created_at": datetime.now().isoformat(),
                 },
@@ -93,28 +117,52 @@ class FlowymlKerasCallback(keras.callbacks.Callback if keras else object):
             self.params_logged = True
     def on_epoch_end(self, epoch, logs=None) -> None:
-        """Log metrics at the end of each epoch."""
-        if self.log_every_epoch and logs:
-            # Log metrics to DB
-            for k, v in logs.items():
-                self.metadata_store.save_metric(
+        """Log metrics at the end of each epoch and accumulate training history."""
+        if logs:
+            # Log metrics to DB (existing behavior)
+            if self.log_every_epoch:
+                for k, v in logs.items():
+                    self.metadata_store.save_metric(
+                        run_id=self.run_name,
+                        name=k,
+                        value=float(v),
+                        step=epoch,
+                    )
+                # Update experiment run
+                self.metadata_store.log_experiment_run(
+                    experiment_id=self.experiment_name,
                     run_id=self.run_name,
-                    name=k,
-                    value=float(v),
-                    step=epoch,
+                    metrics=logs,
                 )
-            # Update experiment run
-            self.metadata_store.log_experiment_run(
-                experiment_id=self.experiment_name,
-                run_id=self.run_name,
-                metrics=logs,
-            )
+            # Accumulate training history (NEW)
+            if self.auto_log_history:
+                self.training_history["epochs"].append(epoch + 1)  # 1-indexed
+                # Standard metrics
+                if "loss" in logs:
+                    self.training_history["train_loss"].append(float(logs["loss"]))
+                if "accuracy" in logs or "acc" in logs:
+                    acc_key = "accuracy" if "accuracy" in logs else "acc"
+                    self.training_history["train_accuracy"].append(float(logs[acc_key]))
+                if "val_loss" in logs:
+                    self.training_history["val_loss"].append(float(logs["val_loss"]))
+                if "val_accuracy" in logs or "val_acc" in logs:
+                    val_acc_key = "val_accuracy" if "val_accuracy" in logs else "val_acc"
+                    self.training_history["val_accuracy"].append(float(logs[val_acc_key]))
+                # Custom metrics
+                for metric_name, value in logs.items():
+                    if metric_name not in ["loss", "accuracy", "acc", "val_loss", "val_accuracy", "val_acc"]:
+                        if metric_name not in self.custom_metrics:
+                            self.custom_metrics.add(metric_name)
+                            self.training_history[metric_name] = []
+                        self.training_history[metric_name].append(float(value))
     def on_train_end(self, logs=None) -> None:
-        """Save model at the end of training."""
+        """Save model at the end of training with complete training history."""
         if self.log_model:
-            # Create artifacts directory
             # Create artifacts directory
             artifact_dir = Path(f".flowyml/artifacts/{self.run_name}")
             artifact_dir.mkdir(parents=True, exist_ok=True)
@@ -122,13 +170,38 @@ class FlowymlKerasCallback(keras.callbacks.Callback if keras else object):
             model_path = artifact_dir / "model.keras"
             self.model.save(model_path)
+            # Clean up empty history lists
+            cleaned_history = {
+                k: v
+                for k, v in self.training_history.items()
+                if v  # Only include non-empty lists
+            }
+            # Calculate final metrics
+            final_metrics = {}
+            if "train_loss" in cleaned_history and cleaned_history["train_loss"]:
+                final_metrics["loss"] = cleaned_history["train_loss"][-1]
+            if "train_accuracy" in cleaned_history and cleaned_history["train_accuracy"]:
+                final_metrics["accuracy"] = cleaned_history["train_accuracy"][-1]
+            # Save model artifact with training history
+            artifact_id = str(uuid.uuid4())
             self.metadata_store.save_artifact(
-                artifact_id=f"{self.run_name}_model",
+                artifact_id=artifact_id,
                 metadata={
-                    "name": "trained_model",
-                    "type": "keras_model",
+                    "artifact_id": artifact_id,
+                    "name": f"model-{self.run_name}",
+                    "type": "model",
                     "run_id": self.run_name,
+                    "project": self.project,
                     "path": str(model_path.resolve()),
+                    "properties": {
+                        "framework": "keras",
+                        "epochs_trained": len(cleaned_history.get("epochs", [])),
+                        "optimizer": str(self.model.optimizer.__class__.__name__),
+                        **final_metrics,
+                    },
+                    "training_history": cleaned_history,  # NEW: UI will display this!
                     "created_at": datetime.now().isoformat(),
                 },
             )

flowyml 1.2.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

flowyml 1.2.0py3-none-any.whl → 1.4.0py3-none-any.whl