PyPI - scientiflow-cli - Versions diffs - 0.4.15__tar.gz → 0.4.17__tar.gz - Mend

scientiflow-cli 0.4.15tar.gz → 0.4.17tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

{scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: scientiflow-cli
-Version: 0.4.15
+Version: 0.4.17
 Summary: CLI tool for scientiflow. This application runs on the client side, decodes pipelines, and executes them in the configured order!
 License: Proprietary
 Author: ScientiFlow

{scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ mode = "prod"
 [tool.poetry]
 name = "scientiflow-cli"
-version = "0.4.15"
+version = "0.4.17"
 description = "CLI tool for scientiflow. This application runs on the client side, decodes pipelines, and executes them in the configured order!"
 authors = ["ScientiFlow <scientiflow@gmail.com>"]
 license = "Proprietary"

{scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/scientiflow_cli/main.py RENAMED Viewed

@@ -117,7 +117,10 @@ def main():
         elif args.logout:
             logout_user()
         elif args.list_jobs:
-            get_jobs()
+            if args.cloud_job:
+                get_jobs(is_cloud=args.cloud_job)
+            else:
+                get_jobs()
         elif args.set_base_directory:
             set_base_directory(hostname=args.hostname)
         elif args.execute_jobs is not None:

{scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/scientiflow_cli/pipeline/decode_and_execute.py RENAMED Viewed

@@ -12,6 +12,85 @@ from scientiflow_cli.services.rich_printer import RichPrinter
 printer = RichPrinter()
+# Global background job tracker
+class GlobalBackgroundJobTracker:
+    _instance = None
+    _lock = threading.Lock()
+    def __new__(cls):
+        if cls._instance is None:
+            with cls._lock:
+                if cls._instance is None:
+                    cls._instance = super().__new__(cls)
+                    cls._instance._initialize()
+        return cls._instance
+    def _initialize(self):
+        self.background_executors = []
+        self.background_jobs_count = 0
+        self.background_jobs_completed = 0
+        self.background_jobs_lock = threading.Lock()
+    def register_background_job(self, executor, futures, node_label, log_file_path):
+        """Register a background job for global tracking."""
+        with self.background_jobs_lock:
+            self.background_jobs_count += 1
+            self.background_executors.append(executor)
+        # Start monitoring in a separate thread
+        monitor_thread = threading.Thread(
+            target=self._monitor_job,
+            args=(futures, node_label, executor, log_file_path),
+            daemon=True
+        )
+        monitor_thread.start()
+    def _monitor_job(self, futures, node_label, executor, log_file_path):
+        """Monitor background job completion."""
+        all_successful = True
+        for future in as_completed(futures):
+            success = future.result()
+            if not success:
+                all_successful = False
+        if not all_successful:
+            with open(log_file_path, 'a') as f:
+                f.write(f"[ERROR] Background job {node_label} failed\n")
+            printer.print_message(f"[BACKGROUND JOB] {node_label} Failed - some commands in background job failed", style="bold red")
+        else:
+            printer.print_message(f"[BACKGROUND JOB] {node_label} Execution completed in the background", style="bold green")
+        # Clean up executor
+        executor.shutdown(wait=False)
+        with self.background_jobs_lock:
+            if executor in self.background_executors:
+                self.background_executors.remove(executor)
+            self.background_jobs_completed += 1
+    def wait_for_all_jobs(self):
+        """Wait for all background jobs to complete."""
+        import time
+        if self.background_jobs_count > 0:
+            printer.print_message(f"[INFO] Waiting for {self.background_jobs_count} background job(s) to complete...", style="bold yellow")
+            while True:
+                with self.background_jobs_lock:
+                    if self.background_jobs_completed >= self.background_jobs_count:
+                        break
+                time.sleep(0.5)  # Check every 500ms
+            printer.print_message("[INFO] All background jobs completed.", style="bold green")
+    def reset(self):
+        """Reset the tracker for a new execution cycle."""
+        with self.background_jobs_lock:
+            self.background_executors = []
+            self.background_jobs_count = 0
+            self.background_jobs_completed = 0
+# Global tracker instance
+global_bg_tracker = GlobalBackgroundJobTracker()
 def execute_background_command_standalone(command: str, log_file_path: str):
     """Execute a command in background without real-time output display - standalone function for multiprocessing."""
     try:
@@ -37,7 +116,7 @@ def execute_background_command_standalone(command: str, log_file_path: str):
         return False
 class PipelineExecutor:
-    def __init__(self, base_dir: str, project_id: int, project_job_id: int, project_title: str, job_dir_name: str, nodes: List[Dict[str, Any]], edges: List[Dict[str, str]], environment_variables: Dict[str, str], start_node: str = None, end_node: str = None, job_status: str = None, current_node_from_config: str = None):
+    def __init__(self, base_dir: str, project_id: int, project_job_id: int, project_title: str, job_dir_name: str, nodes: List[Dict[str, Any]], edges: List[Dict[str, str]], environment_variables: Dict[str, str], start_node: str = None, end_node: str = None, job_status: str = None, current_node_from_config: str = None, is_cloud: bool = False):
         self.base_dir = base_dir
         self.project_id = project_id
         self.project_job_id = project_job_id
@@ -51,6 +130,7 @@ class PipelineExecutor:
         self.current_node = None
         self.job_status = job_status
         self.current_node_from_config = current_node_from_config
+        self.is_cloud = is_cloud
         self.background_executors = []  # Keep track of background executors
         self.background_jobs_count = 0  # Track number of active background jobs
         self.background_jobs_completed = 0  # Track completed background jobs
@@ -78,6 +158,18 @@ class PipelineExecutor:
         # Initialize log file
         self.init_log()
+    # ✅ Helper method to conditionally notify cloud manager
+    def _notify_cloud(self, completion_type: str):
+        """Triggers scientiflow-cloud commands if in cloud mode."""
+        if not self.is_cloud:
+            return
+        try:
+            cmd = ["scientiflow-cloud", f"--completed-job-{completion_type}", str(self.project_job_id)]
+            subprocess.run(cmd, check=False)
+            logger.info(f"Sent {completion_type} signal to cloud manager.")
+        except Exception as e:
+            print(f"[ERROR] Cloud notification failed: {e}")
     def init_log(self):
         """Initialize the log file."""
         try:
@@ -110,6 +202,8 @@ class PipelineExecutor:
             body = {"project_job_id": self.project_job_id, "terminal_output": terminal_output}
             make_auth_request(endpoint="/agent-application/update-terminal-output", method="POST", data=body, error_message="Unable to update terminal output!")
             printer.print_message("[+] Terminal output updated successfully.", style="bold green")
+            # ✅ TRIGGER: FULL COMPLETION
+            self._notify_cloud("fully")
         except Exception as e:
             print(f"[ERROR] Failed to update terminal output: {e}")
@@ -165,40 +259,14 @@ class PipelineExecutor:
             self.update_terminal_output()
             raise SystemExit("[ERROR] Pipeline execution terminated due to an unexpected error.")
-    def monitor_background_job(self, futures, node_label, executor):
-        """Monitor background job completion in a separate thread."""
-        def monitor():
-            all_successful = True
-            for future in as_completed(futures):
-                success = future.result()
-                if not success:
-                    all_successful = False
-            if not all_successful:
-                self.log_output(f"[ERROR] Background job {node_label} failed")
-                printer.print_message(f"[BACKGROUND JOB] {node_label} Failed - some commands in background job failed", style="bold red")
-            else:
-                printer.print_message(f"[BACKGROUND JOB] {node_label} Execution completed in the background", style="bold green")
-            # Clean up executor
-            executor.shutdown(wait=False)
-            if executor in self.background_executors:
-                self.background_executors.remove(executor)
-            # Update background job completion count
-            with self.background_jobs_lock:
-                self.background_jobs_completed += 1
-        # Start monitoring thread
-        monitor_thread = threading.Thread(target=monitor, daemon=True)
-        monitor_thread.start()
     def wait_for_background_jobs(self):
         """Wait for all background jobs to complete."""
         import time
         if self.background_jobs_count > 0:
             printer.print_message(f"[INFO] Waiting for {self.background_jobs_count} background job(s) to complete...", style="bold yellow")
+            # ✅ TRIGGER: PARTIAL COMPLETION (Frees GPU/Primary CPU while waiting for cleanup)
+            self._notify_cloud("partially")
             while True:
                 with self.background_jobs_lock:
                     if self.background_jobs_completed >= self.background_jobs_count:
@@ -284,19 +352,14 @@ class PipelineExecutor:
                 # Execute commands in background using ProcessPoolExecutor (non-blocking)
                 if command_list:
-                    # Increment background jobs counter
-                    with self.background_jobs_lock:
-                        self.background_jobs_count += 1
                     executor = ProcessPoolExecutor(max_workers=numberOfThreads)
-                    self.background_executors.append(executor)  # Keep reference to prevent garbage collection
                     futures = []
                     for cmd in command_list:
                         future = executor.submit(execute_background_command_standalone, cmd, self.log_file_path)
                         futures.append(future)
-                    # Start monitoring in a separate thread (non-blocking)
-                    self.monitor_background_job(futures, node_label, executor)
+                    # Register with global tracker (non-blocking)
+                    global_bg_tracker.register_background_job(executor, futures, node_label, self.log_file_path)
                     # Don't wait for completion, immediately continue to next node
                 else:
@@ -323,7 +386,6 @@ class PipelineExecutor:
         """Start executing the pipeline."""
         # Use job status from configuration instead of API call
         current_status = self.job_status
         if current_status == "running":
             # Job is already running, resume from start but skip until current node
             current_node_id = self.current_node_from_config
@@ -344,9 +406,9 @@ class PipelineExecutor:
         if starting_node:
             self.dfs(starting_node)
-        # Wait for all background jobs to complete before marking pipeline as completed
-        self.wait_for_background_jobs()
+        # Don't wait for background jobs here - let them continue across multiple jobs
+        # Background jobs will be waited for at the end of all job executions
         update_job_status(self.project_job_id, "completed")
         update_stopped_at_node(self.project_id, self.project_job_id, self.current_node)
@@ -354,7 +416,8 @@ class PipelineExecutor:
         self.update_terminal_output()
 # External function to initiate the pipeline execution
-def decode_and_execute_pipeline(base_dir: str, project_id: int, project_job_id: int, project_title: str, job_dir_name: str, nodes: List[Dict[str, Any]], edges: List[Dict[str, str]], environment_variables: Dict[str, str], start_node: str = None, end_node: str = None, job_status: str = None, current_node_from_config: str = None):
+def decode_and_execute_pipeline(base_dir: str, project_id: int, project_job_id: int, project_title: str, job_dir_name: str, nodes: List[Dict[str, Any]], edges: List[Dict[str, str]], environment_variables: Dict[str, str], start_node: str = None, end_node: str = None, job_status: str = None, current_node_from_config: str = None, is_cloud: bool = False):
     """Initialize and execute the pipeline."""
-    executor = PipelineExecutor(base_dir, project_id, project_job_id, project_title, job_dir_name, nodes, edges, environment_variables, start_node, end_node, job_status, current_node_from_config)
-    executor.decode_and_execute_pipeline()
+    executor = PipelineExecutor(base_dir, project_id, project_job_id, project_title, job_dir_name, nodes, edges, environment_variables, start_node, end_node, job_status, current_node_from_config, is_cloud)
+    executor.decode_and_execute_pipeline()

{scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/scientiflow_cli/pipeline/get_jobs.py RENAMED Viewed

@@ -4,8 +4,12 @@ from scientiflow_cli.services.rich_printer import RichPrinter
 printer = RichPrinter()
-def get_jobs() -> list[dict]:
-    response = make_auth_request(endpoint="/agent-application/check-jobs-to-execute", method="GET", error_message="Unable to fetch jobs!")
+def get_jobs(is_cloud: bool = False) -> list[dict]:
+    if is_cloud:
+        response = make_auth_request(endpoint="/agent-application/check-cloud-jobs-to-execute", method="GET", error_message="Unable to fetch jobs!")
+        printer.print_message("Fetching cloud jobs to execute...", style="bold blue")
+    else:
+        response = make_auth_request(endpoint="/agent-application/check-jobs-to-execute", method="GET", error_message="Unable to fetch jobs!")
     try:
         jobs = response.json()
         if len(jobs) == 0:

{scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/scientiflow_cli/services/executor.py RENAMED Viewed

@@ -1,7 +1,7 @@
 from concurrent.futures import ThreadPoolExecutor
 import asyncio
 from scientiflow_cli.pipeline.get_jobs import get_jobs
-from scientiflow_cli.pipeline.decode_and_execute import decode_and_execute_pipeline
+from scientiflow_cli.pipeline.decode_and_execute import decode_and_execute_pipeline, global_bg_tracker
 from scientiflow_cli.pipeline.container_manager import get_job_containers
 from scientiflow_cli.utils.file_manager import create_job_dirs, get_job_files
 from scientiflow_cli.services.rich_printer import RichPrinter
@@ -37,7 +37,7 @@ def execute_jobs(job_ids: list[int] = None, parallel: bool = False, is_cloud: bo
             if matching_jobs:
                 if is_cloud:
                     for job in matching_jobs:
-                        job['project_title'] = str(job['project']['id']) + '_' + job['project']['project_title']
+                        job['project']['project_title'] = str(job['project']['id']) + '_' + job['project']['project_title']
                         if 'server' not in job or job['server'] is None:
                             job['server'] = {'base_directory':None}
                         job['server']['base_directory'] = get_base_directory()
@@ -117,6 +117,9 @@ async def execute_async(jobs: list[dict]) -> None:
     await asyncio.gather(*running_jobs)  # Wait for all jobs to complete
     printer.print_success("[ASYNC COMPLETE] All jobs finished!")
+    # Wait for all background jobs from all executed jobs to complete
+    global_bg_tracker.wait_for_all_jobs()
 def execute_single_job(job: dict, is_cloud: bool = False) -> None:
@@ -169,8 +172,8 @@ def execute_single_job(job: dict, is_cloud: bool = False) -> None:
             printer.print_success(f"[+] Resuming execution for job ID: {project_job_id}")
         else:
             printer.print_success(f"[+] Starting execution for job ID: {project_job_id}")
-        decode_and_execute_pipeline(base_dir, project_id, project_job_id, project_title, job_dir_name, nodes, edges, environment_variables, start_node=start_node, end_node=end_node, job_status=job_status, current_node_from_config=current_node_from_config)
+        decode_and_execute_pipeline(base_dir, project_id, project_job_id, project_title, job_dir_name, nodes, edges, environment_variables, start_node=start_node, end_node=end_node, job_status=job_status, current_node_from_config=current_node_from_config, is_cloud=is_cloud)
         printer.print_success(f"[+] Execution completed for job ID: {project_job_id}")
     except ValueError as value_err:

{scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/scientiflow_cli/services/request_handler.py RENAMED Viewed

@@ -21,7 +21,6 @@ def make_auth_request(endpoint, method, data=None, params=None, error_message=No
         return handle_response(response, error_message)
     except requests.RequestException as e:
-        print(e)
         return "Request failed"
@@ -33,12 +32,10 @@ def make_no_auth_request(endpoint, method, data=None, error_message=None):
             response = requests.get(base_url + endpoint)
         elif method == 'POST':
             response = requests.post(base_url + endpoint, json=data)
-            print(response)
         else:
             raise ValueError("Unsupported HTTP method")
         return handle_response(response, error_message)
     except requests.RequestException as e:
-        print(e)
         return "Request failed"