scientiflow-cli 0.4.15__tar.gz → 0.4.17__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/PKG-INFO +1 -1
  2. {scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/pyproject.toml +1 -1
  3. {scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/scientiflow_cli/main.py +4 -1
  4. {scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/scientiflow_cli/pipeline/decode_and_execute.py +106 -43
  5. {scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/scientiflow_cli/pipeline/get_jobs.py +6 -2
  6. {scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/scientiflow_cli/services/executor.py +7 -4
  7. {scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/scientiflow_cli/services/request_handler.py +0 -3
  8. {scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/LICENSE.md +0 -0
  9. {scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/README.md +0 -0
  10. {scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/scientiflow_cli/__init__.py +0 -0
  11. {scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/scientiflow_cli/__main__.py +0 -0
  12. {scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/scientiflow_cli/cli/__init__.py +0 -0
  13. {scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/scientiflow_cli/cli/auth_utils.py +0 -0
  14. {scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/scientiflow_cli/cli/login.py +0 -0
  15. {scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/scientiflow_cli/cli/logout.py +0 -0
  16. {scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/scientiflow_cli/pipeline/__init__.py +0 -0
  17. {scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/scientiflow_cli/pipeline/container_manager.py +0 -0
  18. {scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/scientiflow_cli/services/__init__.py +0 -0
  19. {scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/scientiflow_cli/services/auth_service.py +0 -0
  20. {scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/scientiflow_cli/services/base_directory.py +0 -0
  21. {scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/scientiflow_cli/services/modes.py +0 -0
  22. {scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/scientiflow_cli/services/rich_printer.py +0 -0
  23. {scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/scientiflow_cli/services/status_updater.py +0 -0
  24. {scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/scientiflow_cli/utils/__init__.py +0 -0
  25. {scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/scientiflow_cli/utils/config.py +0 -0
  26. {scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/scientiflow_cli/utils/encryption.py +0 -0
  27. {scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/scientiflow_cli/utils/file_manager.py +0 -0
  28. {scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/scientiflow_cli/utils/logger.py +0 -0
  29. {scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/scientiflow_cli/utils/mock.py +0 -0
  30. {scientiflow_cli-0.4.15 → scientiflow_cli-0.4.17}/scientiflow_cli/utils/singularity.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: scientiflow-cli
3
- Version: 0.4.15
3
+ Version: 0.4.17
4
4
  Summary: CLI tool for scientiflow. This application runs on the client side, decodes pipelines, and executes them in the configured order!
5
5
  License: Proprietary
6
6
  Author: ScientiFlow
@@ -3,7 +3,7 @@ mode = "prod"
3
3
 
4
4
  [tool.poetry]
5
5
  name = "scientiflow-cli"
6
- version = "0.4.15"
6
+ version = "0.4.17"
7
7
  description = "CLI tool for scientiflow. This application runs on the client side, decodes pipelines, and executes them in the configured order!"
8
8
  authors = ["ScientiFlow <scientiflow@gmail.com>"]
9
9
  license = "Proprietary"
@@ -117,7 +117,10 @@ def main():
117
117
  elif args.logout:
118
118
  logout_user()
119
119
  elif args.list_jobs:
120
- get_jobs()
120
+ if args.cloud_job:
121
+ get_jobs(is_cloud=args.cloud_job)
122
+ else:
123
+ get_jobs()
121
124
  elif args.set_base_directory:
122
125
  set_base_directory(hostname=args.hostname)
123
126
  elif args.execute_jobs is not None:
@@ -12,6 +12,85 @@ from scientiflow_cli.services.rich_printer import RichPrinter
12
12
 
13
13
  printer = RichPrinter()
14
14
 
15
+ # Global background job tracker
16
+ class GlobalBackgroundJobTracker:
17
+ _instance = None
18
+ _lock = threading.Lock()
19
+
20
+ def __new__(cls):
21
+ if cls._instance is None:
22
+ with cls._lock:
23
+ if cls._instance is None:
24
+ cls._instance = super().__new__(cls)
25
+ cls._instance._initialize()
26
+ return cls._instance
27
+
28
+ def _initialize(self):
29
+ self.background_executors = []
30
+ self.background_jobs_count = 0
31
+ self.background_jobs_completed = 0
32
+ self.background_jobs_lock = threading.Lock()
33
+
34
+ def register_background_job(self, executor, futures, node_label, log_file_path):
35
+ """Register a background job for global tracking."""
36
+ with self.background_jobs_lock:
37
+ self.background_jobs_count += 1
38
+ self.background_executors.append(executor)
39
+
40
+ # Start monitoring in a separate thread
41
+ monitor_thread = threading.Thread(
42
+ target=self._monitor_job,
43
+ args=(futures, node_label, executor, log_file_path),
44
+ daemon=True
45
+ )
46
+ monitor_thread.start()
47
+
48
+ def _monitor_job(self, futures, node_label, executor, log_file_path):
49
+ """Monitor background job completion."""
50
+ all_successful = True
51
+ for future in as_completed(futures):
52
+ success = future.result()
53
+ if not success:
54
+ all_successful = False
55
+
56
+ if not all_successful:
57
+ with open(log_file_path, 'a') as f:
58
+ f.write(f"[ERROR] Background job {node_label} failed\n")
59
+ printer.print_message(f"[BACKGROUND JOB] {node_label} Failed - some commands in background job failed", style="bold red")
60
+ else:
61
+ printer.print_message(f"[BACKGROUND JOB] {node_label} Execution completed in the background", style="bold green")
62
+
63
+ # Clean up executor
64
+ executor.shutdown(wait=False)
65
+ with self.background_jobs_lock:
66
+ if executor in self.background_executors:
67
+ self.background_executors.remove(executor)
68
+ self.background_jobs_completed += 1
69
+
70
+ def wait_for_all_jobs(self):
71
+ """Wait for all background jobs to complete."""
72
+ import time
73
+ if self.background_jobs_count > 0:
74
+ printer.print_message(f"[INFO] Waiting for {self.background_jobs_count} background job(s) to complete...", style="bold yellow")
75
+
76
+ while True:
77
+ with self.background_jobs_lock:
78
+ if self.background_jobs_completed >= self.background_jobs_count:
79
+ break
80
+ time.sleep(0.5) # Check every 500ms
81
+
82
+ printer.print_message("[INFO] All background jobs completed.", style="bold green")
83
+
84
+ def reset(self):
85
+ """Reset the tracker for a new execution cycle."""
86
+ with self.background_jobs_lock:
87
+ self.background_executors = []
88
+ self.background_jobs_count = 0
89
+ self.background_jobs_completed = 0
90
+
91
+ # Global tracker instance
92
+ global_bg_tracker = GlobalBackgroundJobTracker()
93
+
15
94
  def execute_background_command_standalone(command: str, log_file_path: str):
16
95
  """Execute a command in background without real-time output display - standalone function for multiprocessing."""
17
96
  try:
@@ -37,7 +116,7 @@ def execute_background_command_standalone(command: str, log_file_path: str):
37
116
  return False
38
117
 
39
118
  class PipelineExecutor:
40
- def __init__(self, base_dir: str, project_id: int, project_job_id: int, project_title: str, job_dir_name: str, nodes: List[Dict[str, Any]], edges: List[Dict[str, str]], environment_variables: Dict[str, str], start_node: str = None, end_node: str = None, job_status: str = None, current_node_from_config: str = None):
119
+ def __init__(self, base_dir: str, project_id: int, project_job_id: int, project_title: str, job_dir_name: str, nodes: List[Dict[str, Any]], edges: List[Dict[str, str]], environment_variables: Dict[str, str], start_node: str = None, end_node: str = None, job_status: str = None, current_node_from_config: str = None, is_cloud: bool = False):
41
120
  self.base_dir = base_dir
42
121
  self.project_id = project_id
43
122
  self.project_job_id = project_job_id
@@ -51,6 +130,7 @@ class PipelineExecutor:
51
130
  self.current_node = None
52
131
  self.job_status = job_status
53
132
  self.current_node_from_config = current_node_from_config
133
+ self.is_cloud = is_cloud
54
134
  self.background_executors = [] # Keep track of background executors
55
135
  self.background_jobs_count = 0 # Track number of active background jobs
56
136
  self.background_jobs_completed = 0 # Track completed background jobs
@@ -78,6 +158,18 @@ class PipelineExecutor:
78
158
  # Initialize log file
79
159
  self.init_log()
80
160
 
161
+ # ✅ Helper method to conditionally notify cloud manager
162
+ def _notify_cloud(self, completion_type: str):
163
+ """Triggers scientiflow-cloud commands if in cloud mode."""
164
+ if not self.is_cloud:
165
+ return
166
+ try:
167
+ cmd = ["scientiflow-cloud", f"--completed-job-{completion_type}", str(self.project_job_id)]
168
+ subprocess.run(cmd, check=False)
169
+ logger.info(f"Sent {completion_type} signal to cloud manager.")
170
+ except Exception as e:
171
+ print(f"[ERROR] Cloud notification failed: {e}")
172
+
81
173
  def init_log(self):
82
174
  """Initialize the log file."""
83
175
  try:
@@ -110,6 +202,8 @@ class PipelineExecutor:
110
202
  body = {"project_job_id": self.project_job_id, "terminal_output": terminal_output}
111
203
  make_auth_request(endpoint="/agent-application/update-terminal-output", method="POST", data=body, error_message="Unable to update terminal output!")
112
204
  printer.print_message("[+] Terminal output updated successfully.", style="bold green")
205
+ # ✅ TRIGGER: FULL COMPLETION
206
+ self._notify_cloud("fully")
113
207
  except Exception as e:
114
208
  print(f"[ERROR] Failed to update terminal output: {e}")
115
209
 
@@ -165,40 +259,14 @@ class PipelineExecutor:
165
259
  self.update_terminal_output()
166
260
  raise SystemExit("[ERROR] Pipeline execution terminated due to an unexpected error.")
167
261
 
168
- def monitor_background_job(self, futures, node_label, executor):
169
- """Monitor background job completion in a separate thread."""
170
- def monitor():
171
- all_successful = True
172
- for future in as_completed(futures):
173
- success = future.result()
174
- if not success:
175
- all_successful = False
176
-
177
- if not all_successful:
178
- self.log_output(f"[ERROR] Background job {node_label} failed")
179
- printer.print_message(f"[BACKGROUND JOB] {node_label} Failed - some commands in background job failed", style="bold red")
180
- else:
181
- printer.print_message(f"[BACKGROUND JOB] {node_label} Execution completed in the background", style="bold green")
182
-
183
- # Clean up executor
184
- executor.shutdown(wait=False)
185
- if executor in self.background_executors:
186
- self.background_executors.remove(executor)
187
-
188
- # Update background job completion count
189
- with self.background_jobs_lock:
190
- self.background_jobs_completed += 1
191
-
192
- # Start monitoring thread
193
- monitor_thread = threading.Thread(target=monitor, daemon=True)
194
- monitor_thread.start()
195
262
 
196
263
  def wait_for_background_jobs(self):
197
264
  """Wait for all background jobs to complete."""
198
265
  import time
199
266
  if self.background_jobs_count > 0:
200
267
  printer.print_message(f"[INFO] Waiting for {self.background_jobs_count} background job(s) to complete...", style="bold yellow")
201
-
268
+ # ✅ TRIGGER: PARTIAL COMPLETION (Frees GPU/Primary CPU while waiting for cleanup)
269
+ self._notify_cloud("partially")
202
270
  while True:
203
271
  with self.background_jobs_lock:
204
272
  if self.background_jobs_completed >= self.background_jobs_count:
@@ -284,19 +352,14 @@ class PipelineExecutor:
284
352
 
285
353
  # Execute commands in background using ProcessPoolExecutor (non-blocking)
286
354
  if command_list:
287
- # Increment background jobs counter
288
- with self.background_jobs_lock:
289
- self.background_jobs_count += 1
290
-
291
355
  executor = ProcessPoolExecutor(max_workers=numberOfThreads)
292
- self.background_executors.append(executor) # Keep reference to prevent garbage collection
293
356
  futures = []
294
357
  for cmd in command_list:
295
358
  future = executor.submit(execute_background_command_standalone, cmd, self.log_file_path)
296
359
  futures.append(future)
297
360
 
298
- # Start monitoring in a separate thread (non-blocking)
299
- self.monitor_background_job(futures, node_label, executor)
361
+ # Register with global tracker (non-blocking)
362
+ global_bg_tracker.register_background_job(executor, futures, node_label, self.log_file_path)
300
363
 
301
364
  # Don't wait for completion, immediately continue to next node
302
365
  else:
@@ -323,7 +386,6 @@ class PipelineExecutor:
323
386
  """Start executing the pipeline."""
324
387
  # Use job status from configuration instead of API call
325
388
  current_status = self.job_status
326
-
327
389
  if current_status == "running":
328
390
  # Job is already running, resume from start but skip until current node
329
391
  current_node_id = self.current_node_from_config
@@ -344,9 +406,9 @@ class PipelineExecutor:
344
406
  if starting_node:
345
407
  self.dfs(starting_node)
346
408
 
347
- # Wait for all background jobs to complete before marking pipeline as completed
348
- self.wait_for_background_jobs()
349
-
409
+ # Don't wait for background jobs here - let them continue across multiple jobs
410
+ # Background jobs will be waited for at the end of all job executions
411
+
350
412
  update_job_status(self.project_job_id, "completed")
351
413
  update_stopped_at_node(self.project_id, self.project_job_id, self.current_node)
352
414
 
@@ -354,7 +416,8 @@ class PipelineExecutor:
354
416
  self.update_terminal_output()
355
417
 
356
418
  # External function to initiate the pipeline execution
357
- def decode_and_execute_pipeline(base_dir: str, project_id: int, project_job_id: int, project_title: str, job_dir_name: str, nodes: List[Dict[str, Any]], edges: List[Dict[str, str]], environment_variables: Dict[str, str], start_node: str = None, end_node: str = None, job_status: str = None, current_node_from_config: str = None):
419
+ def decode_and_execute_pipeline(base_dir: str, project_id: int, project_job_id: int, project_title: str, job_dir_name: str, nodes: List[Dict[str, Any]], edges: List[Dict[str, str]], environment_variables: Dict[str, str], start_node: str = None, end_node: str = None, job_status: str = None, current_node_from_config: str = None, is_cloud: bool = False):
358
420
  """Initialize and execute the pipeline."""
359
- executor = PipelineExecutor(base_dir, project_id, project_job_id, project_title, job_dir_name, nodes, edges, environment_variables, start_node, end_node, job_status, current_node_from_config)
360
- executor.decode_and_execute_pipeline()
421
+ executor = PipelineExecutor(base_dir, project_id, project_job_id, project_title, job_dir_name, nodes, edges, environment_variables, start_node, end_node, job_status, current_node_from_config, is_cloud)
422
+ executor.decode_and_execute_pipeline()
423
+
@@ -4,8 +4,12 @@ from scientiflow_cli.services.rich_printer import RichPrinter
4
4
 
5
5
  printer = RichPrinter()
6
6
 
7
- def get_jobs() -> list[dict]:
8
- response = make_auth_request(endpoint="/agent-application/check-jobs-to-execute", method="GET", error_message="Unable to fetch jobs!")
7
+ def get_jobs(is_cloud: bool = False) -> list[dict]:
8
+ if is_cloud:
9
+ response = make_auth_request(endpoint="/agent-application/check-cloud-jobs-to-execute", method="GET", error_message="Unable to fetch jobs!")
10
+ printer.print_message("Fetching cloud jobs to execute...", style="bold blue")
11
+ else:
12
+ response = make_auth_request(endpoint="/agent-application/check-jobs-to-execute", method="GET", error_message="Unable to fetch jobs!")
9
13
  try:
10
14
  jobs = response.json()
11
15
  if len(jobs) == 0:
@@ -1,7 +1,7 @@
1
1
  from concurrent.futures import ThreadPoolExecutor
2
2
  import asyncio
3
3
  from scientiflow_cli.pipeline.get_jobs import get_jobs
4
- from scientiflow_cli.pipeline.decode_and_execute import decode_and_execute_pipeline
4
+ from scientiflow_cli.pipeline.decode_and_execute import decode_and_execute_pipeline, global_bg_tracker
5
5
  from scientiflow_cli.pipeline.container_manager import get_job_containers
6
6
  from scientiflow_cli.utils.file_manager import create_job_dirs, get_job_files
7
7
  from scientiflow_cli.services.rich_printer import RichPrinter
@@ -37,7 +37,7 @@ def execute_jobs(job_ids: list[int] = None, parallel: bool = False, is_cloud: bo
37
37
  if matching_jobs:
38
38
  if is_cloud:
39
39
  for job in matching_jobs:
40
- job['project_title'] = str(job['project']['id']) + '_' + job['project']['project_title']
40
+ job['project']['project_title'] = str(job['project']['id']) + '_' + job['project']['project_title']
41
41
  if 'server' not in job or job['server'] is None:
42
42
  job['server'] = {'base_directory':None}
43
43
  job['server']['base_directory'] = get_base_directory()
@@ -117,6 +117,9 @@ async def execute_async(jobs: list[dict]) -> None:
117
117
 
118
118
  await asyncio.gather(*running_jobs) # Wait for all jobs to complete
119
119
  printer.print_success("[ASYNC COMPLETE] All jobs finished!")
120
+
121
+ # Wait for all background jobs from all executed jobs to complete
122
+ global_bg_tracker.wait_for_all_jobs()
120
123
 
121
124
 
122
125
  def execute_single_job(job: dict, is_cloud: bool = False) -> None:
@@ -169,8 +172,8 @@ def execute_single_job(job: dict, is_cloud: bool = False) -> None:
169
172
  printer.print_success(f"[+] Resuming execution for job ID: {project_job_id}")
170
173
  else:
171
174
  printer.print_success(f"[+] Starting execution for job ID: {project_job_id}")
172
-
173
- decode_and_execute_pipeline(base_dir, project_id, project_job_id, project_title, job_dir_name, nodes, edges, environment_variables, start_node=start_node, end_node=end_node, job_status=job_status, current_node_from_config=current_node_from_config)
175
+
176
+ decode_and_execute_pipeline(base_dir, project_id, project_job_id, project_title, job_dir_name, nodes, edges, environment_variables, start_node=start_node, end_node=end_node, job_status=job_status, current_node_from_config=current_node_from_config, is_cloud=is_cloud)
174
177
  printer.print_success(f"[+] Execution completed for job ID: {project_job_id}")
175
178
 
176
179
  except ValueError as value_err:
@@ -21,7 +21,6 @@ def make_auth_request(endpoint, method, data=None, params=None, error_message=No
21
21
  return handle_response(response, error_message)
22
22
 
23
23
  except requests.RequestException as e:
24
- print(e)
25
24
  return "Request failed"
26
25
 
27
26
 
@@ -33,12 +32,10 @@ def make_no_auth_request(endpoint, method, data=None, error_message=None):
33
32
  response = requests.get(base_url + endpoint)
34
33
  elif method == 'POST':
35
34
  response = requests.post(base_url + endpoint, json=data)
36
- print(response)
37
35
  else:
38
36
  raise ValueError("Unsupported HTTP method")
39
37
 
40
38
  return handle_response(response, error_message)
41
39
 
42
40
  except requests.RequestException as e:
43
- print(e)
44
41
  return "Request failed"