pipen-cli-gbatch 0.0.0__tar.gz → 0.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pipen-cli-gbatch might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: pipen-cli-gbatch
3
- Version: 0.0.0
3
+ Version: 0.0.2
4
4
  Summary: A pipen cli plugin to run command via Google Cloud Batch
5
5
  License: MIT
6
6
  Author: pwwang
@@ -14,7 +14,7 @@ Classifier: Programming Language :: Python :: 3.11
14
14
  Classifier: Programming Language :: Python :: 3.12
15
15
  Classifier: Programming Language :: Python :: 3.13
16
16
  Requires-Dist: google-cloud-storage (>=3.0.0,<4.0.0)
17
- Requires-Dist: pipen (>=0.17.17,<0.18.0)
17
+ Requires-Dist: pipen (>=0.17.19,<0.18.0)
18
18
  Requires-Dist: pipen-poplog (>=0.3.6,<0.4.0)
19
19
  Description-Content-Type: text/markdown
20
20
 
@@ -207,6 +207,11 @@ Examples:
207
207
  # Run a command and wait for it to complete
208
208
  > pipen gbatch --workdir gs://my-bucket/workdir -- \
209
209
  python myscript.py --input input.txt --output output.txt
210
+
211
+ # Use named mounts
212
+ > pipen gbatch --workdir gs://my-bucket/workdir --mount INFILE=gs://bucket/path/to/file \
213
+ --mount OUTDIR=gs://bucket/path/to/outdir -- \
214
+ cat $INFILE > $OUTDIR/output.txt
210
215
 
211
216
  # Run a command in a detached mode
212
217
  > pipen gbatch --nowait --project $PROJECT --location $LOCATION \
@@ -187,6 +187,11 @@ Examples:
187
187
  # Run a command and wait for it to complete
188
188
  > pipen gbatch --workdir gs://my-bucket/workdir -- \
189
189
  python myscript.py --input input.txt --output output.txt
190
+
191
+ # Use named mounts
192
+ > pipen gbatch --workdir gs://my-bucket/workdir --mount INFILE=gs://bucket/path/to/file \
193
+ --mount OUTDIR=gs://bucket/path/to/outdir -- \
194
+ cat $INFILE > $OUTDIR/output.txt
190
195
 
191
196
  # Run a command in a detached mode
192
197
  > pipen gbatch --nowait --project $PROJECT --location $LOCATION \
@@ -63,8 +63,10 @@ from __future__ import annotations
63
63
 
64
64
  import sys
65
65
  import asyncio
66
+ from contextlib import suppress
66
67
  from pathlib import Path
67
68
  from time import sleep
69
+ from typing import Any
68
70
  from diot import Diot
69
71
  from argx import Namespace
70
72
  from yunpath import AnyPath, GSPath
@@ -77,75 +79,58 @@ from pipen.cli import CLIPlugin
77
79
  from pipen.scheduler import GbatchScheduler
78
80
  from pipen_poplog import LogsPopulator
79
81
 
80
- __version__ = "0.0.0"
82
+ __version__ = "0.0.2"
81
83
  __all__ = ("CliGbatchPlugin", "CliGbatchDaemon")
82
84
 
83
85
 
84
- class XquteCliGbatchPlugin:
85
- """The plugin used to pull logs for the real pipeline."""
86
-
87
- def __init__(self, name: str = "logging", log_start: bool = True):
88
- self.name = name
89
- self.log_start = log_start
90
- self.stdout_populator = LogsPopulator()
91
- self.stderr_populator = LogsPopulator()
92
-
93
- @plugin.impl
94
- async def on_job_started(self, scheduler, job):
95
- if not self.log_start:
96
- return
97
-
98
- self.stdout_populator.logfile = scheduler.workdir.joinpath("0", "job.stdout")
99
- self.stderr_populator.logfile = scheduler.workdir.joinpath("0", "job.stderr")
100
- logger.info("Job is picked up by Google Batch, pulling stdout/stderr...")
101
-
102
- @plugin.impl
103
- async def on_job_polling(self, scheduler, job, counter):
104
- if counter % 5 != 0:
105
- # Make it less frequent
106
- return
107
-
108
- stdout_lines = self.stdout_populator.populate()
109
- self.stdout_populator.increment_counter(len(stdout_lines))
110
- for line in stdout_lines:
111
- logger.info(f"/STDOUT {line}")
112
-
113
- stderr_lines = self.stderr_populator.populate()
114
- self.stderr_populator.increment_counter(len(stderr_lines))
115
- for line in stderr_lines:
116
- logger.error(f"/STDERR {line}")
117
-
118
- @plugin.impl
119
- async def on_job_killed(self, scheduler, job):
120
- await self.on_job_polling.impl(self, scheduler, job, 0)
121
-
122
- @plugin.impl
123
- async def on_job_failed(self, scheduler, job):
124
- await self.on_job_polling.impl(self, scheduler, job, 0)
125
-
126
- @plugin.impl
127
- async def on_job_succeeded(self, scheduler, job):
128
- await self.on_job_polling.impl(self, scheduler, job, 0)
86
+ class CliGbatchDaemon:
87
+ """A daemon pipeline wrapper for running commands via Google Cloud Batch.
129
88
 
130
- @plugin.impl
131
- def on_shutdown(self, xqute, sig):
132
- del self.stdout_populator
133
- self.stdout_populator = None
134
- del self.stderr_populator
135
- self.stderr_populator = None
89
+ This class wraps arbitrary commands as single-process pipen pipelines and executes
90
+ them using the Google Cloud Batch scheduler. It handles configuration management,
91
+ path mounting, and provides both synchronous and asynchronous execution modes.
136
92
 
93
+ Attributes:
94
+ config (Diot): Configuration dictionary containing all daemon settings.
95
+ command (list[str]): The command to be executed as a list of arguments.
137
96
 
138
- class CliGbatchDaemon:
97
+ Example:
98
+ >>> daemon = CliGbatchDaemon(
99
+ ... {"workdir": "gs://my-bucket/workdir", "project": "my-project"},
100
+ ... ["python", "script.py", "--input", "data.txt"]
101
+ ... )
102
+ >>> await daemon.run()
103
+ """
139
104
 
140
105
  def __init__(self, config: dict | Namespace, command: list[str]):
106
+ """Initialize the CliGbatchDaemon.
107
+
108
+ Args:
109
+ config: Configuration dictionary or Namespace containing daemon settings.
110
+ Must include 'workdir' pointing to a Google Storage bucket path.
111
+ command: List of command arguments to execute.
112
+ """
141
113
  if isinstance(config, Namespace):
142
114
  self.config = Diot(vars(config))
143
115
  else:
144
116
  self.config = Diot(config)
117
+
118
+ self.config.prescript = self.config.get("prescript", None) or ""
119
+ self.config.postscript = self.config.get("postscript", None) or ""
145
120
  self.command = command
146
121
 
147
122
  def _get_arg_from_command(self, arg: str) -> str | None:
148
- """Get the value of the given argument from the command line."""
123
+ """Get the value of the given argument from the command line.
124
+
125
+ Args:
126
+ arg: The argument name to search for (without '--' prefix).
127
+
128
+ Returns:
129
+ The value of the argument if found, None otherwise.
130
+
131
+ Raises:
132
+ FileNotFoundError: If a config file is specified but doesn't exist.
133
+ """
149
134
  cmd_equal = [cmd.startswith(f"--{arg}=") for cmd in self.command]
150
135
  cmd_space = [cmd == f"--{arg}" for cmd in self.command]
151
136
  cmd_at = [cmd.startswith("@") for cmd in self.command]
@@ -163,56 +148,131 @@ class CliGbatchDaemon:
163
148
  raise FileNotFoundError(f"Config file not found: {config_file}")
164
149
 
165
150
  conf = Config.load_one(config_file)
166
- value = conf.get("workdir", None)
151
+ value = conf.get(arg, None)
167
152
  else:
168
153
  value = None
169
154
 
170
155
  return value
171
156
 
172
- def _check_workdir(self):
173
- workdir = self.config.get("workdir", self._get_arg_from_command("workdir"))
157
+ def _replace_arg_in_command(self, arg: str, value: Any) -> None:
158
+ """Replace the value of the given argument in the command line.
159
+
160
+ Args:
161
+ arg: The argument name to replace (without '--' prefix).
162
+ value: The new value to set for the argument.
163
+ """
164
+ cmd_equal = [cmd.startswith(f"--{arg}=") for cmd in self.command]
165
+ cmd_space = [cmd == f"--{arg}" for cmd in self.command]
166
+ value = str(value)
167
+
168
+ if any(cmd_equal):
169
+ index = cmd_equal.index(True)
170
+ self.command[index] = f"--{arg}={value}"
171
+ elif any(cmd_space) and len(cmd_space) > cmd_space.index(True) + 1:
172
+ index = cmd_space.index(True)
173
+ self.command[index + 1] = value
174
+ else:
175
+ self.command.extend([f"--{arg}", value])
176
+
177
+ def _add_mount(self, source: str | GSPath, target: str) -> None:
178
+ """Add a mount point to the configuration.
179
+
180
+ Args:
181
+ source: The source path (local or GCS path).
182
+ target: The target mount path inside the container.
183
+ """
184
+ mount = self.config.get("mount", [])
185
+ # mount the workdir
186
+ mount.append(f'{source}:{target}')
187
+
188
+ self.config["mount"] = mount
189
+
190
+ def _handle_workdir(self):
191
+ """Handle workdir configuration and mounting.
192
+
193
+ Validates that workdir is a Google Storage bucket path and sets up
194
+ the appropriate mount configuration for the container.
195
+
196
+ Raises:
197
+ SystemExit: If workdir is not a valid Google Storage bucket path.
198
+ """
199
+ command_workdir = self._get_arg_from_command("workdir")
200
+ workdir = self.config.get("workdir", command_workdir)
174
201
 
175
202
  if not workdir or not isinstance(AnyPath(workdir), GSPath):
176
203
  print(
177
- "\033[1;4mError\033[0m: A Google Storage Bucket path is required for "
178
- "--workdir.\n"
204
+ "\033[1;4mError\033[0m: An existing Google Storage Bucket path is "
205
+ "required for --workdir.\n"
179
206
  )
180
207
  sys.exit(1)
181
208
 
182
209
  self.config["workdir"] = workdir
210
+ # If command workdir is different from config workdir, we need to mount it
211
+ self._add_mount(workdir, GbatchScheduler.MOUNTED_METADIR)
212
+
213
+ # replace --workdir value with the mounted workdir in the command
214
+ self._replace_arg_in_command("workdir", GbatchScheduler.MOUNTED_METADIR)
215
+
216
+ def _handle_outdir(self):
217
+ """Handle output directory configuration and mounting.
218
+
219
+ If an output directory is specified in the command, mounts it to the
220
+ container and updates the command to use the mounted path.
221
+ """
222
+ command_outdir = self._get_arg_from_command("outdir")
223
+
224
+ if command_outdir:
225
+ self._add_mount(command_outdir, GbatchScheduler.MOUNTED_OUTDIR)
226
+ self._replace_arg_in_command("outdir", GbatchScheduler.MOUNTED_OUTDIR)
183
227
 
184
228
  def _infer_name(self):
229
+ """Infer the daemon name from configuration or command arguments.
230
+
231
+ Priority order:
232
+ 1. config.name
233
+ 2. --name from command + "GbatchDaemon" suffix
234
+ 3. Default "PipenCliGbatchDaemon"
235
+ """
185
236
  name = self.config.get("name", None)
186
237
  if not name:
187
238
  command_name = self._get_arg_from_command("name")
188
239
  if not command_name:
189
240
  name = "PipenCliGbatchDaemon"
190
241
  else:
191
- name = f"{name}GbatchDaemon"
242
+ name = f"{command_name}GbatchDaemon"
192
243
 
193
244
  self.config["name"] = name
194
245
 
195
246
  def _infer_jobname_prefix(self):
247
+ """Infer the job name prefix for the Google Cloud Batch scheduler.
248
+
249
+ Priority order:
250
+ 1. config.jobname_prefix
251
+ 2. --name from command + "-gbatch-daemon" suffix (lowercase)
252
+ 3. Default "pipen-cli-gbatch-daemon"
253
+ """
196
254
  prefix = self.config.get("jobname_prefix", None)
197
255
  if not prefix:
198
256
  command_name = self._get_arg_from_command("name")
199
257
  if not command_name:
200
- prefix = "pipen-gbatch-daemon"
258
+ prefix = "pipen-cli-gbatch-daemon"
201
259
  else:
202
260
  prefix = f"{command_name.lower()}-gbatch-daemon"
203
261
 
204
262
  self.config["jobname_prefix"] = prefix
205
263
 
206
- def _setup_mount(self):
207
- mount = self.config.get("mount", [])
208
- # mount the workdir
209
- mount.append(f'{self.config["workdir"]}:{GbatchScheduler.MOUNTED_METADIR}')
210
-
211
- self.config["mount"] = mount
212
-
213
264
  def _get_xqute(self) -> Xqute:
265
+ """Create and configure an Xqute instance for job execution.
266
+
267
+ Returns:
268
+ Configured Xqute instance with appropriate plugins and scheduler options.
269
+ """
214
270
  plugins = ["-xqute.pipen"]
215
- if not self.config.nowait and not self.config.view_logs:
271
+ if (
272
+ not self.config.nowait
273
+ and not self.config.view_logs
274
+ and "logging" not in plugin.get_all_plugin_names()
275
+ ):
216
276
  plugins.append(XquteCliGbatchPlugin())
217
277
 
218
278
  return Xqute(
@@ -238,6 +298,7 @@ class CliGbatchDaemon:
238
298
  "version",
239
299
  "loglevel",
240
300
  "mounts",
301
+ "plain",
241
302
  )
242
303
  },
243
304
  workdir=(f'{self.config.workdir}/{self.config["name"]}'),
@@ -245,10 +306,12 @@ class CliGbatchDaemon:
245
306
  )
246
307
 
247
308
  def _run_version(self):
309
+ """Print version information for pipen-cli-gbatch and pipen."""
248
310
  print(f"pipen-cli-gbatch version: v{__version__}")
249
311
  print(f"pipen version: v{pipen_version}")
250
312
 
251
313
  def _show_scheduler_opts(self):
314
+ """Log the scheduler options for debugging purposes."""
252
315
  logger.debug("Scheduler Options:")
253
316
  for key, val in self.config.items():
254
317
  if key in (
@@ -265,12 +328,18 @@ class CliGbatchDaemon:
265
328
  "version",
266
329
  "loglevel",
267
330
  "mounts",
331
+ "plain",
268
332
  ):
269
333
  continue
270
334
 
271
335
  logger.debug(f"- {key}: {val}")
272
336
 
273
- async def _run_wait(self):
337
+ async def _run_wait(self): # pragma: no cover
338
+ """Run the pipeline and wait for completion.
339
+
340
+ Raises:
341
+ SystemExit: If no command is provided.
342
+ """
274
343
  if not self.command:
275
344
  print("\033[1;4mError\033[0m: No command to run is provided.\n")
276
345
  sys.exit(1)
@@ -281,6 +350,14 @@ class CliGbatchDaemon:
281
350
  await xqute.run_until_complete()
282
351
 
283
352
  async def _run_nowait(self):
353
+ """Run the pipeline without waiting for completion.
354
+
355
+ Submits the job to Google Cloud Batch and prints information about
356
+ how to monitor the job status and retrieve logs.
357
+
358
+ Raises:
359
+ SystemExit: If no command is provided.
360
+ """
284
361
  """Run the pipeline without waiting for completion."""
285
362
  if not self.command:
286
363
  print("\033[1;4mError\033[0m: No command to run is provided.\n")
@@ -332,10 +409,18 @@ class CliGbatchDaemon:
332
409
  logger.info(f'📁 {self.config["workdir"]}/{self.config["name"]}/0/')
333
410
  logger.info("")
334
411
  finally:
335
- if xqute.plugin_context:
412
+ if xqute.plugin_context: # pragma: no cover
336
413
  xqute.plugin_context.__exit__()
337
414
 
338
- def _run_view_logs(self):
415
+ def _run_view_logs(self): # pragma: no cover
416
+ """Pull and display logs from the Google Cloud Batch job.
417
+
418
+ Continuously monitors and displays stdout/stderr logs based on the
419
+ view_logs configuration. Supports viewing 'stdout', 'stderr', or 'all'.
420
+
421
+ Raises:
422
+ SystemExit: If workdir is not found or when interrupted by user.
423
+ """
339
424
  log_source = {}
340
425
  workdir = AnyPath(self.config["workdir"]) / self.config["name"] / "0"
341
426
  if not workdir.exists():
@@ -353,30 +438,73 @@ class CliGbatchDaemon:
353
438
  poplulators = {
354
439
  key: LogsPopulator(logfile=val) for key, val in log_source.items()
355
440
  }
441
+
356
442
  logger.info(f"Pulling logs from: {', '.join(log_source.keys())}")
357
- logger.info("Press Ctrl-C (twice) to stop.")
443
+ logger.info("Press Ctrl-C (twice if needed) to stop.")
358
444
  print("")
359
- while True:
445
+
446
+ try:
447
+ while True:
448
+ for key, populator in poplulators.items():
449
+ lines = populator.populate()
450
+ for line in lines:
451
+ if len(log_source) > 1:
452
+ print(f"/{key} {line}")
453
+ else:
454
+ print(line)
455
+ sleep(5)
456
+ except KeyboardInterrupt:
360
457
  for key, populator in poplulators.items():
361
- lines = populator.populate()
362
- for line in lines:
458
+ if populator.residue:
363
459
  if len(log_source) > 1:
364
- print(f"/{key} {line}")
460
+ print(f"/{key} {populator.residue}")
365
461
  else:
366
- print(line)
367
- sleep(5)
462
+ print(populator.residue)
463
+ print("")
464
+ logger.info("Stopped pulling logs.")
465
+ sys.exit(0)
368
466
 
369
467
  def setup(self):
468
+ """Set up logging and configuration for the daemon.
469
+
470
+ Configures logging handlers and filters, validates workdir requirements,
471
+ and initializes daemon name and job name prefix.
472
+
473
+ Raises:
474
+ SystemExit: If workdir is not a valid Google Storage bucket path.
475
+ """
370
476
  logger.addHandler(RichHandler(show_path=False, show_time=False))
371
477
  logger.addFilter(DuplicateFilter())
372
478
  logger.setLevel(self.config.loglevel.upper())
373
479
 
374
- self._check_workdir()
375
- self._infer_name()
376
- self._infer_jobname_prefix()
377
- self._setup_mount()
480
+ if not self.config.plain:
481
+ self._handle_workdir()
482
+ self._handle_outdir()
483
+ self._infer_name()
484
+ self._infer_jobname_prefix()
485
+ else:
486
+ if not self.config.workdir or not isinstance(
487
+ AnyPath(self.config.workdir),
488
+ GSPath,
489
+ ):
490
+ print(
491
+ "\033[1;4mError\033[0m: An existing Google Storage Bucket path is "
492
+ "required for --workdir.\n"
493
+ )
494
+ sys.exit(1)
495
+
496
+ if 'name' not in self.config:
497
+ self.config["name"] = "PipenCliGbatchDaemon"
378
498
 
379
- async def run(self):
499
+ async def run(self): # pragma: no cover
500
+ """Execute the daemon pipeline based on configuration.
501
+
502
+ Determines the execution mode based on configuration flags:
503
+ - version: Print version information
504
+ - nowait: Run in detached mode
505
+ - view_logs: Display logs from existing job
506
+ - default: Run and wait for completion
507
+ """
380
508
  if self.config.version:
381
509
  self._run_version()
382
510
  return
@@ -391,8 +519,139 @@ class CliGbatchDaemon:
391
519
  await self._run_wait()
392
520
 
393
521
 
394
- class CliGbatchPlugin(CLIPlugin):
395
- """Simplify running commands via Google Cloud Batch."""
522
+ class XquteCliGbatchPlugin: # pragma: no cover
523
+ """Plugin for pulling logs during pipeline execution.
524
+
525
+ This plugin monitors job execution and continuously pulls stdout/stderr logs
526
+ from the Google Cloud Batch job, displaying them in real-time during execution.
527
+
528
+ Attributes:
529
+ name (str): The plugin name.
530
+ log_start (bool): Whether to start logging when job starts.
531
+ stdout_populator (LogsPopulator): Handles stdout log population.
532
+ stderr_populator (LogsPopulator): Handles stderr log population.
533
+ """
534
+
535
+ def __init__(self, name: str = "logging", log_start: bool = True):
536
+ """Initialize the logging plugin.
537
+
538
+ Args:
539
+ name: The plugin name.
540
+ log_start: Whether to start logging when job starts.
541
+ """
542
+ self.name = name
543
+ self.log_start = log_start
544
+ self.stdout_populator = LogsPopulator()
545
+ self.stderr_populator = LogsPopulator()
546
+
547
+ def _clear_residues(self):
548
+ """Clear any remaining log residues and display them."""
549
+ if self.stdout_populator.residue:
550
+ logger.info(f"/STDOUT {self.stdout_populator.residue}")
551
+ self.stdout_populator.residue = ""
552
+ if self.stderr_populator.residue:
553
+ logger.error(f"/STDERR {self.stderr_populator.residue}")
554
+ self.stderr_populator.residue = ""
555
+
556
+ @plugin.impl
557
+ async def on_job_started(self, scheduler, job):
558
+ """Handle job start event by setting up log file paths.
559
+
560
+ Args:
561
+ scheduler: The scheduler instance.
562
+ job: The job that started.
563
+ """
564
+ if not self.log_start:
565
+ return
566
+
567
+ self.stdout_populator.logfile = scheduler.workdir.joinpath("0", "job.stdout")
568
+ self.stderr_populator.logfile = scheduler.workdir.joinpath("0", "job.stderr")
569
+ logger.info("Job is picked up by Google Batch, pulling stdout/stderr...")
570
+
571
+ @plugin.impl
572
+ async def on_job_polling(self, scheduler, job, counter):
573
+ """Handle job polling event by pulling and displaying logs.
574
+
575
+ Args:
576
+ scheduler: The scheduler instance.
577
+ job: The job being polled.
578
+ counter: The polling counter.
579
+ """
580
+ if counter % 5 != 0:
581
+ # Make it less frequent
582
+ return
583
+
584
+ stdout_lines = self.stdout_populator.populate()
585
+ self.stdout_populator.increment_counter(len(stdout_lines))
586
+ for line in stdout_lines:
587
+ logger.info(f"/STDOUT {line}")
588
+
589
+ stderr_lines = self.stderr_populator.populate()
590
+ self.stderr_populator.increment_counter(len(stderr_lines))
591
+ for line in stderr_lines:
592
+ logger.error(f"/STDERR {line}")
593
+
594
+ @plugin.impl
595
+ async def on_job_killed(self, scheduler, job):
596
+ """Handle job killed event by pulling final logs.
597
+
598
+ Args:
599
+ scheduler: The scheduler instance.
600
+ job: The job that was killed.
601
+ """
602
+ await self.on_job_polling(scheduler, job, 0)
603
+ self._clear_residues()
604
+
605
+ @plugin.impl
606
+ async def on_job_failed(self, scheduler, job):
607
+ """Handle job failed event by pulling final logs.
608
+
609
+ Args:
610
+ scheduler: The scheduler instance.
611
+ job: The job that failed.
612
+ """
613
+ with suppress(AttributeError, FileNotFoundError):
614
+ # in case the job failed before started
615
+ await self.on_job_polling(scheduler, job, 0)
616
+ self._clear_residues()
617
+
618
+ @plugin.impl
619
+ async def on_job_succeeded(self, scheduler, job):
620
+ """Handle job succeeded event by pulling final logs.
621
+
622
+ Args:
623
+ scheduler: The scheduler instance.
624
+ job: The job that succeeded.
625
+ """
626
+ with suppress(AttributeError, FileNotFoundError):
627
+ await self.on_job_polling(scheduler, job, 0)
628
+ self._clear_residues()
629
+
630
+ @plugin.impl
631
+ def on_shutdown(self, xqute, sig):
632
+ """Handle shutdown event by cleaning up resources.
633
+
634
+ Args:
635
+ xqute: The Xqute instance.
636
+ sig: The shutdown signal.
637
+ """
638
+ del self.stdout_populator
639
+ self.stdout_populator = None
640
+ del self.stderr_populator
641
+ self.stderr_populator = None
642
+
643
+
644
+ class CliGbatchPlugin(CLIPlugin): # pragma: no cover
645
+ """Simplify running commands via Google Cloud Batch.
646
+
647
+ This CLI plugin provides a command-line interface for executing arbitrary
648
+ commands on Google Cloud Batch through the pipen framework. It wraps
649
+ commands as single-process pipelines and provides various execution modes.
650
+
651
+ Attributes:
652
+ __version__ (str): The version of the plugin.
653
+ name (str): The CLI command name.
654
+ """
396
655
 
397
656
  __version__ = __version__
398
657
  name = "gbatch"
@@ -402,6 +661,15 @@ class CliGbatchPlugin(CLIPlugin):
402
661
  config_files: list[str],
403
662
  profile: str | None,
404
663
  ) -> dict:
664
+ """Get the default configurations from the given config files and profile.
665
+
666
+ Args:
667
+ config_files: List of configuration file paths to load.
668
+ profile: The profile name to use for configuration.
669
+
670
+ Returns:
671
+ Dictionary containing scheduler options from the configuration.
672
+ """
405
673
  """Get the default configurations from the given config files and profile."""
406
674
  if not profile:
407
675
  return {}
@@ -416,6 +684,12 @@ class CliGbatchPlugin(CLIPlugin):
416
684
  return conf.get("scheduler_opts", {})
417
685
 
418
686
  def __init__(self, parser, subparser):
687
+ """Initialize the CLI plugin with argument parsing configuration.
688
+
689
+ Args:
690
+ parser: The main argument parser.
691
+ subparser: The subparser for this specific command.
692
+ """
419
693
  super().__init__(parser, subparser)
420
694
  subparser.epilog = """\033[1;4mExamples\033[0m:
421
695
 
@@ -424,6 +698,12 @@ class CliGbatchPlugin(CLIPlugin):
424
698
  > pipen gbatch --workdir gs://my-bucket/workdir -- \\
425
699
  python myscript.py --input input.txt --output output.txt
426
700
 
701
+ \u200B
702
+ # Use named mounts
703
+ > pipen gbatch --workdir gs://my-bucket/workdir --mount INFILE=gs://bucket/path/to/file \\
704
+ --mount OUTDIR=gs://bucket/path/to/outdir -- \\
705
+ bash -c 'cat $INFILE > $OUTDIR/output.txt'
706
+
427
707
  \u200B
428
708
  # Run a command in a detached mode
429
709
  > pipen gbatch --nowait --project $PROJECT --location $LOCATION \\
@@ -439,7 +719,7 @@ class CliGbatchPlugin(CLIPlugin):
439
719
  # View the logs of a previously run command
440
720
  > pipen gbatch --view-logs all --name my-daemon-name \\
441
721
  --workdir gs://my-bucket/workdir
442
- """
722
+ """ # noqa: E501
443
723
  argfile = Path(__file__).parent / "daemon_args.toml"
444
724
  args_def = Config.load(argfile, loader="toml")
445
725
  mutually_exclusive_groups = args_def.get("mutually_exclusive_groups", [])
@@ -448,7 +728,18 @@ class CliGbatchPlugin(CLIPlugin):
448
728
  subparser._add_decedents(mutually_exclusive_groups, groups, [], arguments, [])
449
729
 
450
730
  def parse_args(self, known_parsed, unparsed_argv: list[str]) -> Namespace:
451
- """Define arguments for the command"""
731
+ """Parse command-line arguments and apply configuration defaults.
732
+
733
+ Args:
734
+ known_parsed: Previously parsed arguments.
735
+ unparsed_argv: List of unparsed command-line arguments.
736
+
737
+ Returns:
738
+ Namespace containing parsed arguments with applied defaults.
739
+
740
+ Raises:
741
+ SystemExit: If command arguments are not properly formatted.
742
+ """
452
743
  # Check if there is any unknown args
453
744
  known_parsed = super().parse_args(known_parsed, unparsed_argv)
454
745
  if known_parsed.command:
@@ -476,5 +767,9 @@ class CliGbatchPlugin(CLIPlugin):
476
767
  return known_parsed
477
768
 
478
769
  def exec_command(self, args: Namespace) -> None:
479
- """Execute the command"""
770
+ """Execute the gbatch command with the provided arguments.
771
+
772
+ Args:
773
+ args: Parsed command-line arguments containing configuration and command.
774
+ """
480
775
  asyncio.run(CliGbatchDaemon(args, args.command).run())
@@ -11,6 +11,12 @@ flags = ["--view-logs"]
11
11
  choices = ["all", "stdout", "stderr"]
12
12
  help = "View the logs of a job."
13
13
 
14
+ [[mutually_exclusive_groups.arguments]]
15
+ flags = ["--version"]
16
+ action = "store_true"
17
+ default = false
18
+ help = "Show the version of the pipen-cli-gbatch package."
19
+
14
20
  [[arguments]]
15
21
  flags = ["--name"]
16
22
  type = "str"
@@ -26,18 +32,18 @@ including ~/.pipen.toml and ./pipen.toml.
26
32
  Note that if not provided, nothing will be loaded from the configuration files.
27
33
  """
28
34
 
29
- [[arguments]]
30
- flags = ["--version"]
31
- action = "store_true"
32
- default = false
33
- help = "Show the version of the pipen-cli-gbatch package."
34
-
35
35
  [[arguments]]
36
36
  flags = ["--loglevel"]
37
37
  choices = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL", "debug", "info", "warning", "error", "critical"]
38
38
  default = "INFO"
39
39
  help = "Set the logging level for the daemon process."
40
40
 
41
+ [[arguments]]
42
+ flags = ["--plain"]
43
+ action = "store_true"
44
+ default = false
45
+ help = "Treat the command as a plain command, not a pipen pipeline, so we don't grab workdir/outdir and replace them with mounted paths from the command."
46
+
41
47
  [[groups]]
42
48
  title = "Key Options"
43
49
  description = "The key options to run the command."
@@ -112,10 +118,15 @@ help = "The location to run the job."
112
118
 
113
119
  [[groups.arguments]]
114
120
  flags = ["--mount"]
115
- type = "list"
121
+ # type = "list"
116
122
  default = []
117
- action = "clear_extend"
118
- help = """The list of mounts to mount to the VM, each in the format of SOURCE:TARGET, where SOURCE must be either a Google Storage Bucket path (gs://...)."""
123
+ action = "clear_append"
124
+ help = """The list of mounts to mount to the VM, each in the format of SOURCE:TARGET, where SOURCE must be either a Google Storage Bucket path (gs://...).
125
+ You can also use named mounts like `INDIR=gs://my-bucket/inputs` and the directory will be mounted to `/mnt/disks/INDIR` in the VM;
126
+ then you can use environment variable `$INDIR` in the command/script to refer to the mounted path.
127
+ You can also mount a file like `INFILE=gs://my-bucket/inputs/file.txt`. The parent directory will be mounted to `/mnt/disks/INFILE/inputs` in the VM,
128
+ and the file will be available at `/mnt/disks/INFILE/inputs/file.txt` in the VM. `$INFILE` can also be used in the command/script to refer to the mounted path.
129
+ """
119
130
 
120
131
  [[groups.arguments]]
121
132
  flags = ["--service-account"]
@@ -159,9 +170,9 @@ help = "The entry point of the container to run the command."
159
170
 
160
171
  [[groups.arguments]]
161
172
  flags = ["--commands"]
162
- type = "list"
173
+ # type = "list"
163
174
  default = []
164
- action = "clear_extend"
175
+ action = "clear_append"
165
176
  help = "The list of commands to run in the container, each as a separate string."
166
177
 
167
178
  [[groups.arguments]]
@@ -0,0 +1,39 @@
1
+ [project]
2
+ name = "pipen-cli-gbatch"
3
+ version = "0.0.2"
4
+ description = "A pipen cli plugin to run command via Google Cloud Batch"
5
+ authors = [
6
+ {name = "pwwang",email = "pwwang@pwwang.com"}
7
+ ]
8
+ license = {text = "MIT"}
9
+ readme = "README.md"
10
+ requires-python = ">=3.9,<4.0"
11
+ dependencies = [
12
+ "pipen (>=0.17.19,<0.18.0)",
13
+ "pipen-poplog (>=0.3.6,<0.4.0)",
14
+ "google-cloud-storage (>=3.0.0,<4.0.0)"
15
+ ]
16
+
17
+ [tool.pytest.ini_options]
18
+ addopts = "-vv -n auto --dist loadgroup -p no:benchmark -W error::UserWarning --cov-config=.coveragerc --cov=pipen_cli_gbatch --cov-report xml:.coverage.xml --cov-report term-missing"
19
+ console_output_style = "progress"
20
+ junit_family = "xunit1"
21
+ asyncio_default_fixture_loop_scope = "function"
22
+ filterwarnings = [
23
+ "ignore:.+may lead to deadlocks in the child:DeprecationWarning",
24
+ ]
25
+
26
+ [tool.poetry.plugins.pipen_cli]
27
+ cli-gbatch = "pipen_cli_gbatch:CliGbatchPlugin"
28
+
29
+
30
+ [tool.poetry.group.dev.dependencies]
31
+ pytest = "^8.4.1"
32
+ pytest-cov = "^6.2.1"
33
+ pytest-asyncio = "^1.1.0"
34
+ pytest-xdist = "^3.8.0"
35
+ pytest-forked = "^1.6.0"
36
+
37
+ [build-system]
38
+ requires = ["poetry-core>=2.0.0,<3.0.0"]
39
+ build-backend = "poetry.core.masonry.api"
@@ -1,23 +0,0 @@
1
- [project]
2
- name = "pipen-cli-gbatch"
3
- version = "0.0.0"
4
- description = "A pipen cli plugin to run command via Google Cloud Batch"
5
- authors = [
6
- {name = "pwwang",email = "pwwang@pwwang.com"}
7
- ]
8
- license = {text = "MIT"}
9
- readme = "README.md"
10
- requires-python = ">=3.9,<4.0"
11
- dependencies = [
12
- "pipen (>=0.17.17,<0.18.0)",
13
- "pipen-poplog (>=0.3.6,<0.4.0)",
14
- "google-cloud-storage (>=3.0.0,<4.0.0)"
15
- ]
16
-
17
-
18
- [tool.poetry.plugins.pipen_cli]
19
- cli-gbatch = "pipen_cli_gbatch:CliGbatchPlugin"
20
-
21
- [build-system]
22
- requires = ["poetry-core>=2.0.0,<3.0.0"]
23
- build-backend = "poetry.core.masonry.api"