pipen-cli-gbatch 0.0.0__tar.gz → 0.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pipen-cli-gbatch might be problematic. Click here for more details.
- {pipen_cli_gbatch-0.0.0 → pipen_cli_gbatch-0.0.2}/PKG-INFO +7 -2
- {pipen_cli_gbatch-0.0.0 → pipen_cli_gbatch-0.0.2}/README.md +5 -0
- {pipen_cli_gbatch-0.0.0 → pipen_cli_gbatch-0.0.2}/pipen_cli_gbatch/__init__.py +384 -89
- {pipen_cli_gbatch-0.0.0 → pipen_cli_gbatch-0.0.2}/pipen_cli_gbatch/daemon_args.toml +22 -11
- pipen_cli_gbatch-0.0.2/pyproject.toml +39 -0
- pipen_cli_gbatch-0.0.0/pyproject.toml +0 -23
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: pipen-cli-gbatch
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.2
|
|
4
4
|
Summary: A pipen cli plugin to run command via Google Cloud Batch
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: pwwang
|
|
@@ -14,7 +14,7 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.13
|
|
16
16
|
Requires-Dist: google-cloud-storage (>=3.0.0,<4.0.0)
|
|
17
|
-
Requires-Dist: pipen (>=0.17.
|
|
17
|
+
Requires-Dist: pipen (>=0.17.19,<0.18.0)
|
|
18
18
|
Requires-Dist: pipen-poplog (>=0.3.6,<0.4.0)
|
|
19
19
|
Description-Content-Type: text/markdown
|
|
20
20
|
|
|
@@ -207,6 +207,11 @@ Examples:
|
|
|
207
207
|
# Run a command and wait for it to complete
|
|
208
208
|
> pipen gbatch --workdir gs://my-bucket/workdir -- \
|
|
209
209
|
python myscript.py --input input.txt --output output.txt
|
|
210
|
+
|
|
211
|
+
# Use named mounts
|
|
212
|
+
> pipen gbatch --workdir gs://my-bucket/workdir --mount INFILE=gs://bucket/path/to/file \
|
|
213
|
+
--mount OUTDIR=gs://bucket/path/to/outdir -- \
|
|
214
|
+
cat $INFILE > $OUTDIR/output.txt
|
|
210
215
|
|
|
211
216
|
# Run a command in a detached mode
|
|
212
217
|
> pipen gbatch --nowait --project $PROJECT --location $LOCATION \
|
|
@@ -187,6 +187,11 @@ Examples:
|
|
|
187
187
|
# Run a command and wait for it to complete
|
|
188
188
|
> pipen gbatch --workdir gs://my-bucket/workdir -- \
|
|
189
189
|
python myscript.py --input input.txt --output output.txt
|
|
190
|
+
|
|
191
|
+
# Use named mounts
|
|
192
|
+
> pipen gbatch --workdir gs://my-bucket/workdir --mount INFILE=gs://bucket/path/to/file \
|
|
193
|
+
--mount OUTDIR=gs://bucket/path/to/outdir -- \
|
|
194
|
+
cat $INFILE > $OUTDIR/output.txt
|
|
190
195
|
|
|
191
196
|
# Run a command in a detached mode
|
|
192
197
|
> pipen gbatch --nowait --project $PROJECT --location $LOCATION \
|
|
@@ -63,8 +63,10 @@ from __future__ import annotations
|
|
|
63
63
|
|
|
64
64
|
import sys
|
|
65
65
|
import asyncio
|
|
66
|
+
from contextlib import suppress
|
|
66
67
|
from pathlib import Path
|
|
67
68
|
from time import sleep
|
|
69
|
+
from typing import Any
|
|
68
70
|
from diot import Diot
|
|
69
71
|
from argx import Namespace
|
|
70
72
|
from yunpath import AnyPath, GSPath
|
|
@@ -77,75 +79,58 @@ from pipen.cli import CLIPlugin
|
|
|
77
79
|
from pipen.scheduler import GbatchScheduler
|
|
78
80
|
from pipen_poplog import LogsPopulator
|
|
79
81
|
|
|
80
|
-
__version__ = "0.0.
|
|
82
|
+
__version__ = "0.0.2"
|
|
81
83
|
__all__ = ("CliGbatchPlugin", "CliGbatchDaemon")
|
|
82
84
|
|
|
83
85
|
|
|
84
|
-
class
|
|
85
|
-
"""
|
|
86
|
-
|
|
87
|
-
def __init__(self, name: str = "logging", log_start: bool = True):
|
|
88
|
-
self.name = name
|
|
89
|
-
self.log_start = log_start
|
|
90
|
-
self.stdout_populator = LogsPopulator()
|
|
91
|
-
self.stderr_populator = LogsPopulator()
|
|
92
|
-
|
|
93
|
-
@plugin.impl
|
|
94
|
-
async def on_job_started(self, scheduler, job):
|
|
95
|
-
if not self.log_start:
|
|
96
|
-
return
|
|
97
|
-
|
|
98
|
-
self.stdout_populator.logfile = scheduler.workdir.joinpath("0", "job.stdout")
|
|
99
|
-
self.stderr_populator.logfile = scheduler.workdir.joinpath("0", "job.stderr")
|
|
100
|
-
logger.info("Job is picked up by Google Batch, pulling stdout/stderr...")
|
|
101
|
-
|
|
102
|
-
@plugin.impl
|
|
103
|
-
async def on_job_polling(self, scheduler, job, counter):
|
|
104
|
-
if counter % 5 != 0:
|
|
105
|
-
# Make it less frequent
|
|
106
|
-
return
|
|
107
|
-
|
|
108
|
-
stdout_lines = self.stdout_populator.populate()
|
|
109
|
-
self.stdout_populator.increment_counter(len(stdout_lines))
|
|
110
|
-
for line in stdout_lines:
|
|
111
|
-
logger.info(f"/STDOUT {line}")
|
|
112
|
-
|
|
113
|
-
stderr_lines = self.stderr_populator.populate()
|
|
114
|
-
self.stderr_populator.increment_counter(len(stderr_lines))
|
|
115
|
-
for line in stderr_lines:
|
|
116
|
-
logger.error(f"/STDERR {line}")
|
|
117
|
-
|
|
118
|
-
@plugin.impl
|
|
119
|
-
async def on_job_killed(self, scheduler, job):
|
|
120
|
-
await self.on_job_polling.impl(self, scheduler, job, 0)
|
|
121
|
-
|
|
122
|
-
@plugin.impl
|
|
123
|
-
async def on_job_failed(self, scheduler, job):
|
|
124
|
-
await self.on_job_polling.impl(self, scheduler, job, 0)
|
|
125
|
-
|
|
126
|
-
@plugin.impl
|
|
127
|
-
async def on_job_succeeded(self, scheduler, job):
|
|
128
|
-
await self.on_job_polling.impl(self, scheduler, job, 0)
|
|
86
|
+
class CliGbatchDaemon:
|
|
87
|
+
"""A daemon pipeline wrapper for running commands via Google Cloud Batch.
|
|
129
88
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
self.stdout_populator = None
|
|
134
|
-
del self.stderr_populator
|
|
135
|
-
self.stderr_populator = None
|
|
89
|
+
This class wraps arbitrary commands as single-process pipen pipelines and executes
|
|
90
|
+
them using the Google Cloud Batch scheduler. It handles configuration management,
|
|
91
|
+
path mounting, and provides both synchronous and asynchronous execution modes.
|
|
136
92
|
|
|
93
|
+
Attributes:
|
|
94
|
+
config (Diot): Configuration dictionary containing all daemon settings.
|
|
95
|
+
command (list[str]): The command to be executed as a list of arguments.
|
|
137
96
|
|
|
138
|
-
|
|
97
|
+
Example:
|
|
98
|
+
>>> daemon = CliGbatchDaemon(
|
|
99
|
+
... {"workdir": "gs://my-bucket/workdir", "project": "my-project"},
|
|
100
|
+
... ["python", "script.py", "--input", "data.txt"]
|
|
101
|
+
... )
|
|
102
|
+
>>> await daemon.run()
|
|
103
|
+
"""
|
|
139
104
|
|
|
140
105
|
def __init__(self, config: dict | Namespace, command: list[str]):
|
|
106
|
+
"""Initialize the CliGbatchDaemon.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
config: Configuration dictionary or Namespace containing daemon settings.
|
|
110
|
+
Must include 'workdir' pointing to a Google Storage bucket path.
|
|
111
|
+
command: List of command arguments to execute.
|
|
112
|
+
"""
|
|
141
113
|
if isinstance(config, Namespace):
|
|
142
114
|
self.config = Diot(vars(config))
|
|
143
115
|
else:
|
|
144
116
|
self.config = Diot(config)
|
|
117
|
+
|
|
118
|
+
self.config.prescript = self.config.get("prescript", None) or ""
|
|
119
|
+
self.config.postscript = self.config.get("postscript", None) or ""
|
|
145
120
|
self.command = command
|
|
146
121
|
|
|
147
122
|
def _get_arg_from_command(self, arg: str) -> str | None:
|
|
148
|
-
"""Get the value of the given argument from the command line.
|
|
123
|
+
"""Get the value of the given argument from the command line.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
arg: The argument name to search for (without '--' prefix).
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
The value of the argument if found, None otherwise.
|
|
130
|
+
|
|
131
|
+
Raises:
|
|
132
|
+
FileNotFoundError: If a config file is specified but doesn't exist.
|
|
133
|
+
"""
|
|
149
134
|
cmd_equal = [cmd.startswith(f"--{arg}=") for cmd in self.command]
|
|
150
135
|
cmd_space = [cmd == f"--{arg}" for cmd in self.command]
|
|
151
136
|
cmd_at = [cmd.startswith("@") for cmd in self.command]
|
|
@@ -163,56 +148,131 @@ class CliGbatchDaemon:
|
|
|
163
148
|
raise FileNotFoundError(f"Config file not found: {config_file}")
|
|
164
149
|
|
|
165
150
|
conf = Config.load_one(config_file)
|
|
166
|
-
value = conf.get(
|
|
151
|
+
value = conf.get(arg, None)
|
|
167
152
|
else:
|
|
168
153
|
value = None
|
|
169
154
|
|
|
170
155
|
return value
|
|
171
156
|
|
|
172
|
-
def
|
|
173
|
-
|
|
157
|
+
def _replace_arg_in_command(self, arg: str, value: Any) -> None:
|
|
158
|
+
"""Replace the value of the given argument in the command line.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
arg: The argument name to replace (without '--' prefix).
|
|
162
|
+
value: The new value to set for the argument.
|
|
163
|
+
"""
|
|
164
|
+
cmd_equal = [cmd.startswith(f"--{arg}=") for cmd in self.command]
|
|
165
|
+
cmd_space = [cmd == f"--{arg}" for cmd in self.command]
|
|
166
|
+
value = str(value)
|
|
167
|
+
|
|
168
|
+
if any(cmd_equal):
|
|
169
|
+
index = cmd_equal.index(True)
|
|
170
|
+
self.command[index] = f"--{arg}={value}"
|
|
171
|
+
elif any(cmd_space) and len(cmd_space) > cmd_space.index(True) + 1:
|
|
172
|
+
index = cmd_space.index(True)
|
|
173
|
+
self.command[index + 1] = value
|
|
174
|
+
else:
|
|
175
|
+
self.command.extend([f"--{arg}", value])
|
|
176
|
+
|
|
177
|
+
def _add_mount(self, source: str | GSPath, target: str) -> None:
|
|
178
|
+
"""Add a mount point to the configuration.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
source: The source path (local or GCS path).
|
|
182
|
+
target: The target mount path inside the container.
|
|
183
|
+
"""
|
|
184
|
+
mount = self.config.get("mount", [])
|
|
185
|
+
# mount the workdir
|
|
186
|
+
mount.append(f'{source}:{target}')
|
|
187
|
+
|
|
188
|
+
self.config["mount"] = mount
|
|
189
|
+
|
|
190
|
+
def _handle_workdir(self):
|
|
191
|
+
"""Handle workdir configuration and mounting.
|
|
192
|
+
|
|
193
|
+
Validates that workdir is a Google Storage bucket path and sets up
|
|
194
|
+
the appropriate mount configuration for the container.
|
|
195
|
+
|
|
196
|
+
Raises:
|
|
197
|
+
SystemExit: If workdir is not a valid Google Storage bucket path.
|
|
198
|
+
"""
|
|
199
|
+
command_workdir = self._get_arg_from_command("workdir")
|
|
200
|
+
workdir = self.config.get("workdir", command_workdir)
|
|
174
201
|
|
|
175
202
|
if not workdir or not isinstance(AnyPath(workdir), GSPath):
|
|
176
203
|
print(
|
|
177
|
-
"\033[1;4mError\033[0m:
|
|
178
|
-
"--workdir.\n"
|
|
204
|
+
"\033[1;4mError\033[0m: An existing Google Storage Bucket path is "
|
|
205
|
+
"required for --workdir.\n"
|
|
179
206
|
)
|
|
180
207
|
sys.exit(1)
|
|
181
208
|
|
|
182
209
|
self.config["workdir"] = workdir
|
|
210
|
+
# If command workdir is different from config workdir, we need to mount it
|
|
211
|
+
self._add_mount(workdir, GbatchScheduler.MOUNTED_METADIR)
|
|
212
|
+
|
|
213
|
+
# replace --workdir value with the mounted workdir in the command
|
|
214
|
+
self._replace_arg_in_command("workdir", GbatchScheduler.MOUNTED_METADIR)
|
|
215
|
+
|
|
216
|
+
def _handle_outdir(self):
|
|
217
|
+
"""Handle output directory configuration and mounting.
|
|
218
|
+
|
|
219
|
+
If an output directory is specified in the command, mounts it to the
|
|
220
|
+
container and updates the command to use the mounted path.
|
|
221
|
+
"""
|
|
222
|
+
command_outdir = self._get_arg_from_command("outdir")
|
|
223
|
+
|
|
224
|
+
if command_outdir:
|
|
225
|
+
self._add_mount(command_outdir, GbatchScheduler.MOUNTED_OUTDIR)
|
|
226
|
+
self._replace_arg_in_command("outdir", GbatchScheduler.MOUNTED_OUTDIR)
|
|
183
227
|
|
|
184
228
|
def _infer_name(self):
|
|
229
|
+
"""Infer the daemon name from configuration or command arguments.
|
|
230
|
+
|
|
231
|
+
Priority order:
|
|
232
|
+
1. config.name
|
|
233
|
+
2. --name from command + "GbatchDaemon" suffix
|
|
234
|
+
3. Default "PipenCliGbatchDaemon"
|
|
235
|
+
"""
|
|
185
236
|
name = self.config.get("name", None)
|
|
186
237
|
if not name:
|
|
187
238
|
command_name = self._get_arg_from_command("name")
|
|
188
239
|
if not command_name:
|
|
189
240
|
name = "PipenCliGbatchDaemon"
|
|
190
241
|
else:
|
|
191
|
-
name = f"{
|
|
242
|
+
name = f"{command_name}GbatchDaemon"
|
|
192
243
|
|
|
193
244
|
self.config["name"] = name
|
|
194
245
|
|
|
195
246
|
def _infer_jobname_prefix(self):
|
|
247
|
+
"""Infer the job name prefix for the Google Cloud Batch scheduler.
|
|
248
|
+
|
|
249
|
+
Priority order:
|
|
250
|
+
1. config.jobname_prefix
|
|
251
|
+
2. --name from command + "-gbatch-daemon" suffix (lowercase)
|
|
252
|
+
3. Default "pipen-cli-gbatch-daemon"
|
|
253
|
+
"""
|
|
196
254
|
prefix = self.config.get("jobname_prefix", None)
|
|
197
255
|
if not prefix:
|
|
198
256
|
command_name = self._get_arg_from_command("name")
|
|
199
257
|
if not command_name:
|
|
200
|
-
prefix = "pipen-gbatch-daemon"
|
|
258
|
+
prefix = "pipen-cli-gbatch-daemon"
|
|
201
259
|
else:
|
|
202
260
|
prefix = f"{command_name.lower()}-gbatch-daemon"
|
|
203
261
|
|
|
204
262
|
self.config["jobname_prefix"] = prefix
|
|
205
263
|
|
|
206
|
-
def _setup_mount(self):
|
|
207
|
-
mount = self.config.get("mount", [])
|
|
208
|
-
# mount the workdir
|
|
209
|
-
mount.append(f'{self.config["workdir"]}:{GbatchScheduler.MOUNTED_METADIR}')
|
|
210
|
-
|
|
211
|
-
self.config["mount"] = mount
|
|
212
|
-
|
|
213
264
|
def _get_xqute(self) -> Xqute:
|
|
265
|
+
"""Create and configure an Xqute instance for job execution.
|
|
266
|
+
|
|
267
|
+
Returns:
|
|
268
|
+
Configured Xqute instance with appropriate plugins and scheduler options.
|
|
269
|
+
"""
|
|
214
270
|
plugins = ["-xqute.pipen"]
|
|
215
|
-
if
|
|
271
|
+
if (
|
|
272
|
+
not self.config.nowait
|
|
273
|
+
and not self.config.view_logs
|
|
274
|
+
and "logging" not in plugin.get_all_plugin_names()
|
|
275
|
+
):
|
|
216
276
|
plugins.append(XquteCliGbatchPlugin())
|
|
217
277
|
|
|
218
278
|
return Xqute(
|
|
@@ -238,6 +298,7 @@ class CliGbatchDaemon:
|
|
|
238
298
|
"version",
|
|
239
299
|
"loglevel",
|
|
240
300
|
"mounts",
|
|
301
|
+
"plain",
|
|
241
302
|
)
|
|
242
303
|
},
|
|
243
304
|
workdir=(f'{self.config.workdir}/{self.config["name"]}'),
|
|
@@ -245,10 +306,12 @@ class CliGbatchDaemon:
|
|
|
245
306
|
)
|
|
246
307
|
|
|
247
308
|
def _run_version(self):
|
|
309
|
+
"""Print version information for pipen-cli-gbatch and pipen."""
|
|
248
310
|
print(f"pipen-cli-gbatch version: v{__version__}")
|
|
249
311
|
print(f"pipen version: v{pipen_version}")
|
|
250
312
|
|
|
251
313
|
def _show_scheduler_opts(self):
|
|
314
|
+
"""Log the scheduler options for debugging purposes."""
|
|
252
315
|
logger.debug("Scheduler Options:")
|
|
253
316
|
for key, val in self.config.items():
|
|
254
317
|
if key in (
|
|
@@ -265,12 +328,18 @@ class CliGbatchDaemon:
|
|
|
265
328
|
"version",
|
|
266
329
|
"loglevel",
|
|
267
330
|
"mounts",
|
|
331
|
+
"plain",
|
|
268
332
|
):
|
|
269
333
|
continue
|
|
270
334
|
|
|
271
335
|
logger.debug(f"- {key}: {val}")
|
|
272
336
|
|
|
273
|
-
async def _run_wait(self):
|
|
337
|
+
async def _run_wait(self): # pragma: no cover
|
|
338
|
+
"""Run the pipeline and wait for completion.
|
|
339
|
+
|
|
340
|
+
Raises:
|
|
341
|
+
SystemExit: If no command is provided.
|
|
342
|
+
"""
|
|
274
343
|
if not self.command:
|
|
275
344
|
print("\033[1;4mError\033[0m: No command to run is provided.\n")
|
|
276
345
|
sys.exit(1)
|
|
@@ -281,6 +350,14 @@ class CliGbatchDaemon:
|
|
|
281
350
|
await xqute.run_until_complete()
|
|
282
351
|
|
|
283
352
|
async def _run_nowait(self):
|
|
353
|
+
"""Run the pipeline without waiting for completion.
|
|
354
|
+
|
|
355
|
+
Submits the job to Google Cloud Batch and prints information about
|
|
356
|
+
how to monitor the job status and retrieve logs.
|
|
357
|
+
|
|
358
|
+
Raises:
|
|
359
|
+
SystemExit: If no command is provided.
|
|
360
|
+
"""
|
|
284
361
|
"""Run the pipeline without waiting for completion."""
|
|
285
362
|
if not self.command:
|
|
286
363
|
print("\033[1;4mError\033[0m: No command to run is provided.\n")
|
|
@@ -332,10 +409,18 @@ class CliGbatchDaemon:
|
|
|
332
409
|
logger.info(f'📁 {self.config["workdir"]}/{self.config["name"]}/0/')
|
|
333
410
|
logger.info("")
|
|
334
411
|
finally:
|
|
335
|
-
if xqute.plugin_context:
|
|
412
|
+
if xqute.plugin_context: # pragma: no cover
|
|
336
413
|
xqute.plugin_context.__exit__()
|
|
337
414
|
|
|
338
|
-
def _run_view_logs(self):
|
|
415
|
+
def _run_view_logs(self): # pragma: no cover
|
|
416
|
+
"""Pull and display logs from the Google Cloud Batch job.
|
|
417
|
+
|
|
418
|
+
Continuously monitors and displays stdout/stderr logs based on the
|
|
419
|
+
view_logs configuration. Supports viewing 'stdout', 'stderr', or 'all'.
|
|
420
|
+
|
|
421
|
+
Raises:
|
|
422
|
+
SystemExit: If workdir is not found or when interrupted by user.
|
|
423
|
+
"""
|
|
339
424
|
log_source = {}
|
|
340
425
|
workdir = AnyPath(self.config["workdir"]) / self.config["name"] / "0"
|
|
341
426
|
if not workdir.exists():
|
|
@@ -353,30 +438,73 @@ class CliGbatchDaemon:
|
|
|
353
438
|
poplulators = {
|
|
354
439
|
key: LogsPopulator(logfile=val) for key, val in log_source.items()
|
|
355
440
|
}
|
|
441
|
+
|
|
356
442
|
logger.info(f"Pulling logs from: {', '.join(log_source.keys())}")
|
|
357
|
-
logger.info("Press Ctrl-C (twice) to stop.")
|
|
443
|
+
logger.info("Press Ctrl-C (twice if needed) to stop.")
|
|
358
444
|
print("")
|
|
359
|
-
|
|
445
|
+
|
|
446
|
+
try:
|
|
447
|
+
while True:
|
|
448
|
+
for key, populator in poplulators.items():
|
|
449
|
+
lines = populator.populate()
|
|
450
|
+
for line in lines:
|
|
451
|
+
if len(log_source) > 1:
|
|
452
|
+
print(f"/{key} {line}")
|
|
453
|
+
else:
|
|
454
|
+
print(line)
|
|
455
|
+
sleep(5)
|
|
456
|
+
except KeyboardInterrupt:
|
|
360
457
|
for key, populator in poplulators.items():
|
|
361
|
-
|
|
362
|
-
for line in lines:
|
|
458
|
+
if populator.residue:
|
|
363
459
|
if len(log_source) > 1:
|
|
364
|
-
print(f"/{key} {
|
|
460
|
+
print(f"/{key} {populator.residue}")
|
|
365
461
|
else:
|
|
366
|
-
print(
|
|
367
|
-
|
|
462
|
+
print(populator.residue)
|
|
463
|
+
print("")
|
|
464
|
+
logger.info("Stopped pulling logs.")
|
|
465
|
+
sys.exit(0)
|
|
368
466
|
|
|
369
467
|
def setup(self):
|
|
468
|
+
"""Set up logging and configuration for the daemon.
|
|
469
|
+
|
|
470
|
+
Configures logging handlers and filters, validates workdir requirements,
|
|
471
|
+
and initializes daemon name and job name prefix.
|
|
472
|
+
|
|
473
|
+
Raises:
|
|
474
|
+
SystemExit: If workdir is not a valid Google Storage bucket path.
|
|
475
|
+
"""
|
|
370
476
|
logger.addHandler(RichHandler(show_path=False, show_time=False))
|
|
371
477
|
logger.addFilter(DuplicateFilter())
|
|
372
478
|
logger.setLevel(self.config.loglevel.upper())
|
|
373
479
|
|
|
374
|
-
self.
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
480
|
+
if not self.config.plain:
|
|
481
|
+
self._handle_workdir()
|
|
482
|
+
self._handle_outdir()
|
|
483
|
+
self._infer_name()
|
|
484
|
+
self._infer_jobname_prefix()
|
|
485
|
+
else:
|
|
486
|
+
if not self.config.workdir or not isinstance(
|
|
487
|
+
AnyPath(self.config.workdir),
|
|
488
|
+
GSPath,
|
|
489
|
+
):
|
|
490
|
+
print(
|
|
491
|
+
"\033[1;4mError\033[0m: An existing Google Storage Bucket path is "
|
|
492
|
+
"required for --workdir.\n"
|
|
493
|
+
)
|
|
494
|
+
sys.exit(1)
|
|
495
|
+
|
|
496
|
+
if 'name' not in self.config:
|
|
497
|
+
self.config["name"] = "PipenCliGbatchDaemon"
|
|
378
498
|
|
|
379
|
-
async def run(self):
|
|
499
|
+
async def run(self): # pragma: no cover
|
|
500
|
+
"""Execute the daemon pipeline based on configuration.
|
|
501
|
+
|
|
502
|
+
Determines the execution mode based on configuration flags:
|
|
503
|
+
- version: Print version information
|
|
504
|
+
- nowait: Run in detached mode
|
|
505
|
+
- view_logs: Display logs from existing job
|
|
506
|
+
- default: Run and wait for completion
|
|
507
|
+
"""
|
|
380
508
|
if self.config.version:
|
|
381
509
|
self._run_version()
|
|
382
510
|
return
|
|
@@ -391,8 +519,139 @@ class CliGbatchDaemon:
|
|
|
391
519
|
await self._run_wait()
|
|
392
520
|
|
|
393
521
|
|
|
394
|
-
class
|
|
395
|
-
"""
|
|
522
|
+
class XquteCliGbatchPlugin: # pragma: no cover
|
|
523
|
+
"""Plugin for pulling logs during pipeline execution.
|
|
524
|
+
|
|
525
|
+
This plugin monitors job execution and continuously pulls stdout/stderr logs
|
|
526
|
+
from the Google Cloud Batch job, displaying them in real-time during execution.
|
|
527
|
+
|
|
528
|
+
Attributes:
|
|
529
|
+
name (str): The plugin name.
|
|
530
|
+
log_start (bool): Whether to start logging when job starts.
|
|
531
|
+
stdout_populator (LogsPopulator): Handles stdout log population.
|
|
532
|
+
stderr_populator (LogsPopulator): Handles stderr log population.
|
|
533
|
+
"""
|
|
534
|
+
|
|
535
|
+
def __init__(self, name: str = "logging", log_start: bool = True):
|
|
536
|
+
"""Initialize the logging plugin.
|
|
537
|
+
|
|
538
|
+
Args:
|
|
539
|
+
name: The plugin name.
|
|
540
|
+
log_start: Whether to start logging when job starts.
|
|
541
|
+
"""
|
|
542
|
+
self.name = name
|
|
543
|
+
self.log_start = log_start
|
|
544
|
+
self.stdout_populator = LogsPopulator()
|
|
545
|
+
self.stderr_populator = LogsPopulator()
|
|
546
|
+
|
|
547
|
+
def _clear_residues(self):
|
|
548
|
+
"""Clear any remaining log residues and display them."""
|
|
549
|
+
if self.stdout_populator.residue:
|
|
550
|
+
logger.info(f"/STDOUT {self.stdout_populator.residue}")
|
|
551
|
+
self.stdout_populator.residue = ""
|
|
552
|
+
if self.stderr_populator.residue:
|
|
553
|
+
logger.error(f"/STDERR {self.stderr_populator.residue}")
|
|
554
|
+
self.stderr_populator.residue = ""
|
|
555
|
+
|
|
556
|
+
@plugin.impl
|
|
557
|
+
async def on_job_started(self, scheduler, job):
|
|
558
|
+
"""Handle job start event by setting up log file paths.
|
|
559
|
+
|
|
560
|
+
Args:
|
|
561
|
+
scheduler: The scheduler instance.
|
|
562
|
+
job: The job that started.
|
|
563
|
+
"""
|
|
564
|
+
if not self.log_start:
|
|
565
|
+
return
|
|
566
|
+
|
|
567
|
+
self.stdout_populator.logfile = scheduler.workdir.joinpath("0", "job.stdout")
|
|
568
|
+
self.stderr_populator.logfile = scheduler.workdir.joinpath("0", "job.stderr")
|
|
569
|
+
logger.info("Job is picked up by Google Batch, pulling stdout/stderr...")
|
|
570
|
+
|
|
571
|
+
@plugin.impl
|
|
572
|
+
async def on_job_polling(self, scheduler, job, counter):
|
|
573
|
+
"""Handle job polling event by pulling and displaying logs.
|
|
574
|
+
|
|
575
|
+
Args:
|
|
576
|
+
scheduler: The scheduler instance.
|
|
577
|
+
job: The job being polled.
|
|
578
|
+
counter: The polling counter.
|
|
579
|
+
"""
|
|
580
|
+
if counter % 5 != 0:
|
|
581
|
+
# Make it less frequent
|
|
582
|
+
return
|
|
583
|
+
|
|
584
|
+
stdout_lines = self.stdout_populator.populate()
|
|
585
|
+
self.stdout_populator.increment_counter(len(stdout_lines))
|
|
586
|
+
for line in stdout_lines:
|
|
587
|
+
logger.info(f"/STDOUT {line}")
|
|
588
|
+
|
|
589
|
+
stderr_lines = self.stderr_populator.populate()
|
|
590
|
+
self.stderr_populator.increment_counter(len(stderr_lines))
|
|
591
|
+
for line in stderr_lines:
|
|
592
|
+
logger.error(f"/STDERR {line}")
|
|
593
|
+
|
|
594
|
+
@plugin.impl
|
|
595
|
+
async def on_job_killed(self, scheduler, job):
|
|
596
|
+
"""Handle job killed event by pulling final logs.
|
|
597
|
+
|
|
598
|
+
Args:
|
|
599
|
+
scheduler: The scheduler instance.
|
|
600
|
+
job: The job that was killed.
|
|
601
|
+
"""
|
|
602
|
+
await self.on_job_polling(scheduler, job, 0)
|
|
603
|
+
self._clear_residues()
|
|
604
|
+
|
|
605
|
+
@plugin.impl
|
|
606
|
+
async def on_job_failed(self, scheduler, job):
|
|
607
|
+
"""Handle job failed event by pulling final logs.
|
|
608
|
+
|
|
609
|
+
Args:
|
|
610
|
+
scheduler: The scheduler instance.
|
|
611
|
+
job: The job that failed.
|
|
612
|
+
"""
|
|
613
|
+
with suppress(AttributeError, FileNotFoundError):
|
|
614
|
+
# in case the job failed before started
|
|
615
|
+
await self.on_job_polling(scheduler, job, 0)
|
|
616
|
+
self._clear_residues()
|
|
617
|
+
|
|
618
|
+
@plugin.impl
|
|
619
|
+
async def on_job_succeeded(self, scheduler, job):
|
|
620
|
+
"""Handle job succeeded event by pulling final logs.
|
|
621
|
+
|
|
622
|
+
Args:
|
|
623
|
+
scheduler: The scheduler instance.
|
|
624
|
+
job: The job that succeeded.
|
|
625
|
+
"""
|
|
626
|
+
with suppress(AttributeError, FileNotFoundError):
|
|
627
|
+
await self.on_job_polling(scheduler, job, 0)
|
|
628
|
+
self._clear_residues()
|
|
629
|
+
|
|
630
|
+
@plugin.impl
|
|
631
|
+
def on_shutdown(self, xqute, sig):
|
|
632
|
+
"""Handle shutdown event by cleaning up resources.
|
|
633
|
+
|
|
634
|
+
Args:
|
|
635
|
+
xqute: The Xqute instance.
|
|
636
|
+
sig: The shutdown signal.
|
|
637
|
+
"""
|
|
638
|
+
del self.stdout_populator
|
|
639
|
+
self.stdout_populator = None
|
|
640
|
+
del self.stderr_populator
|
|
641
|
+
self.stderr_populator = None
|
|
642
|
+
|
|
643
|
+
|
|
644
|
+
class CliGbatchPlugin(CLIPlugin): # pragma: no cover
|
|
645
|
+
"""Simplify running commands via Google Cloud Batch.
|
|
646
|
+
|
|
647
|
+
This CLI plugin provides a command-line interface for executing arbitrary
|
|
648
|
+
commands on Google Cloud Batch through the pipen framework. It wraps
|
|
649
|
+
commands as single-process pipelines and provides various execution modes.
|
|
650
|
+
|
|
651
|
+
Attributes:
|
|
652
|
+
__version__ (str): The version of the plugin.
|
|
653
|
+
name (str): The CLI command name.
|
|
654
|
+
"""
|
|
396
655
|
|
|
397
656
|
__version__ = __version__
|
|
398
657
|
name = "gbatch"
|
|
@@ -402,6 +661,15 @@ class CliGbatchPlugin(CLIPlugin):
|
|
|
402
661
|
config_files: list[str],
|
|
403
662
|
profile: str | None,
|
|
404
663
|
) -> dict:
|
|
664
|
+
"""Get the default configurations from the given config files and profile.
|
|
665
|
+
|
|
666
|
+
Args:
|
|
667
|
+
config_files: List of configuration file paths to load.
|
|
668
|
+
profile: The profile name to use for configuration.
|
|
669
|
+
|
|
670
|
+
Returns:
|
|
671
|
+
Dictionary containing scheduler options from the configuration.
|
|
672
|
+
"""
|
|
405
673
|
"""Get the default configurations from the given config files and profile."""
|
|
406
674
|
if not profile:
|
|
407
675
|
return {}
|
|
@@ -416,6 +684,12 @@ class CliGbatchPlugin(CLIPlugin):
|
|
|
416
684
|
return conf.get("scheduler_opts", {})
|
|
417
685
|
|
|
418
686
|
def __init__(self, parser, subparser):
|
|
687
|
+
"""Initialize the CLI plugin with argument parsing configuration.
|
|
688
|
+
|
|
689
|
+
Args:
|
|
690
|
+
parser: The main argument parser.
|
|
691
|
+
subparser: The subparser for this specific command.
|
|
692
|
+
"""
|
|
419
693
|
super().__init__(parser, subparser)
|
|
420
694
|
subparser.epilog = """\033[1;4mExamples\033[0m:
|
|
421
695
|
|
|
@@ -424,6 +698,12 @@ class CliGbatchPlugin(CLIPlugin):
|
|
|
424
698
|
> pipen gbatch --workdir gs://my-bucket/workdir -- \\
|
|
425
699
|
python myscript.py --input input.txt --output output.txt
|
|
426
700
|
|
|
701
|
+
\u200B
|
|
702
|
+
# Use named mounts
|
|
703
|
+
> pipen gbatch --workdir gs://my-bucket/workdir --mount INFILE=gs://bucket/path/to/file \\
|
|
704
|
+
--mount OUTDIR=gs://bucket/path/to/outdir -- \\
|
|
705
|
+
bash -c 'cat $INFILE > $OUTDIR/output.txt'
|
|
706
|
+
|
|
427
707
|
\u200B
|
|
428
708
|
# Run a command in a detached mode
|
|
429
709
|
> pipen gbatch --nowait --project $PROJECT --location $LOCATION \\
|
|
@@ -439,7 +719,7 @@ class CliGbatchPlugin(CLIPlugin):
|
|
|
439
719
|
# View the logs of a previously run command
|
|
440
720
|
> pipen gbatch --view-logs all --name my-daemon-name \\
|
|
441
721
|
--workdir gs://my-bucket/workdir
|
|
442
|
-
"""
|
|
722
|
+
""" # noqa: E501
|
|
443
723
|
argfile = Path(__file__).parent / "daemon_args.toml"
|
|
444
724
|
args_def = Config.load(argfile, loader="toml")
|
|
445
725
|
mutually_exclusive_groups = args_def.get("mutually_exclusive_groups", [])
|
|
@@ -448,7 +728,18 @@ class CliGbatchPlugin(CLIPlugin):
|
|
|
448
728
|
subparser._add_decedents(mutually_exclusive_groups, groups, [], arguments, [])
|
|
449
729
|
|
|
450
730
|
def parse_args(self, known_parsed, unparsed_argv: list[str]) -> Namespace:
|
|
451
|
-
"""
|
|
731
|
+
"""Parse command-line arguments and apply configuration defaults.
|
|
732
|
+
|
|
733
|
+
Args:
|
|
734
|
+
known_parsed: Previously parsed arguments.
|
|
735
|
+
unparsed_argv: List of unparsed command-line arguments.
|
|
736
|
+
|
|
737
|
+
Returns:
|
|
738
|
+
Namespace containing parsed arguments with applied defaults.
|
|
739
|
+
|
|
740
|
+
Raises:
|
|
741
|
+
SystemExit: If command arguments are not properly formatted.
|
|
742
|
+
"""
|
|
452
743
|
# Check if there is any unknown args
|
|
453
744
|
known_parsed = super().parse_args(known_parsed, unparsed_argv)
|
|
454
745
|
if known_parsed.command:
|
|
@@ -476,5 +767,9 @@ class CliGbatchPlugin(CLIPlugin):
|
|
|
476
767
|
return known_parsed
|
|
477
768
|
|
|
478
769
|
def exec_command(self, args: Namespace) -> None:
|
|
479
|
-
"""Execute the command
|
|
770
|
+
"""Execute the gbatch command with the provided arguments.
|
|
771
|
+
|
|
772
|
+
Args:
|
|
773
|
+
args: Parsed command-line arguments containing configuration and command.
|
|
774
|
+
"""
|
|
480
775
|
asyncio.run(CliGbatchDaemon(args, args.command).run())
|
|
@@ -11,6 +11,12 @@ flags = ["--view-logs"]
|
|
|
11
11
|
choices = ["all", "stdout", "stderr"]
|
|
12
12
|
help = "View the logs of a job."
|
|
13
13
|
|
|
14
|
+
[[mutually_exclusive_groups.arguments]]
|
|
15
|
+
flags = ["--version"]
|
|
16
|
+
action = "store_true"
|
|
17
|
+
default = false
|
|
18
|
+
help = "Show the version of the pipen-cli-gbatch package."
|
|
19
|
+
|
|
14
20
|
[[arguments]]
|
|
15
21
|
flags = ["--name"]
|
|
16
22
|
type = "str"
|
|
@@ -26,18 +32,18 @@ including ~/.pipen.toml and ./pipen.toml.
|
|
|
26
32
|
Note that if not provided, nothing will be loaded from the configuration files.
|
|
27
33
|
"""
|
|
28
34
|
|
|
29
|
-
[[arguments]]
|
|
30
|
-
flags = ["--version"]
|
|
31
|
-
action = "store_true"
|
|
32
|
-
default = false
|
|
33
|
-
help = "Show the version of the pipen-cli-gbatch package."
|
|
34
|
-
|
|
35
35
|
[[arguments]]
|
|
36
36
|
flags = ["--loglevel"]
|
|
37
37
|
choices = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL", "debug", "info", "warning", "error", "critical"]
|
|
38
38
|
default = "INFO"
|
|
39
39
|
help = "Set the logging level for the daemon process."
|
|
40
40
|
|
|
41
|
+
[[arguments]]
|
|
42
|
+
flags = ["--plain"]
|
|
43
|
+
action = "store_true"
|
|
44
|
+
default = false
|
|
45
|
+
help = "Treat the command as a plain command, not a pipen pipeline, so we don't grab workdir/outdir and replace them with mounted paths from the command."
|
|
46
|
+
|
|
41
47
|
[[groups]]
|
|
42
48
|
title = "Key Options"
|
|
43
49
|
description = "The key options to run the command."
|
|
@@ -112,10 +118,15 @@ help = "The location to run the job."
|
|
|
112
118
|
|
|
113
119
|
[[groups.arguments]]
|
|
114
120
|
flags = ["--mount"]
|
|
115
|
-
type = "list"
|
|
121
|
+
# type = "list"
|
|
116
122
|
default = []
|
|
117
|
-
action = "
|
|
118
|
-
help = """The list of mounts to mount to the VM, each in the format of SOURCE:TARGET, where SOURCE must be either a Google Storage Bucket path (gs://...).
|
|
123
|
+
action = "clear_append"
|
|
124
|
+
help = """The list of mounts to mount to the VM, each in the format of SOURCE:TARGET, where SOURCE must be either a Google Storage Bucket path (gs://...).
|
|
125
|
+
You can also use named mounts like `INDIR=gs://my-bucket/inputs` and the directory will be mounted to `/mnt/disks/INDIR` in the VM;
|
|
126
|
+
then you can use environment variable `$INDIR` in the command/script to refer to the mounted path.
|
|
127
|
+
You can also mount a file like `INFILE=gs://my-bucket/inputs/file.txt`. The parent directory will be mounted to `/mnt/disks/INFILE/inputs` in the VM,
|
|
128
|
+
and the file will be available at `/mnt/disks/INFILE/inputs/file.txt` in the VM. `$INFILE` can also be used in the command/script to refer to the mounted path.
|
|
129
|
+
"""
|
|
119
130
|
|
|
120
131
|
[[groups.arguments]]
|
|
121
132
|
flags = ["--service-account"]
|
|
@@ -159,9 +170,9 @@ help = "The entry point of the container to run the command."
|
|
|
159
170
|
|
|
160
171
|
[[groups.arguments]]
|
|
161
172
|
flags = ["--commands"]
|
|
162
|
-
type = "list"
|
|
173
|
+
# type = "list"
|
|
163
174
|
default = []
|
|
164
|
-
action = "
|
|
175
|
+
action = "clear_append"
|
|
165
176
|
help = "The list of commands to run in the container, each as a separate string."
|
|
166
177
|
|
|
167
178
|
[[groups.arguments]]
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "pipen-cli-gbatch"
|
|
3
|
+
version = "0.0.2"
|
|
4
|
+
description = "A pipen cli plugin to run command via Google Cloud Batch"
|
|
5
|
+
authors = [
|
|
6
|
+
{name = "pwwang",email = "pwwang@pwwang.com"}
|
|
7
|
+
]
|
|
8
|
+
license = {text = "MIT"}
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9,<4.0"
|
|
11
|
+
dependencies = [
|
|
12
|
+
"pipen (>=0.17.19,<0.18.0)",
|
|
13
|
+
"pipen-poplog (>=0.3.6,<0.4.0)",
|
|
14
|
+
"google-cloud-storage (>=3.0.0,<4.0.0)"
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
[tool.pytest.ini_options]
|
|
18
|
+
addopts = "-vv -n auto --dist loadgroup -p no:benchmark -W error::UserWarning --cov-config=.coveragerc --cov=pipen_cli_gbatch --cov-report xml:.coverage.xml --cov-report term-missing"
|
|
19
|
+
console_output_style = "progress"
|
|
20
|
+
junit_family = "xunit1"
|
|
21
|
+
asyncio_default_fixture_loop_scope = "function"
|
|
22
|
+
filterwarnings = [
|
|
23
|
+
"ignore:.+may lead to deadlocks in the child:DeprecationWarning",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[tool.poetry.plugins.pipen_cli]
|
|
27
|
+
cli-gbatch = "pipen_cli_gbatch:CliGbatchPlugin"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
[tool.poetry.group.dev.dependencies]
|
|
31
|
+
pytest = "^8.4.1"
|
|
32
|
+
pytest-cov = "^6.2.1"
|
|
33
|
+
pytest-asyncio = "^1.1.0"
|
|
34
|
+
pytest-xdist = "^3.8.0"
|
|
35
|
+
pytest-forked = "^1.6.0"
|
|
36
|
+
|
|
37
|
+
[build-system]
|
|
38
|
+
requires = ["poetry-core>=2.0.0,<3.0.0"]
|
|
39
|
+
build-backend = "poetry.core.masonry.api"
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
[project]
|
|
2
|
-
name = "pipen-cli-gbatch"
|
|
3
|
-
version = "0.0.0"
|
|
4
|
-
description = "A pipen cli plugin to run command via Google Cloud Batch"
|
|
5
|
-
authors = [
|
|
6
|
-
{name = "pwwang",email = "pwwang@pwwang.com"}
|
|
7
|
-
]
|
|
8
|
-
license = {text = "MIT"}
|
|
9
|
-
readme = "README.md"
|
|
10
|
-
requires-python = ">=3.9,<4.0"
|
|
11
|
-
dependencies = [
|
|
12
|
-
"pipen (>=0.17.17,<0.18.0)",
|
|
13
|
-
"pipen-poplog (>=0.3.6,<0.4.0)",
|
|
14
|
-
"google-cloud-storage (>=3.0.0,<4.0.0)"
|
|
15
|
-
]
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
[tool.poetry.plugins.pipen_cli]
|
|
19
|
-
cli-gbatch = "pipen_cli_gbatch:CliGbatchPlugin"
|
|
20
|
-
|
|
21
|
-
[build-system]
|
|
22
|
-
requires = ["poetry-core>=2.0.0,<3.0.0"]
|
|
23
|
-
build-backend = "poetry.core.masonry.api"
|