pipen-cli-gbatch 0.0.5__tar.gz → 0.0.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pipen-cli-gbatch might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pipen-cli-gbatch
3
- Version: 0.0.5
3
+ Version: 0.0.7
4
4
  Summary: A pipen cli plugin to run command via Google Cloud Batch
5
5
  License: MIT
6
6
  Author: pwwang
@@ -79,8 +79,9 @@ from pipen.cli import CLIPlugin
79
79
  from pipen.scheduler import GbatchScheduler
80
80
  from pipen_poplog import LogsPopulator
81
81
 
82
- __version__ = "0.0.5"
82
+ __version__ = "0.0.7"
83
83
  __all__ = ("CliGbatchPlugin", "CliGbatchDaemon")
84
+ MOUNTED_CWD = "/mnt/disks/.cwd"
84
85
 
85
86
 
86
87
  class CliGbatchDaemon:
@@ -115,6 +116,17 @@ class CliGbatchDaemon:
115
116
  else:
116
117
  self.config = Diot(config)
117
118
 
119
+ self.mount_as_cwd = self.config.pop("mount_as_cwd", None)
120
+ if self.mount_as_cwd:
121
+ if self.config.cwd:
122
+ print(
123
+ "\033[1;4mError\033[0m: --mount-as-cwd cannot be used with "
124
+ "--cwd at the same time.\n"
125
+ )
126
+ sys.exit(1)
127
+ self.config.cwd = MOUNTED_CWD
128
+ self._add_mount(self.mount_as_cwd, MOUNTED_CWD)
129
+
118
130
  self.config.prescript = self.config.get("prescript", None) or ""
119
131
  self.config.postscript = self.config.get("postscript", None) or ""
120
132
  if "labels" in self.config and isinstance(self.config.labels, list):
@@ -201,6 +213,11 @@ class CliGbatchDaemon:
201
213
  Raises:
202
214
  SystemExit: If workdir is not a valid Google Storage bucket path.
203
215
  """
216
+ command_name = self._get_arg_from_command("name") or self.config["name"]
217
+ from_mount_as_cwd = self.mount_as_cwd and not self.config.workdir
218
+ if from_mount_as_cwd:
219
+ self.config.workdir = f"{self.mount_as_cwd}/.pipen/{command_name}"
220
+
204
221
  command_workdir = self._get_arg_from_command("workdir")
205
222
  workdir = self.config.get("workdir", None) or command_workdir
206
223
 
@@ -212,11 +229,14 @@ class CliGbatchDaemon:
212
229
  sys.exit(1)
213
230
 
214
231
  self.config["workdir"] = workdir
215
- # If command workdir is different from config workdir, we need to mount it
216
- self._add_mount(workdir, GbatchScheduler.MOUNTED_METADIR)
232
+ if from_mount_as_cwd: # already mounted
233
+ self._replace_arg_in_command("workdir", f"{MOUNTED_CWD}/.pipen")
234
+ else:
235
+ # If command workdir is different from config workdir, we need to mount it
236
+ self._add_mount(workdir, GbatchScheduler.MOUNTED_METADIR)
217
237
 
218
- # replace --workdir value with the mounted workdir in the command
219
- self._replace_arg_in_command("workdir", GbatchScheduler.MOUNTED_METADIR)
238
+ # replace --workdir value with the mounted workdir in the command
239
+ self._replace_arg_in_command("workdir", GbatchScheduler.MOUNTED_METADIR)
220
240
 
221
241
  def _handle_outdir(self):
222
242
  """Handle output directory configuration and mounting.
@@ -227,8 +247,22 @@ class CliGbatchDaemon:
227
247
  command_outdir = self._get_arg_from_command("outdir")
228
248
 
229
249
  if command_outdir:
230
- self._add_mount(command_outdir, GbatchScheduler.MOUNTED_OUTDIR)
231
- self._replace_arg_in_command("outdir", GbatchScheduler.MOUNTED_OUTDIR)
250
+ coudir = AnyPath(command_outdir)
251
+ if (
252
+ not isinstance(coudir, GSPath)
253
+ and not coudir.is_absolute()
254
+ and self.mount_as_cwd
255
+ ):
256
+ self._replace_arg_in_command("outdir", f"{MOUNTED_CWD}/{coudir}")
257
+ else:
258
+ self._add_mount(command_outdir, GbatchScheduler.MOUNTED_OUTDIR)
259
+ self._replace_arg_in_command("outdir", GbatchScheduler.MOUNTED_OUTDIR)
260
+ elif self.mount_as_cwd:
261
+ command_name = self._get_arg_from_command("name") or self.config.name
262
+ self._replace_arg_in_command(
263
+ "outdir",
264
+ f"{MOUNTED_CWD}/{command_name}-output",
265
+ )
232
266
 
233
267
  def _infer_name(self):
234
268
  """Infer the daemon name from configuration or command arguments.
@@ -303,6 +337,7 @@ class CliGbatchDaemon:
303
337
  "version",
304
338
  "loglevel",
305
339
  "mounts",
340
+ "mount_as_cwd",
306
341
  "plain",
307
342
  )
308
343
  },
@@ -333,6 +368,7 @@ class CliGbatchDaemon:
333
368
  "version",
334
369
  "loglevel",
335
370
  "mounts",
371
+ "mount_as_cwd",
336
372
  "plain",
337
373
  ):
338
374
  continue
@@ -483,11 +519,17 @@ class CliGbatchDaemon:
483
519
  logger.setLevel(self.config.loglevel.upper())
484
520
 
485
521
  if not self.config.plain:
522
+ self._infer_name()
486
523
  self._handle_workdir()
487
524
  self._handle_outdir()
488
- self._infer_name()
489
525
  self._infer_jobname_prefix()
490
526
  else:
527
+ if "name" not in self.config or not self.config.name:
528
+ self.config["name"] = "PipenCliGbatchDaemon"
529
+
530
+ if not self.config.workdir and self.mount_as_cwd:
531
+ self.config.workdir = f"{self.mount_as_cwd}/.pipen"
532
+
491
533
  if not self.config.workdir or not isinstance(
492
534
  AnyPath(self.config.workdir),
493
535
  GSPath,
@@ -498,8 +540,6 @@ class CliGbatchDaemon:
498
540
  )
499
541
  sys.exit(1)
500
542
 
501
- if "name" not in self.config or not self.config.name:
502
- self.config["name"] = "PipenCliGbatchDaemon"
503
543
 
504
544
  async def run(self): # pragma: no cover
505
545
  """Execute the daemon pipeline based on configuration.
@@ -804,22 +844,6 @@ class CliGbatchPlugin(CLIPlugin): # pragma: no cover
804
844
 
805
845
  setattr(known_parsed, key, val)
806
846
 
807
- mount_as_cwd = getattr(known_parsed, "mount_as_cwd", None)
808
- cwd = getattr(known_parsed, "cwd", None)
809
- delattr(known_parsed, "mount_as_cwd")
810
- if mount_as_cwd and cwd:
811
- print(
812
- "\033[1;4mError\033[0m: --mount-as-cwd and --cwd "
813
- "cannot be used together.\n"
814
- )
815
- sys.exit(1)
816
-
817
- mount = getattr(known_parsed, "mount", None) or []
818
- if mount_as_cwd:
819
- mount.append(f"{mount_as_cwd}:/mnt/disks/.cwd")
820
- setattr(known_parsed, "mount", mount)
821
- setattr(known_parsed, "cwd", "/mnt/disks/.cwd")
822
-
823
847
  return known_parsed
824
848
 
825
849
  def exec_command(self, args: Namespace) -> None:
@@ -133,7 +133,9 @@ flags = ["--mount-as-cwd"]
133
133
  type = "str"
134
134
  help = """The directory to mount as the current working directory of the command.
135
135
  This is a shortcut for `--mount <cloudpath>:/mnt/disks/.cwd --cwd /mnt/disks/.cwd`.
136
- The <cloudpath> must be a Google Storage Bucket path (gs://...).
136
+ The <cloudpath> must be a Google Storage Bucket path (gs://...). When this option is used,
137
+ and `--workdir` is not provided, the workdir will be set to `<cloudpath>/.pipen/<command_name>`,
138
+ where <command_name> is the name of the command (or the value of `--name` if provided).
137
139
  """
138
140
 
139
141
  [[groups.arguments]]
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "pipen-cli-gbatch"
3
- version = "0.0.5"
3
+ version = "0.0.7"
4
4
  description = "A pipen cli plugin to run command via Google Cloud Batch"
5
5
  authors = ["pwwang <pwwang@pwwang.com>"]
6
6
  license = "MIT"
@@ -17,7 +17,7 @@ entry_points = \
17
17
 
18
18
  setup_kwargs = {
19
19
  'name': 'pipen-cli-gbatch',
20
- 'version': '0.0.5',
20
+ 'version': '0.0.7',
21
21
  'description': 'A pipen cli plugin to run command via Google Cloud Batch',
22
22
  'long_description': '# pipen-cli-gbatch\n\nA pipen CLI plugin to run commands via Google Cloud Batch.\n\nThe idea is to submit the command using xqute and use the gbatch scheduler to run it on Google Cloud Batch.\n\n## Installation\n\n```bash\npip install pipen-cli-gbatch\n```\n\n## Usage\n\n### Basic Command Execution\n\nTo run a command like:\n\n```bash\npython myscript.py --input input.txt --output output.txt\n```\n\nYou can run it with:\n\n```bash\npipen gbatch -- python myscript.py --input input.txt --output output.txt\n```\n\n### With Configuration File\n\nIn order to provide configurations like we do for a normal pipen pipeline, you can also provide a config file (the `[pipen-cli-gbatch]` section will be used):\n\n```bash\npipen gbatch @config.toml -- \\\n python myscript.py --input input.txt --output output.txt\n```\n\n### Detached Mode\n\nWe can also use the `--nowait` option to run the command in a detached mode:\n\n```bash\npipen gbatch --nowait -- \\\n python myscript.py --input input.txt --output output.txt\n```\n\nOr by default, it will wait for the command to complete:\n\n```bash\npipen gbatch -- \\\n python myscript.py --input input.txt --output output.txt\n```\n\nWhile waiting, the running logs will be pulled and shown in the terminal.\n\n### View Logs\n\nWhen running in detached mode, one can also pull the logs later by:\n\n```bash\npipen gbatch --view-logs -- \\\n python myscript.py --input input.txt --output output.txt\n\n# or just provide the workdir\npipen gbatch --view-logs --workdir gs://my-bucket/workdir\n```\n\n## Configuration\n\nBecause the daemon pipeline is running on Google Cloud Batch, a Google Storage Bucket path is required for the workdir. For example: `gs://my-bucket/workdir`\n\nA unique job ID will be generated per the name (`--name`) and workdir, so that if the same command is run again with the same name and workdir, it will not start a new job, but just attach to the existing job and pull the logs.\n\nIf `--name` is not provided in the command line, it will try to grab the name (`--name`) from the command line arguments after `--`, or else use "name" from the root section of the configuration file, with a "GbatchDaemon" suffix. If nothing can be found, a default name "PipenGbatchDaemon" will be used.\n\nThen a workdir `{workdir}/<daemon pipeline name>/` will be created to store the meta information.\n\nWith `--profile` provided, the scheduler options (`scheduler_opts`) defined in `~/.pipen.toml` and `./.pipen.toml` will be used as default.\n\n## All Options\n\n```bash\n> pipen gbatch --help\nUsage: pipen gbatch [-h] [--nowait | --view-logs {all,stdout,stderr}] [--workdir WORKDIR]\n [--error-strategy {retry,halt}] [--num-retries NUM_RETRIES] [--prescript PRESCRIPT]\n [--postscript POSTSCRIPT] [--jobname-prefix JOBNAME_PREFIX] [--recheck-interval RECHECK_INTERVAL]\n [--cwd CWD] [--project PROJECT] [--location LOCATION] [--mount MOUNT]\n [--service-account SERVICE_ACCOUNT] [--network NETWORK] [--subnetwork SUBNETWORK]\n [--no-external-ip-address] [--machine-type MACHINE_TYPE] [--provisioning-model {STANDARD,SPOT}]\n [--image-uri IMAGE_URI] [--entrypoint ENTRYPOINT] [--commands COMMANDS] [--runnables RUNNABLES]\n [--allocationPolicy ALLOCATIONPOLICY] [--taskGroups TASKGROUPS] [--labels LABELS] [--gcloud GCLOUD]\n [--name NAME] [--profile PROFILE] [--version]\n [--loglevel {DEBUG,INFO,WARNING,ERROR,CRITICAL,debug,info,warning,error,critical}]\n ...\n\nSimplify running commands via Google Cloud Batch.\n\nKey Options:\n The key options to run the command.\n\n --workdir WORKDIR The workdir (a Google Storage Bucket path is required) to store the meta information of the\n daemon pipeline.\n If not provided, the one from the command will be used.\n command The command passed after `--` to run, with all its arguments. Note that the command should be\n provided after `--`.\n\nScheduler Options:\n The options to configure the gbatch scheduler.\n\n --error-strategy {retry,halt}\n The strategy when there is error happened [default: halt]\n --num-retries NUM_RETRIES\n The number of retries when there is error happened. Only valid when --error-strategy is \'retry\'.\n [default: 0]\n --prescript PRESCRIPT\n The prescript to run before the main command.\n --postscript POSTSCRIPT\n The postscript to run after the main command.\n --jobname-prefix JOBNAME_PREFIX\n The prefix of the name prefix of the daemon job.\n If not provided, try to generate one from the command to run.\n If the command is also not provided, use \'pipen-gbatch-daemon\' as the prefix.\n --recheck-interval RECHECK_INTERVAL\n The interval to recheck the job status, each takes about 0.1 seconds. [default: 600]\n --cwd CWD The working directory to run the command. If not provided, the current directory is used. You\n can pass either a mounted path (inside the VM) or a Google Storage Bucket path (gs://...). If a\n Google Storage Bucket path is provided, the mounted path will be inferred from the mounted paths\n of the VM.\n --project PROJECT The Google Cloud project to run the job.\n --location LOCATION The location to run the job.\n --mount MOUNT The list of mounts to mount to the VM, each in the format of SOURCE:TARGET, where SOURCE must be\n either a Google Storage Bucket path (gs://...). [default: []]\n --service-account SERVICE_ACCOUNT\n The service account to run the job.\n --network NETWORK The network to run the job.\n --subnetwork SUBNETWORK\n The subnetwork to run the job.\n --no-external-ip-address\n Whether to disable external IP address for the VM.\n --machine-type MACHINE_TYPE\n The machine type of the VM.\n --provisioning-model {STANDARD,SPOT}\n The provisioning model of the VM.\n --image-uri IMAGE_URI\n The custom image URI of the VM.\n --entrypoint ENTRYPOINT\n The entry point of the container to run the command.\n --commands COMMANDS The list of commands to run in the container, each as a separate string. [default: []]\n --runnables RUNNABLES\n The JSON string of extra settings of runnables add to the job.json.\n Refer to https://cloud.google.com/batch/docs/reference/rest/v1/projects.locations.jobs#Runnable\n for details.\n You can have an extra key \'order\' for each runnable, where negative values mean to run before\n the main command,\n and positive values mean to run after the main command.\n --allocationPolicy ALLOCATIONPOLICY\n The JSON string of extra settings of allocationPolicy add to the job.json. Refer to\n https://cloud.google.com/batch/docs/reference/rest/v1/projects.locations.jobs#AllocationPolicy\n for details. [default: {}]\n --taskGroups TASKGROUPS\n The JSON string of extra settings of taskGroups add to the job.json. Refer to\n https://cloud.google.com/batch/docs/reference/rest/v1/projects.locations.jobs#TaskGroup for\n details. [default: []]\n --labels LABELS The JSON string of labels to add to the job. Refer to\n https://cloud.google.com/batch/docs/reference/rest/v1/projects.locations.jobs#Job.FIELDS.labels\n for details. [default: {}]\n --gcloud GCLOUD The path to the gcloud command. [default: gcloud]\n\nOptions:\n -h, --help show this help message and exit\n --nowait Run the command in a detached mode without waiting for its completion. [default: False]\n --view-logs {all,stdout,stderr}\n View the logs of a job.\n --name NAME The name of the daemon pipeline.\n If not provided, try to generate one from the command to run.\n If the command is also not provided, use \'PipenCliGbatchDaemon\' as the name.\n --profile PROFILE Use the `scheduler_opts` as the Scheduler Options of a given profile from pipen configuration\n files,\n including ~/.pipen.toml and ./pipen.toml.\n Note that if not provided, nothing will be loaded from the configuration files.\n --version Show the version of the pipen-cli-gbatch package. [default: False]\n --loglevel {DEBUG,INFO,WARNING,ERROR,CRITICAL,debug,info,warning,error,critical}\n Set the logging level for the daemon process. [default: INFO]\n\nExamples:\n \u200b\n # Run a command and wait for it to complete\n > pipen gbatch --workdir gs://my-bucket/workdir -- \\\n python myscript.py --input input.txt --output output.txt\n\n # Use named mounts\n > pipen gbatch --workdir gs://my-bucket/workdir --mount INFILE=gs://bucket/path/to/file \\\n --mount OUTDIR=gs://bucket/path/to/outdir -- \\\n cat $INFILE > $OUTDIR/output.txt\n \u200b\n # Run a command in a detached mode\n > pipen gbatch --nowait --project $PROJECT --location $LOCATION \\\n --workdir gs://my-bucket/workdir -- \\\n python myscript.py --input input.txt --output output.txt\n \u200b\n # If you have a profile defined in ~/.pipen.toml or ./.pipen.toml\n > pipen gbatch --profile myprofile -- \\\n python myscript.py --input input.txt --output output.txt\n \u200b\n # View the logs of a previously run command\n > pipen gbatch --view-logs all --name my-daemon-name \\\n --workdir gs://my-bucket/workdir\n```\n\n## API\n\nThe API can also be used to run commands programmatically:\n\n```python\nimport asyncio\nfrom pipen_cli_gbatch import CliGbatchDaemon\n\npipe = CliGbatchDaemon(config_for_daemon, command)\nasyncio.run(pipe.run())\n```\n\nNote that the daemon pipeline will always be running without caching, so that the command will always be executed when the pipeline is run.\n',
23
23
  'author': 'pwwang',