flux-batch 0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flux_batch/__init__.py +7 -0
- flux_batch/jobspec.py +132 -0
- flux_batch/logger/__init__.py +1 -0
- flux_batch/logger/generate.py +208 -0
- flux_batch/logger/logger.py +194 -0
- flux_batch/models.py +63 -0
- flux_batch/service/__init__.py +31 -0
- flux_batch/service/scribe.py +12 -0
- flux_batch/submit.py +53 -0
- flux_batch/utils/__init__.py +2 -0
- flux_batch/utils/fileio.py +203 -0
- flux_batch/utils/text.py +22 -0
- flux_batch/utils/timer.py +24 -0
- flux_batch/version.py +16 -0
- flux_batch-0.0.0.dist-info/LICENSE +21 -0
- flux_batch-0.0.0.dist-info/METADATA +153 -0
- flux_batch-0.0.0.dist-info/NOTICE +21 -0
- flux_batch-0.0.0.dist-info/RECORD +21 -0
- flux_batch-0.0.0.dist-info/WHEEL +5 -0
- flux_batch-0.0.0.dist-info/entry_points.txt +2 -0
- flux_batch-0.0.0.dist-info/top_level.txt +1 -0
flux_batch/__init__.py
ADDED
flux_batch/jobspec.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
import shlex
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
import flux_batch.models as models
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class BatchJobspecV1:
|
|
8
|
+
"""
|
|
9
|
+
A BatchJobspecV1 mirrors a JobspecV1. We need to:
|
|
10
|
+
|
|
11
|
+
1. Add some number of commands or a script
|
|
12
|
+
2. Add optional services (start/stop)
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, attributes: models.BatchAttributesV1 = None):
|
|
16
|
+
self.attributes = attributes or models.BatchAttributesV1()
|
|
17
|
+
self.commands: List[str] = []
|
|
18
|
+
self.prologs: List[str] = []
|
|
19
|
+
self.epilogs: List[str] = []
|
|
20
|
+
self.services: List[str] = []
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def from_command(cls, command: List[str], **kwargs):
|
|
24
|
+
inst = cls(models.BatchAttributesV1(**kwargs))
|
|
25
|
+
inst.commands = [shlex.join(command)]
|
|
26
|
+
return inst
|
|
27
|
+
|
|
28
|
+
@classmethod
|
|
29
|
+
def from_jobs(cls, batch: models.BatchJobV1, **kwargs):
|
|
30
|
+
"""
|
|
31
|
+
Generate the batch script from a set of jobs.
|
|
32
|
+
|
|
33
|
+
With more than one job, we assume we are waiting.
|
|
34
|
+
"""
|
|
35
|
+
inst = cls(models.BatchAttributesV1(**kwargs))
|
|
36
|
+
if len(batch.jobs) > 1:
|
|
37
|
+
for job_str in batch.jobs:
|
|
38
|
+
inst.commands.append(f"flux submit --wait {job_str}")
|
|
39
|
+
# Assume we want to wait for all jobs
|
|
40
|
+
inst.commands.append("flux job wait --all")
|
|
41
|
+
else:
|
|
42
|
+
inst.commands = batch.jobs
|
|
43
|
+
return inst
|
|
44
|
+
|
|
45
|
+
def add_service(self, service: str):
|
|
46
|
+
self.services.append(service)
|
|
47
|
+
|
|
48
|
+
def add_prolog(self, cmd: str):
|
|
49
|
+
self.prologs.append(cmd)
|
|
50
|
+
|
|
51
|
+
def add_epilog(self, cmd: str):
|
|
52
|
+
self.epilogs.append(cmd)
|
|
53
|
+
|
|
54
|
+
def get_cli_flags(self) -> List[str]:
|
|
55
|
+
"""
|
|
56
|
+
Converts BatchAttributesV1 into a list of strings for subprocess.
|
|
57
|
+
"""
|
|
58
|
+
flags = []
|
|
59
|
+
attr = self.attributes
|
|
60
|
+
|
|
61
|
+
# Mapping table for simple flags
|
|
62
|
+
mapping = {
|
|
63
|
+
"nslots": "-n",
|
|
64
|
+
"cores_per_slot": "-c",
|
|
65
|
+
"gpus_per_slot": "-g",
|
|
66
|
+
"nodes": "-N",
|
|
67
|
+
"bank": "-B",
|
|
68
|
+
"queue": "-q",
|
|
69
|
+
"time_limit": "-t",
|
|
70
|
+
"urgency": "--urgency",
|
|
71
|
+
"job_name": "--job-name",
|
|
72
|
+
"cwd": "--cwd",
|
|
73
|
+
"dependency": "--dependency",
|
|
74
|
+
"requires": "--requires",
|
|
75
|
+
"begin_time": "--begin-time",
|
|
76
|
+
"signal": "--signal",
|
|
77
|
+
"broker_opts": "--broker-opts",
|
|
78
|
+
"dump": "--dump",
|
|
79
|
+
"flags": "--flags",
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
for field_name, flag in mapping.items():
|
|
83
|
+
val = getattr(attr, field_name)
|
|
84
|
+
if val is not None:
|
|
85
|
+
flags.extend([flag, str(val)])
|
|
86
|
+
|
|
87
|
+
# Boolean flags
|
|
88
|
+
if attr.exclusive:
|
|
89
|
+
flags.append("-x")
|
|
90
|
+
if attr.unbuffered:
|
|
91
|
+
flags.append("-u")
|
|
92
|
+
if attr.wrap:
|
|
93
|
+
flags.append("--wrap")
|
|
94
|
+
|
|
95
|
+
# Multi-use flags
|
|
96
|
+
multi_mapping = {
|
|
97
|
+
"setopt": "-o",
|
|
98
|
+
"setattr": "-S",
|
|
99
|
+
"add_file": "--add-file",
|
|
100
|
+
"env": "--env",
|
|
101
|
+
"env_remove": "--env-remove",
|
|
102
|
+
"env_file": "--env-file",
|
|
103
|
+
"rlimit": "--rlimit",
|
|
104
|
+
"conf": "--conf",
|
|
105
|
+
}
|
|
106
|
+
for field_name, flag in multi_mapping.items():
|
|
107
|
+
for val in getattr(attr, field_name):
|
|
108
|
+
flags.extend([flag, str(val)])
|
|
109
|
+
|
|
110
|
+
return flags
|
|
111
|
+
|
|
112
|
+
def generate_wrapper_script(self) -> str:
|
|
113
|
+
"""
|
|
114
|
+
Generate the wrapper script.
|
|
115
|
+
|
|
116
|
+
1. Start with hashbang!
|
|
117
|
+
2. Add prologs
|
|
118
|
+
3. Add services start
|
|
119
|
+
4. Add jobs/commands
|
|
120
|
+
5. Stop services
|
|
121
|
+
6. And epilogs
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
lines = ["#!/bin/bash"]
|
|
125
|
+
lines.extend(self.prologs)
|
|
126
|
+
for s in self.services:
|
|
127
|
+
lines.append(f"systemctl --user start {s}")
|
|
128
|
+
lines.extend(self.commands)
|
|
129
|
+
for s in reversed(self.services):
|
|
130
|
+
lines.append(f"systemctl --user stop {s}")
|
|
131
|
+
lines.extend(self.epilogs)
|
|
132
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .logger import LogColors, logger, setup_logger
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
from random import choice
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class JobNamer:
|
|
5
|
+
_descriptors = [
|
|
6
|
+
"chunky",
|
|
7
|
+
"buttery",
|
|
8
|
+
"delicious",
|
|
9
|
+
"scruptious",
|
|
10
|
+
"dinosaur",
|
|
11
|
+
"boopy",
|
|
12
|
+
"lovely",
|
|
13
|
+
"carnivorous",
|
|
14
|
+
"hanky",
|
|
15
|
+
"loopy",
|
|
16
|
+
"doopy",
|
|
17
|
+
"astute",
|
|
18
|
+
"gloopy",
|
|
19
|
+
"outstanding",
|
|
20
|
+
"stinky",
|
|
21
|
+
"conspicuous",
|
|
22
|
+
"fugly",
|
|
23
|
+
"frigid",
|
|
24
|
+
"angry",
|
|
25
|
+
"adorable",
|
|
26
|
+
"sticky",
|
|
27
|
+
"moolicious",
|
|
28
|
+
"cowy",
|
|
29
|
+
"spicy",
|
|
30
|
+
"grated",
|
|
31
|
+
"crusty",
|
|
32
|
+
"stanky",
|
|
33
|
+
"blank",
|
|
34
|
+
"bumfuzzled",
|
|
35
|
+
"fuzzy",
|
|
36
|
+
"hairy",
|
|
37
|
+
"peachy",
|
|
38
|
+
"tart",
|
|
39
|
+
"creamy",
|
|
40
|
+
"arid",
|
|
41
|
+
"strawberry",
|
|
42
|
+
"butterscotch",
|
|
43
|
+
"wobbly",
|
|
44
|
+
"persnickety",
|
|
45
|
+
"nerdy",
|
|
46
|
+
"dirty",
|
|
47
|
+
"placid",
|
|
48
|
+
"bloated",
|
|
49
|
+
"swampy",
|
|
50
|
+
"pusheena",
|
|
51
|
+
"hello",
|
|
52
|
+
"goodbye",
|
|
53
|
+
"milky",
|
|
54
|
+
"purple",
|
|
55
|
+
"rainbow",
|
|
56
|
+
"bricky",
|
|
57
|
+
"muffled",
|
|
58
|
+
"anxious",
|
|
59
|
+
"misunderstood",
|
|
60
|
+
"eccentric",
|
|
61
|
+
"quirky",
|
|
62
|
+
"lovable",
|
|
63
|
+
"reclusive",
|
|
64
|
+
"faux",
|
|
65
|
+
"evasive",
|
|
66
|
+
"confused",
|
|
67
|
+
"crunchy",
|
|
68
|
+
"expensive",
|
|
69
|
+
"ornery",
|
|
70
|
+
"fat",
|
|
71
|
+
"phat",
|
|
72
|
+
"joyous",
|
|
73
|
+
"expressive",
|
|
74
|
+
"psycho",
|
|
75
|
+
"chocolate",
|
|
76
|
+
"salted",
|
|
77
|
+
"gassy",
|
|
78
|
+
"red",
|
|
79
|
+
"blue",
|
|
80
|
+
]
|
|
81
|
+
|
|
82
|
+
_nouns = [
|
|
83
|
+
"squidward",
|
|
84
|
+
"hippo",
|
|
85
|
+
"butter",
|
|
86
|
+
"animal",
|
|
87
|
+
"peas",
|
|
88
|
+
"lettuce",
|
|
89
|
+
"carrot",
|
|
90
|
+
"onion",
|
|
91
|
+
"peanut",
|
|
92
|
+
"cupcake",
|
|
93
|
+
"muffin",
|
|
94
|
+
"buttface",
|
|
95
|
+
"leopard",
|
|
96
|
+
"parrot",
|
|
97
|
+
"parsnip",
|
|
98
|
+
"poodle",
|
|
99
|
+
"itch",
|
|
100
|
+
"punk",
|
|
101
|
+
"kerfuffle",
|
|
102
|
+
"soup",
|
|
103
|
+
"noodle",
|
|
104
|
+
"avocado",
|
|
105
|
+
"peanut-butter",
|
|
106
|
+
"latke",
|
|
107
|
+
"milkshake",
|
|
108
|
+
"banana",
|
|
109
|
+
"lizard",
|
|
110
|
+
"lemur",
|
|
111
|
+
"lentil",
|
|
112
|
+
"bits",
|
|
113
|
+
"house",
|
|
114
|
+
"leader",
|
|
115
|
+
"toaster",
|
|
116
|
+
"signal",
|
|
117
|
+
"pancake",
|
|
118
|
+
"kitty",
|
|
119
|
+
"cat",
|
|
120
|
+
"cattywampus",
|
|
121
|
+
"poo",
|
|
122
|
+
"malarkey",
|
|
123
|
+
"general",
|
|
124
|
+
"rabbit",
|
|
125
|
+
"chair",
|
|
126
|
+
"staircase",
|
|
127
|
+
"underoos",
|
|
128
|
+
"snack",
|
|
129
|
+
"lamp",
|
|
130
|
+
"eagle",
|
|
131
|
+
"hobbit",
|
|
132
|
+
"diablo",
|
|
133
|
+
"earthworm",
|
|
134
|
+
"pot",
|
|
135
|
+
"plant",
|
|
136
|
+
"leg",
|
|
137
|
+
"arm",
|
|
138
|
+
"bike",
|
|
139
|
+
"citrus",
|
|
140
|
+
"dog",
|
|
141
|
+
"puppy",
|
|
142
|
+
"blackbean",
|
|
143
|
+
"ricecake",
|
|
144
|
+
"gato",
|
|
145
|
+
"nalgas",
|
|
146
|
+
"lemon",
|
|
147
|
+
"caramel",
|
|
148
|
+
"fudge",
|
|
149
|
+
"cherry",
|
|
150
|
+
"sundae",
|
|
151
|
+
"truffle",
|
|
152
|
+
"cinnamonbun",
|
|
153
|
+
"pastry",
|
|
154
|
+
"egg",
|
|
155
|
+
"omelette",
|
|
156
|
+
"fork",
|
|
157
|
+
"knife",
|
|
158
|
+
"spoon",
|
|
159
|
+
"salad",
|
|
160
|
+
"train",
|
|
161
|
+
"car",
|
|
162
|
+
"motorcycle",
|
|
163
|
+
"bicycle",
|
|
164
|
+
"platanos",
|
|
165
|
+
"mango",
|
|
166
|
+
"taco",
|
|
167
|
+
"pedo",
|
|
168
|
+
"nunchucks",
|
|
169
|
+
"destiny",
|
|
170
|
+
"hope",
|
|
171
|
+
"despacito",
|
|
172
|
+
"frito",
|
|
173
|
+
"chip",
|
|
174
|
+
]
|
|
175
|
+
|
|
176
|
+
def generate(self, delim="-", length=4, chars="0123456789"):
|
|
177
|
+
"""
|
|
178
|
+
Generate a robot name. Inspiration from Haikunator, but much more
|
|
179
|
+
poorly implemented ;)
|
|
180
|
+
|
|
181
|
+
Parameters
|
|
182
|
+
==========
|
|
183
|
+
delim: Delimiter
|
|
184
|
+
length: TokenLength
|
|
185
|
+
chars: TokenChars
|
|
186
|
+
"""
|
|
187
|
+
|
|
188
|
+
descriptor = self._select(self._descriptors)
|
|
189
|
+
noun = self._select(self._nouns)
|
|
190
|
+
numbers = "".join((self._select(chars) for _ in range(length)))
|
|
191
|
+
return delim.join([descriptor, noun, numbers])
|
|
192
|
+
|
|
193
|
+
def _select(self, select_from):
|
|
194
|
+
"""select an element from a list using random.choice
|
|
195
|
+
|
|
196
|
+
Parameters
|
|
197
|
+
==========
|
|
198
|
+
should be a list of things to select from
|
|
199
|
+
"""
|
|
200
|
+
if not select_from:
|
|
201
|
+
return ""
|
|
202
|
+
|
|
203
|
+
return choice(select_from)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def generate_name():
|
|
207
|
+
namer = JobNamer()
|
|
208
|
+
return namer.generate()
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
import inspect
|
|
2
|
+
import logging as _logging
|
|
3
|
+
import os
|
|
4
|
+
import platform
|
|
5
|
+
import sys
|
|
6
|
+
import threading
|
|
7
|
+
|
|
8
|
+
from fastmcp.utilities.logging import get_logger
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class LogColors:
|
|
12
|
+
PURPLE = "\033[95m"
|
|
13
|
+
OKBLUE = "\033[94m"
|
|
14
|
+
OKCYAN = "\033[96m"
|
|
15
|
+
OKGREEN = "\033[92m"
|
|
16
|
+
WARNING = "\033[93m"
|
|
17
|
+
RED = "\033[91m"
|
|
18
|
+
ENDC = "\033[0m"
|
|
19
|
+
BOLD = "\033[1m"
|
|
20
|
+
UNDERLINE = "\033[4m"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ColorizingStreamHandler(_logging.StreamHandler):
|
|
24
|
+
BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8)
|
|
25
|
+
RESET_SEQ = "\033[0m"
|
|
26
|
+
COLOR_SEQ = "\033[%dm"
|
|
27
|
+
BOLD_SEQ = "\033[1m"
|
|
28
|
+
|
|
29
|
+
colors = {
|
|
30
|
+
"WARNING": YELLOW,
|
|
31
|
+
"INFO": GREEN,
|
|
32
|
+
"DEBUG": BLUE,
|
|
33
|
+
"CRITICAL": RED,
|
|
34
|
+
"ERROR": RED,
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
def __init__(self, nocolor=False, stream=sys.stderr, use_threads=False):
|
|
38
|
+
super().__init__(stream=stream)
|
|
39
|
+
self._output_lock = threading.Lock()
|
|
40
|
+
self.nocolor = nocolor or not self.can_color_tty()
|
|
41
|
+
|
|
42
|
+
def can_color_tty(self):
|
|
43
|
+
if "TERM" in os.environ and os.environ["TERM"] == "dumb":
|
|
44
|
+
return False
|
|
45
|
+
return self.is_tty and not platform.system() == "Windows"
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def is_tty(self):
|
|
49
|
+
isatty = getattr(self.stream, "isatty", None)
|
|
50
|
+
return isatty and isatty()
|
|
51
|
+
|
|
52
|
+
def emit(self, record):
|
|
53
|
+
with self._output_lock:
|
|
54
|
+
try:
|
|
55
|
+
self.format(record) # add the message to the record
|
|
56
|
+
self.stream.write(self.decorate(record))
|
|
57
|
+
self.stream.write(getattr(self, "terminator", "\n"))
|
|
58
|
+
self.flush()
|
|
59
|
+
except BrokenPipeError as e:
|
|
60
|
+
raise e
|
|
61
|
+
except (KeyboardInterrupt, SystemExit):
|
|
62
|
+
# ignore any exceptions in these cases as any relevant messages have been printed before
|
|
63
|
+
pass
|
|
64
|
+
except Exception:
|
|
65
|
+
self.handleError(record)
|
|
66
|
+
|
|
67
|
+
def decorate(self, record):
|
|
68
|
+
message = record.message
|
|
69
|
+
message = [message]
|
|
70
|
+
if not self.nocolor and record.levelname in self.colors:
|
|
71
|
+
message.insert(0, self.COLOR_SEQ % (30 + self.colors[record.levelname]))
|
|
72
|
+
message.append(self.RESET_SEQ)
|
|
73
|
+
return "".join(message)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class Logger:
|
|
77
|
+
def __init__(self):
|
|
78
|
+
self.logger = get_logger(__name__)
|
|
79
|
+
self.log_handler = [self.text_handler]
|
|
80
|
+
self.stream_handler = None
|
|
81
|
+
self.printshellcmds = False
|
|
82
|
+
self.quiet = False
|
|
83
|
+
self.logfile = None
|
|
84
|
+
self.last_msg_was_job_info = False
|
|
85
|
+
self.logfile_handler = None
|
|
86
|
+
|
|
87
|
+
def cleanup(self):
|
|
88
|
+
if self.logfile_handler is not None:
|
|
89
|
+
self.logger.removeHandler(self.logfile_handler)
|
|
90
|
+
self.logfile_handler.close()
|
|
91
|
+
self.log_handler = [self.text_handler]
|
|
92
|
+
|
|
93
|
+
def handler(self, msg):
|
|
94
|
+
for handler in self.log_handler:
|
|
95
|
+
handler(msg)
|
|
96
|
+
|
|
97
|
+
def set_stream_handler(self, stream_handler):
|
|
98
|
+
if self.stream_handler is not None:
|
|
99
|
+
self.logger.removeHandler(self.stream_handler)
|
|
100
|
+
self.stream_handler = stream_handler
|
|
101
|
+
self.logger.addHandler(stream_handler)
|
|
102
|
+
|
|
103
|
+
def set_level(self, level):
|
|
104
|
+
self.logger.setLevel(level)
|
|
105
|
+
|
|
106
|
+
def location(self, msg):
|
|
107
|
+
callerframerecord = inspect.stack()[1]
|
|
108
|
+
frame = callerframerecord[0]
|
|
109
|
+
info = inspect.getframeinfo(frame)
|
|
110
|
+
self.debug("{}: {info.filename}, {info.function}, {info.lineno}".format(msg, info=info))
|
|
111
|
+
|
|
112
|
+
def info(self, msg):
|
|
113
|
+
self.handler(dict(level="info", msg=msg))
|
|
114
|
+
|
|
115
|
+
def warning(self, msg):
|
|
116
|
+
self.handler(dict(level="warning", msg=msg))
|
|
117
|
+
|
|
118
|
+
def debug(self, msg):
|
|
119
|
+
self.handler(dict(level="debug", msg=msg))
|
|
120
|
+
|
|
121
|
+
def error(self, msg):
|
|
122
|
+
self.handler(dict(level="error", msg=msg))
|
|
123
|
+
|
|
124
|
+
def exit(self, msg, return_code=1):
|
|
125
|
+
self.handler(dict(level="error", msg=msg))
|
|
126
|
+
sys.exit(return_code)
|
|
127
|
+
|
|
128
|
+
def progress(self, done=None, total=None):
|
|
129
|
+
self.handler(dict(level="progress", done=done, total=total))
|
|
130
|
+
|
|
131
|
+
def shellcmd(self, msg):
|
|
132
|
+
if msg is not None:
|
|
133
|
+
msg = dict(level="shellcmd", msg=msg)
|
|
134
|
+
self.handler(msg)
|
|
135
|
+
|
|
136
|
+
def success(self, message):
|
|
137
|
+
"""
|
|
138
|
+
Wrapper to add success to output for LLM.
|
|
139
|
+
"""
|
|
140
|
+
return "✅ SUCCESS: " + message
|
|
141
|
+
|
|
142
|
+
def failure(self, message):
|
|
143
|
+
"""
|
|
144
|
+
Wrapper to add success to output for LLM.
|
|
145
|
+
"""
|
|
146
|
+
return "❌ FAILED: " + message
|
|
147
|
+
|
|
148
|
+
def text_handler(self, msg):
|
|
149
|
+
"""The default snakemake log handler.
|
|
150
|
+
Prints the output to the console.
|
|
151
|
+
Args:
|
|
152
|
+
msg (dict): the log message dictionary
|
|
153
|
+
"""
|
|
154
|
+
level = msg["level"]
|
|
155
|
+
if level == "info" and not self.quiet:
|
|
156
|
+
self.logger.info(msg["msg"])
|
|
157
|
+
if level == "warning":
|
|
158
|
+
self.logger.warning(msg["msg"])
|
|
159
|
+
elif level == "error":
|
|
160
|
+
self.logger.error(msg["msg"])
|
|
161
|
+
elif level == "debug":
|
|
162
|
+
self.logger.debug(msg["msg"])
|
|
163
|
+
elif level == "progress" and not self.quiet:
|
|
164
|
+
done = msg["done"]
|
|
165
|
+
total = msg["total"]
|
|
166
|
+
p = done / total
|
|
167
|
+
percent_fmt = ("{:.2%}" if p < 0.01 else "{:.0%}").format(p)
|
|
168
|
+
self.logger.info("{} of {} steps ({}) done".format(done, total, percent_fmt))
|
|
169
|
+
elif level == "shellcmd":
|
|
170
|
+
if self.printshellcmds:
|
|
171
|
+
self.logger.warning(msg["msg"])
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
logger = Logger()
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def setup_logger(
|
|
178
|
+
quiet=False,
|
|
179
|
+
printshellcmds=False,
|
|
180
|
+
nocolor=False,
|
|
181
|
+
stdout=False,
|
|
182
|
+
debug=False,
|
|
183
|
+
use_threads=False,
|
|
184
|
+
):
|
|
185
|
+
# console output only if no custom logger was specified
|
|
186
|
+
stream_handler = ColorizingStreamHandler(
|
|
187
|
+
nocolor=nocolor,
|
|
188
|
+
stream=sys.stdout if stdout else sys.stderr,
|
|
189
|
+
use_threads=use_threads,
|
|
190
|
+
)
|
|
191
|
+
logger.set_stream_handler(stream_handler)
|
|
192
|
+
logger.set_level(_logging.DEBUG if debug else _logging.INFO)
|
|
193
|
+
logger.quiet = quiet
|
|
194
|
+
logger.printshellcmds = printshellcmds
|
flux_batch/models.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
from typing import Dict, List, Optional, Union
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@dataclass
|
|
6
|
+
class BatchJobV1:
|
|
7
|
+
"""
|
|
8
|
+
Commands to be run within the batch wrapper.
|
|
9
|
+
|
|
10
|
+
This should mirror JobspecV1
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
jobs: List[str] = field(default_factory=list)
|
|
14
|
+
|
|
15
|
+
def add_job(self, command: List[str]):
|
|
16
|
+
import shlex
|
|
17
|
+
|
|
18
|
+
self.jobs.append(shlex.join(command))
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class BatchAttributesV1:
|
|
23
|
+
"""
|
|
24
|
+
Explicitly defined arguments allowed by flux batch for V1 spec
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
# Resources
|
|
28
|
+
nslots: Optional[int] = None # -n
|
|
29
|
+
cores_per_slot: Optional[int] = None # -c
|
|
30
|
+
gpus_per_slot: Optional[int] = None # -g
|
|
31
|
+
nodes: Optional[int] = None # -N
|
|
32
|
+
exclusive: bool = False # -x
|
|
33
|
+
|
|
34
|
+
# Basic Options
|
|
35
|
+
bank: Optional[str] = None # -B
|
|
36
|
+
queue: Optional[str] = None # -q
|
|
37
|
+
time_limit: Optional[str] = None # -t
|
|
38
|
+
urgency: Optional[int] = None # --urgency
|
|
39
|
+
job_name: Optional[str] = None # --job-name
|
|
40
|
+
cwd: Optional[str] = None # --cwd
|
|
41
|
+
|
|
42
|
+
# More complex options
|
|
43
|
+
setopt: List[str] = field(default_factory=list) # -o
|
|
44
|
+
setattr: List[str] = field(default_factory=list) # -S
|
|
45
|
+
add_file: List[str] = field(default_factory=list) # --add-file
|
|
46
|
+
env: List[str] = field(default_factory=list) # --env
|
|
47
|
+
env_remove: List[str] = field(default_factory=list) # --env-remove
|
|
48
|
+
env_file: List[str] = field(default_factory=list) # --env-file
|
|
49
|
+
rlimit: List[str] = field(default_factory=list) # --rlimit
|
|
50
|
+
conf: List[str] = field(default_factory=list) # --conf
|
|
51
|
+
|
|
52
|
+
# Other Attributes
|
|
53
|
+
dependency: Optional[str] = None # --dependency
|
|
54
|
+
requires: Optional[str] = None # --requires
|
|
55
|
+
begin_time: Optional[str] = None # --begin-time
|
|
56
|
+
signal: Optional[str] = None # --signal
|
|
57
|
+
broker_opts: Optional[str] = None # --broker-opts
|
|
58
|
+
dump: Optional[str] = None # --dump
|
|
59
|
+
|
|
60
|
+
# Flags
|
|
61
|
+
unbuffered: bool = False # -u
|
|
62
|
+
wrap: bool = False # --wrap
|
|
63
|
+
flags: Optional[str] = None # --flags (debug, waitable, etc)
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import subprocess
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
from .scribe import SERVICE_TEMPLATE as scribe_template
|
|
6
|
+
|
|
7
|
+
# Lookup of known services
|
|
8
|
+
services = {"scribe": scribe_template}
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def ensure_user_service(service_name: str):
|
|
12
|
+
"""
|
|
13
|
+
Checks for the existence of a systemd service file in the user's home.
|
|
14
|
+
If it doesn't exist, it creates it and reloads the daemon.
|
|
15
|
+
"""
|
|
16
|
+
user_systemd_dir = os.path.expanduser("~/.config/systemd/user")
|
|
17
|
+
os.makedirs(user_systemd_dir, exist_ok=True)
|
|
18
|
+
service_path = os.path.join(user_systemd_dir, f"{service_name}.service")
|
|
19
|
+
|
|
20
|
+
if not os.path.exists(service_path):
|
|
21
|
+
if service_name in services:
|
|
22
|
+
template = services[service_name]
|
|
23
|
+
print(f"[*] Provisioning {service_name} at {service_path}")
|
|
24
|
+
with open(service_path, "w") as f:
|
|
25
|
+
f.write(template.format(python_path=sys.executable))
|
|
26
|
+
|
|
27
|
+
else:
|
|
28
|
+
print(f"[*] Service {service_name} is not known, assuming exists.")
|
|
29
|
+
|
|
30
|
+
# Reload the user-session manager to recognize the new unit
|
|
31
|
+
subprocess.run(["systemctl", "--user", "daemon-reload"], check=True)
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# Template for the Scribe Journal Consumer
|
|
2
|
+
SERVICE_TEMPLATE = """[Unit]
|
|
3
|
+
Description=Flux Scribe Journal Consumer
|
|
4
|
+
After=network.target
|
|
5
|
+
|
|
6
|
+
[Service]
|
|
7
|
+
ExecStart={python_path} -m flux_mcp_server.scribe
|
|
8
|
+
Restart=on-failure
|
|
9
|
+
|
|
10
|
+
[Install]
|
|
11
|
+
WantedBy=default.target
|
|
12
|
+
"""
|
flux_batch/submit.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import stat
|
|
3
|
+
import subprocess
|
|
4
|
+
import tempfile
|
|
5
|
+
|
|
6
|
+
import flux
|
|
7
|
+
import flux.job
|
|
8
|
+
|
|
9
|
+
import flux_batch.models as models
|
|
10
|
+
import flux_batch.utils as utils
|
|
11
|
+
from flux_batch.service import ensure_user_service
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def submit(handle: flux.Flux, spec: models.BatchJobV1, dry_run=False) -> int:
|
|
15
|
+
"""
|
|
16
|
+
Orchestrates the submission process:
|
|
17
|
+
1. Provisions any required user-space services.
|
|
18
|
+
2. Generates the wrapper shell script.
|
|
19
|
+
3. Uses 'flux batch --dryrun' to compile the Jobspec JSON.
|
|
20
|
+
4. Submits the Jobspec to the Flux instance.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
# Provision services (like flux-scribe) if requested
|
|
24
|
+
for service in spec.services:
|
|
25
|
+
ensure_user_service(service)
|
|
26
|
+
|
|
27
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
28
|
+
# Write the wrapper script (handling prologs, services, and jobs)
|
|
29
|
+
wrapper_path = os.path.join(tmpdir, "wrapper.sh")
|
|
30
|
+
|
|
31
|
+
# dry run here just displays it
|
|
32
|
+
script = spec.generate_wrapper_script()
|
|
33
|
+
if dry_run:
|
|
34
|
+
return script
|
|
35
|
+
|
|
36
|
+
utils.write_file(script, wrapper_path)
|
|
37
|
+
|
|
38
|
+
# Make the script executable so 'flux batch' can analyze it
|
|
39
|
+
os.chmod(wrapper_path, os.stat(wrapper_path).st_mode | stat.S_IEXEC)
|
|
40
|
+
|
|
41
|
+
# Generate the RFC 25 Jobspec JSON via the Flux CLI
|
|
42
|
+
# This handles all resource mapping (-N, -n, etc.)
|
|
43
|
+
cmd = ["flux", "batch"] + spec.get_cli_flags() + ["--dry-run", wrapper_path]
|
|
44
|
+
|
|
45
|
+
try:
|
|
46
|
+
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
|
47
|
+
except subprocess.CalledProcessError as e:
|
|
48
|
+
print(f"Error during flux batch dryrun: {e.stderr}")
|
|
49
|
+
raise
|
|
50
|
+
|
|
51
|
+
# Submit the JSON string to the Flux instance
|
|
52
|
+
# The result.stdout contains the raw JSON Jobspec
|
|
53
|
+
return flux.job.submit(handle, result.stdout)
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import platform
|
|
4
|
+
import re
|
|
5
|
+
import stat
|
|
6
|
+
import subprocess
|
|
7
|
+
import tempfile
|
|
8
|
+
from contextlib import contextmanager
|
|
9
|
+
|
|
10
|
+
import yaml
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def run_sync(coroutine):
|
|
14
|
+
"""
|
|
15
|
+
Runs an async coroutine synchronously.
|
|
16
|
+
Patches the loop if running inside IPython/Jupyter.
|
|
17
|
+
|
|
18
|
+
Note that I'm not currently using this - keeping here if need.
|
|
19
|
+
"""
|
|
20
|
+
import asyncio
|
|
21
|
+
|
|
22
|
+
import nest_asyncio
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
loop = asyncio.get_running_loop()
|
|
26
|
+
except RuntimeError:
|
|
27
|
+
loop = None
|
|
28
|
+
|
|
29
|
+
if loop and loop.is_running():
|
|
30
|
+
# We aren't in async -> patch it
|
|
31
|
+
nest_asyncio.apply(loop)
|
|
32
|
+
return loop.run_until_complete(coroutine)
|
|
33
|
+
else:
|
|
34
|
+
# We are in standard Python script -> Standard run
|
|
35
|
+
return asyncio.run(coroutine)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def get_local_cluster():
|
|
39
|
+
"""
|
|
40
|
+
Guess the local cluster based on the hostname
|
|
41
|
+
"""
|
|
42
|
+
return platform.node().split("-")[0]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def read_json(filename):
|
|
46
|
+
"""
|
|
47
|
+
Read json from file
|
|
48
|
+
"""
|
|
49
|
+
return json.loads(read_file(filename))
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def write_json(obj, filename):
|
|
53
|
+
with open(filename, "w") as fd:
|
|
54
|
+
fd.write(json.dumps(obj, indent=4))
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def load_jobspec(filename):
|
|
58
|
+
"""
|
|
59
|
+
Load a jobspec. First try yaml and fall back to json
|
|
60
|
+
"""
|
|
61
|
+
# It is already loaded!
|
|
62
|
+
if isinstance(filename, dict):
|
|
63
|
+
return filename
|
|
64
|
+
if isinstance(filename, str) and not os.path.exists(filename):
|
|
65
|
+
return yaml.safe_load(filename)
|
|
66
|
+
try:
|
|
67
|
+
return read_yaml(filename)
|
|
68
|
+
except:
|
|
69
|
+
return read_json(filename)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def read_file(filename):
|
|
73
|
+
"""
|
|
74
|
+
Read in a file content
|
|
75
|
+
"""
|
|
76
|
+
with open(filename, "r") as fd:
|
|
77
|
+
content = fd.read()
|
|
78
|
+
return content
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def make_executable(path):
|
|
82
|
+
"""
|
|
83
|
+
Adds execute permission to a file.
|
|
84
|
+
"""
|
|
85
|
+
current_mode = os.stat(path).st_mode
|
|
86
|
+
|
|
87
|
+
# Add execute permission for owner, group, and others
|
|
88
|
+
new_mode = current_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
|
|
89
|
+
|
|
90
|
+
# Set the new permissions
|
|
91
|
+
os.chmod(path, new_mode)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def recursive_find(base, pattern="[.]py"):
|
|
95
|
+
"""recursive find will yield python files in all directory levels
|
|
96
|
+
below a base path.
|
|
97
|
+
|
|
98
|
+
Arguments:
|
|
99
|
+
- base (str) : the base directory to search
|
|
100
|
+
- pattern: a pattern to match, defaults to *.py
|
|
101
|
+
"""
|
|
102
|
+
for root, _, filenames in os.walk(base):
|
|
103
|
+
for filename in filenames:
|
|
104
|
+
filepath = os.path.join(root, filename)
|
|
105
|
+
if not re.search(pattern, filepath):
|
|
106
|
+
continue
|
|
107
|
+
yield filepath
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def get_tmpfile(tmpdir=None, prefix="", suffix=None):
|
|
111
|
+
"""
|
|
112
|
+
Get a temporary file with an optional prefix.
|
|
113
|
+
"""
|
|
114
|
+
# First priority for the base goes to the user requested.
|
|
115
|
+
tmpdir = get_tmpdir(tmpdir)
|
|
116
|
+
|
|
117
|
+
# If tmpdir is set, add to prefix
|
|
118
|
+
if tmpdir:
|
|
119
|
+
prefix = os.path.join(tmpdir, os.path.basename(prefix))
|
|
120
|
+
|
|
121
|
+
fd, tmp_file = tempfile.mkstemp(prefix=prefix, suffix=suffix)
|
|
122
|
+
os.close(fd)
|
|
123
|
+
|
|
124
|
+
return tmp_file
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def get_tmpdir(tmpdir=None, prefix="", create=True):
|
|
128
|
+
"""
|
|
129
|
+
Get a temporary directory for an operation.
|
|
130
|
+
"""
|
|
131
|
+
tmpdir = tmpdir or tempfile.gettempdir()
|
|
132
|
+
prefix = prefix or "jobspec"
|
|
133
|
+
prefix = "%s.%s" % (prefix, next(tempfile._get_candidate_names()))
|
|
134
|
+
tmpdir = os.path.join(tmpdir, prefix)
|
|
135
|
+
|
|
136
|
+
if not os.path.exists(tmpdir) and create is True:
|
|
137
|
+
os.mkdir(tmpdir)
|
|
138
|
+
|
|
139
|
+
return tmpdir
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def read_yaml(filename):
|
|
143
|
+
"""
|
|
144
|
+
Read yaml from file
|
|
145
|
+
"""
|
|
146
|
+
with open(filename, "r") as fd:
|
|
147
|
+
content = yaml.safe_load(fd)
|
|
148
|
+
return content
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def write_file(content, filename):
|
|
152
|
+
"""
|
|
153
|
+
Write content to file
|
|
154
|
+
"""
|
|
155
|
+
with open(filename, "w") as fd:
|
|
156
|
+
fd.write(content)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def write_yaml(obj, filename):
|
|
160
|
+
"""
|
|
161
|
+
Read yaml to file
|
|
162
|
+
"""
|
|
163
|
+
with open(filename, "w") as fd:
|
|
164
|
+
yaml.dump(obj, fd)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
@contextmanager
|
|
168
|
+
def workdir(dirname):
|
|
169
|
+
"""
|
|
170
|
+
Provide context for a working directory, e.g.,
|
|
171
|
+
|
|
172
|
+
with workdir(name):
|
|
173
|
+
# do stuff
|
|
174
|
+
"""
|
|
175
|
+
here = os.getcwd()
|
|
176
|
+
os.chdir(dirname)
|
|
177
|
+
try:
|
|
178
|
+
yield
|
|
179
|
+
finally:
|
|
180
|
+
os.chdir(here)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def run_command(cmd, stream=False, check_output=False, return_code=0):
|
|
184
|
+
"""
|
|
185
|
+
use subprocess to send a command to the terminal.
|
|
186
|
+
|
|
187
|
+
If check_output is True, check against an expected return code.
|
|
188
|
+
"""
|
|
189
|
+
stdout = subprocess.PIPE if not stream else None
|
|
190
|
+
output = subprocess.Popen(cmd, stderr=subprocess.STDOUT, stdout=stdout, env=os.environ.copy())
|
|
191
|
+
t = output.communicate()[0], output.returncode
|
|
192
|
+
output = {"message": t[0], "return_code": t[1]}
|
|
193
|
+
|
|
194
|
+
if isinstance(output["message"], bytes):
|
|
195
|
+
output["message"] = output["message"].decode("utf-8")
|
|
196
|
+
|
|
197
|
+
# Check the output and raise an error if not success
|
|
198
|
+
if check_output and t[1] != return_code:
|
|
199
|
+
if output["message"]:
|
|
200
|
+
raise ValueError(output["message"].strip())
|
|
201
|
+
else:
|
|
202
|
+
raise ValueError(f"Failed execution, return code {t[1]}")
|
|
203
|
+
return output
|
flux_batch/utils/text.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def format_rules(rules):
|
|
5
|
+
return "\n".join([f"- {r}" for r in rules])
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def get_code_block(content, code_type):
|
|
9
|
+
"""
|
|
10
|
+
Parse a code block from the response
|
|
11
|
+
"""
|
|
12
|
+
pattern = f"```(?:{code_type})?\n(.*?)```"
|
|
13
|
+
match = re.search(pattern, content, re.DOTALL)
|
|
14
|
+
if match:
|
|
15
|
+
return match.group(1).strip()
|
|
16
|
+
if content.startswith(f"```{code_type}"):
|
|
17
|
+
content = content[len(f"```{code_type}") :]
|
|
18
|
+
if content.startswith("```"):
|
|
19
|
+
content = content[len("```") :]
|
|
20
|
+
if content.endswith("```"):
|
|
21
|
+
content = content[: -len("```")]
|
|
22
|
+
return content.strip()
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import time
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class Timer:
|
|
5
|
+
"""
|
|
6
|
+
A context timer! You can use in context OR explicitly start/stop.
|
|
7
|
+
t = Timer()
|
|
8
|
+
with t:
|
|
9
|
+
do_stuff()
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
def __enter__(self):
|
|
13
|
+
self.start()
|
|
14
|
+
return self
|
|
15
|
+
|
|
16
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
17
|
+
self.stop()
|
|
18
|
+
|
|
19
|
+
def stop(self):
|
|
20
|
+
self.end_time = time.perf_counter()
|
|
21
|
+
self.elapsed_time = self.end_time - self.start_time
|
|
22
|
+
|
|
23
|
+
def start(self):
|
|
24
|
+
self.start_time = time.perf_counter()
|
flux_batch/version.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
__version__ = "0.0.0"
|
|
2
|
+
AUTHOR = "Vanessa Sochat"
|
|
3
|
+
AUTHOR_EMAIL = "vsoch@users.noreply.github.com"
|
|
4
|
+
NAME = "flux-batch"
|
|
5
|
+
PACKAGE_URL = "https://github.com/converged-computing/flux-batch"
|
|
6
|
+
KEYWORDS = "flux, flux framework, hpc, batch, workloads"
|
|
7
|
+
DESCRIPTION = "Python SDK for flux batch jobs and services"
|
|
8
|
+
LICENSE = "LICENSE"
|
|
9
|
+
|
|
10
|
+
INSTALL_REQUIRES = (
|
|
11
|
+
("pyyaml", {"min_version": None}),
|
|
12
|
+
("ply", {"min_version": None}),
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
TESTS_REQUIRES = (("pytest", {"min_version": "4.6.2"}),)
|
|
16
|
+
INSTALL_REQUIRES_ALL = INSTALL_REQUIRES + TESTS_REQUIRES
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2022-2023 LLNS, LLC and other HPCIC DevTools Developers.
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: flux-batch
|
|
3
|
+
Version: 0.0.0
|
|
4
|
+
Summary: Python SDK for flux batch jobs and services
|
|
5
|
+
Home-page: https://github.com/converged-computing/flux-batch
|
|
6
|
+
Author: Vanessa Sochat
|
|
7
|
+
Author-email: vsoch@users.noreply.github.com
|
|
8
|
+
Maintainer: Vanessa Sochat
|
|
9
|
+
License: LICENSE
|
|
10
|
+
Keywords: flux,flux framework,hpc,batch,workloads
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: C
|
|
15
|
+
Classifier: Programming Language :: Python
|
|
16
|
+
Classifier: Topic :: Software Development
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering
|
|
18
|
+
Classifier: Operating System :: Unix
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
License-File: NOTICE
|
|
23
|
+
Requires-Dist: pyyaml
|
|
24
|
+
Requires-Dist: ply
|
|
25
|
+
Provides-Extra: all
|
|
26
|
+
Requires-Dist: pyyaml ; extra == 'all'
|
|
27
|
+
Requires-Dist: ply ; extra == 'all'
|
|
28
|
+
Requires-Dist: pytest >=4.6.2 ; extra == 'all'
|
|
29
|
+
|
|
30
|
+
# flux-batch
|
|
31
|
+
|
|
32
|
+
> Python SDK to generate Flux batch jobs and services
|
|
33
|
+
|
|
34
|
+
[](https://badge.fury.io/py/flux-batch)
|
|
35
|
+
|
|
36
|
+

|
|
37
|
+
|
|
38
|
+
## Related Projects
|
|
39
|
+
|
|
40
|
+
- [flux-mcp](https://github.com/converged-computing/flux-mcp): MCP functions for Flux.
|
|
41
|
+
- [flux-mcp-server](https://github.com/converged-computing/flux-mcp-server): MCP server.
|
|
42
|
+
- [fractale-mcp](https://github.com/compspec/fractale-mcp): (fractale) MCP orchestration (agents, databases, ui interfaces).
|
|
43
|
+
- [hpc-mcp](https://github.com/converged-computing/hpc-mcp): HPC tools for a larger set of HPC and converged computing use cases.
|
|
44
|
+
|
|
45
|
+
## Services
|
|
46
|
+
|
|
47
|
+
- **flux-scribe**: Write job events to a local sqlite database via the JournalConsumer (not added yet, written and needs testing)
|
|
48
|
+
|
|
49
|
+
## Usage
|
|
50
|
+
|
|
51
|
+
This is a small Flux utility that makes it easy to create Flux batch jobs and services.
|
|
52
|
+
The use case is to submit work (one or more jobs) under an instance, and run a custom service, or epilog and prolog commands. We will provision several services here, and you can also provide your own name to start / stop.
|
|
53
|
+
|
|
54
|
+
### Setup
|
|
55
|
+
|
|
56
|
+
Install the library and start (or be in) a flux instance.
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
flux start
|
|
60
|
+
pip install -e . --break-system-packages
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### Example
|
|
64
|
+
|
|
65
|
+
Run the controlled example to see a batch job with prolog and epilog run and complete:
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
python3 ./tests/test_flux_batch.py
|
|
69
|
+
```
|
|
70
|
+
```console
|
|
71
|
+
Flux Batch Module Test
|
|
72
|
+
[OK] Connected to Flux.
|
|
73
|
+
[*] Creating batch jobs...
|
|
74
|
+
[*] Mapping attributes to BatchJobspecV1...
|
|
75
|
+
[*] Previewing submission (Dryrun -> Wrapper)...
|
|
76
|
+
#!/bin/bash
|
|
77
|
+
echo 'Batch Wrapper Starting'
|
|
78
|
+
flux submit --wait /bin/echo 'Job 1 starting'
|
|
79
|
+
flux submit --wait /bin/sleep 5
|
|
80
|
+
flux submit --wait /bin/echo 'Job 2 finished'
|
|
81
|
+
flux job wait --all
|
|
82
|
+
echo 'Batch Wrapper Finished'
|
|
83
|
+
[*] Performing submission (Dryrun -> Wrapper -> Submit)...
|
|
84
|
+
[SUCCESS] Batch submitted! Flux Job ID: ƒMX29AwFu
|
|
85
|
+
```
|
|
86
|
+
```bash
|
|
87
|
+
$ flux jobs -a
|
|
88
|
+
JOBID USER NAME ST NTASKS NNODES TIME INFO
|
|
89
|
+
ƒMX29AwFu vscode test-batch R 1 1 4.213s 68e8c4399c15
|
|
90
|
+
```
|
|
91
|
+
```bash
|
|
92
|
+
$ flux jobs -a
|
|
93
|
+
JOBID USER NAME ST NTASKS NNODES TIME INFO
|
|
94
|
+
ƒMX29AwFu vscode test-batch CD 1 1 6.354s 68e8c4399c15
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Here is an explicit (manual) example to do the same:
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
import flux
|
|
101
|
+
import flux_batch
|
|
102
|
+
|
|
103
|
+
# for pretty printing
|
|
104
|
+
# from rich import print
|
|
105
|
+
|
|
106
|
+
handle = flux.Flux()
|
|
107
|
+
|
|
108
|
+
# Create your batch job with some number of commands
|
|
109
|
+
batch = flux_batch.BatchJobV1()
|
|
110
|
+
batch.add_job(["echo", "Job 1 starting"])
|
|
111
|
+
batch.add_job(["sleep", "5"])
|
|
112
|
+
batch.add_job(["echo", "Job 2 finished"])
|
|
113
|
+
|
|
114
|
+
# Wrap it up into a jobspec
|
|
115
|
+
jobspec = flux_batch.BatchJobspecV1.from_jobs(
|
|
116
|
+
batch,
|
|
117
|
+
nodes=1,
|
|
118
|
+
nslots=1,
|
|
119
|
+
time_limit="10m",
|
|
120
|
+
job_name="test-batch"
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
# Add a prolog and epilog
|
|
124
|
+
jobspec.add_prolog("echo 'Batch Wrapper Starting'")
|
|
125
|
+
jobspec.add_epilog("echo 'Batch Wrapper Finished'")
|
|
126
|
+
|
|
127
|
+
# Add a service (this assumes user level that exists)
|
|
128
|
+
# jobspec.add_service("my-service'")
|
|
129
|
+
|
|
130
|
+
# Preview it
|
|
131
|
+
print(flux_batch.submit(handle, jobspec, dry_run=True))
|
|
132
|
+
|
|
133
|
+
# Submit that bad boi.
|
|
134
|
+
jobid = flux_batch.submit(handle, jobspec)
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
## TODO
|
|
138
|
+
|
|
139
|
+
- Option for controlled output (that we can easily get after)
|
|
140
|
+
- Create flux-scribe service, add example (need to test on cluster with systemctl)
|
|
141
|
+
|
|
142
|
+
## License
|
|
143
|
+
|
|
144
|
+
HPCIC DevTools is distributed under the terms of the MIT license.
|
|
145
|
+
All new contributions must be made under this license.
|
|
146
|
+
|
|
147
|
+
See [LICENSE](https://github.com/converged-computing/cloud-select/blob/main/LICENSE),
|
|
148
|
+
[COPYRIGHT](https://github.com/converged-computing/cloud-select/blob/main/COPYRIGHT), and
|
|
149
|
+
[NOTICE](https://github.com/converged-computing/cloud-select/blob/main/NOTICE) for details.
|
|
150
|
+
|
|
151
|
+
SPDX-License-Identifier: (MIT)
|
|
152
|
+
|
|
153
|
+
LLNL-CODE- 842614
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
This work was produced under the auspices of the U.S. Department of
|
|
2
|
+
Energy by Lawrence Livermore National Laboratory under Contract
|
|
3
|
+
DE-AC52-07NA27344.
|
|
4
|
+
|
|
5
|
+
This work was prepared as an account of work sponsored by an agency of
|
|
6
|
+
the United States Government. Neither the United States Government nor
|
|
7
|
+
Lawrence Livermore National Security, LLC, nor any of their employees
|
|
8
|
+
makes any warranty, expressed or implied, or assumes any legal liability
|
|
9
|
+
or responsibility for the accuracy, completeness, or usefulness of any
|
|
10
|
+
information, apparatus, product, or process disclosed, or represents that
|
|
11
|
+
its use would not infringe privately owned rights.
|
|
12
|
+
|
|
13
|
+
Reference herein to any specific commercial product, process, or service
|
|
14
|
+
by trade name, trademark, manufacturer, or otherwise does not necessarily
|
|
15
|
+
constitute or imply its endorsement, recommendation, or favoring by the
|
|
16
|
+
United States Government or Lawrence Livermore National Security, LLC.
|
|
17
|
+
|
|
18
|
+
The views and opinions of authors expressed herein do not necessarily
|
|
19
|
+
state or reflect those of the United States Government or Lawrence
|
|
20
|
+
Livermore National Security, LLC, and shall not be used for advertising
|
|
21
|
+
or product endorsement purposes.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
flux_batch/__init__.py,sha256=ZAZF-E0SbVVY2U1_WkRGZFB4rge5BGqQZJ2MdjloZhY,230
|
|
2
|
+
flux_batch/jobspec.py,sha256=f0C3ba8kR9lS-ej-AH-q3fXd8F_WarB7qCSyIDKhhgk,3891
|
|
3
|
+
flux_batch/models.py,sha256=WACPnAvjEteLOq7Jx2zb8M2gKz5YVhz6V5pKjzRpPXI,1999
|
|
4
|
+
flux_batch/submit.py,sha256=TSDg1Dwa5HKhg7Rj9Due8hTDz5__ihyoUdGGKhtVqWo,1795
|
|
5
|
+
flux_batch/version.py,sha256=DX2m2hMpubLBjIWS8CfP-WZViMaSR2YD6eT8I_Akw8c,533
|
|
6
|
+
flux_batch/logger/__init__.py,sha256=eDdpw_uppR5mPLHE39qT_haqMxu-2wniLlJZDigRC2k,52
|
|
7
|
+
flux_batch/logger/generate.py,sha256=L9JyMY2oapp0ss7f7LGuihbLomzVJsMq7sByy9NhbZI,4017
|
|
8
|
+
flux_batch/logger/logger.py,sha256=HKymVBNcoPdX87QWy69er5wUzHVeriiKp9p0bIYboUo,5927
|
|
9
|
+
flux_batch/service/__init__.py,sha256=8IDdhIZY2B20RuJUcWsuTpwB6fm5BYxtTGot6pmn4Ag,1111
|
|
10
|
+
flux_batch/service/scribe.py,sha256=dY6geiLvXYIRcIzuP_naZscKgzX4Y5dPzxoWf9Wywg0,253
|
|
11
|
+
flux_batch/utils/__init__.py,sha256=CqMhw_mBfR0HBcHwv7LtFITq0J7LBV413VQE9xrz8ks,42
|
|
12
|
+
flux_batch/utils/fileio.py,sha256=Elz8WkNkJ9B6x7WmCwiIBW0GgsRSSFCcbuJh7aqu2z4,4879
|
|
13
|
+
flux_batch/utils/text.py,sha256=Ci1BqHs2IbOSn2o60zhLkT4kIA7CSNuGj8mdiGaDIGk,606
|
|
14
|
+
flux_batch/utils/timer.py,sha256=_Weec7Wd5hWQ1r4ZHjownG4YdoIowpVqilXhvYFmIgA,491
|
|
15
|
+
flux_batch-0.0.0.dist-info/LICENSE,sha256=AlyLB1m_z0CENCx1ob0PedLTTohtH2VLZhs2kfygrfc,1108
|
|
16
|
+
flux_batch-0.0.0.dist-info/METADATA,sha256=JGWbOpKTZIpesJokTG29z9p-x1L-tTajMlQJ7TcjQUQ,4877
|
|
17
|
+
flux_batch-0.0.0.dist-info/NOTICE,sha256=9CR93geVKl_4ZrJORbXN0fzkEM2y4DglWhY1hn9ZwQw,1167
|
|
18
|
+
flux_batch-0.0.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
19
|
+
flux_batch-0.0.0.dist-info/entry_points.txt,sha256=ynoKpD82xn2V2sD-aZIQoq7NnfOu9VEKqW55Y1AoPGI,67
|
|
20
|
+
flux_batch-0.0.0.dist-info/top_level.txt,sha256=jj8zAsZzMmbjiBISJL7lRtA37MSEAQYfObGLUncn9Lw,11
|
|
21
|
+
flux_batch-0.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
flux_batch
|