flux-batch 0.0.11__py3-none-any.whl → 0.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,9 +26,12 @@ class JournalScribe:
26
26
  """
27
27
  Initializes the Scribe with a synchronous DB backend and a Flux Journal Consumer.
28
28
  """
29
+ # Get settings from environment
30
+ self.init_settings()
31
+
29
32
  # Setup Database
30
33
  logger.info(f"Connecting to Database: {db_url}")
31
- self.db = SQLAlchemyBackend(db_url)
34
+ self.db = SQLAlchemyBackend(db_url, self.settings["use_ssl"])
32
35
  self.db.initialize()
33
36
 
34
37
  try:
@@ -38,11 +41,22 @@ class JournalScribe:
38
41
  logger.critical(f"Failed to connect to Flux: {e}")
39
42
  sys.exit(1)
40
43
 
44
+ logger.info(f"🍳 Handle: {self.handle.attr_get('local-uri')}")
45
+
41
46
  # Initialize Journal Consumer
42
47
  # This consumes the global event log for the entire instance
43
48
  self.consumer = flux.job.JournalConsumer(self.handle)
44
49
  self.running = True
45
50
 
51
+ def init_settings(self):
52
+ """
53
+ Initialize settings.
54
+ """
55
+ self.settings = {
56
+ "use_ssl": os.environ.get("FLUX_SCRIBE_SSL") not in [None, "no", "false", "off"],
57
+ "uid": os.environ.get("FLUX_SCRIBE_UID"),
58
+ }
59
+
46
60
  def _normalize_event(self, event) -> dict:
47
61
  """
48
62
  Converts a Flux event object into the dictionary format expected by record_event.
@@ -79,7 +93,7 @@ class JournalScribe:
79
93
  # We only care about events associated with a job
80
94
  if hasattr(event, "jobid"):
81
95
  clean_event = self._normalize_event(event)
82
- self.db.record_event("local", clean_event)
96
+ self.db.record_event("local", self.settings['uid'], clean_event)
83
97
  else:
84
98
  # If no event, yield a tiny bit of CPU
85
99
  time.sleep(0.01)
@@ -8,7 +8,7 @@ from sqlalchemy.orm import sessionmaker
8
8
  from flux_batch.service.scribe.models import Base, EventModel, EventRecord, JobModel, JobRecord
9
9
 
10
10
 
11
- def _record_event_internal(session, cluster: str, event: Dict[str, Any]):
11
+ def _record_event_internal(session, cluster: str, uid: str, event: Dict[str, Any]):
12
12
  """
13
13
  Shared synchronous logic for recording events.
14
14
  Used by both Sync and Async backends.
@@ -18,11 +18,13 @@ def _record_event_internal(session, cluster: str, event: Dict[str, Any]):
18
18
  data = event.get("payload", {})
19
19
  timestamp = event.get("timestamp", time.time())
20
20
 
21
+ # Add the new event with all metadata
21
22
  new_event = EventModel(
22
23
  job_id=job_id,
23
24
  cluster=cluster,
24
25
  timestamp=timestamp,
25
26
  event_type=event_type,
27
+ uid=uid,
26
28
  payload=data,
27
29
  )
28
30
  session.add(new_event)
@@ -40,6 +42,7 @@ def _record_event_internal(session, cluster: str, event: Dict[str, Any]):
40
42
  workdir=data.get("cwd", ""),
41
43
  submit_time=timestamp,
42
44
  last_updated=timestamp,
45
+ uid=uid,
43
46
  )
44
47
  session.add(job)
45
48
  else:
@@ -63,8 +66,18 @@ class AsyncSQLAlchemyBackend:
63
66
  Asynchronous backend for the MCP Gateway.
64
67
  """
65
68
 
66
- def __init__(self, db_url: str):
67
- self.engine = create_async_engine(db_url, echo=False)
69
+ def __init__(self, db_url: str, use_ssl: bool = False):
70
+ # Use asyncio connection via asyncmy
71
+ if db_url.startswith("mysql://") or db_url.startswith("mariadb://"):
72
+ db_url = db_url.replace("mysql://", "mysql+asyncmy://", 1)
73
+ db_url = db_url.replace("mariadb://", "mysql+asyncmy://", 1)
74
+
75
+ connect_args = {"connect_timeout": 10}
76
+ if use_ssl:
77
+ print("SSL is enabled.")
78
+ connect_args["ssl"] = {"ssl_mode": "REQUIRED", "check_hostname": False}
79
+
80
+ self.engine = create_async_engine(db_url, echo=False, connect_args=connect_args)
68
81
  self.SessionLocal = async_sessionmaker(self.engine, expire_on_commit=False)
69
82
 
70
83
  async def initialize(self):
@@ -115,10 +128,13 @@ class SQLAlchemyBackend:
115
128
  Synchronous backend for the standalone Scribe daemon.
116
129
  """
117
130
 
118
- def __init__(self, db_url: str):
119
- # strip 'aiosqlite+' or similar if passed from shared config
120
- url = db_url.replace("+aiosqlite", "").replace("+asyncpg", "")
121
- self.engine = create_engine(url, echo=False)
131
+ def __init__(self, db_url: str, use_ssl: bool = False):
132
+ connect_args = {"connect_timeout": 10}
133
+ if use_ssl:
134
+ print("SSL is enabled.")
135
+ connect_args["ssl"] = {"ssl_mode": "REQUIRED", "check_hostname": False}
136
+
137
+ self.engine = create_engine(db_url, echo=False, connect_args=connect_args)
122
138
  self.SessionLocal = sessionmaker(bind=self.engine, expire_on_commit=False)
123
139
 
124
140
  def initialize(self):
@@ -127,10 +143,10 @@ class SQLAlchemyBackend:
127
143
  def close(self):
128
144
  self.engine.dispose()
129
145
 
130
- def record_event(self, cluster: str, event: Dict[str, Any]):
146
+ def record_event(self, cluster: str, uid: str, event: Dict[str, Any]):
131
147
  with self.SessionLocal() as session:
132
148
  with session.begin():
133
- _record_event_internal(session, cluster, event)
149
+ _record_event_internal(session, cluster, uid, event)
134
150
 
135
151
  def get_unwatched_job_ids(self, cluster: str) -> List[int]:
136
152
  """Specific for Scribe: find jobs that need a watcher."""
@@ -16,7 +16,7 @@ class JobRecord:
16
16
  Returned by get_job() and search_jobs().
17
17
  """
18
18
 
19
- job_id: int
19
+ job_id: str
20
20
  cluster: str
21
21
  state: str
22
22
  user: str
@@ -24,6 +24,7 @@ class JobRecord:
24
24
  exit_code: Optional[int] = None
25
25
  submit_time: float = 0.0
26
26
  last_updated: float = 0.0
27
+ uid: str = None
27
28
 
28
29
 
29
30
  @dataclass
@@ -36,6 +37,7 @@ class EventRecord:
36
37
  timestamp: float
37
38
  event_type: str
38
39
  payload: Dict[str, Any]
40
+ uid: str = None
39
41
 
40
42
 
41
43
  # Database models for SQLAlchemy ORM
@@ -49,12 +51,16 @@ class JobModel(Base):
49
51
  __tablename__ = "jobs"
50
52
 
51
53
  # Composite Primary Key
52
- job_id: Mapped[int] = mapped_column(Integer, primary_key=True)
53
54
  cluster: Mapped[str] = mapped_column(String(255), primary_key=True)
55
+ job_id: Mapped[str] = mapped_column(String(255), primary_key=True)
56
+ uid: Mapped[str] = mapped_column(String(255), nullable=True)
54
57
 
55
58
  state: Mapped[str] = mapped_column(String(50))
56
59
  user: Mapped[str] = mapped_column(String(255), nullable=True)
57
- workdir: Mapped[Optional[str]] = mapped_column(String, nullable=True)
60
+
61
+ # Fixed: Added length 1024 for MySQL/MariaDB compatibility
62
+ workdir: Mapped[Optional[str]] = mapped_column(String(1024), nullable=True)
63
+
58
64
  exit_code: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
59
65
  submit_time: Mapped[float] = mapped_column(Float, default=0.0)
60
66
  last_updated: Mapped[float] = mapped_column(Float, default=0.0)
@@ -72,6 +78,7 @@ class JobModel(Base):
72
78
  exit_code=self.exit_code,
73
79
  submit_time=self.submit_time,
74
80
  last_updated=self.last_updated,
81
+ uid=self.uid,
75
82
  )
76
83
 
77
84
 
@@ -79,16 +86,22 @@ class EventModel(Base):
79
86
  __tablename__ = "events"
80
87
 
81
88
  id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
82
- job_id: Mapped[int] = mapped_column(Integer, index=True)
89
+ job_id: Mapped[str] = mapped_column(String(255), primary_key=True)
83
90
  cluster: Mapped[str] = mapped_column(String(255), index=True)
84
91
  timestamp: Mapped[float] = mapped_column(Float)
85
92
  event_type: Mapped[str] = mapped_column(String(50))
86
93
  payload: Mapped[Dict[str, Any]] = mapped_column(JSON)
94
+ uid: Mapped[str] = mapped_column(String(255), nullable=True)
87
95
 
88
96
  def to_record(self) -> EventRecord:
89
97
  """
90
98
  Helper to convert ORM model to public DTO
91
99
  """
92
100
  return EventRecord(
93
- timestamp=self.timestamp, event_type=self.event_type, payload=self.payload
101
+ timestamp=self.timestamp,
102
+ event_type=self.event_type,
103
+ payload=self.payload,
104
+ cluster=self.cluster,
105
+ job_id=self.job_id,
106
+ uid=self.uid,
94
107
  )
@@ -0,0 +1,2 @@
1
+ MODULE_NAME = "flux_batch.service.usernetes"
2
+ from .template import SERVICE_TEMPLATE, START_MODULE_TEMPLATE, STOP_MODULE_TEMPLATE
@@ -0,0 +1,115 @@
1
+ #!/usr/bin/env python3
2
+ import errno
3
+ import logging
4
+ import os
5
+ import sys
6
+ import time
7
+
8
+ import flux
9
+ import flux.job
10
+
11
+ # Not necessary, but it makes it pretty
12
+ from rich import print
13
+
14
+ # Use the synchronous version of the backend to avoid asyncio-in-thread conflicts
15
+ from flux_batch.service.scribe.database import SQLAlchemyBackend
16
+
17
+ # Setup logging to stderr (to avoid polluting stdout if run manually)
18
+ logging.basicConfig(
19
+ level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s", stream=sys.stderr
20
+ )
21
+ logger = logging.getLogger("flux-scribe")
22
+
23
+
24
+ class JournalScribe:
25
+ def __init__(self, db_url: str):
26
+ """
27
+ Initializes the Scribe with a synchronous DB backend and a Flux Journal Consumer.
28
+ """
29
+ # Setup Database
30
+ logger.info(f"Connecting to Database: {db_url}")
31
+ self.db = SQLAlchemyBackend(db_url)
32
+ self.db.initialize()
33
+
34
+ try:
35
+ self.handle = flux.Flux()
36
+ logger.info("Connected to Flux instance.")
37
+ except Exception as e:
38
+ logger.critical(f"Failed to connect to Flux: {e}")
39
+ sys.exit(1)
40
+
41
+ logger.info(f"🍳 Handle: {self.handle.attr_get('local-uri')}")
42
+
43
+ # Initialize Journal Consumer
44
+ # This consumes the global event log for the entire instance
45
+ self.consumer = flux.job.JournalConsumer(self.handle)
46
+ self.running = True
47
+
48
+ def _normalize_event(self, event) -> dict:
49
+ """
50
+ Converts a Flux event object into the dictionary format expected by record_event.
51
+ Matches the logic provided in your EventsEngine reference.
52
+ """
53
+ # Convert the SWIG/CFFI event object to a dictionary
54
+ payload = dict(event)
55
+
56
+ return {
57
+ "id": str(getattr(event, "jobid", "unknown")),
58
+ "type": getattr(event, "name", "unknown"),
59
+ "timestamp": getattr(event, "timestamp", time.time()),
60
+ "payload": payload,
61
+ "R": getattr(event, "R", None),
62
+ "jobspec": getattr(event, "jobspec", None),
63
+ }
64
+
65
+ def run(self):
66
+ """
67
+ Main execution loop. Polls the journal and writes to the DB.
68
+ """
69
+ try:
70
+ logger.info("🚀 Flux Scribe (Journal Consumer) started.")
71
+ self.consumer.start()
72
+
73
+ while self.running:
74
+ try:
75
+ # Non-blocking poll (100ms timeout)
76
+ # This allows the loop to check for shutdown signals regularly
77
+ event = self.consumer.poll(timeout=0.1)
78
+
79
+ if event:
80
+ print(event)
81
+ # We only care about events associated with a job
82
+ if hasattr(event, "jobid"):
83
+ clean_event = self._normalize_event(event)
84
+ self.db.record_event("local", clean_event)
85
+ else:
86
+ # If no event, yield a tiny bit of CPU
87
+ time.sleep(0.01)
88
+
89
+ except EnvironmentError as e:
90
+ # Ignore timeouts (no data)
91
+ if e.errno == errno.ETIMEDOUT:
92
+ continue
93
+ logger.error(f"Flux connection error: {e}")
94
+ time.sleep(1)
95
+
96
+ except Exception as e:
97
+ logger.error(f"Unexpected error in event loop: {e}")
98
+ time.sleep(1)
99
+
100
+ except Exception as e:
101
+ logger.critical(f"EventsEngine crashed: {e}")
102
+ finally:
103
+ self.db.close()
104
+ logger.info("EventsEngine thread exiting.")
105
+
106
+
107
+ def main():
108
+ # Retrieve DB path from environment or use a default
109
+ db_path = os.environ.get("FLUX_SCRIBE_DATABASE", "sqlite:///server_state.db")
110
+ scribe = JournalScribe(db_path)
111
+ scribe.run()
112
+
113
+
114
+ if __name__ == "__main__":
115
+ main()
@@ -0,0 +1,151 @@
1
+ import time
2
+ from typing import Any, Dict, List, Optional
3
+
4
+ from sqlalchemy import and_, create_engine, select, update
5
+ from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
6
+ from sqlalchemy.orm import sessionmaker
7
+
8
+ from flux_batch.service.scribe.models import Base, EventModel, EventRecord, JobModel, JobRecord
9
+
10
+
11
+ def _record_event_internal(session, cluster: str, event: Dict[str, Any]):
12
+ """
13
+ Shared synchronous logic for recording events.
14
+ Used by both Sync and Async backends.
15
+ """
16
+ job_id = event.get("id")
17
+ event_type = event.get("type")
18
+ data = event.get("payload", {})
19
+ timestamp = event.get("timestamp", time.time())
20
+
21
+ # Add the new event with all metadata
22
+ new_event = EventModel(
23
+ job_id=job_id,
24
+ cluster=cluster,
25
+ timestamp=timestamp,
26
+ event_type=event_type,
27
+ payload=data,
28
+ )
29
+ session.add(new_event)
30
+
31
+ if event_type == "submit":
32
+ stmt = select(JobModel).where(and_(JobModel.job_id == job_id, JobModel.cluster == cluster))
33
+ job = session.execute(stmt).scalar_one_or_none()
34
+
35
+ if not job:
36
+ job = JobModel(
37
+ job_id=job_id,
38
+ cluster=cluster,
39
+ user=str(data.get("userid", "unknown")),
40
+ state="submitted",
41
+ workdir=data.get("cwd", ""),
42
+ submit_time=timestamp,
43
+ last_updated=timestamp,
44
+ )
45
+ session.add(job)
46
+ else:
47
+ job.state = "submitted"
48
+ job.last_updated = timestamp
49
+
50
+ # state transitions
51
+ elif event_type == "state" or (event_type and event_type.endswith(".finish")):
52
+ state_name = data.get("state_name", event_type)
53
+ stmt = select(JobModel).where(and_(JobModel.job_id == job_id, JobModel.cluster == cluster))
54
+ job = session.execute(stmt).scalar_one_or_none()
55
+ if job:
56
+ job.state = state_name
57
+ job.last_updated = time.time()
58
+ if "status" in data:
59
+ job.exit_code = data["status"]
60
+
61
+
62
+ class AsyncSQLAlchemyBackend:
63
+ """
64
+ Asynchronous backend for the MCP Gateway.
65
+ """
66
+
67
+ def __init__(self, db_url: str):
68
+ self.engine = create_async_engine(db_url, echo=False)
69
+ self.SessionLocal = async_sessionmaker(self.engine, expire_on_commit=False)
70
+
71
+ async def initialize(self):
72
+ async with self.engine.begin() as conn:
73
+ await conn.run_sync(Base.metadata.create_all)
74
+
75
+ async def close(self):
76
+ await self.engine.dispose()
77
+
78
+ async def record_event(self, cluster: str, event: Dict[str, Any]):
79
+ async with self.SessionLocal() as session:
80
+ # run_sync bridges our shared logic into the async session
81
+ await session.run_sync(_record_event_internal, cluster, event)
82
+ await session.commit()
83
+
84
+ async def get_job(self, cluster: str, job_id: int) -> Optional[JobRecord]:
85
+ async with self.SessionLocal() as session:
86
+ result = await session.execute(
87
+ select(JobModel).where(and_(JobModel.job_id == job_id, JobModel.cluster == cluster))
88
+ )
89
+ job = result.scalar_one_or_none()
90
+ return job.to_record() if job else None
91
+
92
+ async def get_event_history(self, cluster: str, job_id: int) -> List[EventRecord]:
93
+ async with self.SessionLocal() as session:
94
+ result = await session.execute(
95
+ select(EventModel)
96
+ .where(and_(EventModel.job_id == job_id, EventModel.cluster == cluster))
97
+ .order_by(EventModel.timestamp.asc())
98
+ )
99
+ return [e.to_record() for e in result.scalars().all()]
100
+
101
+ async def search_jobs(
102
+ self, cluster: str = None, state: str = None, limit: int = 10
103
+ ) -> List[JobRecord]:
104
+ async with self.SessionLocal() as session:
105
+ stmt = select(JobModel)
106
+ if cluster:
107
+ stmt = stmt.where(JobModel.cluster == cluster)
108
+ if state:
109
+ stmt = stmt.where(JobModel.state == state)
110
+ result = await session.execute(stmt.limit(limit))
111
+ return [j.to_record() for j in result.scalars().all()]
112
+
113
+
114
+ class SQLAlchemyBackend:
115
+ """
116
+ Synchronous backend for the standalone Scribe daemon.
117
+ """
118
+
119
+ def __init__(self, db_url: str):
120
+ # strip 'aiosqlite+' or similar if passed from shared config
121
+ url = db_url.replace("+aiosqlite", "").replace("+asyncpg", "")
122
+ self.engine = create_engine(url, echo=False)
123
+ self.SessionLocal = sessionmaker(bind=self.engine, expire_on_commit=False)
124
+
125
+ def initialize(self):
126
+ Base.metadata.create_all(self.engine)
127
+
128
+ def close(self):
129
+ self.engine.dispose()
130
+
131
+ def record_event(self, cluster: str, event: Dict[str, Any]):
132
+ with self.SessionLocal() as session:
133
+ with session.begin():
134
+ _record_event_internal(session, cluster, event)
135
+
136
+ def get_unwatched_job_ids(self, cluster: str) -> List[int]:
137
+ """Specific for Scribe: find jobs that need a watcher."""
138
+ with self.SessionLocal() as session:
139
+ stmt = select(JobModel.job_id).where(
140
+ and_(JobModel.cluster == cluster, JobModel.state == "submitted")
141
+ )
142
+ return list(session.execute(stmt).scalars().all())
143
+
144
+ def mark_job_as_watched(self, cluster: str, job_id: int):
145
+ with self.SessionLocal() as session:
146
+ with session.begin():
147
+ session.execute(
148
+ update(JobModel)
149
+ .where(and_(JobModel.job_id == job_id, JobModel.cluster == cluster))
150
+ .values(state="watching")
151
+ )
@@ -0,0 +1,98 @@
1
+ from dataclasses import dataclass
2
+ from typing import Any, Dict, Optional
3
+
4
+ from sqlalchemy import JSON, Float, Integer, String
5
+ from sqlalchemy.ext.asyncio import AsyncAttrs
6
+ from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
7
+
8
+ # DTOs are "Public Data Transfer Objects" and they are used by
9
+ # our interfaces and tools
10
+
11
+
12
+ @dataclass
13
+ class JobRecord:
14
+ """
15
+ Represents a snapshot of a job state.
16
+ Returned by get_job() and search_jobs().
17
+ """
18
+
19
+ job_id: int
20
+ cluster: str
21
+ state: str
22
+ user: str
23
+ workdir: Optional[str] = None
24
+ exit_code: Optional[int] = None
25
+ submit_time: float = 0.0
26
+ last_updated: float = 0.0
27
+
28
+
29
+ @dataclass
30
+ class EventRecord:
31
+ """
32
+ Represents a single historical event.
33
+ Returned by get_event_history().
34
+ """
35
+
36
+ timestamp: float
37
+ event_type: str
38
+ payload: Dict[str, Any]
39
+
40
+
41
+ # Database models for SQLAlchemy ORM
42
+
43
+
44
+ class Base(AsyncAttrs, DeclarativeBase):
45
+ pass
46
+
47
+
48
+ class JobModel(Base):
49
+ __tablename__ = "jobs"
50
+
51
+ # Composite Primary Key
52
+ job_id: Mapped[int] = mapped_column(Integer, primary_key=True)
53
+ cluster: Mapped[str] = mapped_column(String(255), primary_key=True)
54
+
55
+ state: Mapped[str] = mapped_column(String(50))
56
+ user: Mapped[str] = mapped_column(String(255), nullable=True)
57
+ workdir: Mapped[Optional[str]] = mapped_column(String, nullable=True)
58
+ exit_code: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
59
+ submit_time: Mapped[float] = mapped_column(Float, default=0.0)
60
+ last_updated: Mapped[float] = mapped_column(Float, default=0.0)
61
+
62
+ def to_record(self) -> JobRecord:
63
+ """
64
+ Helper to convert ORM model to public DTO
65
+ """
66
+ return JobRecord(
67
+ job_id=self.job_id,
68
+ cluster=self.cluster,
69
+ state=self.state,
70
+ user=self.user,
71
+ workdir=self.workdir,
72
+ exit_code=self.exit_code,
73
+ submit_time=self.submit_time,
74
+ last_updated=self.last_updated,
75
+ )
76
+
77
+
78
+ class EventModel(Base):
79
+ __tablename__ = "events"
80
+
81
+ id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
82
+ job_id: Mapped[int] = mapped_column(Integer, index=True)
83
+ cluster: Mapped[str] = mapped_column(String(255), index=True)
84
+ timestamp: Mapped[float] = mapped_column(Float)
85
+ event_type: Mapped[str] = mapped_column(String(50))
86
+ payload: Mapped[Dict[str, Any]] = mapped_column(JSON)
87
+
88
+ def to_record(self) -> EventRecord:
89
+ """
90
+ Helper to convert ORM model to public DTO
91
+ """
92
+ return EventRecord(
93
+ timestamp=self.timestamp,
94
+ event_type=self.event_type,
95
+ payload=self.payload,
96
+ cluster=self.cluster,
97
+ job_id=self.job_id,
98
+ )
@@ -0,0 +1,53 @@
1
+ # Template for the Scribe Journal Consumer
2
+ SERVICE_TEMPLATE = """[Unit]
3
+ Description=Flux Scribe Journal Consumer
4
+ After=network.target
5
+
6
+ [Service]
7
+ ExecStart={python_path} -m flux_batch.service.scribe
8
+ Restart=on-failure
9
+
10
+ [Install]
11
+ WantedBy=default.target
12
+ """
13
+
14
+ START_MODULE_TEMPLATE = """
15
+ from flux.modprobe import task
16
+ import flux.subprocess as subprocess
17
+
18
+ @task(
19
+ "start-{service_name}",
20
+ ranks="0",
21
+ needs_config=["{service_name}"],
22
+ after=["resource", "job-list"],
23
+ )
24
+ def start_{service_func}(context):
25
+ # This triggers the systemd user service provisioned earlier
26
+ # context.bash("systemctl --user start {service_name}")
27
+ subprocess.rexec_bg(
28
+ context.handle,
29
+ ["{python_bin}", "-m", "{module_name}"],
30
+ label="{service_name}",
31
+ nodeid=0
32
+ )
33
+ """
34
+
35
+ STOP_MODULE_TEMPLATE = """
36
+ from flux.modprobe import task
37
+ import flux.subprocess as subprocess
38
+
39
+ @task(
40
+ "stop-{service_name}",
41
+ ranks="0",
42
+ needs_config=["{service_name}"],
43
+ before=["resource", "job-list"],
44
+ )
45
+ def stop_{service_func}(context):
46
+ # context.bash("systemctl --user stop {service_name}")
47
+ subprocess.kill(context.handle, signum=2, label="{service_name}").get()
48
+ try:
49
+ status = subprocess.wait(context.handle, label="{service_name}").get()["status"]
50
+ print(status)
51
+ except:
52
+ pass
53
+ """
flux_batch/version.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = "0.0.11"
1
+ __version__ = "0.0.13"
2
2
  AUTHOR = "Vanessa Sochat"
3
3
  AUTHOR_EMAIL = "vsoch@users.noreply.github.com"
4
4
  NAME = "flux-batch"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: flux-batch
3
- Version: 0.0.11
3
+ Version: 0.0.13
4
4
  Summary: Python SDK for flux batch jobs and services
5
5
  Home-page: https://github.com/converged-computing/flux-batch
6
6
  Author: Vanessa Sochat
@@ -47,6 +47,8 @@ Requires-Dist: rich ; extra == 'scribe'
47
47
  - [fractale-mcp](https://github.com/compspec/fractale-mcp): (fractale) MCP orchestration (agents, databases, ui interfaces).
48
48
  - [hpc-mcp](https://github.com/converged-computing/hpc-mcp): HPC tools for a larger set of HPC and converged computing use cases.
49
49
 
50
+ If you are looking for `flux batch` please see the documentation [here](https://flux-framework.readthedocs.io/projects/flux-core/en/latest/man1/flux-batch.html). This library supports Flux Framework and is experimental.
51
+
50
52
  ## Services
51
53
 
52
54
  - **flux-scribe**: Write job events to a local sqlite database via the JournalConsumer (not added yet, written and needs testing)
@@ -135,7 +137,7 @@ batch.add_job(["sleep", "5"])
135
137
  batch.add_job(["echo", "Job 2 finished"])
136
138
 
137
139
  # Wrap it up into a jobspec
138
- jobspec = flux_batch.BatchJobspecV1.from_jobs(
140
+ spec = flux_batch.BatchJobspecV1.from_jobs(
139
141
  batch,
140
142
  nodes=1,
141
143
  nslots=1,
@@ -146,14 +148,15 @@ jobspec = flux_batch.BatchJobspecV1.from_jobs(
146
148
  )
147
149
 
148
150
  # Add a prolog and epilog
149
- jobspec.add_prolog("echo 'Batch Wrapper Starting'")
150
- jobspec.add_epilog("echo 'Batch Wrapper Finished'")
151
+ spec.add_prolog("echo 'Batch Wrapper Starting'")
152
+ spec.add_epilog("echo 'Batch Wrapper Finished'")
151
153
 
152
154
  # Add a service (this assumes user level that exists)
153
- jobspec.add_service("flux-scribe")
155
+ spec.add_service("flux-scribe")
154
156
 
155
- # Preview it
156
- print(flux_batch.submit(handle, jobspec, dry_run=True))
157
+ # Preview it (batch wrapper), or generate the jobspec (json)
158
+ print(flux_batch.submit(handle, spec, dry_run=True))
159
+ jobspec = flux_batch.jobspec(spec)
157
160
 
158
161
  # Submit that bad boi.
159
162
  jobid = flux_batch.submit(handle, jobspec)
@@ -2,7 +2,7 @@ flux_batch/__init__.py,sha256=35032OVyHkBiQIptg7NBY_6mdhthFu1hpqOCsREm5lQ,280
2
2
  flux_batch/jobspec.py,sha256=MUMI4Y_DgjCRssgzUDagTTFw8L3t4L_5az1E3tmT1FI,5464
3
3
  flux_batch/models.py,sha256=JjrFqi4Skrop_cyIWfgAHTdY53kotkiGD0JpTnVlByI,2200
4
4
  flux_batch/submit.py,sha256=tNJMDvnsxbAuXg_5TkB8HurzQ9I-Dot_gXXpIU5LtTA,2654
5
- flux_batch/version.py,sha256=h-6jDEwrQuFPdUK7HJbFYJISN7mt1MwBKK0UnnS1-cM,643
5
+ flux_batch/version.py,sha256=W-9wI-nhKTdrBIA6HgtR7eb_HyV0qAzlIAHd9UibgX4,643
6
6
  flux_batch/logger/__init__.py,sha256=eDdpw_uppR5mPLHE39qT_haqMxu-2wniLlJZDigRC2k,52
7
7
  flux_batch/logger/generate.py,sha256=L9JyMY2oapp0ss7f7LGuihbLomzVJsMq7sByy9NhbZI,4017
8
8
  flux_batch/logger/logger.py,sha256=HKymVBNcoPdX87QWy69er5wUzHVeriiKp9p0bIYboUo,5927
@@ -11,18 +11,23 @@ flux_batch/script/save_logs.sh,sha256=HeapqvL0iR8aX7LtbwlcFTy19j5WxkQ-1M2J9epu-E
11
11
  flux_batch/service/__init__.py,sha256=pD7RYpdSLZvYU4qwShYXA6UcaJy191fKtzqJL3uttEc,2724
12
12
  flux_batch/service/scribe.py,sha256=dY6geiLvXYIRcIzuP_naZscKgzX4Y5dPzxoWf9Wywg0,253
13
13
  flux_batch/service/scribe/__init__.py,sha256=773-AzF_WRY6udG5nQSexf7Vga4K1YZLy1sfQAEd1uo,126
14
- flux_batch/service/scribe/__main__.py,sha256=3S0dyhkHk-bPT_Z0laNUg-HydrCFql4hPOV2ZNF5rO0,3777
15
- flux_batch/service/scribe/database.py,sha256=EB8OEMfNvfCplGaz-ZNMsfIpd305eP-mfCvSd-fg_k4,5626
16
- flux_batch/service/scribe/models.py,sha256=7lUrRosnQ2douFL_xD9GMYex4Z4lkN-CcBeWDhzmD8c,2668
14
+ flux_batch/service/scribe/__main__.py,sha256=tyaETaimVptrg1qfWasgcavhh0AFj0kGAhFp2LKtwm4,4236
15
+ flux_batch/service/scribe/database.py,sha256=0BImmobV8yzskcu4kagQCLqYDzUAtDyxUUJRlj-Yf2A,6343
16
+ flux_batch/service/scribe/models.py,sha256=9eHYp_lO-oVYidY3G3khA1mDgm2AzmuPTpzPGEgxruA,3066
17
17
  flux_batch/service/scribe/template.py,sha256=yDsheid2FXeYr458loxB3HyZKSWpGQpM8iUQfnfOA-s,1336
18
+ flux_batch/service/usernetes/__init__.py,sha256=BvrHIsjevkT-nFqUKcn9VKwvtUGA1G48jqk4z0O9xK4,129
19
+ flux_batch/service/usernetes/__main__.py,sha256=xQHIxRd2pjGxpVcIyv2tq88ts3e2IsmhrQuMVG-wrOw,3851
20
+ flux_batch/service/usernetes/database.py,sha256=kagh_YJFpKMtixJ5-Zs762vLFDGEU7Fq9lOYEgeKPmo,5668
21
+ flux_batch/service/usernetes/models.py,sha256=wVHgX5X9PUpUSDtZtMaz1M_fGC1Vyv8JdUt58AoEA3c,2759
22
+ flux_batch/service/usernetes/template.py,sha256=yDsheid2FXeYr458loxB3HyZKSWpGQpM8iUQfnfOA-s,1336
18
23
  flux_batch/utils/__init__.py,sha256=CqMhw_mBfR0HBcHwv7LtFITq0J7LBV413VQE9xrz8ks,42
19
24
  flux_batch/utils/fileio.py,sha256=Elz8WkNkJ9B6x7WmCwiIBW0GgsRSSFCcbuJh7aqu2z4,4879
20
25
  flux_batch/utils/text.py,sha256=Ci1BqHs2IbOSn2o60zhLkT4kIA7CSNuGj8mdiGaDIGk,606
21
26
  flux_batch/utils/timer.py,sha256=_Weec7Wd5hWQ1r4ZHjownG4YdoIowpVqilXhvYFmIgA,491
22
- flux_batch-0.0.11.dist-info/LICENSE,sha256=AlyLB1m_z0CENCx1ob0PedLTTohtH2VLZhs2kfygrfc,1108
23
- flux_batch-0.0.11.dist-info/METADATA,sha256=A0LLvVm-RKGxSM26HMfHlOKe5OxF22O7APLNWfvIBj0,5352
24
- flux_batch-0.0.11.dist-info/NOTICE,sha256=9CR93geVKl_4ZrJORbXN0fzkEM2y4DglWhY1hn9ZwQw,1167
25
- flux_batch-0.0.11.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
26
- flux_batch-0.0.11.dist-info/entry_points.txt,sha256=ynoKpD82xn2V2sD-aZIQoq7NnfOu9VEKqW55Y1AoPGI,67
27
- flux_batch-0.0.11.dist-info/top_level.txt,sha256=jj8zAsZzMmbjiBISJL7lRtA37MSEAQYfObGLUncn9Lw,11
28
- flux_batch-0.0.11.dist-info/RECORD,,
27
+ flux_batch-0.0.13.dist-info/LICENSE,sha256=AlyLB1m_z0CENCx1ob0PedLTTohtH2VLZhs2kfygrfc,1108
28
+ flux_batch-0.0.13.dist-info/METADATA,sha256=Y0somwoZuJzlzyT1KmkwWliawzy9foVPGd4Y2UZt4J0,5641
29
+ flux_batch-0.0.13.dist-info/NOTICE,sha256=9CR93geVKl_4ZrJORbXN0fzkEM2y4DglWhY1hn9ZwQw,1167
30
+ flux_batch-0.0.13.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
31
+ flux_batch-0.0.13.dist-info/entry_points.txt,sha256=ynoKpD82xn2V2sD-aZIQoq7NnfOu9VEKqW55Y1AoPGI,67
32
+ flux_batch-0.0.13.dist-info/top_level.txt,sha256=jj8zAsZzMmbjiBISJL7lRtA37MSEAQYfObGLUncn9Lw,11
33
+ flux_batch-0.0.13.dist-info/RECORD,,