fdsx 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fdsx/__init__.py +8 -0
- fdsx/checkpoint/__init__.py +1 -0
- fdsx/checkpoint/manager.py +337 -0
- fdsx/cli/__init__.py +1 -0
- fdsx/cli/main.py +366 -0
- fdsx/core/__init__.py +1 -0
- fdsx/core/batch.py +527 -0
- fdsx/core/compiler/__init__.py +42 -0
- fdsx/core/compiler/aggregation.py +45 -0
- fdsx/core/compiler/compile.py +412 -0
- fdsx/core/compiler/execution.py +163 -0
- fdsx/core/compiler/helpers.py +218 -0
- fdsx/core/compiler/nodes.py +283 -0
- fdsx/core/compiler/parallel.py +306 -0
- fdsx/core/compiler/routing.py +47 -0
- fdsx/core/config.py +233 -0
- fdsx/core/engine/__init__.py +41 -0
- fdsx/core/engine/batch.py +126 -0
- fdsx/core/engine/interrupts.py +59 -0
- fdsx/core/engine/results.py +74 -0
- fdsx/core/engine/resume.py +231 -0
- fdsx/core/engine/run.py +189 -0
- fdsx/core/engine/signals.py +156 -0
- fdsx/core/engine/tasks_dir.py +415 -0
- fdsx/core/engine/validate.py +24 -0
- fdsx/core/extraction.py +253 -0
- fdsx/core/graph_utils.py +52 -0
- fdsx/core/hooks.py +200 -0
- fdsx/core/loader.py +178 -0
- fdsx/core/paths.py +40 -0
- fdsx/core/selector.py +430 -0
- fdsx/core/thread_id.py +8 -0
- fdsx/core/variables.py +390 -0
- fdsx/display/__init__.py +1 -0
- fdsx/display/terminal.py +674 -0
- fdsx/logging/__init__.py +6 -0
- fdsx/logging/recorder.py +204 -0
- fdsx/logging/stream_logger.py +99 -0
- fdsx/models/__init__.py +1 -0
- fdsx/models/flow.py +485 -0
- fdsx/models/task.py +229 -0
- fdsx/models/validators.py +30 -0
- fdsx/notify/__init__.py +1 -0
- fdsx/notify/webhook.py +72 -0
- fdsx/providers/__init__.py +1 -0
- fdsx/providers/base.py +381 -0
- fdsx/providers/claude.py +294 -0
- fdsx/providers/codex.py +236 -0
- fdsx/providers/opencode.py +96 -0
- fdsx/providers/system.py +50 -0
- fdsx-0.1.0.dist-info/METADATA +197 -0
- fdsx-0.1.0.dist-info/RECORD +56 -0
- fdsx-0.1.0.dist-info/WHEEL +5 -0
- fdsx-0.1.0.dist-info/entry_points.txt +2 -0
- fdsx-0.1.0.dist-info/licenses/LICENSE +21 -0
- fdsx-0.1.0.dist-info/top_level.txt +1 -0
fdsx/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Checkpoint management for flow execution."""
|
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
import sqlite3
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from langchain_core.runnables.config import RunnableConfig
|
|
9
|
+
from langgraph.checkpoint.base import Checkpoint
|
|
10
|
+
from langgraph.checkpoint.sqlite import SqliteSaver
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _extract_meta_from_checkpoint(
|
|
16
|
+
checkpoint_data: Checkpoint | dict[str, Any],
|
|
17
|
+
) -> dict[str, Any]:
|
|
18
|
+
"""Extract _meta from checkpoint channel_values, handling both
|
|
19
|
+
__root__ (object schema) and named-channel (TypedDict schema) layouts."""
|
|
20
|
+
channel_values = checkpoint_data.get("channel_values", {})
|
|
21
|
+
# Named-channel layout (flows with ParallelState or TypedDict schema)
|
|
22
|
+
meta = channel_values.get("_meta")
|
|
23
|
+
if isinstance(meta, dict):
|
|
24
|
+
return meta
|
|
25
|
+
# __root__ layout (flows using object schema, e.g. no ParallelState)
|
|
26
|
+
root = channel_values.get("__root__")
|
|
27
|
+
if isinstance(root, dict):
|
|
28
|
+
meta = root.get("_meta")
|
|
29
|
+
if isinstance(meta, dict):
|
|
30
|
+
return meta
|
|
31
|
+
return {}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
_SAFE_THREAD_ID = re.compile(r"^[a-zA-Z0-9_\-]+$")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class CheckpointManager:
|
|
38
|
+
"""Manages checkpoints and PID-based locks for flow execution.
|
|
39
|
+
|
|
40
|
+
Wraps LangGraph's SqliteSaver to provide:
|
|
41
|
+
- Checkpoint persistence to SQLite
|
|
42
|
+
- PID-based lock files to prevent concurrent execution
|
|
43
|
+
- Stale lock detection and cleanup
|
|
44
|
+
- Thread listing functionality
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
DEFAULT_BASE_DIR = Path(".fdsx")
|
|
48
|
+
|
|
49
|
+
def __init__(self, base_dir: Path | None = None):
|
|
50
|
+
"""Initialize the CheckpointManager.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
base_dir: Base directory for checkpoints and locks.
|
|
54
|
+
Defaults to '.fdsx/' relative to CWD.
|
|
55
|
+
"""
|
|
56
|
+
self.base_dir = base_dir if base_dir is not None else self.DEFAULT_BASE_DIR
|
|
57
|
+
self.checkpoints_dir = self.base_dir / "checkpoints"
|
|
58
|
+
self.locks_dir = self.base_dir / "locks"
|
|
59
|
+
|
|
60
|
+
self.base_dir.mkdir(parents=True, exist_ok=True, mode=0o700)
|
|
61
|
+
self.checkpoints_dir.mkdir(parents=True, exist_ok=True, mode=0o700)
|
|
62
|
+
self.locks_dir.mkdir(parents=True, exist_ok=True, mode=0o700)
|
|
63
|
+
|
|
64
|
+
def get_checkpointer(self) -> SqliteSaver:
|
|
65
|
+
"""Get a SqliteSaver checkpointer for the checkpoint directory.
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
SqliteSaver configured to use the checkpoints database
|
|
69
|
+
"""
|
|
70
|
+
db_path = self.checkpoints_dir / "checkpoints.db"
|
|
71
|
+
conn = sqlite3.connect(str(db_path), check_same_thread=False)
|
|
72
|
+
try:
|
|
73
|
+
os.chmod(str(db_path), 0o600)
|
|
74
|
+
except OSError:
|
|
75
|
+
pass
|
|
76
|
+
return SqliteSaver(conn)
|
|
77
|
+
|
|
78
|
+
def _get_lock_path(self, thread_id: str) -> Path:
|
|
79
|
+
"""Get the path to the lock file for a thread.
|
|
80
|
+
|
|
81
|
+
Raises:
|
|
82
|
+
ValueError: If thread_id contains unsafe characters or escapes locks_dir.
|
|
83
|
+
"""
|
|
84
|
+
if not _SAFE_THREAD_ID.match(thread_id):
|
|
85
|
+
raise ValueError(f"Invalid thread ID: {thread_id!r}")
|
|
86
|
+
lock_path = (self.locks_dir / f"{thread_id}.lock").resolve()
|
|
87
|
+
if not str(lock_path).startswith(str(self.locks_dir.resolve())):
|
|
88
|
+
raise ValueError(f"Thread ID escapes lock directory: {thread_id!r}")
|
|
89
|
+
return lock_path
|
|
90
|
+
|
|
91
|
+
def _create_lock_file(self, lock_path: Path) -> bool:
|
|
92
|
+
"""Atomically create a lock file and write the current PID.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
True if the file was created, False if it already exists.
|
|
96
|
+
"""
|
|
97
|
+
try:
|
|
98
|
+
fd = os.open(str(lock_path), os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o600)
|
|
99
|
+
try:
|
|
100
|
+
os.write(fd, str(os.getpid()).encode())
|
|
101
|
+
finally:
|
|
102
|
+
os.close(fd)
|
|
103
|
+
return True
|
|
104
|
+
except FileExistsError:
|
|
105
|
+
return False
|
|
106
|
+
|
|
107
|
+
def acquire_lock(self, thread_id: str) -> bool:
|
|
108
|
+
"""Acquire a lock for the given thread ID.
|
|
109
|
+
|
|
110
|
+
Uses O_CREAT|O_EXCL for atomic creation to prevent TOCTOU race conditions.
|
|
111
|
+
Automatically recovers stale locks from dead processes.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
thread_id: The thread ID to lock
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
True if lock was acquired, False if already locked by alive process
|
|
118
|
+
"""
|
|
119
|
+
lock_path = self._get_lock_path(thread_id)
|
|
120
|
+
|
|
121
|
+
if self._create_lock_file(lock_path):
|
|
122
|
+
return True
|
|
123
|
+
|
|
124
|
+
# Lock file already exists — check if the owning process is still alive
|
|
125
|
+
try:
|
|
126
|
+
with open(lock_path) as f:
|
|
127
|
+
pid = int(f.read().strip())
|
|
128
|
+
try:
|
|
129
|
+
os.kill(pid, 0)
|
|
130
|
+
# Process is alive — lock is legitimately held
|
|
131
|
+
return False
|
|
132
|
+
except OSError:
|
|
133
|
+
# Process is dead — stale lock
|
|
134
|
+
logger.warning(
|
|
135
|
+
"Removing stale lock for thread %r (dead PID %d)", thread_id, pid
|
|
136
|
+
)
|
|
137
|
+
except (ValueError, IOError):
|
|
138
|
+
# Corrupt or empty lock file — treat as stale
|
|
139
|
+
logger.warning("Removing corrupt lock file for thread %r", thread_id)
|
|
140
|
+
|
|
141
|
+
# Remove the stale/corrupt lock and retry once
|
|
142
|
+
lock_path.unlink(missing_ok=True)
|
|
143
|
+
return self._create_lock_file(lock_path)
|
|
144
|
+
|
|
145
|
+
def release_lock(self, thread_id: str) -> None:
|
|
146
|
+
"""Release the lock for the given thread ID.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
thread_id: The thread ID to unlock
|
|
150
|
+
"""
|
|
151
|
+
lock_path = self._get_lock_path(thread_id)
|
|
152
|
+
lock_path.unlink(missing_ok=True)
|
|
153
|
+
|
|
154
|
+
def is_locked(self, thread_id: str) -> tuple[bool, int | None]:
|
|
155
|
+
"""Check if a thread is locked.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
thread_id: The thread ID to check
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
Tuple of (is_locked, pid) where pid is the locking PID if locked
|
|
162
|
+
"""
|
|
163
|
+
lock_path = self._get_lock_path(thread_id)
|
|
164
|
+
|
|
165
|
+
if not lock_path.exists():
|
|
166
|
+
return False, None
|
|
167
|
+
|
|
168
|
+
try:
|
|
169
|
+
with open(lock_path) as f:
|
|
170
|
+
pid = int(f.read().strip())
|
|
171
|
+
try:
|
|
172
|
+
os.kill(pid, 0)
|
|
173
|
+
return True, pid
|
|
174
|
+
except OSError:
|
|
175
|
+
return False, None
|
|
176
|
+
except (ValueError, IOError):
|
|
177
|
+
return False, None
|
|
178
|
+
|
|
179
|
+
def verify_checkpoint(self, thread_id: str) -> bool:
|
|
180
|
+
"""Verify checkpoint integrity for a thread ID.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
thread_id: The thread ID to verify
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
True if checkpoint is valid, False otherwise
|
|
187
|
+
"""
|
|
188
|
+
db_path = self.checkpoints_dir / "checkpoints.db"
|
|
189
|
+
if not db_path.exists():
|
|
190
|
+
return False
|
|
191
|
+
|
|
192
|
+
try:
|
|
193
|
+
conn = sqlite3.connect(str(db_path))
|
|
194
|
+
cursor = conn.cursor()
|
|
195
|
+
cursor.execute(
|
|
196
|
+
"SELECT COUNT(*) FROM checkpoints WHERE thread_id = ?",
|
|
197
|
+
(thread_id,),
|
|
198
|
+
)
|
|
199
|
+
count = cursor.fetchone()[0]
|
|
200
|
+
conn.close()
|
|
201
|
+
return bool(count > 0)
|
|
202
|
+
except Exception:
|
|
203
|
+
return False
|
|
204
|
+
|
|
205
|
+
def list_threads(self) -> list[dict[str, Any]]:
|
|
206
|
+
"""List all known thread executions.
|
|
207
|
+
|
|
208
|
+
Merges threads from the checkpoint database and from run log directories
|
|
209
|
+
under <base_dir>/runs/.
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
List of thread info dictionaries with thread_id, status, flow_name
|
|
213
|
+
"""
|
|
214
|
+
from fdsx.logging.recorder import RUNS_DIR_NAME, RUN_FILENAME
|
|
215
|
+
|
|
216
|
+
# Collect thread IDs from checkpoint DB
|
|
217
|
+
checkpoint_thread_ids: list[str] = []
|
|
218
|
+
db_path = self.checkpoints_dir / "checkpoints.db"
|
|
219
|
+
if db_path.exists():
|
|
220
|
+
try:
|
|
221
|
+
conn = sqlite3.connect(str(db_path), check_same_thread=False)
|
|
222
|
+
cursor = conn.cursor()
|
|
223
|
+
cursor.execute("SELECT DISTINCT thread_id FROM checkpoints")
|
|
224
|
+
checkpoint_thread_ids = [row[0] for row in cursor.fetchall()]
|
|
225
|
+
conn.close()
|
|
226
|
+
except Exception:
|
|
227
|
+
pass
|
|
228
|
+
|
|
229
|
+
# Collect thread IDs from run log directories
|
|
230
|
+
runs_dir = self.base_dir / RUNS_DIR_NAME
|
|
231
|
+
run_log_thread_ids: list[str] = []
|
|
232
|
+
if runs_dir.is_dir():
|
|
233
|
+
for entry in runs_dir.iterdir():
|
|
234
|
+
if entry.is_dir() and (entry / RUN_FILENAME).is_file():
|
|
235
|
+
run_log_thread_ids.append(entry.name)
|
|
236
|
+
|
|
237
|
+
# Merge, preserving checkpoint-DB entries first, then run-log-only entries
|
|
238
|
+
seen: set[str] = set(checkpoint_thread_ids)
|
|
239
|
+
all_thread_ids = list(checkpoint_thread_ids)
|
|
240
|
+
for tid in run_log_thread_ids:
|
|
241
|
+
if tid not in seen:
|
|
242
|
+
seen.add(tid)
|
|
243
|
+
all_thread_ids.append(tid)
|
|
244
|
+
|
|
245
|
+
if not all_thread_ids:
|
|
246
|
+
return []
|
|
247
|
+
|
|
248
|
+
try:
|
|
249
|
+
threads = []
|
|
250
|
+
checkpointer = self.get_checkpointer() if db_path.exists() else None
|
|
251
|
+
for thread_id in all_thread_ids:
|
|
252
|
+
is_locked, pid = self.is_locked(thread_id)
|
|
253
|
+
status = "running" if is_locked else "stopped"
|
|
254
|
+
flow_name = thread_id # fallback default
|
|
255
|
+
|
|
256
|
+
current_state = ""
|
|
257
|
+
started_at = ""
|
|
258
|
+
config: RunnableConfig = {"configurable": {"thread_id": thread_id}}
|
|
259
|
+
try:
|
|
260
|
+
checkpoint_tuple = (
|
|
261
|
+
checkpointer.get_tuple(config)
|
|
262
|
+
if checkpointer is not None
|
|
263
|
+
else None
|
|
264
|
+
)
|
|
265
|
+
if checkpoint_tuple is not None:
|
|
266
|
+
checkpoint_data = checkpoint_tuple.checkpoint
|
|
267
|
+
meta = _extract_meta_from_checkpoint(checkpoint_data)
|
|
268
|
+
flow_name = meta.get("flow_name", thread_id)
|
|
269
|
+
if not is_locked:
|
|
270
|
+
if checkpoint_tuple.pending_writes:
|
|
271
|
+
has_error = any(
|
|
272
|
+
pw[1] == "__error__"
|
|
273
|
+
for pw in checkpoint_tuple.pending_writes
|
|
274
|
+
if isinstance(pw, (list, tuple)) and len(pw) >= 2
|
|
275
|
+
)
|
|
276
|
+
status = "stopped" if has_error else "waiting"
|
|
277
|
+
else:
|
|
278
|
+
status = "completed"
|
|
279
|
+
# Extract current_state from checkpoint.
|
|
280
|
+
# For stopped/waiting flows, prefer _meta.next_state (the node
|
|
281
|
+
# about to execute when the crash/interrupt happened).
|
|
282
|
+
# For completed/running flows, use last entry in versions_seen.
|
|
283
|
+
if status in ("stopped", "waiting"):
|
|
284
|
+
next_state_val = meta.get("next_state", "")
|
|
285
|
+
if next_state_val and next_state_val != "__end__":
|
|
286
|
+
current_state = next_state_val
|
|
287
|
+
else:
|
|
288
|
+
versions_seen = checkpoint_data.get("versions_seen", {})
|
|
289
|
+
if isinstance(versions_seen, dict) and versions_seen:
|
|
290
|
+
current_state = list(versions_seen.keys())[-1]
|
|
291
|
+
else:
|
|
292
|
+
versions_seen = checkpoint_data.get("versions_seen", {})
|
|
293
|
+
if isinstance(versions_seen, dict) and versions_seen:
|
|
294
|
+
current_state = list(versions_seen.keys())[-1]
|
|
295
|
+
# Extract started_at from checkpoint ts
|
|
296
|
+
ts = checkpoint_data.get("ts", "")
|
|
297
|
+
if ts and "T" in str(ts):
|
|
298
|
+
started_at = str(ts)[:16].replace("T", " ")
|
|
299
|
+
except Exception:
|
|
300
|
+
pass
|
|
301
|
+
|
|
302
|
+
# Fallback: read flow_name and started_at from run log when
|
|
303
|
+
# the checkpoint did not provide them.
|
|
304
|
+
if flow_name == thread_id or not started_at:
|
|
305
|
+
try:
|
|
306
|
+
import json
|
|
307
|
+
|
|
308
|
+
run_log_path = runs_dir / thread_id / RUN_FILENAME
|
|
309
|
+
if run_log_path.is_file():
|
|
310
|
+
with open(run_log_path, "r") as f:
|
|
311
|
+
run_log = json.load(f)
|
|
312
|
+
if flow_name == thread_id:
|
|
313
|
+
flow_name = run_log.get("flow_name", thread_id)
|
|
314
|
+
if not started_at:
|
|
315
|
+
ts_str = run_log.get("started_at", "")
|
|
316
|
+
if ts_str and "T" in ts_str:
|
|
317
|
+
started_at = ts_str[:16].replace("T", " ")
|
|
318
|
+
if not is_locked and flow_name != thread_id:
|
|
319
|
+
# Run-log-only thread: derive status from log status
|
|
320
|
+
log_status = run_log.get("status", "")
|
|
321
|
+
if log_status == "completed":
|
|
322
|
+
status = "completed"
|
|
323
|
+
except (json.JSONDecodeError, OSError, KeyError):
|
|
324
|
+
pass
|
|
325
|
+
|
|
326
|
+
threads.append(
|
|
327
|
+
{
|
|
328
|
+
"thread_id": thread_id,
|
|
329
|
+
"status": status,
|
|
330
|
+
"flow_name": flow_name,
|
|
331
|
+
"current_state": current_state,
|
|
332
|
+
"started_at": started_at,
|
|
333
|
+
}
|
|
334
|
+
)
|
|
335
|
+
return threads
|
|
336
|
+
except Exception:
|
|
337
|
+
return []
|
fdsx/cli/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""CLI module for fdsx."""
|