pyworkflow-engine 0.1.22__py3-none-any.whl → 0.1.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyworkflow/__init__.py +1 -1
- pyworkflow/celery/app.py +18 -0
- pyworkflow/celery/tasks.py +61 -71
- pyworkflow/storage/base.py +36 -0
- pyworkflow/storage/cassandra.py +34 -0
- pyworkflow/storage/dynamodb.py +34 -0
- pyworkflow/storage/file.py +52 -0
- pyworkflow/storage/memory.py +37 -0
- pyworkflow/storage/migrations/__init__.py +15 -0
- pyworkflow/storage/migrations/base.py +299 -0
- pyworkflow/storage/mysql.py +186 -5
- pyworkflow/storage/postgres.py +197 -6
- pyworkflow/storage/sqlite.py +171 -5
- {pyworkflow_engine-0.1.22.dist-info → pyworkflow_engine-0.1.24.dist-info}/METADATA +1 -1
- {pyworkflow_engine-0.1.22.dist-info → pyworkflow_engine-0.1.24.dist-info}/RECORD +19 -17
- {pyworkflow_engine-0.1.22.dist-info → pyworkflow_engine-0.1.24.dist-info}/WHEEL +0 -0
- {pyworkflow_engine-0.1.22.dist-info → pyworkflow_engine-0.1.24.dist-info}/entry_points.txt +0 -0
- {pyworkflow_engine-0.1.22.dist-info → pyworkflow_engine-0.1.24.dist-info}/licenses/LICENSE +0 -0
- {pyworkflow_engine-0.1.22.dist-info → pyworkflow_engine-0.1.24.dist-info}/top_level.txt +0 -0
pyworkflow/storage/postgres.py
CHANGED
|
@@ -23,6 +23,7 @@ import asyncpg
|
|
|
23
23
|
|
|
24
24
|
from pyworkflow.engine.events import Event, EventType
|
|
25
25
|
from pyworkflow.storage.base import StorageBackend
|
|
26
|
+
from pyworkflow.storage.migrations import Migration, MigrationRegistry, MigrationRunner
|
|
26
27
|
from pyworkflow.storage.schemas import (
|
|
27
28
|
Hook,
|
|
28
29
|
HookStatus,
|
|
@@ -37,6 +38,115 @@ from pyworkflow.storage.schemas import (
|
|
|
37
38
|
)
|
|
38
39
|
|
|
39
40
|
|
|
41
|
+
class PostgresMigrationRunner(MigrationRunner):
|
|
42
|
+
"""PostgreSQL-specific migration runner."""
|
|
43
|
+
|
|
44
|
+
def __init__(self, pool: asyncpg.Pool, registry: MigrationRegistry | None = None) -> None:
|
|
45
|
+
super().__init__(registry)
|
|
46
|
+
self._pool = pool
|
|
47
|
+
|
|
48
|
+
async def ensure_schema_versions_table(self) -> None:
|
|
49
|
+
"""Create schema_versions table if it doesn't exist."""
|
|
50
|
+
async with self._pool.acquire() as conn:
|
|
51
|
+
await conn.execute("""
|
|
52
|
+
CREATE TABLE IF NOT EXISTS schema_versions (
|
|
53
|
+
version INTEGER PRIMARY KEY,
|
|
54
|
+
applied_at TIMESTAMPTZ NOT NULL,
|
|
55
|
+
description TEXT
|
|
56
|
+
)
|
|
57
|
+
""")
|
|
58
|
+
|
|
59
|
+
async def get_current_version(self) -> int:
|
|
60
|
+
"""Get the highest applied migration version."""
|
|
61
|
+
async with self._pool.acquire() as conn:
|
|
62
|
+
row = await conn.fetchrow(
|
|
63
|
+
"SELECT COALESCE(MAX(version), 0) as version FROM schema_versions"
|
|
64
|
+
)
|
|
65
|
+
return row["version"] if row else 0
|
|
66
|
+
|
|
67
|
+
async def detect_existing_schema(self) -> bool:
|
|
68
|
+
"""Check if the events table exists (pre-versioning database)."""
|
|
69
|
+
async with self._pool.acquire() as conn:
|
|
70
|
+
row = await conn.fetchrow("""
|
|
71
|
+
SELECT EXISTS (
|
|
72
|
+
SELECT FROM information_schema.tables
|
|
73
|
+
WHERE table_name = 'events'
|
|
74
|
+
) as exists
|
|
75
|
+
""")
|
|
76
|
+
return row["exists"] if row else False
|
|
77
|
+
|
|
78
|
+
async def record_baseline_version(self, version: int, description: str) -> None:
|
|
79
|
+
"""Record a baseline version without running migrations."""
|
|
80
|
+
async with self._pool.acquire() as conn:
|
|
81
|
+
await conn.execute(
|
|
82
|
+
"""
|
|
83
|
+
INSERT INTO schema_versions (version, applied_at, description)
|
|
84
|
+
VALUES ($1, $2, $3)
|
|
85
|
+
ON CONFLICT (version) DO NOTHING
|
|
86
|
+
""",
|
|
87
|
+
version,
|
|
88
|
+
datetime.now(UTC),
|
|
89
|
+
description,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
async def apply_migration(self, migration: Migration) -> None:
|
|
93
|
+
"""Apply a migration with PostgreSQL-specific handling."""
|
|
94
|
+
async with self._pool.acquire() as conn, conn.transaction():
|
|
95
|
+
if migration.version == 2:
|
|
96
|
+
# V2: Add step_id column to events table
|
|
97
|
+
# First check if events table exists (fresh databases won't have it yet)
|
|
98
|
+
table_exists = await conn.fetchrow("""
|
|
99
|
+
SELECT EXISTS (
|
|
100
|
+
SELECT FROM information_schema.tables
|
|
101
|
+
WHERE table_name = 'events'
|
|
102
|
+
) as exists
|
|
103
|
+
""")
|
|
104
|
+
|
|
105
|
+
if table_exists and table_exists["exists"]:
|
|
106
|
+
# Use IF NOT EXISTS for idempotency
|
|
107
|
+
await conn.execute("""
|
|
108
|
+
DO $$
|
|
109
|
+
BEGIN
|
|
110
|
+
IF NOT EXISTS (
|
|
111
|
+
SELECT 1 FROM information_schema.columns
|
|
112
|
+
WHERE table_name = 'events' AND column_name = 'step_id'
|
|
113
|
+
) THEN
|
|
114
|
+
ALTER TABLE events ADD COLUMN step_id TEXT;
|
|
115
|
+
END IF;
|
|
116
|
+
END $$
|
|
117
|
+
""")
|
|
118
|
+
|
|
119
|
+
# Create index for optimized has_event() queries
|
|
120
|
+
await conn.execute("""
|
|
121
|
+
CREATE INDEX IF NOT EXISTS idx_events_run_id_step_id_type
|
|
122
|
+
ON events(run_id, step_id, type)
|
|
123
|
+
""")
|
|
124
|
+
|
|
125
|
+
# Backfill step_id from JSON data
|
|
126
|
+
await conn.execute("""
|
|
127
|
+
UPDATE events
|
|
128
|
+
SET step_id = (data::jsonb)->>'step_id'
|
|
129
|
+
WHERE step_id IS NULL
|
|
130
|
+
AND (data::jsonb)->>'step_id' IS NOT NULL
|
|
131
|
+
""")
|
|
132
|
+
# If table doesn't exist, schema will be created with step_id column
|
|
133
|
+
elif migration.up_func:
|
|
134
|
+
await migration.up_func(conn)
|
|
135
|
+
elif migration.up_sql and migration.up_sql != "SELECT 1":
|
|
136
|
+
await conn.execute(migration.up_sql)
|
|
137
|
+
|
|
138
|
+
# Record the migration
|
|
139
|
+
await conn.execute(
|
|
140
|
+
"""
|
|
141
|
+
INSERT INTO schema_versions (version, applied_at, description)
|
|
142
|
+
VALUES ($1, $2, $3)
|
|
143
|
+
""",
|
|
144
|
+
migration.version,
|
|
145
|
+
datetime.now(UTC),
|
|
146
|
+
migration.description,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
|
|
40
150
|
class PostgresStorageBackend(StorageBackend):
|
|
41
151
|
"""
|
|
42
152
|
PostgreSQL storage backend using asyncpg for async operations.
|
|
@@ -55,6 +165,8 @@ class PostgresStorageBackend(StorageBackend):
|
|
|
55
165
|
database: str = "pyworkflow",
|
|
56
166
|
min_pool_size: int = 1,
|
|
57
167
|
max_pool_size: int = 10,
|
|
168
|
+
max_inactive_connection_lifetime: float = 1800.0,
|
|
169
|
+
command_timeout: float | None = 60.0,
|
|
58
170
|
):
|
|
59
171
|
"""
|
|
60
172
|
Initialize PostgreSQL storage backend.
|
|
@@ -68,6 +180,9 @@ class PostgresStorageBackend(StorageBackend):
|
|
|
68
180
|
database: Database name (used if dsn not provided)
|
|
69
181
|
min_pool_size: Minimum connections in pool
|
|
70
182
|
max_pool_size: Maximum connections in pool
|
|
183
|
+
max_inactive_connection_lifetime: How long (seconds) an idle connection can
|
|
184
|
+
stay in the pool before being closed. Default 1800s (30 min).
|
|
185
|
+
command_timeout: Default timeout (seconds) for queries. None for no timeout. Default 60s.
|
|
71
186
|
"""
|
|
72
187
|
self.dsn = dsn
|
|
73
188
|
self.host = host
|
|
@@ -77,6 +192,8 @@ class PostgresStorageBackend(StorageBackend):
|
|
|
77
192
|
self.database = database
|
|
78
193
|
self.min_pool_size = min_pool_size
|
|
79
194
|
self.max_pool_size = max_pool_size
|
|
195
|
+
self.max_inactive_connection_lifetime = max_inactive_connection_lifetime
|
|
196
|
+
self.command_timeout = command_timeout
|
|
80
197
|
self._pool: asyncpg.Pool | None = None
|
|
81
198
|
self._pool_loop_id: int | None = None # Track which loop the pool was created on
|
|
82
199
|
self._initialized = False
|
|
@@ -111,6 +228,11 @@ class PostgresStorageBackend(StorageBackend):
|
|
|
111
228
|
dsn=self.dsn or self._build_dsn(),
|
|
112
229
|
min_size=self.min_pool_size,
|
|
113
230
|
max_size=self.max_pool_size,
|
|
231
|
+
max_inactive_connection_lifetime=self.max_inactive_connection_lifetime,
|
|
232
|
+
command_timeout=self.command_timeout,
|
|
233
|
+
# Disable statement caching to avoid InvalidCachedStatementError
|
|
234
|
+
# after schema migrations
|
|
235
|
+
statement_cache_size=0,
|
|
114
236
|
)
|
|
115
237
|
self._pool_loop_id = current_loop_id
|
|
116
238
|
|
|
@@ -127,11 +249,16 @@ class PostgresStorageBackend(StorageBackend):
|
|
|
127
249
|
self._initialized = False
|
|
128
250
|
|
|
129
251
|
async def _initialize_schema(self) -> None:
|
|
130
|
-
"""Create database tables if they don't exist."""
|
|
252
|
+
"""Create database tables if they don't exist and run migrations."""
|
|
131
253
|
if not self._pool:
|
|
132
254
|
await self.connect()
|
|
133
255
|
|
|
134
256
|
pool = await self._get_pool()
|
|
257
|
+
|
|
258
|
+
# Run migrations first (handles schema versioning)
|
|
259
|
+
runner = PostgresMigrationRunner(pool)
|
|
260
|
+
await runner.run_migrations()
|
|
261
|
+
|
|
135
262
|
async with pool.acquire() as conn:
|
|
136
263
|
# Workflow runs table
|
|
137
264
|
await conn.execute("""
|
|
@@ -177,7 +304,7 @@ class PostgresStorageBackend(StorageBackend):
|
|
|
177
304
|
"CREATE INDEX IF NOT EXISTS idx_runs_parent_run_id ON workflow_runs(parent_run_id)"
|
|
178
305
|
)
|
|
179
306
|
|
|
180
|
-
# Events table
|
|
307
|
+
# Events table (includes step_id column added in V2 migration)
|
|
181
308
|
await conn.execute("""
|
|
182
309
|
CREATE TABLE IF NOT EXISTS events (
|
|
183
310
|
event_id TEXT PRIMARY KEY,
|
|
@@ -185,7 +312,8 @@ class PostgresStorageBackend(StorageBackend):
|
|
|
185
312
|
sequence INTEGER NOT NULL,
|
|
186
313
|
type TEXT NOT NULL,
|
|
187
314
|
timestamp TIMESTAMPTZ NOT NULL,
|
|
188
|
-
data TEXT NOT NULL DEFAULT '{}'
|
|
315
|
+
data TEXT NOT NULL DEFAULT '{}',
|
|
316
|
+
step_id TEXT
|
|
189
317
|
)
|
|
190
318
|
""")
|
|
191
319
|
|
|
@@ -193,7 +321,14 @@ class PostgresStorageBackend(StorageBackend):
|
|
|
193
321
|
await conn.execute(
|
|
194
322
|
"CREATE INDEX IF NOT EXISTS idx_events_run_id_sequence ON events(run_id, sequence)"
|
|
195
323
|
)
|
|
196
|
-
|
|
324
|
+
# Composite index for get_events() with type filter
|
|
325
|
+
await conn.execute(
|
|
326
|
+
"CREATE INDEX IF NOT EXISTS idx_events_run_id_type ON events(run_id, type)"
|
|
327
|
+
)
|
|
328
|
+
# Optimized index for has_event() with step_id filter (V2 migration)
|
|
329
|
+
await conn.execute(
|
|
330
|
+
"CREATE INDEX IF NOT EXISTS idx_events_run_id_step_id_type ON events(run_id, step_id, type)"
|
|
331
|
+
)
|
|
197
332
|
|
|
198
333
|
# Steps table
|
|
199
334
|
await conn.execute("""
|
|
@@ -554,6 +689,9 @@ class PostgresStorageBackend(StorageBackend):
|
|
|
554
689
|
"""Record an event to the append-only event log."""
|
|
555
690
|
pool = await self._get_pool()
|
|
556
691
|
|
|
692
|
+
# Extract step_id from event data for indexed column
|
|
693
|
+
step_id = event.data.get("step_id") if event.data else None
|
|
694
|
+
|
|
557
695
|
async with pool.acquire() as conn, conn.transaction():
|
|
558
696
|
# Get next sequence number and insert in a transaction
|
|
559
697
|
row = await conn.fetchrow(
|
|
@@ -564,8 +702,8 @@ class PostgresStorageBackend(StorageBackend):
|
|
|
564
702
|
|
|
565
703
|
await conn.execute(
|
|
566
704
|
"""
|
|
567
|
-
INSERT INTO events (event_id, run_id, sequence, type, timestamp, data)
|
|
568
|
-
VALUES ($1, $2, $3, $4, $5, $6)
|
|
705
|
+
INSERT INTO events (event_id, run_id, sequence, type, timestamp, data, step_id)
|
|
706
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7)
|
|
569
707
|
""",
|
|
570
708
|
event.event_id,
|
|
571
709
|
event.run_id,
|
|
@@ -573,6 +711,7 @@ class PostgresStorageBackend(StorageBackend):
|
|
|
573
711
|
event.type.value,
|
|
574
712
|
event.timestamp,
|
|
575
713
|
json.dumps(event.data),
|
|
714
|
+
step_id,
|
|
576
715
|
)
|
|
577
716
|
|
|
578
717
|
async def get_events(
|
|
@@ -638,6 +777,58 @@ class PostgresStorageBackend(StorageBackend):
|
|
|
638
777
|
|
|
639
778
|
return self._row_to_event(row)
|
|
640
779
|
|
|
780
|
+
async def has_event(
|
|
781
|
+
self,
|
|
782
|
+
run_id: str,
|
|
783
|
+
event_type: str,
|
|
784
|
+
**filters: str,
|
|
785
|
+
) -> bool:
|
|
786
|
+
"""
|
|
787
|
+
Check if an event exists using optimized indexed queries.
|
|
788
|
+
|
|
789
|
+
When step_id is the only filter, uses a direct indexed query (O(1) lookup).
|
|
790
|
+
For other filters, falls back to loading events of the type and filtering in Python.
|
|
791
|
+
|
|
792
|
+
Args:
|
|
793
|
+
run_id: Workflow run identifier
|
|
794
|
+
event_type: Event type to check for
|
|
795
|
+
**filters: Additional filters for event data fields
|
|
796
|
+
|
|
797
|
+
Returns:
|
|
798
|
+
True if a matching event exists, False otherwise
|
|
799
|
+
"""
|
|
800
|
+
pool = await self._get_pool()
|
|
801
|
+
|
|
802
|
+
# Optimized path: if only filtering by step_id, use indexed column directly
|
|
803
|
+
if filters.keys() == {"step_id"}:
|
|
804
|
+
step_id = str(filters["step_id"])
|
|
805
|
+
async with pool.acquire() as conn:
|
|
806
|
+
row = await conn.fetchrow(
|
|
807
|
+
"""
|
|
808
|
+
SELECT 1 FROM events
|
|
809
|
+
WHERE run_id = $1 AND type = $2 AND step_id = $3
|
|
810
|
+
LIMIT 1
|
|
811
|
+
""",
|
|
812
|
+
run_id,
|
|
813
|
+
event_type,
|
|
814
|
+
step_id,
|
|
815
|
+
)
|
|
816
|
+
return row is not None
|
|
817
|
+
|
|
818
|
+
# Fallback: load events of type and filter in Python
|
|
819
|
+
events = await self.get_events(run_id, event_types=[event_type])
|
|
820
|
+
|
|
821
|
+
for event in events:
|
|
822
|
+
match = True
|
|
823
|
+
for key, value in filters.items():
|
|
824
|
+
if str(event.data.get(key)) != str(value):
|
|
825
|
+
match = False
|
|
826
|
+
break
|
|
827
|
+
if match:
|
|
828
|
+
return True
|
|
829
|
+
|
|
830
|
+
return False
|
|
831
|
+
|
|
641
832
|
# Step Operations
|
|
642
833
|
|
|
643
834
|
async def create_step(self, step: StepExecution) -> None:
|
pyworkflow/storage/sqlite.py
CHANGED
|
@@ -18,6 +18,7 @@ import aiosqlite
|
|
|
18
18
|
|
|
19
19
|
from pyworkflow.engine.events import Event, EventType
|
|
20
20
|
from pyworkflow.storage.base import StorageBackend
|
|
21
|
+
from pyworkflow.storage.migrations import Migration, MigrationRegistry, MigrationRunner
|
|
21
22
|
from pyworkflow.storage.schemas import (
|
|
22
23
|
Hook,
|
|
23
24
|
HookStatus,
|
|
@@ -31,6 +32,103 @@ from pyworkflow.storage.schemas import (
|
|
|
31
32
|
)
|
|
32
33
|
|
|
33
34
|
|
|
35
|
+
class SQLiteMigrationRunner(MigrationRunner):
|
|
36
|
+
"""SQLite-specific migration runner."""
|
|
37
|
+
|
|
38
|
+
def __init__(self, db: aiosqlite.Connection, registry: MigrationRegistry | None = None) -> None:
|
|
39
|
+
super().__init__(registry)
|
|
40
|
+
self._db = db
|
|
41
|
+
|
|
42
|
+
async def ensure_schema_versions_table(self) -> None:
|
|
43
|
+
"""Create schema_versions table if it doesn't exist."""
|
|
44
|
+
await self._db.execute("""
|
|
45
|
+
CREATE TABLE IF NOT EXISTS schema_versions (
|
|
46
|
+
version INTEGER PRIMARY KEY,
|
|
47
|
+
applied_at TIMESTAMP NOT NULL,
|
|
48
|
+
description TEXT
|
|
49
|
+
)
|
|
50
|
+
""")
|
|
51
|
+
await self._db.commit()
|
|
52
|
+
|
|
53
|
+
async def get_current_version(self) -> int:
|
|
54
|
+
"""Get the highest applied migration version."""
|
|
55
|
+
async with self._db.execute(
|
|
56
|
+
"SELECT COALESCE(MAX(version), 0) as version FROM schema_versions"
|
|
57
|
+
) as cursor:
|
|
58
|
+
row = await cursor.fetchone()
|
|
59
|
+
return row[0] if row else 0
|
|
60
|
+
|
|
61
|
+
async def detect_existing_schema(self) -> bool:
|
|
62
|
+
"""Check if the events table exists (pre-versioning database)."""
|
|
63
|
+
async with self._db.execute(
|
|
64
|
+
"SELECT name FROM sqlite_master WHERE type='table' AND name='events'"
|
|
65
|
+
) as cursor:
|
|
66
|
+
row = await cursor.fetchone()
|
|
67
|
+
return row is not None
|
|
68
|
+
|
|
69
|
+
async def record_baseline_version(self, version: int, description: str) -> None:
|
|
70
|
+
"""Record a baseline version without running migrations."""
|
|
71
|
+
await self._db.execute(
|
|
72
|
+
"""
|
|
73
|
+
INSERT OR IGNORE INTO schema_versions (version, applied_at, description)
|
|
74
|
+
VALUES (?, ?, ?)
|
|
75
|
+
""",
|
|
76
|
+
(version, datetime.now(UTC).isoformat(), description),
|
|
77
|
+
)
|
|
78
|
+
await self._db.commit()
|
|
79
|
+
|
|
80
|
+
async def apply_migration(self, migration: Migration) -> None:
|
|
81
|
+
"""Apply a migration with SQLite-specific handling."""
|
|
82
|
+
if migration.version == 2:
|
|
83
|
+
# V2: Add step_id column to events table
|
|
84
|
+
# First check if events table exists (fresh databases won't have it yet)
|
|
85
|
+
async with self._db.execute(
|
|
86
|
+
"SELECT name FROM sqlite_master WHERE type='table' AND name='events'"
|
|
87
|
+
) as cursor:
|
|
88
|
+
table_exists = await cursor.fetchone() is not None
|
|
89
|
+
|
|
90
|
+
if table_exists:
|
|
91
|
+
# SQLite doesn't have IF NOT EXISTS for columns, so check first
|
|
92
|
+
async with self._db.execute("PRAGMA table_info(events)") as cursor:
|
|
93
|
+
columns = await cursor.fetchall()
|
|
94
|
+
column_names = [col[1] for col in columns]
|
|
95
|
+
|
|
96
|
+
if "step_id" not in column_names:
|
|
97
|
+
await self._db.execute("ALTER TABLE events ADD COLUMN step_id TEXT")
|
|
98
|
+
|
|
99
|
+
# Create index for optimized has_event() queries
|
|
100
|
+
await self._db.execute("""
|
|
101
|
+
CREATE INDEX IF NOT EXISTS idx_events_run_id_step_id_type
|
|
102
|
+
ON events(run_id, step_id, type)
|
|
103
|
+
""")
|
|
104
|
+
|
|
105
|
+
# Backfill step_id from JSON data using json_extract
|
|
106
|
+
await self._db.execute("""
|
|
107
|
+
UPDATE events
|
|
108
|
+
SET step_id = json_extract(data, '$.step_id')
|
|
109
|
+
WHERE step_id IS NULL
|
|
110
|
+
AND json_extract(data, '$.step_id') IS NOT NULL
|
|
111
|
+
""")
|
|
112
|
+
|
|
113
|
+
await self._db.commit()
|
|
114
|
+
# If table doesn't exist, schema will be created with step_id column
|
|
115
|
+
elif migration.up_func:
|
|
116
|
+
await migration.up_func(self._db)
|
|
117
|
+
elif migration.up_sql and migration.up_sql != "SELECT 1":
|
|
118
|
+
await self._db.execute(migration.up_sql)
|
|
119
|
+
await self._db.commit()
|
|
120
|
+
|
|
121
|
+
# Record the migration
|
|
122
|
+
await self._db.execute(
|
|
123
|
+
"""
|
|
124
|
+
INSERT INTO schema_versions (version, applied_at, description)
|
|
125
|
+
VALUES (?, ?, ?)
|
|
126
|
+
""",
|
|
127
|
+
(migration.version, datetime.now(UTC).isoformat(), migration.description),
|
|
128
|
+
)
|
|
129
|
+
await self._db.commit()
|
|
130
|
+
|
|
131
|
+
|
|
34
132
|
class SQLiteStorageBackend(StorageBackend):
|
|
35
133
|
"""
|
|
36
134
|
SQLite storage backend using aiosqlite for async operations.
|
|
@@ -72,7 +170,7 @@ class SQLiteStorageBackend(StorageBackend):
|
|
|
72
170
|
self._initialized = False
|
|
73
171
|
|
|
74
172
|
async def _initialize_schema(self) -> None:
|
|
75
|
-
"""Create database tables if they don't exist."""
|
|
173
|
+
"""Create database tables if they don't exist and run migrations."""
|
|
76
174
|
if not self._db:
|
|
77
175
|
await self.connect()
|
|
78
176
|
|
|
@@ -80,6 +178,10 @@ class SQLiteStorageBackend(StorageBackend):
|
|
|
80
178
|
assert self._db is not None
|
|
81
179
|
db = self._db
|
|
82
180
|
|
|
181
|
+
# Run migrations first (handles schema versioning)
|
|
182
|
+
runner = SQLiteMigrationRunner(db)
|
|
183
|
+
await runner.run_migrations()
|
|
184
|
+
|
|
83
185
|
# Workflow runs table
|
|
84
186
|
await db.execute("""
|
|
85
187
|
CREATE TABLE IF NOT EXISTS workflow_runs (
|
|
@@ -123,7 +225,7 @@ class SQLiteStorageBackend(StorageBackend):
|
|
|
123
225
|
"CREATE INDEX IF NOT EXISTS idx_runs_parent_run_id ON workflow_runs(parent_run_id)"
|
|
124
226
|
)
|
|
125
227
|
|
|
126
|
-
# Events table
|
|
228
|
+
# Events table (includes step_id column added in V2 migration)
|
|
127
229
|
await db.execute("""
|
|
128
230
|
CREATE TABLE IF NOT EXISTS events (
|
|
129
231
|
event_id TEXT PRIMARY KEY,
|
|
@@ -132,6 +234,7 @@ class SQLiteStorageBackend(StorageBackend):
|
|
|
132
234
|
type TEXT NOT NULL,
|
|
133
235
|
timestamp TIMESTAMP NOT NULL,
|
|
134
236
|
data TEXT NOT NULL DEFAULT '{}',
|
|
237
|
+
step_id TEXT,
|
|
135
238
|
FOREIGN KEY (run_id) REFERENCES workflow_runs(run_id) ON DELETE CASCADE
|
|
136
239
|
)
|
|
137
240
|
""")
|
|
@@ -140,7 +243,14 @@ class SQLiteStorageBackend(StorageBackend):
|
|
|
140
243
|
await db.execute(
|
|
141
244
|
"CREATE INDEX IF NOT EXISTS idx_events_run_id_sequence ON events(run_id, sequence)"
|
|
142
245
|
)
|
|
143
|
-
|
|
246
|
+
# Composite index for get_events() with type filter
|
|
247
|
+
await db.execute(
|
|
248
|
+
"CREATE INDEX IF NOT EXISTS idx_events_run_id_type ON events(run_id, type)"
|
|
249
|
+
)
|
|
250
|
+
# Optimized index for has_event() with step_id filter (V2 migration)
|
|
251
|
+
await db.execute(
|
|
252
|
+
"CREATE INDEX IF NOT EXISTS idx_events_run_id_step_id_type ON events(run_id, step_id, type)"
|
|
253
|
+
)
|
|
144
254
|
|
|
145
255
|
# Steps table
|
|
146
256
|
await db.execute("""
|
|
@@ -447,6 +557,9 @@ class SQLiteStorageBackend(StorageBackend):
|
|
|
447
557
|
"""Record an event to the append-only event log."""
|
|
448
558
|
db = self._ensure_connected()
|
|
449
559
|
|
|
560
|
+
# Extract step_id from event data for indexed column
|
|
561
|
+
step_id = event.data.get("step_id") if event.data else None
|
|
562
|
+
|
|
450
563
|
# Get next sequence number
|
|
451
564
|
async with db.execute(
|
|
452
565
|
"SELECT COALESCE(MAX(sequence), -1) + 1 FROM events WHERE run_id = ?",
|
|
@@ -457,8 +570,8 @@ class SQLiteStorageBackend(StorageBackend):
|
|
|
457
570
|
|
|
458
571
|
await db.execute(
|
|
459
572
|
"""
|
|
460
|
-
INSERT INTO events (event_id, run_id, sequence, type, timestamp, data)
|
|
461
|
-
VALUES (?, ?, ?, ?, ?, ?)
|
|
573
|
+
INSERT INTO events (event_id, run_id, sequence, type, timestamp, data, step_id)
|
|
574
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
462
575
|
""",
|
|
463
576
|
(
|
|
464
577
|
event.event_id,
|
|
@@ -467,6 +580,7 @@ class SQLiteStorageBackend(StorageBackend):
|
|
|
467
580
|
event.type.value,
|
|
468
581
|
event.timestamp.isoformat(),
|
|
469
582
|
json.dumps(event.data),
|
|
583
|
+
step_id,
|
|
470
584
|
),
|
|
471
585
|
)
|
|
472
586
|
await db.commit()
|
|
@@ -529,6 +643,56 @@ class SQLiteStorageBackend(StorageBackend):
|
|
|
529
643
|
|
|
530
644
|
return self._row_to_event(row)
|
|
531
645
|
|
|
646
|
+
async def has_event(
|
|
647
|
+
self,
|
|
648
|
+
run_id: str,
|
|
649
|
+
event_type: str,
|
|
650
|
+
**filters: str,
|
|
651
|
+
) -> bool:
|
|
652
|
+
"""
|
|
653
|
+
Check if an event exists using optimized indexed queries.
|
|
654
|
+
|
|
655
|
+
When step_id is the only filter, uses a direct indexed query (O(1) lookup).
|
|
656
|
+
For other filters, falls back to loading events of the type and filtering in Python.
|
|
657
|
+
|
|
658
|
+
Args:
|
|
659
|
+
run_id: Workflow run identifier
|
|
660
|
+
event_type: Event type to check for
|
|
661
|
+
**filters: Additional filters for event data fields
|
|
662
|
+
|
|
663
|
+
Returns:
|
|
664
|
+
True if a matching event exists, False otherwise
|
|
665
|
+
"""
|
|
666
|
+
db = self._ensure_connected()
|
|
667
|
+
|
|
668
|
+
# Optimized path: if only filtering by step_id, use indexed column directly
|
|
669
|
+
if filters.keys() == {"step_id"}:
|
|
670
|
+
step_id = str(filters["step_id"])
|
|
671
|
+
async with db.execute(
|
|
672
|
+
"""
|
|
673
|
+
SELECT 1 FROM events
|
|
674
|
+
WHERE run_id = ? AND type = ? AND step_id = ?
|
|
675
|
+
LIMIT 1
|
|
676
|
+
""",
|
|
677
|
+
(run_id, event_type, step_id),
|
|
678
|
+
) as cursor:
|
|
679
|
+
row = await cursor.fetchone()
|
|
680
|
+
return row is not None
|
|
681
|
+
|
|
682
|
+
# Fallback: load events of type and filter in Python
|
|
683
|
+
events = await self.get_events(run_id, event_types=[event_type])
|
|
684
|
+
|
|
685
|
+
for event in events:
|
|
686
|
+
match = True
|
|
687
|
+
for key, value in filters.items():
|
|
688
|
+
if str(event.data.get(key)) != str(value):
|
|
689
|
+
match = False
|
|
690
|
+
break
|
|
691
|
+
if match:
|
|
692
|
+
return True
|
|
693
|
+
|
|
694
|
+
return False
|
|
695
|
+
|
|
532
696
|
# Step Operations
|
|
533
697
|
|
|
534
698
|
async def create_step(self, step: StepExecution) -> None:
|
|
@@ -1119,6 +1283,7 @@ class SQLiteStorageBackend(StorageBackend):
|
|
|
1119
1283
|
|
|
1120
1284
|
def _row_to_event(self, row: Any) -> Event:
|
|
1121
1285
|
"""Convert database row to Event object."""
|
|
1286
|
+
# Column order: event_id[0], run_id[1], sequence[2], type[3], timestamp[4], data[5], step_id[6]
|
|
1122
1287
|
return Event(
|
|
1123
1288
|
event_id=row[0],
|
|
1124
1289
|
run_id=row[1],
|
|
@@ -1126,6 +1291,7 @@ class SQLiteStorageBackend(StorageBackend):
|
|
|
1126
1291
|
type=EventType(row[3]),
|
|
1127
1292
|
timestamp=datetime.fromisoformat(row[4]),
|
|
1128
1293
|
data=json.loads(row[5]) if row[5] else {},
|
|
1294
|
+
# step_id is in row[6] but not used in Event object (it's denormalized for query optimization)
|
|
1129
1295
|
)
|
|
1130
1296
|
|
|
1131
1297
|
def _row_to_step_execution(self, row: Any) -> StepExecution:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
pyworkflow/__init__.py,sha256=
|
|
1
|
+
pyworkflow/__init__.py,sha256=ySBxZ69-klgc95RAB5PIB2p1g155-5YtVpXph7N5kME,6281
|
|
2
2
|
pyworkflow/config.py,sha256=pKwPrpCwBJiDpB-MIjM0U7GW1TFmQFO341pihL5-vTM,14455
|
|
3
3
|
pyworkflow/discovery.py,sha256=snW3l4nvY3Nc067TGlwtn_qdzTU9ybN7YPr8FbvY8iM,8066
|
|
4
4
|
pyworkflow/aws/__init__.py,sha256=Ak_xHcR9LTRX-CwcS0XecYmzrXZw4EM3V9aKBBDEmIk,1741
|
|
@@ -6,11 +6,11 @@ pyworkflow/aws/context.py,sha256=C_wBr_YRUYT4IHyaAGPdiqRPNbH8mDLkoMpVtQVIg28,822
|
|
|
6
6
|
pyworkflow/aws/handler.py,sha256=0SnQuIfQVD99QKMCRFPtrsrV_l1LYKFkzPIRx_2UkSI,5849
|
|
7
7
|
pyworkflow/aws/testing.py,sha256=WrRk9wjbycM-UyHFQWNnA83UE9IrYnhfT38WrbxQT2U,8844
|
|
8
8
|
pyworkflow/celery/__init__.py,sha256=FywVyqnT8AYz9cXkr-wel7_-N7dHFsPNASEPMFESf4Q,1179
|
|
9
|
-
pyworkflow/celery/app.py,sha256=
|
|
9
|
+
pyworkflow/celery/app.py,sha256=MXmD5N8DCrX9YpdccOWPy2ob7k4vg9r0ix1kDgDuR3w,15753
|
|
10
10
|
pyworkflow/celery/loop.py,sha256=mu8cIfMJYgHAoGCN_DdDoNoXK3QHzHpLmrPCyFDQYIY,3016
|
|
11
11
|
pyworkflow/celery/scheduler.py,sha256=Ms4rqRpdpMiLM8l4y3DK-Divunj9afYuUaGGoNQe7P4,11288
|
|
12
12
|
pyworkflow/celery/singleton.py,sha256=9gdVHzqFjShZ9OJOJlJNABUg9oqnl6ITGROtomcOtsg,16070
|
|
13
|
-
pyworkflow/celery/tasks.py,sha256=
|
|
13
|
+
pyworkflow/celery/tasks.py,sha256=FiZFBTHUQz16AWYmsHUGmTIxXQ5zNewyFWUGRizS4b8,88813
|
|
14
14
|
pyworkflow/cli/__init__.py,sha256=tcbe-fcZmyeEKUy_aEo8bsEF40HsNKOwvyMBZIJZPwc,3844
|
|
15
15
|
pyworkflow/cli/__main__.py,sha256=LxLLS4FEEPXa5rWpLTtKuivn6Xp9pGia-QKGoxt9SS0,148
|
|
16
16
|
pyworkflow/cli/commands/__init__.py,sha256=IXvnTgukALckkO8fTlZhVRq80ojSqpnIIgboAg_-yZU,39
|
|
@@ -73,22 +73,24 @@ pyworkflow/serialization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZ
|
|
|
73
73
|
pyworkflow/serialization/decoder.py,sha256=F7Ofuw1Yzo82iSFFXiK2yoW_v2YRbLMpX3CQbKjm0Ls,3860
|
|
74
74
|
pyworkflow/serialization/encoder.py,sha256=ZBwAxe5Bb4MCfFJePHw7ArJlIbBieSwUgsysGCI2iPU,4108
|
|
75
75
|
pyworkflow/storage/__init__.py,sha256=LhVjLNZdo4Mi5dEC75hjSPnbQr9jBoIsTOrC8vzTGOM,1924
|
|
76
|
-
pyworkflow/storage/base.py,sha256=
|
|
77
|
-
pyworkflow/storage/cassandra.py,sha256=
|
|
76
|
+
pyworkflow/storage/base.py,sha256=CTvZwy8cHMdbOV3ZYVlbsx0fBNZN9cusbbh3oqGmrao,18598
|
|
77
|
+
pyworkflow/storage/cassandra.py,sha256=3VDwiM9zaxKNwp5vdX1FdqpwFm0AZfTWtVDTmIKBA7Q,63538
|
|
78
78
|
pyworkflow/storage/config.py,sha256=45UMPxRoqgK4ZwE7HIK9ctxE_eoK3eAE_1tRhn3Psd4,12410
|
|
79
|
-
pyworkflow/storage/dynamodb.py,sha256=
|
|
80
|
-
pyworkflow/storage/file.py,sha256=
|
|
81
|
-
pyworkflow/storage/memory.py,sha256=
|
|
82
|
-
pyworkflow/storage/mysql.py,sha256=
|
|
83
|
-
pyworkflow/storage/postgres.py,sha256=
|
|
79
|
+
pyworkflow/storage/dynamodb.py,sha256=hER4rOc0GWh0-yk0YiFrxZkiHJ6DKEOpvZQ9GCEx8QQ,56501
|
|
80
|
+
pyworkflow/storage/file.py,sha256=Ag4K5Rt-8YaENZLx-HnN77EAfNc37WqkymmaPlVJbYg,32665
|
|
81
|
+
pyworkflow/storage/memory.py,sha256=HiuGU9jgzKwNGrF98YHdHYrzQTNUu96VyL5HVVeII7A,22179
|
|
82
|
+
pyworkflow/storage/mysql.py,sha256=lS9dZHmtPL14cY1lrzDFilFKmsjMD_b0NnYJfiPDFKQ,52537
|
|
83
|
+
pyworkflow/storage/postgres.py,sha256=Rg6h9syMgjw32-lxbNFroSxD9lBvv-WjaO4uZnglvOo,54056
|
|
84
84
|
pyworkflow/storage/schemas.py,sha256=o1ntTYNgQQ5YVuXtPCShtENEsndVjdrXclWrkCgkitg,18002
|
|
85
|
-
pyworkflow/storage/sqlite.py,sha256=
|
|
85
|
+
pyworkflow/storage/sqlite.py,sha256=EJ8n66WfhZnfFLKcNdUOoOCswdftttoH6T31spS78Qo,47667
|
|
86
|
+
pyworkflow/storage/migrations/__init__.py,sha256=R8b-VoVzpYJ6A-Z5FDNbrbOO-2Nnt4NrcAS8Wo08VPo,425
|
|
87
|
+
pyworkflow/storage/migrations/base.py,sha256=Thq38E8BDpC7EmBlwpfm727Zyz4TVmADQ1kEttWo0oY,9237
|
|
86
88
|
pyworkflow/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
87
89
|
pyworkflow/utils/duration.py,sha256=C-itmiSQQlplw7j6XB679hLF9xYGnyCwm7twO88OF8U,3978
|
|
88
90
|
pyworkflow/utils/schedule.py,sha256=dO_MkGFyfwZpb0LDlW6BGyZzlPuQIA6dc6j9nk9lc4Y,10691
|
|
89
|
-
pyworkflow_engine-0.1.
|
|
90
|
-
pyworkflow_engine-0.1.
|
|
91
|
-
pyworkflow_engine-0.1.
|
|
92
|
-
pyworkflow_engine-0.1.
|
|
93
|
-
pyworkflow_engine-0.1.
|
|
94
|
-
pyworkflow_engine-0.1.
|
|
91
|
+
pyworkflow_engine-0.1.24.dist-info/licenses/LICENSE,sha256=Y49RCTZ5ayn_yzBcRxnyIFdcMCyuYm150aty_FIznfY,1080
|
|
92
|
+
pyworkflow_engine-0.1.24.dist-info/METADATA,sha256=OoG1XNfV_RIwP4cL0QV66cbFKKwv6QqQNl_Mt6FwTgk,19628
|
|
93
|
+
pyworkflow_engine-0.1.24.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
94
|
+
pyworkflow_engine-0.1.24.dist-info/entry_points.txt,sha256=3IGAfuylnS39U0YX0pxnjrj54kB4iT_bNYrmsiDB-dE,51
|
|
95
|
+
pyworkflow_engine-0.1.24.dist-info/top_level.txt,sha256=FLTv9pQmLDBXrQdLOhTMIS3njFibliMsQEfumqmdzBE,11
|
|
96
|
+
pyworkflow_engine-0.1.24.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|