pyworkflow-engine 0.1.21__py3-none-any.whl → 0.1.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyworkflow/__init__.py +1 -1
- pyworkflow/celery/app.py +18 -0
- pyworkflow/celery/tasks.py +148 -106
- pyworkflow/storage/base.py +36 -0
- pyworkflow/storage/cassandra.py +34 -0
- pyworkflow/storage/dynamodb.py +34 -0
- pyworkflow/storage/file.py +52 -0
- pyworkflow/storage/memory.py +37 -0
- pyworkflow/storage/migrations/__init__.py +15 -0
- pyworkflow/storage/migrations/base.py +299 -0
- pyworkflow/storage/mysql.py +186 -5
- pyworkflow/storage/postgres.py +194 -6
- pyworkflow/storage/sqlite.py +171 -5
- {pyworkflow_engine-0.1.21.dist-info → pyworkflow_engine-0.1.23.dist-info}/METADATA +1 -1
- {pyworkflow_engine-0.1.21.dist-info → pyworkflow_engine-0.1.23.dist-info}/RECORD +19 -17
- {pyworkflow_engine-0.1.21.dist-info → pyworkflow_engine-0.1.23.dist-info}/WHEEL +0 -0
- {pyworkflow_engine-0.1.21.dist-info → pyworkflow_engine-0.1.23.dist-info}/entry_points.txt +0 -0
- {pyworkflow_engine-0.1.21.dist-info → pyworkflow_engine-0.1.23.dist-info}/licenses/LICENSE +0 -0
- {pyworkflow_engine-0.1.21.dist-info → pyworkflow_engine-0.1.23.dist-info}/top_level.txt +0 -0
pyworkflow/storage/postgres.py
CHANGED
|
@@ -23,6 +23,7 @@ import asyncpg
|
|
|
23
23
|
|
|
24
24
|
from pyworkflow.engine.events import Event, EventType
|
|
25
25
|
from pyworkflow.storage.base import StorageBackend
|
|
26
|
+
from pyworkflow.storage.migrations import Migration, MigrationRegistry, MigrationRunner
|
|
26
27
|
from pyworkflow.storage.schemas import (
|
|
27
28
|
Hook,
|
|
28
29
|
HookStatus,
|
|
@@ -37,6 +38,115 @@ from pyworkflow.storage.schemas import (
|
|
|
37
38
|
)
|
|
38
39
|
|
|
39
40
|
|
|
41
|
+
class PostgresMigrationRunner(MigrationRunner):
|
|
42
|
+
"""PostgreSQL-specific migration runner."""
|
|
43
|
+
|
|
44
|
+
def __init__(self, pool: asyncpg.Pool, registry: MigrationRegistry | None = None) -> None:
|
|
45
|
+
super().__init__(registry)
|
|
46
|
+
self._pool = pool
|
|
47
|
+
|
|
48
|
+
async def ensure_schema_versions_table(self) -> None:
|
|
49
|
+
"""Create schema_versions table if it doesn't exist."""
|
|
50
|
+
async with self._pool.acquire() as conn:
|
|
51
|
+
await conn.execute("""
|
|
52
|
+
CREATE TABLE IF NOT EXISTS schema_versions (
|
|
53
|
+
version INTEGER PRIMARY KEY,
|
|
54
|
+
applied_at TIMESTAMPTZ NOT NULL,
|
|
55
|
+
description TEXT
|
|
56
|
+
)
|
|
57
|
+
""")
|
|
58
|
+
|
|
59
|
+
async def get_current_version(self) -> int:
|
|
60
|
+
"""Get the highest applied migration version."""
|
|
61
|
+
async with self._pool.acquire() as conn:
|
|
62
|
+
row = await conn.fetchrow(
|
|
63
|
+
"SELECT COALESCE(MAX(version), 0) as version FROM schema_versions"
|
|
64
|
+
)
|
|
65
|
+
return row["version"] if row else 0
|
|
66
|
+
|
|
67
|
+
async def detect_existing_schema(self) -> bool:
|
|
68
|
+
"""Check if the events table exists (pre-versioning database)."""
|
|
69
|
+
async with self._pool.acquire() as conn:
|
|
70
|
+
row = await conn.fetchrow("""
|
|
71
|
+
SELECT EXISTS (
|
|
72
|
+
SELECT FROM information_schema.tables
|
|
73
|
+
WHERE table_name = 'events'
|
|
74
|
+
) as exists
|
|
75
|
+
""")
|
|
76
|
+
return row["exists"] if row else False
|
|
77
|
+
|
|
78
|
+
async def record_baseline_version(self, version: int, description: str) -> None:
|
|
79
|
+
"""Record a baseline version without running migrations."""
|
|
80
|
+
async with self._pool.acquire() as conn:
|
|
81
|
+
await conn.execute(
|
|
82
|
+
"""
|
|
83
|
+
INSERT INTO schema_versions (version, applied_at, description)
|
|
84
|
+
VALUES ($1, $2, $3)
|
|
85
|
+
ON CONFLICT (version) DO NOTHING
|
|
86
|
+
""",
|
|
87
|
+
version,
|
|
88
|
+
datetime.now(UTC),
|
|
89
|
+
description,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
async def apply_migration(self, migration: Migration) -> None:
|
|
93
|
+
"""Apply a migration with PostgreSQL-specific handling."""
|
|
94
|
+
async with self._pool.acquire() as conn, conn.transaction():
|
|
95
|
+
if migration.version == 2:
|
|
96
|
+
# V2: Add step_id column to events table
|
|
97
|
+
# First check if events table exists (fresh databases won't have it yet)
|
|
98
|
+
table_exists = await conn.fetchrow("""
|
|
99
|
+
SELECT EXISTS (
|
|
100
|
+
SELECT FROM information_schema.tables
|
|
101
|
+
WHERE table_name = 'events'
|
|
102
|
+
) as exists
|
|
103
|
+
""")
|
|
104
|
+
|
|
105
|
+
if table_exists and table_exists["exists"]:
|
|
106
|
+
# Use IF NOT EXISTS for idempotency
|
|
107
|
+
await conn.execute("""
|
|
108
|
+
DO $$
|
|
109
|
+
BEGIN
|
|
110
|
+
IF NOT EXISTS (
|
|
111
|
+
SELECT 1 FROM information_schema.columns
|
|
112
|
+
WHERE table_name = 'events' AND column_name = 'step_id'
|
|
113
|
+
) THEN
|
|
114
|
+
ALTER TABLE events ADD COLUMN step_id TEXT;
|
|
115
|
+
END IF;
|
|
116
|
+
END $$
|
|
117
|
+
""")
|
|
118
|
+
|
|
119
|
+
# Create index for optimized has_event() queries
|
|
120
|
+
await conn.execute("""
|
|
121
|
+
CREATE INDEX IF NOT EXISTS idx_events_run_id_step_id_type
|
|
122
|
+
ON events(run_id, step_id, type)
|
|
123
|
+
""")
|
|
124
|
+
|
|
125
|
+
# Backfill step_id from JSON data
|
|
126
|
+
await conn.execute("""
|
|
127
|
+
UPDATE events
|
|
128
|
+
SET step_id = (data::jsonb)->>'step_id'
|
|
129
|
+
WHERE step_id IS NULL
|
|
130
|
+
AND (data::jsonb)->>'step_id' IS NOT NULL
|
|
131
|
+
""")
|
|
132
|
+
# If table doesn't exist, schema will be created with step_id column
|
|
133
|
+
elif migration.up_func:
|
|
134
|
+
await migration.up_func(conn)
|
|
135
|
+
elif migration.up_sql and migration.up_sql != "SELECT 1":
|
|
136
|
+
await conn.execute(migration.up_sql)
|
|
137
|
+
|
|
138
|
+
# Record the migration
|
|
139
|
+
await conn.execute(
|
|
140
|
+
"""
|
|
141
|
+
INSERT INTO schema_versions (version, applied_at, description)
|
|
142
|
+
VALUES ($1, $2, $3)
|
|
143
|
+
""",
|
|
144
|
+
migration.version,
|
|
145
|
+
datetime.now(UTC),
|
|
146
|
+
migration.description,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
|
|
40
150
|
class PostgresStorageBackend(StorageBackend):
|
|
41
151
|
"""
|
|
42
152
|
PostgreSQL storage backend using asyncpg for async operations.
|
|
@@ -55,6 +165,8 @@ class PostgresStorageBackend(StorageBackend):
|
|
|
55
165
|
database: str = "pyworkflow",
|
|
56
166
|
min_pool_size: int = 1,
|
|
57
167
|
max_pool_size: int = 10,
|
|
168
|
+
max_inactive_connection_lifetime: float = 1800.0,
|
|
169
|
+
command_timeout: float | None = 60.0,
|
|
58
170
|
):
|
|
59
171
|
"""
|
|
60
172
|
Initialize PostgreSQL storage backend.
|
|
@@ -68,6 +180,9 @@ class PostgresStorageBackend(StorageBackend):
|
|
|
68
180
|
database: Database name (used if dsn not provided)
|
|
69
181
|
min_pool_size: Minimum connections in pool
|
|
70
182
|
max_pool_size: Maximum connections in pool
|
|
183
|
+
max_inactive_connection_lifetime: How long (seconds) an idle connection can
|
|
184
|
+
stay in the pool before being closed. Default 1800s (30 min).
|
|
185
|
+
command_timeout: Default timeout (seconds) for queries. None for no timeout. Default 60s.
|
|
71
186
|
"""
|
|
72
187
|
self.dsn = dsn
|
|
73
188
|
self.host = host
|
|
@@ -77,6 +192,8 @@ class PostgresStorageBackend(StorageBackend):
|
|
|
77
192
|
self.database = database
|
|
78
193
|
self.min_pool_size = min_pool_size
|
|
79
194
|
self.max_pool_size = max_pool_size
|
|
195
|
+
self.max_inactive_connection_lifetime = max_inactive_connection_lifetime
|
|
196
|
+
self.command_timeout = command_timeout
|
|
80
197
|
self._pool: asyncpg.Pool | None = None
|
|
81
198
|
self._pool_loop_id: int | None = None # Track which loop the pool was created on
|
|
82
199
|
self._initialized = False
|
|
@@ -111,6 +228,8 @@ class PostgresStorageBackend(StorageBackend):
|
|
|
111
228
|
dsn=self.dsn or self._build_dsn(),
|
|
112
229
|
min_size=self.min_pool_size,
|
|
113
230
|
max_size=self.max_pool_size,
|
|
231
|
+
max_inactive_connection_lifetime=self.max_inactive_connection_lifetime,
|
|
232
|
+
command_timeout=self.command_timeout,
|
|
114
233
|
)
|
|
115
234
|
self._pool_loop_id = current_loop_id
|
|
116
235
|
|
|
@@ -127,11 +246,16 @@ class PostgresStorageBackend(StorageBackend):
|
|
|
127
246
|
self._initialized = False
|
|
128
247
|
|
|
129
248
|
async def _initialize_schema(self) -> None:
|
|
130
|
-
"""Create database tables if they don't exist."""
|
|
249
|
+
"""Create database tables if they don't exist and run migrations."""
|
|
131
250
|
if not self._pool:
|
|
132
251
|
await self.connect()
|
|
133
252
|
|
|
134
253
|
pool = await self._get_pool()
|
|
254
|
+
|
|
255
|
+
# Run migrations first (handles schema versioning)
|
|
256
|
+
runner = PostgresMigrationRunner(pool)
|
|
257
|
+
await runner.run_migrations()
|
|
258
|
+
|
|
135
259
|
async with pool.acquire() as conn:
|
|
136
260
|
# Workflow runs table
|
|
137
261
|
await conn.execute("""
|
|
@@ -177,7 +301,7 @@ class PostgresStorageBackend(StorageBackend):
|
|
|
177
301
|
"CREATE INDEX IF NOT EXISTS idx_runs_parent_run_id ON workflow_runs(parent_run_id)"
|
|
178
302
|
)
|
|
179
303
|
|
|
180
|
-
# Events table
|
|
304
|
+
# Events table (includes step_id column added in V2 migration)
|
|
181
305
|
await conn.execute("""
|
|
182
306
|
CREATE TABLE IF NOT EXISTS events (
|
|
183
307
|
event_id TEXT PRIMARY KEY,
|
|
@@ -185,7 +309,8 @@ class PostgresStorageBackend(StorageBackend):
|
|
|
185
309
|
sequence INTEGER NOT NULL,
|
|
186
310
|
type TEXT NOT NULL,
|
|
187
311
|
timestamp TIMESTAMPTZ NOT NULL,
|
|
188
|
-
data TEXT NOT NULL DEFAULT '{}'
|
|
312
|
+
data TEXT NOT NULL DEFAULT '{}',
|
|
313
|
+
step_id TEXT
|
|
189
314
|
)
|
|
190
315
|
""")
|
|
191
316
|
|
|
@@ -193,7 +318,14 @@ class PostgresStorageBackend(StorageBackend):
|
|
|
193
318
|
await conn.execute(
|
|
194
319
|
"CREATE INDEX IF NOT EXISTS idx_events_run_id_sequence ON events(run_id, sequence)"
|
|
195
320
|
)
|
|
196
|
-
|
|
321
|
+
# Composite index for get_events() with type filter
|
|
322
|
+
await conn.execute(
|
|
323
|
+
"CREATE INDEX IF NOT EXISTS idx_events_run_id_type ON events(run_id, type)"
|
|
324
|
+
)
|
|
325
|
+
# Optimized index for has_event() with step_id filter (V2 migration)
|
|
326
|
+
await conn.execute(
|
|
327
|
+
"CREATE INDEX IF NOT EXISTS idx_events_run_id_step_id_type ON events(run_id, step_id, type)"
|
|
328
|
+
)
|
|
197
329
|
|
|
198
330
|
# Steps table
|
|
199
331
|
await conn.execute("""
|
|
@@ -554,6 +686,9 @@ class PostgresStorageBackend(StorageBackend):
|
|
|
554
686
|
"""Record an event to the append-only event log."""
|
|
555
687
|
pool = await self._get_pool()
|
|
556
688
|
|
|
689
|
+
# Extract step_id from event data for indexed column
|
|
690
|
+
step_id = event.data.get("step_id") if event.data else None
|
|
691
|
+
|
|
557
692
|
async with pool.acquire() as conn, conn.transaction():
|
|
558
693
|
# Get next sequence number and insert in a transaction
|
|
559
694
|
row = await conn.fetchrow(
|
|
@@ -564,8 +699,8 @@ class PostgresStorageBackend(StorageBackend):
|
|
|
564
699
|
|
|
565
700
|
await conn.execute(
|
|
566
701
|
"""
|
|
567
|
-
INSERT INTO events (event_id, run_id, sequence, type, timestamp, data)
|
|
568
|
-
VALUES ($1, $2, $3, $4, $5, $6)
|
|
702
|
+
INSERT INTO events (event_id, run_id, sequence, type, timestamp, data, step_id)
|
|
703
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7)
|
|
569
704
|
""",
|
|
570
705
|
event.event_id,
|
|
571
706
|
event.run_id,
|
|
@@ -573,6 +708,7 @@ class PostgresStorageBackend(StorageBackend):
|
|
|
573
708
|
event.type.value,
|
|
574
709
|
event.timestamp,
|
|
575
710
|
json.dumps(event.data),
|
|
711
|
+
step_id,
|
|
576
712
|
)
|
|
577
713
|
|
|
578
714
|
async def get_events(
|
|
@@ -638,6 +774,58 @@ class PostgresStorageBackend(StorageBackend):
|
|
|
638
774
|
|
|
639
775
|
return self._row_to_event(row)
|
|
640
776
|
|
|
777
|
+
async def has_event(
|
|
778
|
+
self,
|
|
779
|
+
run_id: str,
|
|
780
|
+
event_type: str,
|
|
781
|
+
**filters: str,
|
|
782
|
+
) -> bool:
|
|
783
|
+
"""
|
|
784
|
+
Check if an event exists using optimized indexed queries.
|
|
785
|
+
|
|
786
|
+
When step_id is the only filter, uses a direct indexed query (O(1) lookup).
|
|
787
|
+
For other filters, falls back to loading events of the type and filtering in Python.
|
|
788
|
+
|
|
789
|
+
Args:
|
|
790
|
+
run_id: Workflow run identifier
|
|
791
|
+
event_type: Event type to check for
|
|
792
|
+
**filters: Additional filters for event data fields
|
|
793
|
+
|
|
794
|
+
Returns:
|
|
795
|
+
True if a matching event exists, False otherwise
|
|
796
|
+
"""
|
|
797
|
+
pool = await self._get_pool()
|
|
798
|
+
|
|
799
|
+
# Optimized path: if only filtering by step_id, use indexed column directly
|
|
800
|
+
if filters.keys() == {"step_id"}:
|
|
801
|
+
step_id = str(filters["step_id"])
|
|
802
|
+
async with pool.acquire() as conn:
|
|
803
|
+
row = await conn.fetchrow(
|
|
804
|
+
"""
|
|
805
|
+
SELECT 1 FROM events
|
|
806
|
+
WHERE run_id = $1 AND type = $2 AND step_id = $3
|
|
807
|
+
LIMIT 1
|
|
808
|
+
""",
|
|
809
|
+
run_id,
|
|
810
|
+
event_type,
|
|
811
|
+
step_id,
|
|
812
|
+
)
|
|
813
|
+
return row is not None
|
|
814
|
+
|
|
815
|
+
# Fallback: load events of type and filter in Python
|
|
816
|
+
events = await self.get_events(run_id, event_types=[event_type])
|
|
817
|
+
|
|
818
|
+
for event in events:
|
|
819
|
+
match = True
|
|
820
|
+
for key, value in filters.items():
|
|
821
|
+
if str(event.data.get(key)) != str(value):
|
|
822
|
+
match = False
|
|
823
|
+
break
|
|
824
|
+
if match:
|
|
825
|
+
return True
|
|
826
|
+
|
|
827
|
+
return False
|
|
828
|
+
|
|
641
829
|
# Step Operations
|
|
642
830
|
|
|
643
831
|
async def create_step(self, step: StepExecution) -> None:
|
pyworkflow/storage/sqlite.py
CHANGED
|
@@ -18,6 +18,7 @@ import aiosqlite
|
|
|
18
18
|
|
|
19
19
|
from pyworkflow.engine.events import Event, EventType
|
|
20
20
|
from pyworkflow.storage.base import StorageBackend
|
|
21
|
+
from pyworkflow.storage.migrations import Migration, MigrationRegistry, MigrationRunner
|
|
21
22
|
from pyworkflow.storage.schemas import (
|
|
22
23
|
Hook,
|
|
23
24
|
HookStatus,
|
|
@@ -31,6 +32,103 @@ from pyworkflow.storage.schemas import (
|
|
|
31
32
|
)
|
|
32
33
|
|
|
33
34
|
|
|
35
|
+
class SQLiteMigrationRunner(MigrationRunner):
|
|
36
|
+
"""SQLite-specific migration runner."""
|
|
37
|
+
|
|
38
|
+
def __init__(self, db: aiosqlite.Connection, registry: MigrationRegistry | None = None) -> None:
|
|
39
|
+
super().__init__(registry)
|
|
40
|
+
self._db = db
|
|
41
|
+
|
|
42
|
+
async def ensure_schema_versions_table(self) -> None:
|
|
43
|
+
"""Create schema_versions table if it doesn't exist."""
|
|
44
|
+
await self._db.execute("""
|
|
45
|
+
CREATE TABLE IF NOT EXISTS schema_versions (
|
|
46
|
+
version INTEGER PRIMARY KEY,
|
|
47
|
+
applied_at TIMESTAMP NOT NULL,
|
|
48
|
+
description TEXT
|
|
49
|
+
)
|
|
50
|
+
""")
|
|
51
|
+
await self._db.commit()
|
|
52
|
+
|
|
53
|
+
async def get_current_version(self) -> int:
|
|
54
|
+
"""Get the highest applied migration version."""
|
|
55
|
+
async with self._db.execute(
|
|
56
|
+
"SELECT COALESCE(MAX(version), 0) as version FROM schema_versions"
|
|
57
|
+
) as cursor:
|
|
58
|
+
row = await cursor.fetchone()
|
|
59
|
+
return row[0] if row else 0
|
|
60
|
+
|
|
61
|
+
async def detect_existing_schema(self) -> bool:
|
|
62
|
+
"""Check if the events table exists (pre-versioning database)."""
|
|
63
|
+
async with self._db.execute(
|
|
64
|
+
"SELECT name FROM sqlite_master WHERE type='table' AND name='events'"
|
|
65
|
+
) as cursor:
|
|
66
|
+
row = await cursor.fetchone()
|
|
67
|
+
return row is not None
|
|
68
|
+
|
|
69
|
+
async def record_baseline_version(self, version: int, description: str) -> None:
|
|
70
|
+
"""Record a baseline version without running migrations."""
|
|
71
|
+
await self._db.execute(
|
|
72
|
+
"""
|
|
73
|
+
INSERT OR IGNORE INTO schema_versions (version, applied_at, description)
|
|
74
|
+
VALUES (?, ?, ?)
|
|
75
|
+
""",
|
|
76
|
+
(version, datetime.now(UTC).isoformat(), description),
|
|
77
|
+
)
|
|
78
|
+
await self._db.commit()
|
|
79
|
+
|
|
80
|
+
async def apply_migration(self, migration: Migration) -> None:
|
|
81
|
+
"""Apply a migration with SQLite-specific handling."""
|
|
82
|
+
if migration.version == 2:
|
|
83
|
+
# V2: Add step_id column to events table
|
|
84
|
+
# First check if events table exists (fresh databases won't have it yet)
|
|
85
|
+
async with self._db.execute(
|
|
86
|
+
"SELECT name FROM sqlite_master WHERE type='table' AND name='events'"
|
|
87
|
+
) as cursor:
|
|
88
|
+
table_exists = await cursor.fetchone() is not None
|
|
89
|
+
|
|
90
|
+
if table_exists:
|
|
91
|
+
# SQLite doesn't have IF NOT EXISTS for columns, so check first
|
|
92
|
+
async with self._db.execute("PRAGMA table_info(events)") as cursor:
|
|
93
|
+
columns = await cursor.fetchall()
|
|
94
|
+
column_names = [col[1] for col in columns]
|
|
95
|
+
|
|
96
|
+
if "step_id" not in column_names:
|
|
97
|
+
await self._db.execute("ALTER TABLE events ADD COLUMN step_id TEXT")
|
|
98
|
+
|
|
99
|
+
# Create index for optimized has_event() queries
|
|
100
|
+
await self._db.execute("""
|
|
101
|
+
CREATE INDEX IF NOT EXISTS idx_events_run_id_step_id_type
|
|
102
|
+
ON events(run_id, step_id, type)
|
|
103
|
+
""")
|
|
104
|
+
|
|
105
|
+
# Backfill step_id from JSON data using json_extract
|
|
106
|
+
await self._db.execute("""
|
|
107
|
+
UPDATE events
|
|
108
|
+
SET step_id = json_extract(data, '$.step_id')
|
|
109
|
+
WHERE step_id IS NULL
|
|
110
|
+
AND json_extract(data, '$.step_id') IS NOT NULL
|
|
111
|
+
""")
|
|
112
|
+
|
|
113
|
+
await self._db.commit()
|
|
114
|
+
# If table doesn't exist, schema will be created with step_id column
|
|
115
|
+
elif migration.up_func:
|
|
116
|
+
await migration.up_func(self._db)
|
|
117
|
+
elif migration.up_sql and migration.up_sql != "SELECT 1":
|
|
118
|
+
await self._db.execute(migration.up_sql)
|
|
119
|
+
await self._db.commit()
|
|
120
|
+
|
|
121
|
+
# Record the migration
|
|
122
|
+
await self._db.execute(
|
|
123
|
+
"""
|
|
124
|
+
INSERT INTO schema_versions (version, applied_at, description)
|
|
125
|
+
VALUES (?, ?, ?)
|
|
126
|
+
""",
|
|
127
|
+
(migration.version, datetime.now(UTC).isoformat(), migration.description),
|
|
128
|
+
)
|
|
129
|
+
await self._db.commit()
|
|
130
|
+
|
|
131
|
+
|
|
34
132
|
class SQLiteStorageBackend(StorageBackend):
|
|
35
133
|
"""
|
|
36
134
|
SQLite storage backend using aiosqlite for async operations.
|
|
@@ -72,7 +170,7 @@ class SQLiteStorageBackend(StorageBackend):
|
|
|
72
170
|
self._initialized = False
|
|
73
171
|
|
|
74
172
|
async def _initialize_schema(self) -> None:
|
|
75
|
-
"""Create database tables if they don't exist."""
|
|
173
|
+
"""Create database tables if they don't exist and run migrations."""
|
|
76
174
|
if not self._db:
|
|
77
175
|
await self.connect()
|
|
78
176
|
|
|
@@ -80,6 +178,10 @@ class SQLiteStorageBackend(StorageBackend):
|
|
|
80
178
|
assert self._db is not None
|
|
81
179
|
db = self._db
|
|
82
180
|
|
|
181
|
+
# Run migrations first (handles schema versioning)
|
|
182
|
+
runner = SQLiteMigrationRunner(db)
|
|
183
|
+
await runner.run_migrations()
|
|
184
|
+
|
|
83
185
|
# Workflow runs table
|
|
84
186
|
await db.execute("""
|
|
85
187
|
CREATE TABLE IF NOT EXISTS workflow_runs (
|
|
@@ -123,7 +225,7 @@ class SQLiteStorageBackend(StorageBackend):
|
|
|
123
225
|
"CREATE INDEX IF NOT EXISTS idx_runs_parent_run_id ON workflow_runs(parent_run_id)"
|
|
124
226
|
)
|
|
125
227
|
|
|
126
|
-
# Events table
|
|
228
|
+
# Events table (includes step_id column added in V2 migration)
|
|
127
229
|
await db.execute("""
|
|
128
230
|
CREATE TABLE IF NOT EXISTS events (
|
|
129
231
|
event_id TEXT PRIMARY KEY,
|
|
@@ -132,6 +234,7 @@ class SQLiteStorageBackend(StorageBackend):
|
|
|
132
234
|
type TEXT NOT NULL,
|
|
133
235
|
timestamp TIMESTAMP NOT NULL,
|
|
134
236
|
data TEXT NOT NULL DEFAULT '{}',
|
|
237
|
+
step_id TEXT,
|
|
135
238
|
FOREIGN KEY (run_id) REFERENCES workflow_runs(run_id) ON DELETE CASCADE
|
|
136
239
|
)
|
|
137
240
|
""")
|
|
@@ -140,7 +243,14 @@ class SQLiteStorageBackend(StorageBackend):
|
|
|
140
243
|
await db.execute(
|
|
141
244
|
"CREATE INDEX IF NOT EXISTS idx_events_run_id_sequence ON events(run_id, sequence)"
|
|
142
245
|
)
|
|
143
|
-
|
|
246
|
+
# Composite index for get_events() with type filter
|
|
247
|
+
await db.execute(
|
|
248
|
+
"CREATE INDEX IF NOT EXISTS idx_events_run_id_type ON events(run_id, type)"
|
|
249
|
+
)
|
|
250
|
+
# Optimized index for has_event() with step_id filter (V2 migration)
|
|
251
|
+
await db.execute(
|
|
252
|
+
"CREATE INDEX IF NOT EXISTS idx_events_run_id_step_id_type ON events(run_id, step_id, type)"
|
|
253
|
+
)
|
|
144
254
|
|
|
145
255
|
# Steps table
|
|
146
256
|
await db.execute("""
|
|
@@ -447,6 +557,9 @@ class SQLiteStorageBackend(StorageBackend):
|
|
|
447
557
|
"""Record an event to the append-only event log."""
|
|
448
558
|
db = self._ensure_connected()
|
|
449
559
|
|
|
560
|
+
# Extract step_id from event data for indexed column
|
|
561
|
+
step_id = event.data.get("step_id") if event.data else None
|
|
562
|
+
|
|
450
563
|
# Get next sequence number
|
|
451
564
|
async with db.execute(
|
|
452
565
|
"SELECT COALESCE(MAX(sequence), -1) + 1 FROM events WHERE run_id = ?",
|
|
@@ -457,8 +570,8 @@ class SQLiteStorageBackend(StorageBackend):
|
|
|
457
570
|
|
|
458
571
|
await db.execute(
|
|
459
572
|
"""
|
|
460
|
-
INSERT INTO events (event_id, run_id, sequence, type, timestamp, data)
|
|
461
|
-
VALUES (?, ?, ?, ?, ?, ?)
|
|
573
|
+
INSERT INTO events (event_id, run_id, sequence, type, timestamp, data, step_id)
|
|
574
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
462
575
|
""",
|
|
463
576
|
(
|
|
464
577
|
event.event_id,
|
|
@@ -467,6 +580,7 @@ class SQLiteStorageBackend(StorageBackend):
|
|
|
467
580
|
event.type.value,
|
|
468
581
|
event.timestamp.isoformat(),
|
|
469
582
|
json.dumps(event.data),
|
|
583
|
+
step_id,
|
|
470
584
|
),
|
|
471
585
|
)
|
|
472
586
|
await db.commit()
|
|
@@ -529,6 +643,56 @@ class SQLiteStorageBackend(StorageBackend):
|
|
|
529
643
|
|
|
530
644
|
return self._row_to_event(row)
|
|
531
645
|
|
|
646
|
+
async def has_event(
|
|
647
|
+
self,
|
|
648
|
+
run_id: str,
|
|
649
|
+
event_type: str,
|
|
650
|
+
**filters: str,
|
|
651
|
+
) -> bool:
|
|
652
|
+
"""
|
|
653
|
+
Check if an event exists using optimized indexed queries.
|
|
654
|
+
|
|
655
|
+
When step_id is the only filter, uses a direct indexed query (O(1) lookup).
|
|
656
|
+
For other filters, falls back to loading events of the type and filtering in Python.
|
|
657
|
+
|
|
658
|
+
Args:
|
|
659
|
+
run_id: Workflow run identifier
|
|
660
|
+
event_type: Event type to check for
|
|
661
|
+
**filters: Additional filters for event data fields
|
|
662
|
+
|
|
663
|
+
Returns:
|
|
664
|
+
True if a matching event exists, False otherwise
|
|
665
|
+
"""
|
|
666
|
+
db = self._ensure_connected()
|
|
667
|
+
|
|
668
|
+
# Optimized path: if only filtering by step_id, use indexed column directly
|
|
669
|
+
if filters.keys() == {"step_id"}:
|
|
670
|
+
step_id = str(filters["step_id"])
|
|
671
|
+
async with db.execute(
|
|
672
|
+
"""
|
|
673
|
+
SELECT 1 FROM events
|
|
674
|
+
WHERE run_id = ? AND type = ? AND step_id = ?
|
|
675
|
+
LIMIT 1
|
|
676
|
+
""",
|
|
677
|
+
(run_id, event_type, step_id),
|
|
678
|
+
) as cursor:
|
|
679
|
+
row = await cursor.fetchone()
|
|
680
|
+
return row is not None
|
|
681
|
+
|
|
682
|
+
# Fallback: load events of type and filter in Python
|
|
683
|
+
events = await self.get_events(run_id, event_types=[event_type])
|
|
684
|
+
|
|
685
|
+
for event in events:
|
|
686
|
+
match = True
|
|
687
|
+
for key, value in filters.items():
|
|
688
|
+
if str(event.data.get(key)) != str(value):
|
|
689
|
+
match = False
|
|
690
|
+
break
|
|
691
|
+
if match:
|
|
692
|
+
return True
|
|
693
|
+
|
|
694
|
+
return False
|
|
695
|
+
|
|
532
696
|
# Step Operations
|
|
533
697
|
|
|
534
698
|
async def create_step(self, step: StepExecution) -> None:
|
|
@@ -1119,6 +1283,7 @@ class SQLiteStorageBackend(StorageBackend):
|
|
|
1119
1283
|
|
|
1120
1284
|
def _row_to_event(self, row: Any) -> Event:
|
|
1121
1285
|
"""Convert database row to Event object."""
|
|
1286
|
+
# Column order: event_id[0], run_id[1], sequence[2], type[3], timestamp[4], data[5], step_id[6]
|
|
1122
1287
|
return Event(
|
|
1123
1288
|
event_id=row[0],
|
|
1124
1289
|
run_id=row[1],
|
|
@@ -1126,6 +1291,7 @@ class SQLiteStorageBackend(StorageBackend):
|
|
|
1126
1291
|
type=EventType(row[3]),
|
|
1127
1292
|
timestamp=datetime.fromisoformat(row[4]),
|
|
1128
1293
|
data=json.loads(row[5]) if row[5] else {},
|
|
1294
|
+
# step_id is in row[6] but not used in Event object (it's denormalized for query optimization)
|
|
1129
1295
|
)
|
|
1130
1296
|
|
|
1131
1297
|
def _row_to_step_execution(self, row: Any) -> StepExecution:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
pyworkflow/__init__.py,sha256=
|
|
1
|
+
pyworkflow/__init__.py,sha256=g24ALTMah13y4dLj3Doyb-0wsQnTdu3_9wCXFsshdW4,6281
|
|
2
2
|
pyworkflow/config.py,sha256=pKwPrpCwBJiDpB-MIjM0U7GW1TFmQFO341pihL5-vTM,14455
|
|
3
3
|
pyworkflow/discovery.py,sha256=snW3l4nvY3Nc067TGlwtn_qdzTU9ybN7YPr8FbvY8iM,8066
|
|
4
4
|
pyworkflow/aws/__init__.py,sha256=Ak_xHcR9LTRX-CwcS0XecYmzrXZw4EM3V9aKBBDEmIk,1741
|
|
@@ -6,11 +6,11 @@ pyworkflow/aws/context.py,sha256=C_wBr_YRUYT4IHyaAGPdiqRPNbH8mDLkoMpVtQVIg28,822
|
|
|
6
6
|
pyworkflow/aws/handler.py,sha256=0SnQuIfQVD99QKMCRFPtrsrV_l1LYKFkzPIRx_2UkSI,5849
|
|
7
7
|
pyworkflow/aws/testing.py,sha256=WrRk9wjbycM-UyHFQWNnA83UE9IrYnhfT38WrbxQT2U,8844
|
|
8
8
|
pyworkflow/celery/__init__.py,sha256=FywVyqnT8AYz9cXkr-wel7_-N7dHFsPNASEPMFESf4Q,1179
|
|
9
|
-
pyworkflow/celery/app.py,sha256=
|
|
9
|
+
pyworkflow/celery/app.py,sha256=MXmD5N8DCrX9YpdccOWPy2ob7k4vg9r0ix1kDgDuR3w,15753
|
|
10
10
|
pyworkflow/celery/loop.py,sha256=mu8cIfMJYgHAoGCN_DdDoNoXK3QHzHpLmrPCyFDQYIY,3016
|
|
11
11
|
pyworkflow/celery/scheduler.py,sha256=Ms4rqRpdpMiLM8l4y3DK-Divunj9afYuUaGGoNQe7P4,11288
|
|
12
12
|
pyworkflow/celery/singleton.py,sha256=9gdVHzqFjShZ9OJOJlJNABUg9oqnl6ITGROtomcOtsg,16070
|
|
13
|
-
pyworkflow/celery/tasks.py,sha256=
|
|
13
|
+
pyworkflow/celery/tasks.py,sha256=FiZFBTHUQz16AWYmsHUGmTIxXQ5zNewyFWUGRizS4b8,88813
|
|
14
14
|
pyworkflow/cli/__init__.py,sha256=tcbe-fcZmyeEKUy_aEo8bsEF40HsNKOwvyMBZIJZPwc,3844
|
|
15
15
|
pyworkflow/cli/__main__.py,sha256=LxLLS4FEEPXa5rWpLTtKuivn6Xp9pGia-QKGoxt9SS0,148
|
|
16
16
|
pyworkflow/cli/commands/__init__.py,sha256=IXvnTgukALckkO8fTlZhVRq80ojSqpnIIgboAg_-yZU,39
|
|
@@ -73,22 +73,24 @@ pyworkflow/serialization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZ
|
|
|
73
73
|
pyworkflow/serialization/decoder.py,sha256=F7Ofuw1Yzo82iSFFXiK2yoW_v2YRbLMpX3CQbKjm0Ls,3860
|
|
74
74
|
pyworkflow/serialization/encoder.py,sha256=ZBwAxe5Bb4MCfFJePHw7ArJlIbBieSwUgsysGCI2iPU,4108
|
|
75
75
|
pyworkflow/storage/__init__.py,sha256=LhVjLNZdo4Mi5dEC75hjSPnbQr9jBoIsTOrC8vzTGOM,1924
|
|
76
|
-
pyworkflow/storage/base.py,sha256=
|
|
77
|
-
pyworkflow/storage/cassandra.py,sha256=
|
|
76
|
+
pyworkflow/storage/base.py,sha256=CTvZwy8cHMdbOV3ZYVlbsx0fBNZN9cusbbh3oqGmrao,18598
|
|
77
|
+
pyworkflow/storage/cassandra.py,sha256=3VDwiM9zaxKNwp5vdX1FdqpwFm0AZfTWtVDTmIKBA7Q,63538
|
|
78
78
|
pyworkflow/storage/config.py,sha256=45UMPxRoqgK4ZwE7HIK9ctxE_eoK3eAE_1tRhn3Psd4,12410
|
|
79
|
-
pyworkflow/storage/dynamodb.py,sha256=
|
|
80
|
-
pyworkflow/storage/file.py,sha256=
|
|
81
|
-
pyworkflow/storage/memory.py,sha256=
|
|
82
|
-
pyworkflow/storage/mysql.py,sha256=
|
|
83
|
-
pyworkflow/storage/postgres.py,sha256=
|
|
79
|
+
pyworkflow/storage/dynamodb.py,sha256=hER4rOc0GWh0-yk0YiFrxZkiHJ6DKEOpvZQ9GCEx8QQ,56501
|
|
80
|
+
pyworkflow/storage/file.py,sha256=Ag4K5Rt-8YaENZLx-HnN77EAfNc37WqkymmaPlVJbYg,32665
|
|
81
|
+
pyworkflow/storage/memory.py,sha256=HiuGU9jgzKwNGrF98YHdHYrzQTNUu96VyL5HVVeII7A,22179
|
|
82
|
+
pyworkflow/storage/mysql.py,sha256=lS9dZHmtPL14cY1lrzDFilFKmsjMD_b0NnYJfiPDFKQ,52537
|
|
83
|
+
pyworkflow/storage/postgres.py,sha256=Afjh1qQ9KL7Xb-tT3fVRGK1vJ_j46r-4Lym0XHhOkf8,53893
|
|
84
84
|
pyworkflow/storage/schemas.py,sha256=o1ntTYNgQQ5YVuXtPCShtENEsndVjdrXclWrkCgkitg,18002
|
|
85
|
-
pyworkflow/storage/sqlite.py,sha256=
|
|
85
|
+
pyworkflow/storage/sqlite.py,sha256=EJ8n66WfhZnfFLKcNdUOoOCswdftttoH6T31spS78Qo,47667
|
|
86
|
+
pyworkflow/storage/migrations/__init__.py,sha256=R8b-VoVzpYJ6A-Z5FDNbrbOO-2Nnt4NrcAS8Wo08VPo,425
|
|
87
|
+
pyworkflow/storage/migrations/base.py,sha256=Thq38E8BDpC7EmBlwpfm727Zyz4TVmADQ1kEttWo0oY,9237
|
|
86
88
|
pyworkflow/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
87
89
|
pyworkflow/utils/duration.py,sha256=C-itmiSQQlplw7j6XB679hLF9xYGnyCwm7twO88OF8U,3978
|
|
88
90
|
pyworkflow/utils/schedule.py,sha256=dO_MkGFyfwZpb0LDlW6BGyZzlPuQIA6dc6j9nk9lc4Y,10691
|
|
89
|
-
pyworkflow_engine-0.1.
|
|
90
|
-
pyworkflow_engine-0.1.
|
|
91
|
-
pyworkflow_engine-0.1.
|
|
92
|
-
pyworkflow_engine-0.1.
|
|
93
|
-
pyworkflow_engine-0.1.
|
|
94
|
-
pyworkflow_engine-0.1.
|
|
91
|
+
pyworkflow_engine-0.1.23.dist-info/licenses/LICENSE,sha256=Y49RCTZ5ayn_yzBcRxnyIFdcMCyuYm150aty_FIznfY,1080
|
|
92
|
+
pyworkflow_engine-0.1.23.dist-info/METADATA,sha256=PrBwRu_9khJBesYc1p_cyXTNgHvM_uZU-XUyejvOjkk,19628
|
|
93
|
+
pyworkflow_engine-0.1.23.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
94
|
+
pyworkflow_engine-0.1.23.dist-info/entry_points.txt,sha256=3IGAfuylnS39U0YX0pxnjrj54kB4iT_bNYrmsiDB-dE,51
|
|
95
|
+
pyworkflow_engine-0.1.23.dist-info/top_level.txt,sha256=FLTv9pQmLDBXrQdLOhTMIS3njFibliMsQEfumqmdzBE,11
|
|
96
|
+
pyworkflow_engine-0.1.23.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|