pytrilogy 0.3.142__cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- LICENSE.md +19 -0
- _preql_import_resolver/__init__.py +5 -0
- _preql_import_resolver/_preql_import_resolver.cpython-313-x86_64-linux-gnu.so +0 -0
- pytrilogy-0.3.142.dist-info/METADATA +555 -0
- pytrilogy-0.3.142.dist-info/RECORD +200 -0
- pytrilogy-0.3.142.dist-info/WHEEL +5 -0
- pytrilogy-0.3.142.dist-info/entry_points.txt +2 -0
- pytrilogy-0.3.142.dist-info/licenses/LICENSE.md +19 -0
- trilogy/__init__.py +16 -0
- trilogy/ai/README.md +10 -0
- trilogy/ai/__init__.py +19 -0
- trilogy/ai/constants.py +92 -0
- trilogy/ai/conversation.py +107 -0
- trilogy/ai/enums.py +7 -0
- trilogy/ai/execute.py +50 -0
- trilogy/ai/models.py +34 -0
- trilogy/ai/prompts.py +100 -0
- trilogy/ai/providers/__init__.py +0 -0
- trilogy/ai/providers/anthropic.py +106 -0
- trilogy/ai/providers/base.py +24 -0
- trilogy/ai/providers/google.py +146 -0
- trilogy/ai/providers/openai.py +89 -0
- trilogy/ai/providers/utils.py +68 -0
- trilogy/authoring/README.md +3 -0
- trilogy/authoring/__init__.py +148 -0
- trilogy/constants.py +113 -0
- trilogy/core/README.md +52 -0
- trilogy/core/__init__.py +0 -0
- trilogy/core/constants.py +6 -0
- trilogy/core/enums.py +443 -0
- trilogy/core/env_processor.py +120 -0
- trilogy/core/environment_helpers.py +320 -0
- trilogy/core/ergonomics.py +193 -0
- trilogy/core/exceptions.py +123 -0
- trilogy/core/functions.py +1227 -0
- trilogy/core/graph_models.py +139 -0
- trilogy/core/internal.py +85 -0
- trilogy/core/models/__init__.py +0 -0
- trilogy/core/models/author.py +2669 -0
- trilogy/core/models/build.py +2521 -0
- trilogy/core/models/build_environment.py +180 -0
- trilogy/core/models/core.py +501 -0
- trilogy/core/models/datasource.py +322 -0
- trilogy/core/models/environment.py +751 -0
- trilogy/core/models/execute.py +1177 -0
- trilogy/core/optimization.py +251 -0
- trilogy/core/optimizations/__init__.py +12 -0
- trilogy/core/optimizations/base_optimization.py +17 -0
- trilogy/core/optimizations/hide_unused_concept.py +47 -0
- trilogy/core/optimizations/inline_datasource.py +102 -0
- trilogy/core/optimizations/predicate_pushdown.py +245 -0
- trilogy/core/processing/README.md +94 -0
- trilogy/core/processing/READMEv2.md +121 -0
- trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
- trilogy/core/processing/__init__.py +0 -0
- trilogy/core/processing/concept_strategies_v3.py +508 -0
- trilogy/core/processing/constants.py +15 -0
- trilogy/core/processing/discovery_node_factory.py +451 -0
- trilogy/core/processing/discovery_utility.py +548 -0
- trilogy/core/processing/discovery_validation.py +167 -0
- trilogy/core/processing/graph_utils.py +43 -0
- trilogy/core/processing/node_generators/README.md +9 -0
- trilogy/core/processing/node_generators/__init__.py +31 -0
- trilogy/core/processing/node_generators/basic_node.py +160 -0
- trilogy/core/processing/node_generators/common.py +268 -0
- trilogy/core/processing/node_generators/constant_node.py +38 -0
- trilogy/core/processing/node_generators/filter_node.py +315 -0
- trilogy/core/processing/node_generators/group_node.py +213 -0
- trilogy/core/processing/node_generators/group_to_node.py +117 -0
- trilogy/core/processing/node_generators/multiselect_node.py +205 -0
- trilogy/core/processing/node_generators/node_merge_node.py +653 -0
- trilogy/core/processing/node_generators/recursive_node.py +88 -0
- trilogy/core/processing/node_generators/rowset_node.py +165 -0
- trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
- trilogy/core/processing/node_generators/select_merge_node.py +748 -0
- trilogy/core/processing/node_generators/select_node.py +95 -0
- trilogy/core/processing/node_generators/synonym_node.py +98 -0
- trilogy/core/processing/node_generators/union_node.py +91 -0
- trilogy/core/processing/node_generators/unnest_node.py +182 -0
- trilogy/core/processing/node_generators/window_node.py +201 -0
- trilogy/core/processing/nodes/README.md +28 -0
- trilogy/core/processing/nodes/__init__.py +179 -0
- trilogy/core/processing/nodes/base_node.py +519 -0
- trilogy/core/processing/nodes/filter_node.py +75 -0
- trilogy/core/processing/nodes/group_node.py +194 -0
- trilogy/core/processing/nodes/merge_node.py +420 -0
- trilogy/core/processing/nodes/recursive_node.py +46 -0
- trilogy/core/processing/nodes/select_node_v2.py +242 -0
- trilogy/core/processing/nodes/union_node.py +53 -0
- trilogy/core/processing/nodes/unnest_node.py +62 -0
- trilogy/core/processing/nodes/window_node.py +56 -0
- trilogy/core/processing/utility.py +823 -0
- trilogy/core/query_processor.py +596 -0
- trilogy/core/statements/README.md +35 -0
- trilogy/core/statements/__init__.py +0 -0
- trilogy/core/statements/author.py +536 -0
- trilogy/core/statements/build.py +0 -0
- trilogy/core/statements/common.py +20 -0
- trilogy/core/statements/execute.py +155 -0
- trilogy/core/table_processor.py +66 -0
- trilogy/core/utility.py +8 -0
- trilogy/core/validation/README.md +46 -0
- trilogy/core/validation/__init__.py +0 -0
- trilogy/core/validation/common.py +161 -0
- trilogy/core/validation/concept.py +146 -0
- trilogy/core/validation/datasource.py +227 -0
- trilogy/core/validation/environment.py +73 -0
- trilogy/core/validation/fix.py +256 -0
- trilogy/dialect/__init__.py +32 -0
- trilogy/dialect/base.py +1392 -0
- trilogy/dialect/bigquery.py +308 -0
- trilogy/dialect/common.py +147 -0
- trilogy/dialect/config.py +144 -0
- trilogy/dialect/dataframe.py +50 -0
- trilogy/dialect/duckdb.py +231 -0
- trilogy/dialect/enums.py +147 -0
- trilogy/dialect/metadata.py +173 -0
- trilogy/dialect/mock.py +190 -0
- trilogy/dialect/postgres.py +117 -0
- trilogy/dialect/presto.py +110 -0
- trilogy/dialect/results.py +89 -0
- trilogy/dialect/snowflake.py +129 -0
- trilogy/dialect/sql_server.py +137 -0
- trilogy/engine.py +48 -0
- trilogy/execution/config.py +75 -0
- trilogy/executor.py +568 -0
- trilogy/hooks/__init__.py +4 -0
- trilogy/hooks/base_hook.py +40 -0
- trilogy/hooks/graph_hook.py +139 -0
- trilogy/hooks/query_debugger.py +166 -0
- trilogy/metadata/__init__.py +0 -0
- trilogy/parser.py +10 -0
- trilogy/parsing/README.md +21 -0
- trilogy/parsing/__init__.py +0 -0
- trilogy/parsing/common.py +1069 -0
- trilogy/parsing/config.py +5 -0
- trilogy/parsing/exceptions.py +8 -0
- trilogy/parsing/helpers.py +1 -0
- trilogy/parsing/parse_engine.py +2813 -0
- trilogy/parsing/render.py +769 -0
- trilogy/parsing/trilogy.lark +540 -0
- trilogy/py.typed +0 -0
- trilogy/render.py +42 -0
- trilogy/scripts/README.md +9 -0
- trilogy/scripts/__init__.py +0 -0
- trilogy/scripts/agent.py +41 -0
- trilogy/scripts/agent_info.py +303 -0
- trilogy/scripts/common.py +355 -0
- trilogy/scripts/dependency/Cargo.lock +617 -0
- trilogy/scripts/dependency/Cargo.toml +39 -0
- trilogy/scripts/dependency/README.md +131 -0
- trilogy/scripts/dependency/build.sh +25 -0
- trilogy/scripts/dependency/src/directory_resolver.rs +177 -0
- trilogy/scripts/dependency/src/lib.rs +16 -0
- trilogy/scripts/dependency/src/main.rs +770 -0
- trilogy/scripts/dependency/src/parser.rs +435 -0
- trilogy/scripts/dependency/src/preql.pest +208 -0
- trilogy/scripts/dependency/src/python_bindings.rs +303 -0
- trilogy/scripts/dependency/src/resolver.rs +716 -0
- trilogy/scripts/dependency/tests/base.preql +3 -0
- trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
- trilogy/scripts/dependency/tests/customer.preql +6 -0
- trilogy/scripts/dependency/tests/main.preql +9 -0
- trilogy/scripts/dependency/tests/orders.preql +7 -0
- trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
- trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
- trilogy/scripts/dependency.py +323 -0
- trilogy/scripts/display.py +512 -0
- trilogy/scripts/environment.py +46 -0
- trilogy/scripts/fmt.py +32 -0
- trilogy/scripts/ingest.py +471 -0
- trilogy/scripts/ingest_helpers/__init__.py +1 -0
- trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
- trilogy/scripts/ingest_helpers/formatting.py +93 -0
- trilogy/scripts/ingest_helpers/typing.py +161 -0
- trilogy/scripts/init.py +105 -0
- trilogy/scripts/parallel_execution.py +713 -0
- trilogy/scripts/plan.py +189 -0
- trilogy/scripts/run.py +63 -0
- trilogy/scripts/serve.py +140 -0
- trilogy/scripts/serve_helpers/__init__.py +41 -0
- trilogy/scripts/serve_helpers/file_discovery.py +142 -0
- trilogy/scripts/serve_helpers/index_generation.py +206 -0
- trilogy/scripts/serve_helpers/models.py +38 -0
- trilogy/scripts/single_execution.py +131 -0
- trilogy/scripts/testing.py +119 -0
- trilogy/scripts/trilogy.py +68 -0
- trilogy/std/__init__.py +0 -0
- trilogy/std/color.preql +3 -0
- trilogy/std/date.preql +13 -0
- trilogy/std/display.preql +18 -0
- trilogy/std/geography.preql +22 -0
- trilogy/std/metric.preql +15 -0
- trilogy/std/money.preql +67 -0
- trilogy/std/net.preql +14 -0
- trilogy/std/ranking.preql +7 -0
- trilogy/std/report.preql +5 -0
- trilogy/std/semantic.preql +6 -0
- trilogy/utility.py +34 -0
|
@@ -0,0 +1,713 @@
|
|
|
1
|
+
import threading
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from io import StringIO
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Callable, Protocol
|
|
7
|
+
|
|
8
|
+
import networkx as nx
|
|
9
|
+
from click.exceptions import Exit
|
|
10
|
+
|
|
11
|
+
from trilogy import Executor
|
|
12
|
+
from trilogy.scripts.dependency import (
|
|
13
|
+
DependencyResolver,
|
|
14
|
+
DependencyStrategy,
|
|
15
|
+
ScriptNode,
|
|
16
|
+
create_script_nodes,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class ExecutionResult:
|
|
22
|
+
"""Result of executing a single script."""
|
|
23
|
+
|
|
24
|
+
node: ScriptNode
|
|
25
|
+
success: bool
|
|
26
|
+
error: Exception | None = None
|
|
27
|
+
duration: float = 0.0 # seconds
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class ParallelExecutionSummary:
|
|
32
|
+
"""Summary of a parallel execution run."""
|
|
33
|
+
|
|
34
|
+
total_scripts: int
|
|
35
|
+
successful: int
|
|
36
|
+
failed: int
|
|
37
|
+
total_duration: float
|
|
38
|
+
results: list[ExecutionResult]
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def all_succeeded(self) -> bool:
|
|
42
|
+
return self.failed == 0
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class ExecutionStrategy(Protocol):
|
|
46
|
+
"""Protocol for execution traversal strategies."""
|
|
47
|
+
|
|
48
|
+
def execute(
|
|
49
|
+
self,
|
|
50
|
+
graph: nx.DiGraph,
|
|
51
|
+
resolver: DependencyResolver,
|
|
52
|
+
max_workers: int,
|
|
53
|
+
executor_factory: Callable[[ScriptNode], Any],
|
|
54
|
+
execution_fn: Callable[[Any, ScriptNode], None],
|
|
55
|
+
on_script_start: Callable[[ScriptNode], None] | None = None,
|
|
56
|
+
on_script_complete: Callable[[ExecutionResult], None] | None = None,
|
|
57
|
+
) -> list[ExecutionResult]:
|
|
58
|
+
"""
|
|
59
|
+
Execute scripts according to the strategy.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
graph: The dependency graph (edges point from deps to dependents).
|
|
63
|
+
max_workers: Maximum parallel workers.
|
|
64
|
+
executor_factory: Factory to create executor for each script.
|
|
65
|
+
execution_fn: Function to execute a script.
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
List of ExecutionResult for all scripts.
|
|
69
|
+
"""
|
|
70
|
+
...
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# Type aliases for cleaner signatures
|
|
74
|
+
CompletedSet = set[ScriptNode]
|
|
75
|
+
FailedSet = set[ScriptNode]
|
|
76
|
+
InProgressSet = set[ScriptNode]
|
|
77
|
+
ResultsList = list[ExecutionResult]
|
|
78
|
+
RemainingDepsDict = dict[ScriptNode, int]
|
|
79
|
+
ReadyList = list[ScriptNode]
|
|
80
|
+
OnCompleteCallback = Callable[[ExecutionResult], None] | None
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _propagate_failure(
|
|
84
|
+
failed_node: ScriptNode,
|
|
85
|
+
graph: nx.DiGraph,
|
|
86
|
+
completed: CompletedSet,
|
|
87
|
+
in_progress: InProgressSet,
|
|
88
|
+
results: ResultsList,
|
|
89
|
+
failed: FailedSet,
|
|
90
|
+
on_script_complete: OnCompleteCallback,
|
|
91
|
+
) -> None:
|
|
92
|
+
"""
|
|
93
|
+
Recursively mark all *unstarted* dependents of a failed node as failed and skipped.
|
|
94
|
+
"""
|
|
95
|
+
for dependent in graph.successors(failed_node):
|
|
96
|
+
if dependent not in completed and dependent not in in_progress:
|
|
97
|
+
skip_result = ExecutionResult(
|
|
98
|
+
node=dependent,
|
|
99
|
+
success=False,
|
|
100
|
+
error=RuntimeError("Skipped due to failed dependency"),
|
|
101
|
+
duration=0.0,
|
|
102
|
+
)
|
|
103
|
+
results.append(skip_result)
|
|
104
|
+
completed.add(dependent)
|
|
105
|
+
failed.add(dependent)
|
|
106
|
+
if on_script_complete:
|
|
107
|
+
on_script_complete(skip_result)
|
|
108
|
+
_propagate_failure(
|
|
109
|
+
dependent,
|
|
110
|
+
graph,
|
|
111
|
+
completed,
|
|
112
|
+
in_progress,
|
|
113
|
+
results,
|
|
114
|
+
failed,
|
|
115
|
+
on_script_complete,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _get_next_ready(ready: ReadyList) -> ScriptNode | None:
|
|
120
|
+
"""Get next ready node from the queue."""
|
|
121
|
+
if ready:
|
|
122
|
+
return ready.pop(0)
|
|
123
|
+
return None
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _mark_node_complete(
|
|
127
|
+
node: ScriptNode,
|
|
128
|
+
success: bool,
|
|
129
|
+
graph: nx.DiGraph,
|
|
130
|
+
completed: CompletedSet,
|
|
131
|
+
failed: FailedSet,
|
|
132
|
+
in_progress: InProgressSet,
|
|
133
|
+
remaining_deps: RemainingDepsDict,
|
|
134
|
+
ready: ReadyList,
|
|
135
|
+
results: ResultsList,
|
|
136
|
+
on_script_complete: OnCompleteCallback,
|
|
137
|
+
) -> None:
|
|
138
|
+
"""
|
|
139
|
+
Mark a node as complete, update dependent counts, and add newly ready/skipped nodes.
|
|
140
|
+
"""
|
|
141
|
+
in_progress.discard(node)
|
|
142
|
+
completed.add(node)
|
|
143
|
+
if not success:
|
|
144
|
+
failed.add(node)
|
|
145
|
+
|
|
146
|
+
# Update dependents
|
|
147
|
+
for dependent in graph.successors(node):
|
|
148
|
+
if dependent in completed or dependent in in_progress:
|
|
149
|
+
continue
|
|
150
|
+
|
|
151
|
+
if success:
|
|
152
|
+
remaining_deps[dependent] -= 1
|
|
153
|
+
if remaining_deps[dependent] == 0:
|
|
154
|
+
# Check if any dependency failed before running
|
|
155
|
+
deps = set(graph.predecessors(dependent))
|
|
156
|
+
if deps & failed:
|
|
157
|
+
# Skip this node - dependency failed
|
|
158
|
+
skip_result = ExecutionResult(
|
|
159
|
+
node=dependent,
|
|
160
|
+
success=False,
|
|
161
|
+
error=RuntimeError("Skipped due to failed dependency"),
|
|
162
|
+
duration=0.0,
|
|
163
|
+
)
|
|
164
|
+
results.append(skip_result)
|
|
165
|
+
completed.add(dependent)
|
|
166
|
+
failed.add(dependent)
|
|
167
|
+
if on_script_complete:
|
|
168
|
+
on_script_complete(skip_result)
|
|
169
|
+
# Recursively mark dependents as failed
|
|
170
|
+
_propagate_failure(
|
|
171
|
+
dependent,
|
|
172
|
+
graph,
|
|
173
|
+
completed,
|
|
174
|
+
in_progress,
|
|
175
|
+
results,
|
|
176
|
+
failed,
|
|
177
|
+
on_script_complete,
|
|
178
|
+
)
|
|
179
|
+
else:
|
|
180
|
+
ready.append(dependent)
|
|
181
|
+
else:
|
|
182
|
+
# Current node failed - mark this dependent as skipped
|
|
183
|
+
if dependent not in failed:
|
|
184
|
+
skip_result = ExecutionResult(
|
|
185
|
+
node=dependent,
|
|
186
|
+
success=False,
|
|
187
|
+
error=RuntimeError("Skipped due to failed dependency"),
|
|
188
|
+
duration=0.0,
|
|
189
|
+
)
|
|
190
|
+
results.append(skip_result)
|
|
191
|
+
completed.add(dependent)
|
|
192
|
+
failed.add(dependent)
|
|
193
|
+
if on_script_complete:
|
|
194
|
+
on_script_complete(skip_result)
|
|
195
|
+
# Recursively mark dependents as failed
|
|
196
|
+
_propagate_failure(
|
|
197
|
+
dependent,
|
|
198
|
+
graph,
|
|
199
|
+
completed,
|
|
200
|
+
in_progress,
|
|
201
|
+
results,
|
|
202
|
+
failed,
|
|
203
|
+
on_script_complete,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def _is_execution_done(completed: CompletedSet, total_count: int) -> bool:
|
|
208
|
+
"""Check if all nodes have been processed."""
|
|
209
|
+
return len(completed) >= total_count
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def _execute_single(
|
|
213
|
+
node: ScriptNode,
|
|
214
|
+
executor_factory: Callable[[ScriptNode], Executor],
|
|
215
|
+
execution_fn: Callable[[Any, ScriptNode], None],
|
|
216
|
+
) -> ExecutionResult:
|
|
217
|
+
"""Execute a single script and return the result."""
|
|
218
|
+
start_time = datetime.now()
|
|
219
|
+
executor = None
|
|
220
|
+
try:
|
|
221
|
+
executor = executor_factory(node)
|
|
222
|
+
execution_fn(executor, node)
|
|
223
|
+
|
|
224
|
+
duration = (datetime.now() - start_time).total_seconds()
|
|
225
|
+
if executor:
|
|
226
|
+
executor.close()
|
|
227
|
+
return ExecutionResult(
|
|
228
|
+
node=node,
|
|
229
|
+
success=True,
|
|
230
|
+
error=None,
|
|
231
|
+
duration=duration,
|
|
232
|
+
)
|
|
233
|
+
except Exception as e:
|
|
234
|
+
duration = (datetime.now() - start_time).total_seconds()
|
|
235
|
+
if executor:
|
|
236
|
+
executor.close() # Ensure executor is closed even on failure
|
|
237
|
+
return ExecutionResult(
|
|
238
|
+
node=node,
|
|
239
|
+
success=False,
|
|
240
|
+
error=e,
|
|
241
|
+
duration=duration,
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def _create_worker(
|
|
246
|
+
graph: nx.DiGraph,
|
|
247
|
+
lock: threading.Lock,
|
|
248
|
+
work_available: threading.Condition,
|
|
249
|
+
completed: CompletedSet,
|
|
250
|
+
failed: FailedSet,
|
|
251
|
+
in_progress: InProgressSet,
|
|
252
|
+
remaining_deps: RemainingDepsDict,
|
|
253
|
+
ready: ReadyList,
|
|
254
|
+
results: ResultsList,
|
|
255
|
+
total_count: int,
|
|
256
|
+
executor_factory: Callable[[ScriptNode], Any],
|
|
257
|
+
execution_fn: Callable[[Any, ScriptNode], None],
|
|
258
|
+
on_script_start: Callable[[ScriptNode], None] | None,
|
|
259
|
+
on_script_complete: OnCompleteCallback,
|
|
260
|
+
) -> Callable[[], None]:
|
|
261
|
+
"""
|
|
262
|
+
Create a worker function for thread execution to process the dependency graph.
|
|
263
|
+
"""
|
|
264
|
+
|
|
265
|
+
def worker() -> None:
|
|
266
|
+
while True:
|
|
267
|
+
node = None
|
|
268
|
+
|
|
269
|
+
with work_available:
|
|
270
|
+
# Wait for work or global completion
|
|
271
|
+
while not ready and not _is_execution_done(completed, total_count):
|
|
272
|
+
work_available.wait()
|
|
273
|
+
|
|
274
|
+
if _is_execution_done(completed, total_count):
|
|
275
|
+
return
|
|
276
|
+
|
|
277
|
+
node = _get_next_ready(ready)
|
|
278
|
+
if node is None:
|
|
279
|
+
# Should be impossible if total_count check is correct, but handles race condition safety
|
|
280
|
+
continue
|
|
281
|
+
|
|
282
|
+
in_progress.add(node)
|
|
283
|
+
|
|
284
|
+
# Execute outside the lock
|
|
285
|
+
if node is not None:
|
|
286
|
+
if on_script_start:
|
|
287
|
+
on_script_start(node)
|
|
288
|
+
result = _execute_single(node, executor_factory, execution_fn)
|
|
289
|
+
|
|
290
|
+
# Use the lock for state updates and notification
|
|
291
|
+
with lock:
|
|
292
|
+
results.append(result)
|
|
293
|
+
|
|
294
|
+
if on_script_complete:
|
|
295
|
+
on_script_complete(result)
|
|
296
|
+
|
|
297
|
+
_mark_node_complete(
|
|
298
|
+
node,
|
|
299
|
+
result.success,
|
|
300
|
+
graph,
|
|
301
|
+
completed,
|
|
302
|
+
failed,
|
|
303
|
+
in_progress,
|
|
304
|
+
remaining_deps,
|
|
305
|
+
ready,
|
|
306
|
+
results,
|
|
307
|
+
on_script_complete,
|
|
308
|
+
)
|
|
309
|
+
work_available.notify_all() # Notify other workers of new ready/completed state
|
|
310
|
+
|
|
311
|
+
return worker
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
class EagerBFSStrategy:
|
|
315
|
+
"""
|
|
316
|
+
Eager Breadth-First Search (BFS) execution strategy.
|
|
317
|
+
|
|
318
|
+
Scripts execute as soon as all their dependencies complete, maximizing parallelism.
|
|
319
|
+
Uses a thread pool coordinated by locks and condition variables.
|
|
320
|
+
"""
|
|
321
|
+
|
|
322
|
+
def execute(
|
|
323
|
+
self,
|
|
324
|
+
graph: nx.DiGraph,
|
|
325
|
+
resolver: DependencyResolver,
|
|
326
|
+
max_workers: int,
|
|
327
|
+
executor_factory: Callable[[ScriptNode], Any],
|
|
328
|
+
execution_fn: Callable[[Any, ScriptNode], None],
|
|
329
|
+
on_script_start: Callable[[ScriptNode], None] | None = None,
|
|
330
|
+
on_script_complete: Callable[[ExecutionResult], None] | None = None,
|
|
331
|
+
) -> list[ExecutionResult]:
|
|
332
|
+
"""Execute scripts eagerly as dependencies complete."""
|
|
333
|
+
if not graph.nodes():
|
|
334
|
+
return []
|
|
335
|
+
|
|
336
|
+
lock = threading.Lock()
|
|
337
|
+
work_available = threading.Condition(lock)
|
|
338
|
+
|
|
339
|
+
# Track state
|
|
340
|
+
completed: CompletedSet = set()
|
|
341
|
+
failed: FailedSet = set()
|
|
342
|
+
in_progress: InProgressSet = set()
|
|
343
|
+
results: ResultsList = []
|
|
344
|
+
|
|
345
|
+
# Calculate in-degrees (number of incomplete dependencies)
|
|
346
|
+
remaining_deps: RemainingDepsDict = {
|
|
347
|
+
node: graph.in_degree(node) for node in graph.nodes()
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
# Ready queue - nodes with all dependencies satisfied initially (in-degree 0)
|
|
351
|
+
ready: ReadyList = [node for node in graph.nodes() if remaining_deps[node] == 0]
|
|
352
|
+
|
|
353
|
+
total_count = len(graph.nodes())
|
|
354
|
+
|
|
355
|
+
# Create the worker function
|
|
356
|
+
worker = _create_worker(
|
|
357
|
+
graph=graph,
|
|
358
|
+
lock=lock,
|
|
359
|
+
work_available=work_available,
|
|
360
|
+
completed=completed,
|
|
361
|
+
failed=failed,
|
|
362
|
+
in_progress=in_progress,
|
|
363
|
+
remaining_deps=remaining_deps,
|
|
364
|
+
ready=ready,
|
|
365
|
+
results=results,
|
|
366
|
+
total_count=total_count,
|
|
367
|
+
executor_factory=executor_factory,
|
|
368
|
+
execution_fn=execution_fn,
|
|
369
|
+
on_script_start=on_script_start,
|
|
370
|
+
on_script_complete=on_script_complete,
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
# Start worker threads
|
|
374
|
+
workers = min(max_workers, total_count)
|
|
375
|
+
threads: list[threading.Thread] = []
|
|
376
|
+
for _ in range(workers):
|
|
377
|
+
t = threading.Thread(target=worker, daemon=True)
|
|
378
|
+
t.start()
|
|
379
|
+
threads.append(t)
|
|
380
|
+
|
|
381
|
+
# Wake up any waiting workers if we have initial work
|
|
382
|
+
with work_available:
|
|
383
|
+
work_available.notify_all()
|
|
384
|
+
|
|
385
|
+
# Wait for all threads to complete
|
|
386
|
+
for t in threads:
|
|
387
|
+
t.join()
|
|
388
|
+
|
|
389
|
+
return results
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
class ParallelExecutor:
|
|
393
|
+
"""
|
|
394
|
+
Executes scripts in parallel while respecting dependencies.
|
|
395
|
+
|
|
396
|
+
Uses an Eager BFS traversal by default, running scripts as soon as their
|
|
397
|
+
dependencies complete.
|
|
398
|
+
"""
|
|
399
|
+
|
|
400
|
+
def __init__(
|
|
401
|
+
self,
|
|
402
|
+
max_workers: int = 5,
|
|
403
|
+
dependency_strategy: DependencyStrategy | None = None,
|
|
404
|
+
execution_strategy: ExecutionStrategy | None = None,
|
|
405
|
+
):
|
|
406
|
+
"""
|
|
407
|
+
Initialize the parallel executor.
|
|
408
|
+
|
|
409
|
+
Args:
|
|
410
|
+
max_workers: Maximum number of parallel workers.
|
|
411
|
+
dependency_strategy: Strategy for resolving dependencies.
|
|
412
|
+
execution_strategy: Strategy for traversing the graph during execution.
|
|
413
|
+
"""
|
|
414
|
+
self.max_workers = max_workers
|
|
415
|
+
# Resolver finds dependencies and builds the graph
|
|
416
|
+
self.resolver = DependencyResolver(strategy=dependency_strategy)
|
|
417
|
+
# Execution strategy determines how the graph is traversed and executed
|
|
418
|
+
self.execution_strategy = execution_strategy or EagerBFSStrategy()
|
|
419
|
+
|
|
420
|
+
def execute(
|
|
421
|
+
self,
|
|
422
|
+
root: Path,
|
|
423
|
+
executor_factory: Callable[[ScriptNode], Any],
|
|
424
|
+
execution_fn: Callable[[Any, ScriptNode], None],
|
|
425
|
+
on_script_start: Callable[[ScriptNode], None] | None = None,
|
|
426
|
+
on_script_complete: Callable[[ExecutionResult], None] | None = None,
|
|
427
|
+
) -> ParallelExecutionSummary:
|
|
428
|
+
"""
|
|
429
|
+
Execute scripts in parallel respecting dependencies.
|
|
430
|
+
|
|
431
|
+
Args:
|
|
432
|
+
root: Root path (folder or single file) to find scripts.
|
|
433
|
+
executor_factory: Factory function to create an executor for a script.
|
|
434
|
+
execution_fn: Function that executes a script given (executor, node).
|
|
435
|
+
|
|
436
|
+
Returns:
|
|
437
|
+
ParallelExecutionSummary with all results.
|
|
438
|
+
"""
|
|
439
|
+
start_time = datetime.now()
|
|
440
|
+
|
|
441
|
+
# Build dependency graph
|
|
442
|
+
if root.is_dir():
|
|
443
|
+
graph = self.resolver.build_folder_graph(root)
|
|
444
|
+
nodes = list(graph.nodes())
|
|
445
|
+
else:
|
|
446
|
+
nodes = create_script_nodes([root])
|
|
447
|
+
graph = self.resolver.build_graph(nodes)
|
|
448
|
+
|
|
449
|
+
# Total count of nodes for summary/completion check
|
|
450
|
+
total_scripts = len(nodes)
|
|
451
|
+
|
|
452
|
+
# Execute using the configured strategy
|
|
453
|
+
results = self.execution_strategy.execute(
|
|
454
|
+
graph=graph,
|
|
455
|
+
resolver=self.resolver,
|
|
456
|
+
max_workers=self.max_workers,
|
|
457
|
+
executor_factory=executor_factory,
|
|
458
|
+
execution_fn=execution_fn,
|
|
459
|
+
on_script_start=on_script_start,
|
|
460
|
+
on_script_complete=on_script_complete,
|
|
461
|
+
)
|
|
462
|
+
|
|
463
|
+
total_duration = (datetime.now() - start_time).total_seconds()
|
|
464
|
+
successful = sum(1 for r in results if r.success)
|
|
465
|
+
|
|
466
|
+
return ParallelExecutionSummary(
|
|
467
|
+
total_scripts=total_scripts,
|
|
468
|
+
successful=successful,
|
|
469
|
+
failed=total_scripts - successful,
|
|
470
|
+
total_duration=total_duration,
|
|
471
|
+
results=results,
|
|
472
|
+
)
|
|
473
|
+
|
|
474
|
+
def get_folder_execution_plan(self, folder: Path) -> nx.DiGraph:
|
|
475
|
+
"""
|
|
476
|
+
Get the execution plan (dependency graph) for all scripts in a folder.
|
|
477
|
+
"""
|
|
478
|
+
return self.resolver.build_folder_graph(folder)
|
|
479
|
+
|
|
480
|
+
def get_execution_plan(self, files: list[Path]) -> nx.DiGraph:
|
|
481
|
+
"""
|
|
482
|
+
Get the execution plan (dependency graph) without executing.
|
|
483
|
+
"""
|
|
484
|
+
nodes = create_script_nodes(files)
|
|
485
|
+
return self.resolver.build_graph(nodes)
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
def run_single_script_execution(
|
|
489
|
+
files: list[StringIO | Path],
|
|
490
|
+
directory: Path,
|
|
491
|
+
input_type: str,
|
|
492
|
+
input_name: str,
|
|
493
|
+
edialect,
|
|
494
|
+
param: tuple[str, ...],
|
|
495
|
+
conn_args,
|
|
496
|
+
debug: bool,
|
|
497
|
+
execution_mode: str,
|
|
498
|
+
config,
|
|
499
|
+
) -> None:
|
|
500
|
+
"""
|
|
501
|
+
Run single script execution with polished multi-statement progress display.
|
|
502
|
+
|
|
503
|
+
Args:
|
|
504
|
+
text: List of script contents
|
|
505
|
+
directory: Working directory
|
|
506
|
+
input_type: Type of input (file, query, etc.)
|
|
507
|
+
input_name: Name of the input
|
|
508
|
+
edialect: Dialect to use
|
|
509
|
+
param: Environment parameters
|
|
510
|
+
conn_args: Connection arguments
|
|
511
|
+
debug: Debug mode flag
|
|
512
|
+
execution_mode: One of 'run', 'integration', or 'unit'
|
|
513
|
+
"""
|
|
514
|
+
from trilogy.scripts.common import (
|
|
515
|
+
create_executor,
|
|
516
|
+
handle_execution_exception,
|
|
517
|
+
validate_datasources,
|
|
518
|
+
)
|
|
519
|
+
from trilogy.scripts.display import (
|
|
520
|
+
RICH_AVAILABLE,
|
|
521
|
+
create_progress_context,
|
|
522
|
+
print_success,
|
|
523
|
+
show_execution_info,
|
|
524
|
+
show_execution_start,
|
|
525
|
+
show_execution_summary,
|
|
526
|
+
)
|
|
527
|
+
from trilogy.scripts.single_execution import (
|
|
528
|
+
execute_queries_simple,
|
|
529
|
+
execute_queries_with_progress,
|
|
530
|
+
)
|
|
531
|
+
|
|
532
|
+
show_execution_info(input_type, input_name, edialect.value, debug)
|
|
533
|
+
|
|
534
|
+
exec = create_executor(param, directory, conn_args, edialect, debug, config)
|
|
535
|
+
base = files[0]
|
|
536
|
+
if isinstance(base, StringIO):
|
|
537
|
+
text = [base.getvalue()]
|
|
538
|
+
else:
|
|
539
|
+
with open(base, "r") as raw:
|
|
540
|
+
text = [raw.read()]
|
|
541
|
+
|
|
542
|
+
if execution_mode == "run":
|
|
543
|
+
# Parse all scripts and collect queries
|
|
544
|
+
queries = []
|
|
545
|
+
try:
|
|
546
|
+
for script in text:
|
|
547
|
+
queries += exec.parse_text(script)
|
|
548
|
+
except Exception as e:
|
|
549
|
+
handle_execution_exception(e, debug=debug)
|
|
550
|
+
|
|
551
|
+
start = datetime.now()
|
|
552
|
+
show_execution_start(len(queries))
|
|
553
|
+
|
|
554
|
+
# Execute with progress tracking for multiple statements
|
|
555
|
+
if len(queries) > 1 and RICH_AVAILABLE:
|
|
556
|
+
progress = create_progress_context()
|
|
557
|
+
else:
|
|
558
|
+
progress = None
|
|
559
|
+
|
|
560
|
+
try:
|
|
561
|
+
if progress:
|
|
562
|
+
exception = execute_queries_with_progress(exec, queries)
|
|
563
|
+
else:
|
|
564
|
+
exception = execute_queries_simple(exec, queries)
|
|
565
|
+
|
|
566
|
+
total_duration = datetime.now() - start
|
|
567
|
+
show_execution_summary(len(queries), total_duration, exception is None)
|
|
568
|
+
|
|
569
|
+
if exception:
|
|
570
|
+
raise Exit(1) from exception
|
|
571
|
+
except Exit:
|
|
572
|
+
raise
|
|
573
|
+
except Exception as e:
|
|
574
|
+
handle_execution_exception(e, debug=debug)
|
|
575
|
+
|
|
576
|
+
elif execution_mode == "integration":
|
|
577
|
+
for script in text:
|
|
578
|
+
exec.parse_text(script)
|
|
579
|
+
validate_datasources(exec, mock=False, quiet=False)
|
|
580
|
+
print_success("Integration tests passed successfully!")
|
|
581
|
+
|
|
582
|
+
elif execution_mode == "unit":
|
|
583
|
+
for script in text:
|
|
584
|
+
exec.parse_text(script)
|
|
585
|
+
validate_datasources(exec, mock=True, quiet=False)
|
|
586
|
+
print_success("Unit tests passed successfully!")
|
|
587
|
+
|
|
588
|
+
|
|
589
|
+
def get_execution_strategy(strategy_name: str):
|
|
590
|
+
"""Get execution strategy by name."""
|
|
591
|
+
strategies = {
|
|
592
|
+
"eager_bfs": EagerBFSStrategy,
|
|
593
|
+
}
|
|
594
|
+
if strategy_name not in strategies:
|
|
595
|
+
raise ValueError(
|
|
596
|
+
f"Unknown execution strategy: {strategy_name}. "
|
|
597
|
+
f"Available: {', '.join(strategies.keys())}"
|
|
598
|
+
)
|
|
599
|
+
return strategies[strategy_name]()
|
|
600
|
+
|
|
601
|
+
|
|
602
|
+
def run_parallel_execution(
|
|
603
|
+
cli_params,
|
|
604
|
+
execution_fn,
|
|
605
|
+
execution_mode: str = "run",
|
|
606
|
+
) -> None:
|
|
607
|
+
"""
|
|
608
|
+
Run parallel execution for directory inputs, or single-script execution
|
|
609
|
+
with polished progress display for single files/inline queries.
|
|
610
|
+
|
|
611
|
+
Args:
|
|
612
|
+
cli_params: CLI runtime parameters containing all execution settings
|
|
613
|
+
execution_fn: Function to execute each script (exec, node, quiet) -> None
|
|
614
|
+
execution_mode: One of 'run', 'integration', or 'unit'
|
|
615
|
+
"""
|
|
616
|
+
from trilogy.scripts.common import (
|
|
617
|
+
create_executor_for_script,
|
|
618
|
+
merge_runtime_config,
|
|
619
|
+
resolve_input_information,
|
|
620
|
+
)
|
|
621
|
+
from trilogy.scripts.dependency import ETLDependencyStrategy
|
|
622
|
+
from trilogy.scripts.display import (
|
|
623
|
+
print_error,
|
|
624
|
+
print_success,
|
|
625
|
+
show_execution_info,
|
|
626
|
+
show_parallel_execution_start,
|
|
627
|
+
show_parallel_execution_summary,
|
|
628
|
+
show_script_result,
|
|
629
|
+
)
|
|
630
|
+
|
|
631
|
+
# Check if input is a directory (parallel execution)
|
|
632
|
+
pathlib_input = Path(cli_params.input)
|
|
633
|
+
files_iter, directory, input_type, input_name, config = resolve_input_information(
|
|
634
|
+
cli_params.input, cli_params.config_path
|
|
635
|
+
)
|
|
636
|
+
files = list(files_iter)
|
|
637
|
+
|
|
638
|
+
# Merge CLI params with config file
|
|
639
|
+
edialect, parallelism = merge_runtime_config(cli_params, config)
|
|
640
|
+
if not pathlib_input.exists() or len(files) == 1:
|
|
641
|
+
# Inline query - use polished single-script execution
|
|
642
|
+
|
|
643
|
+
run_single_script_execution(
|
|
644
|
+
files=files,
|
|
645
|
+
directory=directory,
|
|
646
|
+
input_type=input_type,
|
|
647
|
+
input_name=input_name,
|
|
648
|
+
edialect=edialect,
|
|
649
|
+
param=cli_params.param,
|
|
650
|
+
conn_args=cli_params.conn_args,
|
|
651
|
+
debug=cli_params.debug,
|
|
652
|
+
execution_mode=execution_mode,
|
|
653
|
+
config=config,
|
|
654
|
+
)
|
|
655
|
+
return
|
|
656
|
+
# Multiple files - use parallel execution
|
|
657
|
+
show_execution_info(input_type, input_name, edialect.value, cli_params.debug)
|
|
658
|
+
|
|
659
|
+
# Get execution strategy
|
|
660
|
+
strategy = get_execution_strategy(cli_params.execution_strategy)
|
|
661
|
+
|
|
662
|
+
# Set up parallel executor
|
|
663
|
+
parallel_exec = ParallelExecutor(
|
|
664
|
+
max_workers=parallelism,
|
|
665
|
+
dependency_strategy=ETLDependencyStrategy(),
|
|
666
|
+
execution_strategy=strategy,
|
|
667
|
+
)
|
|
668
|
+
|
|
669
|
+
# Get execution plan for display
|
|
670
|
+
if pathlib_input.is_dir():
|
|
671
|
+
execution_plan = parallel_exec.get_folder_execution_plan(pathlib_input)
|
|
672
|
+
elif pathlib_input.is_file():
|
|
673
|
+
execution_plan = parallel_exec.get_execution_plan([pathlib_input])
|
|
674
|
+
else:
|
|
675
|
+
raise FileNotFoundError(f"Input path '{pathlib_input}' does not exist.")
|
|
676
|
+
|
|
677
|
+
num_edges = execution_plan.number_of_edges()
|
|
678
|
+
num_nodes = execution_plan.number_of_nodes()
|
|
679
|
+
|
|
680
|
+
show_parallel_execution_start(
|
|
681
|
+
num_nodes, num_edges, parallelism, cli_params.execution_strategy
|
|
682
|
+
)
|
|
683
|
+
|
|
684
|
+
# Factory to create executor for each script
|
|
685
|
+
def executor_factory(node: ScriptNode) -> Executor:
|
|
686
|
+
return create_executor_for_script(
|
|
687
|
+
node,
|
|
688
|
+
cli_params.param,
|
|
689
|
+
cli_params.conn_args,
|
|
690
|
+
edialect,
|
|
691
|
+
cli_params.debug,
|
|
692
|
+
config,
|
|
693
|
+
)
|
|
694
|
+
|
|
695
|
+
# Wrap execution_fn to pass quiet=True for parallel execution
|
|
696
|
+
def quiet_execution_fn(exec: Executor, node: ScriptNode) -> None:
|
|
697
|
+
execution_fn(exec, node, quiet=True)
|
|
698
|
+
|
|
699
|
+
# Run parallel execution
|
|
700
|
+
summary = parallel_exec.execute(
|
|
701
|
+
root=pathlib_input,
|
|
702
|
+
executor_factory=executor_factory,
|
|
703
|
+
execution_fn=quiet_execution_fn,
|
|
704
|
+
on_script_complete=show_script_result,
|
|
705
|
+
)
|
|
706
|
+
|
|
707
|
+
show_parallel_execution_summary(summary)
|
|
708
|
+
|
|
709
|
+
if not summary.all_succeeded:
|
|
710
|
+
print_error("Some scripts failed during execution.")
|
|
711
|
+
raise Exit(1)
|
|
712
|
+
|
|
713
|
+
print_success("All scripts executed successfully!")
|