palimpzest 0.7.21__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- palimpzest/__init__.py +37 -6
- palimpzest/agents/__init__.py +0 -0
- palimpzest/agents/compute_agents.py +0 -0
- palimpzest/agents/search_agents.py +637 -0
- palimpzest/constants.py +259 -197
- palimpzest/core/data/context.py +393 -0
- palimpzest/core/data/context_manager.py +163 -0
- palimpzest/core/data/dataset.py +634 -0
- palimpzest/core/data/{datareaders.py → iter_dataset.py} +202 -126
- palimpzest/core/elements/groupbysig.py +16 -13
- palimpzest/core/elements/records.py +166 -75
- palimpzest/core/lib/schemas.py +152 -390
- palimpzest/core/{data/dataclasses.py → models.py} +306 -170
- palimpzest/policy.py +2 -27
- palimpzest/prompts/__init__.py +35 -5
- palimpzest/prompts/agent_prompts.py +357 -0
- palimpzest/prompts/context_search.py +9 -0
- palimpzest/prompts/convert_prompts.py +61 -5
- palimpzest/prompts/filter_prompts.py +50 -5
- palimpzest/prompts/join_prompts.py +163 -0
- palimpzest/prompts/moa_proposer_convert_prompts.py +5 -5
- palimpzest/prompts/prompt_factory.py +358 -46
- palimpzest/prompts/validator.py +239 -0
- palimpzest/query/execution/all_sample_execution_strategy.py +134 -76
- palimpzest/query/execution/execution_strategy.py +210 -317
- palimpzest/query/execution/execution_strategy_type.py +5 -7
- palimpzest/query/execution/mab_execution_strategy.py +249 -136
- palimpzest/query/execution/parallel_execution_strategy.py +153 -244
- palimpzest/query/execution/single_threaded_execution_strategy.py +107 -64
- palimpzest/query/generators/generators.py +157 -330
- palimpzest/query/operators/__init__.py +15 -5
- palimpzest/query/operators/aggregate.py +50 -33
- palimpzest/query/operators/compute.py +201 -0
- palimpzest/query/operators/convert.py +27 -21
- palimpzest/query/operators/critique_and_refine_convert.py +7 -5
- palimpzest/query/operators/distinct.py +62 -0
- palimpzest/query/operators/filter.py +22 -13
- palimpzest/query/operators/join.py +402 -0
- palimpzest/query/operators/limit.py +3 -3
- palimpzest/query/operators/logical.py +198 -80
- palimpzest/query/operators/mixture_of_agents_convert.py +10 -8
- palimpzest/query/operators/physical.py +27 -21
- palimpzest/query/operators/project.py +3 -3
- palimpzest/query/operators/rag_convert.py +7 -7
- palimpzest/query/operators/retrieve.py +9 -9
- palimpzest/query/operators/scan.py +81 -42
- palimpzest/query/operators/search.py +524 -0
- palimpzest/query/operators/split_convert.py +10 -8
- palimpzest/query/optimizer/__init__.py +7 -9
- palimpzest/query/optimizer/cost_model.py +108 -441
- palimpzest/query/optimizer/optimizer.py +123 -181
- palimpzest/query/optimizer/optimizer_strategy.py +66 -61
- palimpzest/query/optimizer/plan.py +352 -67
- palimpzest/query/optimizer/primitives.py +43 -19
- palimpzest/query/optimizer/rules.py +484 -646
- palimpzest/query/optimizer/tasks.py +127 -58
- palimpzest/query/processor/config.py +41 -76
- palimpzest/query/processor/query_processor.py +73 -18
- palimpzest/query/processor/query_processor_factory.py +46 -38
- palimpzest/schemabuilder/schema_builder.py +15 -28
- palimpzest/utils/model_helpers.py +27 -77
- palimpzest/utils/progress.py +114 -102
- palimpzest/validator/__init__.py +0 -0
- palimpzest/validator/validator.py +306 -0
- {palimpzest-0.7.21.dist-info → palimpzest-0.8.0.dist-info}/METADATA +6 -1
- palimpzest-0.8.0.dist-info/RECORD +95 -0
- palimpzest/core/lib/fields.py +0 -141
- palimpzest/prompts/code_synthesis_prompts.py +0 -28
- palimpzest/query/execution/random_sampling_execution_strategy.py +0 -240
- palimpzest/query/generators/api_client_factory.py +0 -30
- palimpzest/query/operators/code_synthesis_convert.py +0 -488
- palimpzest/query/operators/map.py +0 -130
- palimpzest/query/processor/nosentinel_processor.py +0 -33
- palimpzest/query/processor/processing_strategy_type.py +0 -28
- palimpzest/query/processor/sentinel_processor.py +0 -88
- palimpzest/query/processor/streaming_processor.py +0 -149
- palimpzest/sets.py +0 -405
- palimpzest/utils/datareader_helpers.py +0 -61
- palimpzest/utils/demo_helpers.py +0 -75
- palimpzest/utils/field_helpers.py +0 -69
- palimpzest/utils/generation_helpers.py +0 -69
- palimpzest/utils/sandbox.py +0 -183
- palimpzest-0.7.21.dist-info/RECORD +0 -95
- /palimpzest/core/{elements/index.py → data/index_dataset.py} +0 -0
- {palimpzest-0.7.21.dist-info → palimpzest-0.8.0.dist-info}/WHEEL +0 -0
- {palimpzest-0.7.21.dist-info → palimpzest-0.8.0.dist-info}/licenses/LICENSE +0 -0
- {palimpzest-0.7.21.dist-info → palimpzest-0.8.0.dist-info}/top_level.txt +0 -0
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
|
|
3
|
-
from palimpzest.core.data.dataclasses import PlanStats
|
|
4
3
|
from palimpzest.core.elements.records import DataRecord
|
|
4
|
+
from palimpzest.core.models import PlanStats
|
|
5
5
|
from palimpzest.query.execution.execution_strategy import ExecutionStrategy
|
|
6
6
|
from palimpzest.query.operators.aggregate import AggregateOp
|
|
7
|
+
from palimpzest.query.operators.join import JoinOp
|
|
7
8
|
from palimpzest.query.operators.limit import LimitScanOp
|
|
8
|
-
from palimpzest.query.operators.
|
|
9
|
+
from palimpzest.query.operators.physical import PhysicalOperator
|
|
10
|
+
from palimpzest.query.operators.scan import ContextScanOp, ScanPhysicalOp
|
|
9
11
|
from palimpzest.query.optimizer.plan import PhysicalPlan
|
|
10
12
|
from palimpzest.utils.progress import create_progress_manager
|
|
11
13
|
|
|
@@ -25,61 +27,80 @@ class SequentialSingleThreadExecutionStrategy(ExecutionStrategy):
|
|
|
25
27
|
super().__init__(*args, **kwargs)
|
|
26
28
|
self.max_workers = 1
|
|
27
29
|
|
|
28
|
-
def _execute_plan(self, plan: PhysicalPlan, input_queues: dict[str, list], plan_stats: PlanStats) -> tuple[list[DataRecord], PlanStats]:
|
|
30
|
+
def _execute_plan(self, plan: PhysicalPlan, input_queues: dict[str, dict[str, list]], plan_stats: PlanStats) -> tuple[list[DataRecord], PlanStats]:
|
|
29
31
|
# execute the plan one operator at a time
|
|
30
32
|
output_records = []
|
|
31
|
-
for
|
|
33
|
+
for topo_idx, operator in enumerate(plan):
|
|
32
34
|
# if we've filtered out all records, terminate early
|
|
33
|
-
|
|
34
|
-
|
|
35
|
+
source_unique_full_op_ids = (
|
|
36
|
+
[f"source_{operator.get_full_op_id()}"]
|
|
37
|
+
if isinstance(operator, (ContextScanOp, ScanPhysicalOp))
|
|
38
|
+
else plan.get_source_unique_full_op_ids(topo_idx, operator)
|
|
39
|
+
)
|
|
40
|
+
unique_full_op_id = f"{topo_idx}-{operator.get_full_op_id()}"
|
|
41
|
+
num_inputs = sum(len(input_queues[unique_full_op_id][source_unique_full_op_id]) for source_unique_full_op_id in source_unique_full_op_ids)
|
|
35
42
|
if num_inputs == 0:
|
|
36
43
|
break
|
|
37
44
|
|
|
38
45
|
# begin to process this operator
|
|
39
46
|
records, record_op_stats = [], []
|
|
40
|
-
logger.info(f"Processing operator {operator.op_name()} ({
|
|
47
|
+
logger.info(f"Processing operator {operator.op_name()} ({unique_full_op_id})")
|
|
41
48
|
|
|
42
49
|
# if this operator is an aggregate, process all the records in the input_queue
|
|
43
50
|
if isinstance(operator, AggregateOp):
|
|
44
|
-
|
|
51
|
+
source_unique_full_op_id = source_unique_full_op_ids[0]
|
|
52
|
+
record_set = operator(candidates=input_queues[unique_full_op_id][source_unique_full_op_id])
|
|
45
53
|
records = record_set.data_records
|
|
46
54
|
record_op_stats = record_set.record_op_stats
|
|
47
55
|
num_outputs = sum(record.passed_operator for record in records)
|
|
48
56
|
|
|
49
57
|
# update the progress manager
|
|
50
|
-
self.progress_manager.incr(
|
|
58
|
+
self.progress_manager.incr(unique_full_op_id, num_inputs=1, num_outputs=num_outputs, total_cost=record_set.get_total_cost())
|
|
59
|
+
|
|
60
|
+
# if this operator is a join, process all pairs of records from the two input queues
|
|
61
|
+
elif isinstance(operator, JoinOp):
|
|
62
|
+
left_full_source_op_id = source_unique_full_op_ids[0]
|
|
63
|
+
left_num_inputs = len(input_queues[unique_full_op_id][left_full_source_op_id])
|
|
64
|
+
left_input_records = [input_queues[unique_full_op_id][left_full_source_op_id].pop(0) for _ in range(left_num_inputs)]
|
|
65
|
+
|
|
66
|
+
right_full_source_op_id = source_unique_full_op_ids[1]
|
|
67
|
+
right_num_inputs = len(input_queues[unique_full_op_id][right_full_source_op_id])
|
|
68
|
+
right_input_records = [input_queues[unique_full_op_id][right_full_source_op_id].pop(0) for _ in range(right_num_inputs)]
|
|
69
|
+
|
|
70
|
+
record_set, num_inputs_processed = operator(left_input_records, right_input_records)
|
|
71
|
+
records = record_set.data_records
|
|
72
|
+
record_op_stats = record_set.record_op_stats
|
|
73
|
+
num_outputs = sum(record.passed_operator for record in records)
|
|
74
|
+
|
|
75
|
+
# update the progress manager
|
|
76
|
+
self.progress_manager.incr(unique_full_op_id, num_inputs=num_inputs_processed, num_outputs=num_outputs, total_cost=record_set.get_total_cost())
|
|
51
77
|
|
|
52
78
|
# otherwise, process the records in the input queue for this operator one at a time
|
|
53
79
|
else:
|
|
54
|
-
|
|
80
|
+
source_unique_full_op_id = source_unique_full_op_ids[0]
|
|
81
|
+
for input_record in input_queues[unique_full_op_id][source_unique_full_op_id]:
|
|
55
82
|
record_set = operator(input_record)
|
|
56
83
|
records.extend(record_set.data_records)
|
|
57
84
|
record_op_stats.extend(record_set.record_op_stats)
|
|
58
85
|
num_outputs = sum(record.passed_operator for record in record_set.data_records)
|
|
59
86
|
|
|
60
87
|
# update the progress manager
|
|
61
|
-
self.progress_manager.incr(
|
|
88
|
+
self.progress_manager.incr(unique_full_op_id, num_inputs=1, num_outputs=num_outputs, total_cost=record_set.get_total_cost())
|
|
62
89
|
|
|
63
90
|
# finish early if this is a limit
|
|
64
91
|
if isinstance(operator, LimitScanOp) and len(records) == operator.limit:
|
|
65
92
|
break
|
|
66
93
|
|
|
67
94
|
# update plan stats
|
|
68
|
-
plan_stats.add_record_op_stats(record_op_stats)
|
|
69
|
-
|
|
70
|
-
# add records to the cache
|
|
71
|
-
self._add_records_to_cache(operator.target_cache_id, records)
|
|
95
|
+
plan_stats.add_record_op_stats(unique_full_op_id, record_op_stats)
|
|
72
96
|
|
|
73
97
|
# update next input_queue (if it exists)
|
|
74
|
-
output_records = [record for record in records if record.passed_operator]
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
input_queues[
|
|
98
|
+
output_records = [record for record in records if record.passed_operator]
|
|
99
|
+
next_unique_full_op_id = plan.get_next_unique_full_op_id(topo_idx, operator)
|
|
100
|
+
if next_unique_full_op_id is not None:
|
|
101
|
+
input_queues[next_unique_full_op_id][unique_full_op_id] = output_records
|
|
78
102
|
|
|
79
|
-
logger.info(f"Finished processing operator {operator.op_name()} ({
|
|
80
|
-
|
|
81
|
-
# close the cache
|
|
82
|
-
self._close_cache([op.target_cache_id for op in plan.operators])
|
|
103
|
+
logger.info(f"Finished processing operator {operator.op_name()} ({unique_full_op_id}), and generated {len(records)} records")
|
|
83
104
|
|
|
84
105
|
# finalize plan stats
|
|
85
106
|
plan_stats.finish()
|
|
@@ -88,8 +109,6 @@ class SequentialSingleThreadExecutionStrategy(ExecutionStrategy):
|
|
|
88
109
|
|
|
89
110
|
def execute_plan(self, plan: PhysicalPlan) -> tuple[list[DataRecord], PlanStats]:
|
|
90
111
|
"""Initialize the stats and execute the plan."""
|
|
91
|
-
# for now, assert that the first operator in the plan is a ScanPhysicalOp
|
|
92
|
-
assert isinstance(plan.operators[0], ScanPhysicalOp), "First operator in physical plan must be a ScanPhysicalOp"
|
|
93
112
|
logger.info(f"Executing plan {plan.plan_id} with {self.max_workers} workers")
|
|
94
113
|
logger.info(f"Plan Details: {plan}")
|
|
95
114
|
|
|
@@ -104,7 +123,7 @@ class SequentialSingleThreadExecutionStrategy(ExecutionStrategy):
|
|
|
104
123
|
self.progress_manager = create_progress_manager(plan, num_samples=self.num_samples, progress=self.progress)
|
|
105
124
|
self.progress_manager.start()
|
|
106
125
|
|
|
107
|
-
# NOTE: we must handle progress manager outside of
|
|
126
|
+
# NOTE: we must handle progress manager outside of _execute_plan to ensure that it is shut down correctly;
|
|
108
127
|
# if we don't have the `finally:` branch, then program crashes can cause future program runs to fail
|
|
109
128
|
# because the progress manager cannot get a handle to the console
|
|
110
129
|
try:
|
|
@@ -139,31 +158,43 @@ class PipelinedSingleThreadExecutionStrategy(ExecutionStrategy):
|
|
|
139
158
|
super().__init__(*args, **kwargs)
|
|
140
159
|
self.max_workers = 1
|
|
141
160
|
|
|
142
|
-
def _any_queue_not_empty(self, queues: dict[str, list]) -> bool:
|
|
161
|
+
def _any_queue_not_empty(self, queues: dict[str, list] | dict[str, dict[str, list]]) -> bool:
|
|
143
162
|
"""Helper function to check if any queue is not empty."""
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
163
|
+
for _, value in queues.items():
|
|
164
|
+
if isinstance(value, dict):
|
|
165
|
+
if any(len(subqueue) > 0 for subqueue in value.values()):
|
|
166
|
+
return True
|
|
167
|
+
elif len(value) > 0:
|
|
168
|
+
return True
|
|
169
|
+
return False
|
|
170
|
+
|
|
171
|
+
def _upstream_ops_finished(self, plan: PhysicalPlan, topo_idx: int, operator: PhysicalOperator, input_queues: dict[str, dict[str, list]]) -> bool:
|
|
172
|
+
"""Helper function to check if agg / join operator is ready to process its inputs."""
|
|
173
|
+
# for agg / join operator, we can only process it when all upstream operators have finished processing their inputs
|
|
174
|
+
upstream_unique_full_op_ids = plan.get_upstream_unique_full_op_ids(topo_idx, operator)
|
|
175
|
+
upstream_input_queues = {upstream_unique_full_op_id: input_queues[upstream_unique_full_op_id] for upstream_unique_full_op_id in upstream_unique_full_op_ids}
|
|
176
|
+
return not self._any_queue_not_empty(upstream_input_queues)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _execute_plan(self, plan: PhysicalPlan, input_queues: dict[str, dict[str, list]], plan_stats: PlanStats) -> tuple[list[DataRecord], PlanStats]:
|
|
156
180
|
# execute the plan until either:
|
|
157
181
|
# 1. all records have been processed, or
|
|
158
182
|
# 2. the final limit operation has completed (we break out of the loop if this happens)
|
|
159
183
|
final_output_records = []
|
|
160
184
|
while self._any_queue_not_empty(input_queues):
|
|
161
|
-
for
|
|
185
|
+
for topo_idx, operator in enumerate(plan):
|
|
162
186
|
# if this operator does not have enough inputs to execute, then skip it
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
187
|
+
source_unique_full_op_ids = (
|
|
188
|
+
[f"source_{operator.get_full_op_id()}"]
|
|
189
|
+
if isinstance(operator, (ContextScanOp, ScanPhysicalOp))
|
|
190
|
+
else plan.get_source_unique_full_op_ids(topo_idx, operator)
|
|
191
|
+
)
|
|
192
|
+
unique_full_op_id = f"{topo_idx}-{operator.get_full_op_id()}"
|
|
193
|
+
|
|
194
|
+
num_inputs = sum(len(input_queues[unique_full_op_id][source_unique_full_op_id]) for source_unique_full_op_id in source_unique_full_op_ids)
|
|
195
|
+
agg_op_not_ready = isinstance(operator, AggregateOp) and not self._upstream_ops_finished(plan, topo_idx, operator, input_queues)
|
|
196
|
+
join_op_not_ready = isinstance(operator, JoinOp) and not self._upstream_ops_finished(plan, topo_idx, operator, input_queues)
|
|
197
|
+
if num_inputs == 0 or agg_op_not_ready or join_op_not_ready:
|
|
167
198
|
continue
|
|
168
199
|
|
|
169
200
|
# create empty lists for records and execution stats generated by executing this operator on its next input(s)
|
|
@@ -171,49 +202,63 @@ class PipelinedSingleThreadExecutionStrategy(ExecutionStrategy):
|
|
|
171
202
|
|
|
172
203
|
# if the next operator is an aggregate, process all the records in the input_queue
|
|
173
204
|
if isinstance(operator, AggregateOp):
|
|
174
|
-
|
|
205
|
+
source_unique_full_op_id = source_unique_full_op_ids[0]
|
|
206
|
+
input_records = [input_queues[unique_full_op_id][source_unique_full_op_id].pop(0) for _ in range(num_inputs)]
|
|
175
207
|
record_set = operator(candidates=input_records)
|
|
176
208
|
records = record_set.data_records
|
|
177
209
|
record_op_stats = record_set.record_op_stats
|
|
178
210
|
num_outputs = sum(record.passed_operator for record in records)
|
|
179
211
|
|
|
180
212
|
# update the progress manager
|
|
181
|
-
self.progress_manager.incr(
|
|
213
|
+
self.progress_manager.incr(unique_full_op_id, num_inputs=1, num_outputs=num_outputs, total_cost=record_set.get_total_cost())
|
|
214
|
+
|
|
215
|
+
# if this operator is a join, process all pairs of records from the two input queues
|
|
216
|
+
elif isinstance(operator, JoinOp):
|
|
217
|
+
left_full_source_op_id = source_unique_full_op_ids[0]
|
|
218
|
+
left_num_inputs = len(input_queues[unique_full_op_id][left_full_source_op_id])
|
|
219
|
+
left_input_records = [input_queues[unique_full_op_id][left_full_source_op_id].pop(0) for _ in range(left_num_inputs)]
|
|
220
|
+
|
|
221
|
+
right_full_source_op_id = source_unique_full_op_ids[1]
|
|
222
|
+
right_num_inputs = len(input_queues[unique_full_op_id][right_full_source_op_id])
|
|
223
|
+
right_input_records = [input_queues[unique_full_op_id][right_full_source_op_id].pop(0) for _ in range(right_num_inputs)]
|
|
224
|
+
|
|
225
|
+
record_set, num_inputs_processed = operator(left_input_records, right_input_records)
|
|
226
|
+
records = record_set.data_records
|
|
227
|
+
record_op_stats = record_set.record_op_stats
|
|
228
|
+
num_outputs = sum(record.passed_operator for record in records)
|
|
229
|
+
|
|
230
|
+
# update the progress manager
|
|
231
|
+
self.progress_manager.incr(unique_full_op_id, num_inputs=num_inputs_processed, num_outputs=num_outputs, total_cost=record_set.get_total_cost())
|
|
182
232
|
|
|
183
233
|
# otherwise, process the next record in the input queue for this operator
|
|
184
234
|
else:
|
|
185
|
-
|
|
235
|
+
source_unique_full_op_id = source_unique_full_op_ids[0]
|
|
236
|
+
input_record = input_queues[unique_full_op_id][source_unique_full_op_id].pop(0)
|
|
186
237
|
record_set = operator(input_record)
|
|
187
238
|
records = record_set.data_records
|
|
188
239
|
record_op_stats = record_set.record_op_stats
|
|
189
240
|
num_outputs = sum(record.passed_operator for record in records)
|
|
190
241
|
|
|
191
242
|
# update the progress manager
|
|
192
|
-
self.progress_manager.incr(
|
|
243
|
+
self.progress_manager.incr(unique_full_op_id, num_inputs=1, num_outputs=num_outputs, total_cost=record_set.get_total_cost())
|
|
193
244
|
|
|
194
245
|
# update plan stats
|
|
195
|
-
plan_stats.add_record_op_stats(record_op_stats)
|
|
196
|
-
|
|
197
|
-
# add records to the cache
|
|
198
|
-
self._add_records_to_cache(operator.target_cache_id, records)
|
|
246
|
+
plan_stats.add_record_op_stats(unique_full_op_id, record_op_stats)
|
|
199
247
|
|
|
200
248
|
# update next input_queue or final_output_records
|
|
201
|
-
output_records = [record for record in records if record.passed_operator]
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
input_queues[
|
|
249
|
+
output_records = [record for record in records if record.passed_operator]
|
|
250
|
+
next_unique_full_op_id = plan.get_next_unique_full_op_id(topo_idx, operator)
|
|
251
|
+
if next_unique_full_op_id is not None:
|
|
252
|
+
input_queues[next_unique_full_op_id][unique_full_op_id].extend(output_records)
|
|
205
253
|
else:
|
|
206
254
|
final_output_records.extend(output_records)
|
|
207
255
|
|
|
208
|
-
logger.info(f"Finished processing operator {operator.op_name()} ({
|
|
256
|
+
logger.info(f"Finished processing operator {operator.op_name()} ({unique_full_op_id}) on {num_inputs} records")
|
|
209
257
|
|
|
210
258
|
# break out of loop if the final operator is a LimitScanOp and we've reached its limit
|
|
211
|
-
if isinstance(plan.
|
|
259
|
+
if isinstance(plan.operator, LimitScanOp) and len(final_output_records) == plan.operator.limit:
|
|
212
260
|
break
|
|
213
261
|
|
|
214
|
-
# close the cache
|
|
215
|
-
self._close_cache([op.target_cache_id for op in plan.operators])
|
|
216
|
-
|
|
217
262
|
# finalize plan stats
|
|
218
263
|
plan_stats.finish()
|
|
219
264
|
|
|
@@ -221,8 +266,6 @@ class PipelinedSingleThreadExecutionStrategy(ExecutionStrategy):
|
|
|
221
266
|
|
|
222
267
|
def execute_plan(self, plan: PhysicalPlan):
|
|
223
268
|
"""Initialize the stats and execute the plan."""
|
|
224
|
-
# for now, assert that the first operator in the plan is a ScanPhysicalOp
|
|
225
|
-
assert isinstance(plan.operators[0], ScanPhysicalOp), "First operator in physical plan must be a ScanPhysicalOp"
|
|
226
269
|
logger.info(f"Executing plan {plan.plan_id} with {self.max_workers} workers")
|
|
227
270
|
logger.info(f"Plan Details: {plan}")
|
|
228
271
|
|
|
@@ -237,7 +280,7 @@ class PipelinedSingleThreadExecutionStrategy(ExecutionStrategy):
|
|
|
237
280
|
self.progress_manager = create_progress_manager(plan, self.num_samples, self.progress)
|
|
238
281
|
self.progress_manager.start()
|
|
239
282
|
|
|
240
|
-
# NOTE: we must handle progress manager outside of
|
|
283
|
+
# NOTE: we must handle progress manager outside of _execute_plan to ensure that it is shut down correctly;
|
|
241
284
|
# if we don't have the `finally:` branch, then program crashes can cause future program runs to fail
|
|
242
285
|
# because the progress manager cannot get a handle to the console
|
|
243
286
|
try:
|