palimpzest 0.7.20__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. palimpzest/__init__.py +37 -6
  2. palimpzest/agents/__init__.py +0 -0
  3. palimpzest/agents/compute_agents.py +0 -0
  4. palimpzest/agents/search_agents.py +637 -0
  5. palimpzest/constants.py +259 -197
  6. palimpzest/core/data/context.py +393 -0
  7. palimpzest/core/data/context_manager.py +163 -0
  8. palimpzest/core/data/dataset.py +634 -0
  9. palimpzest/core/data/{datareaders.py → iter_dataset.py} +202 -126
  10. palimpzest/core/elements/groupbysig.py +16 -13
  11. palimpzest/core/elements/records.py +166 -75
  12. palimpzest/core/lib/schemas.py +152 -390
  13. palimpzest/core/{data/dataclasses.py → models.py} +306 -170
  14. palimpzest/policy.py +2 -27
  15. palimpzest/prompts/__init__.py +35 -5
  16. palimpzest/prompts/agent_prompts.py +357 -0
  17. palimpzest/prompts/context_search.py +9 -0
  18. palimpzest/prompts/convert_prompts.py +61 -5
  19. palimpzest/prompts/filter_prompts.py +50 -5
  20. palimpzest/prompts/join_prompts.py +163 -0
  21. palimpzest/prompts/moa_proposer_convert_prompts.py +5 -5
  22. palimpzest/prompts/prompt_factory.py +358 -46
  23. palimpzest/prompts/validator.py +239 -0
  24. palimpzest/query/execution/all_sample_execution_strategy.py +134 -76
  25. palimpzest/query/execution/execution_strategy.py +210 -317
  26. palimpzest/query/execution/execution_strategy_type.py +5 -7
  27. palimpzest/query/execution/mab_execution_strategy.py +249 -136
  28. palimpzest/query/execution/parallel_execution_strategy.py +153 -244
  29. palimpzest/query/execution/single_threaded_execution_strategy.py +107 -64
  30. palimpzest/query/generators/generators.py +157 -330
  31. palimpzest/query/operators/__init__.py +15 -5
  32. palimpzest/query/operators/aggregate.py +50 -33
  33. palimpzest/query/operators/compute.py +201 -0
  34. palimpzest/query/operators/convert.py +27 -21
  35. palimpzest/query/operators/critique_and_refine_convert.py +7 -5
  36. palimpzest/query/operators/distinct.py +62 -0
  37. palimpzest/query/operators/filter.py +22 -13
  38. palimpzest/query/operators/join.py +402 -0
  39. palimpzest/query/operators/limit.py +3 -3
  40. palimpzest/query/operators/logical.py +198 -80
  41. palimpzest/query/operators/mixture_of_agents_convert.py +10 -8
  42. palimpzest/query/operators/physical.py +27 -21
  43. palimpzest/query/operators/project.py +3 -3
  44. palimpzest/query/operators/rag_convert.py +7 -7
  45. palimpzest/query/operators/retrieve.py +9 -9
  46. palimpzest/query/operators/scan.py +81 -42
  47. palimpzest/query/operators/search.py +524 -0
  48. palimpzest/query/operators/split_convert.py +10 -8
  49. palimpzest/query/optimizer/__init__.py +7 -9
  50. palimpzest/query/optimizer/cost_model.py +108 -441
  51. palimpzest/query/optimizer/optimizer.py +123 -181
  52. palimpzest/query/optimizer/optimizer_strategy.py +66 -61
  53. palimpzest/query/optimizer/plan.py +352 -67
  54. palimpzest/query/optimizer/primitives.py +43 -19
  55. palimpzest/query/optimizer/rules.py +484 -646
  56. palimpzest/query/optimizer/tasks.py +127 -58
  57. palimpzest/query/processor/config.py +41 -76
  58. palimpzest/query/processor/query_processor.py +73 -18
  59. palimpzest/query/processor/query_processor_factory.py +46 -38
  60. palimpzest/schemabuilder/schema_builder.py +15 -28
  61. palimpzest/utils/model_helpers.py +27 -77
  62. palimpzest/utils/progress.py +114 -102
  63. palimpzest/validator/__init__.py +0 -0
  64. palimpzest/validator/validator.py +306 -0
  65. {palimpzest-0.7.20.dist-info → palimpzest-0.8.0.dist-info}/METADATA +6 -1
  66. palimpzest-0.8.0.dist-info/RECORD +95 -0
  67. palimpzest/core/lib/fields.py +0 -141
  68. palimpzest/prompts/code_synthesis_prompts.py +0 -28
  69. palimpzest/query/execution/random_sampling_execution_strategy.py +0 -240
  70. palimpzest/query/generators/api_client_factory.py +0 -30
  71. palimpzest/query/operators/code_synthesis_convert.py +0 -488
  72. palimpzest/query/operators/map.py +0 -130
  73. palimpzest/query/processor/nosentinel_processor.py +0 -33
  74. palimpzest/query/processor/processing_strategy_type.py +0 -28
  75. palimpzest/query/processor/sentinel_processor.py +0 -88
  76. palimpzest/query/processor/streaming_processor.py +0 -149
  77. palimpzest/sets.py +0 -405
  78. palimpzest/utils/datareader_helpers.py +0 -61
  79. palimpzest/utils/demo_helpers.py +0 -75
  80. palimpzest/utils/field_helpers.py +0 -69
  81. palimpzest/utils/generation_helpers.py +0 -69
  82. palimpzest/utils/sandbox.py +0 -183
  83. palimpzest-0.7.20.dist-info/RECORD +0 -95
  84. /palimpzest/core/{elements/index.py → data/index_dataset.py} +0 -0
  85. {palimpzest-0.7.20.dist-info → palimpzest-0.8.0.dist-info}/WHEEL +0 -0
  86. {palimpzest-0.7.20.dist-info → palimpzest-0.8.0.dist-info}/licenses/LICENSE +0 -0
  87. {palimpzest-0.7.20.dist-info → palimpzest-0.8.0.dist-info}/top_level.txt +0 -0
@@ -21,6 +21,7 @@ from rich.table import Table
21
21
  from palimpzest.query.operators.aggregate import AggregateOp
22
22
  from palimpzest.query.operators.convert import LLMConvert
23
23
  from palimpzest.query.operators.filter import LLMFilter
24
+ from palimpzest.query.operators.join import JoinOp
24
25
  from palimpzest.query.operators.limit import LimitScanOp
25
26
  from palimpzest.query.operators.physical import PhysicalOperator
26
27
  from palimpzest.query.operators.retrieve import RetrieveOp
@@ -57,7 +58,7 @@ class ProgressManager(ABC):
57
58
  Initialize the progress manager for the given plan. This function takes in a plan,
58
59
  the number of samples to process (if specified).
59
60
 
60
- If `num_samples` is None, then the entire DataReader will be scanned.
61
+ If `num_samples` is None, then the entire Dataset will be scanned.
61
62
 
62
63
  For each operator which is not an `AggregateOp` or `LimitScanOp`, we set its task `total`
63
64
  to the number of inputs to be processed by the plan. As intermediate operators process
@@ -81,51 +82,50 @@ class ProgressManager(ABC):
81
82
  expand=True, # Use full width
82
83
  )
83
84
 
84
- # initialize mapping from full_op_id --> ProgressStats
85
- self.full_op_id_to_stats: dict[str, ProgressStats] = {}
85
+ # initialize mapping from unique_full_op_id --> ProgressStats
86
+ self.unique_full_op_id_to_stats: dict[str, ProgressStats] = {}
86
87
 
87
- # initialize mapping from full_op_id --> task
88
- self.full_op_id_to_task = {}
88
+ # initialize mapping from unique_full_op_id --> task
89
+ self.unique_full_op_id_to_task = {}
89
90
 
90
91
  # initialize start time
91
92
  self.start_time = None
92
93
 
93
- # create mapping from full_op_id --> next_op
94
- self.full_op_id_to_next_op: dict[str, PhysicalOperator] = {}
95
- for op_idx, op in enumerate(plan.operators):
96
- full_op_id = op.get_full_op_id()
97
- next_op = plan.operators[op_idx + 1] if op_idx + 1 < len(plan.operators) else None
98
- self.full_op_id_to_next_op[full_op_id] = next_op
99
-
100
- # compute the total number of inputs to be processed by the plan
101
- datareader_len = len(plan.operators[0].datareader)
102
- total = datareader_len if num_samples is None else min(num_samples, datareader_len)
94
+ # TODO: store plan and use its methods within incr()
95
+ # create mapping from unique_full_op_id --> input unique_full_op_ids
96
+ self.unique_full_op_id_to_input_unique_full_op_ids: dict[str, list[str]] = {}
97
+ for topo_idx, op in enumerate(plan):
98
+ unique_full_op_id = f"{topo_idx}-{op.get_full_op_id()}"
99
+ input_unique_full_op_ids = plan.get_source_unique_full_op_ids(topo_idx, op)
100
+ self.unique_full_op_id_to_input_unique_full_op_ids[unique_full_op_id] = input_unique_full_op_ids
101
+
102
+ # create mapping from unique_full_op_id --> next_op
103
+ self.unique_full_op_id_to_next_op_and_id: dict[str, tuple[PhysicalOperator, str]] = {}
104
+ for topo_idx, op in enumerate(plan):
105
+ unique_full_op_id = f"{topo_idx}-{op.get_full_op_id()}"
106
+ next_op, next_unique_full_op_id = plan.get_next_unique_full_op_and_id(topo_idx, op)
107
+ self.unique_full_op_id_to_next_op_and_id[unique_full_op_id] = (next_op, next_unique_full_op_id)
103
108
 
104
109
  # add a task to the progress manager for each operator in the plan
105
- for op in plan.operators:
110
+ est_total_outputs, _ = plan.get_est_total_outputs(num_samples)
111
+ for topo_idx, op in enumerate(plan):
106
112
  # get the op id and a short string representation of the op; (str(op) is too long)
107
113
  op_str = f"{op.op_name()} ({op.get_op_id()})"
114
+ unique_full_op_id = f"{topo_idx}-{op.get_full_op_id()}"
115
+ self.add_task(unique_full_op_id, op_str, est_total_outputs[unique_full_op_id])
108
116
 
109
- # update the `total` if we encounter an AggregateOp or LimitScanOp
110
- if isinstance(op, AggregateOp):
111
- total = 1
112
- elif isinstance(op, LimitScanOp):
113
- total = op.limit
114
-
115
- self.add_task(op.get_full_op_id(), op_str, total)
116
-
117
- def get_task_total(self, full_op_id: str) -> int:
117
+ def get_task_total(self, unique_full_op_id: str) -> int:
118
118
  """Return the current total value for the given task."""
119
- task = self.full_op_id_to_task[full_op_id]
119
+ task = self.unique_full_op_id_to_task[unique_full_op_id]
120
120
  return self.progress._tasks[task].total
121
121
 
122
- def get_task_description(self, full_op_id: str) -> str:
122
+ def get_task_description(self, unique_full_op_id: str) -> str:
123
123
  """Return the current description for the given task."""
124
- task = self.full_op_id_to_task[full_op_id]
124
+ task = self.unique_full_op_id_to_task[unique_full_op_id]
125
125
  return self.progress._tasks[task].description
126
126
 
127
127
  @abstractmethod
128
- def add_task(self, full_op_id: str, op_str: str, total: int):
128
+ def add_task(self, unique_full_op_id: str, op_str: str, total: int):
129
129
  """Initialize progress tracking for operator execution with total items"""
130
130
  pass
131
131
 
@@ -135,18 +135,16 @@ class ProgressManager(ABC):
135
135
  pass
136
136
 
137
137
  @abstractmethod
138
- def incr(self, full_op_id: str, num_outputs: int = 1, display_text: str | None = None, **kwargs):
138
+ def incr(self, unique_full_op_id: str, num_inputs: int = 1, num_outputs: int = 1, display_text: str | None = None, **kwargs):
139
139
  """
140
- Advance the progress bar for the given operator by one. Modify the downstream operators'
140
+ Advance the progress bar for the given operator. Modify the downstream operators'
141
141
  progress bar `total` to reflect the number of outputs produced by this operator.
142
142
 
143
- NOTE: The semantics of this function are that every time it is executed we advance the
144
- progress bar by 1. This is because the progress bar represents what fraction of the inputs
145
- have been processed by the operator. `num_outputs` specifies how many outputs were generated
146
- by the operator when processing the input for which `incr()` was called. E.g. a filter which
147
- filters an input record will advance its progress bar by 1, but the next operator will now
148
- have 1 fewer inputs to process. Alternatively, a convert which generates 3 `num_outputs` will
149
- increase the inputs for the next operator by `delta = num_outputs - 1 = 2`.
143
+ NOTE: `num_outputs` specifies how many outputs were generated by the operator when processing
144
+ the `num_inputs` inputs for which `incr()` was called. E.g. a filter which filters one input record
145
+ will advance its progress bar by 1, but the next operator will now have 1 fewer inputs to process.
146
+ Alternatively, a convert which generates 3 `num_outputs` for 2 `num_inputs` will increase the inputs
147
+ for the next operator by `delta = num_outputs - num_inputs = 3 - 2 = 1`.
150
148
  """
151
149
  pass
152
150
 
@@ -162,13 +160,13 @@ class MockProgressManager(ProgressManager):
162
160
  def __init__(self, plan: PhysicalPlan | SentinelPlan, num_samples: int | None = None):
163
161
  pass
164
162
 
165
- def add_task(self, full_op_id: str, op_str: str, total: int):
163
+ def add_task(self, unique_full_op_id: str, op_str: str, total: int):
166
164
  pass
167
165
 
168
166
  def start(self):
169
167
  pass
170
168
 
171
- def incr(self, full_op_id: str, num_outputs: int = 1, display_text: str | None = None, **kwargs):
169
+ def incr(self, unique_full_op_id: str, num_inputs: int = 1, num_outputs: int = 1, display_text: str | None = None, **kwargs):
172
170
  pass
173
171
 
174
172
  def finish(self):
@@ -181,7 +179,7 @@ class PZProgressManager(ProgressManager):
181
179
  super().__init__(plan, num_samples)
182
180
  self.console = Console()
183
181
 
184
- def add_task(self, full_op_id: str, op_str: str, total: int):
182
+ def add_task(self, unique_full_op_id: str, op_str: str, total: int):
185
183
  """Add a new task to the progress bar"""
186
184
  task = self.progress.add_task(
187
185
  f"[blue]{op_str}",
@@ -194,10 +192,10 @@ class PZProgressManager(ProgressManager):
194
192
  )
195
193
 
196
194
  # store the mapping of operator ID to task ID
197
- self.full_op_id_to_task[full_op_id] = task
195
+ self.unique_full_op_id_to_task[unique_full_op_id] = task
198
196
 
199
197
  # initialize the stats for this operation
200
- self.full_op_id_to_stats[full_op_id] = ProgressStats(start_time=time.time())
198
+ self.unique_full_op_id_to_stats[unique_full_op_id] = ProgressStats(start_time=time.time())
201
199
 
202
200
  def start(self):
203
201
  # print a newline before starting to separate from previous output
@@ -209,41 +207,53 @@ class PZProgressManager(ProgressManager):
209
207
  # start progress bar
210
208
  self.progress.start()
211
209
 
212
- def incr(self, full_op_id: str, num_outputs: int = 1, display_text: str | None = None, **kwargs):
210
+ def incr(self, unique_full_op_id: str, num_inputs: int = 1, num_outputs: int = 1, display_text: str | None = None, **kwargs):
213
211
  # get the task for the given operation
214
- task = self.full_op_id_to_task.get(full_op_id)
212
+ task = self.unique_full_op_id_to_task.get(unique_full_op_id)
215
213
 
216
214
  # update statistics with any additional keyword arguments
217
215
  if kwargs != {}:
218
- self.update_stats(full_op_id, **kwargs)
216
+ self.update_stats(unique_full_op_id, **kwargs)
219
217
 
220
218
  # update progress bar and recent text in one update
221
219
  if display_text is not None:
222
- self.full_op_id_to_stats[full_op_id].recent_text = display_text
220
+ self.unique_full_op_id_to_stats[unique_full_op_id].recent_text = display_text
223
221
 
224
- # if num_outputs is not 1, update the downstream operators' progress bar total for any
225
- # operator which is not an AggregateOp or LimitScanOp
226
- delta = num_outputs - 1
222
+ # update the downstream operators' progress bar total for any operator which is not an AggregateOp or LimitScanOp
223
+ delta = num_outputs - num_inputs
227
224
  if delta != 0:
228
- next_op = self.full_op_id_to_next_op[full_op_id]
225
+ current_unique_full_op_id = unique_full_op_id
226
+ next_op, next_unique_full_op_id = self.unique_full_op_id_to_next_op_and_id[unique_full_op_id]
229
227
  while next_op is not None:
230
228
  if not isinstance(next_op, (AggregateOp, LimitScanOp)):
231
- next_full_op_id = next_op.get_full_op_id()
232
- next_task = self.full_op_id_to_task[next_full_op_id]
233
- self.progress.update(next_task, total=self.get_task_total(next_full_op_id) + delta)
234
-
235
- next_op = self.full_op_id_to_next_op[next_full_op_id]
229
+ next_task = self.unique_full_op_id_to_task[next_unique_full_op_id]
230
+ multiplier = 1
231
+ if isinstance(next_op, JoinOp):
232
+ # for joins, scale the delta by the number of inputs from the other side of the join
233
+ left_input_unique_full_op_id, right_input_unique_input_op_id = self.unique_full_op_id_to_input_unique_full_op_ids[next_unique_full_op_id]
234
+ if current_unique_full_op_id == left_input_unique_full_op_id:
235
+ multiplier = self.get_task_total(right_input_unique_input_op_id)
236
+ elif current_unique_full_op_id == right_input_unique_input_op_id:
237
+ multiplier = self.get_task_total(left_input_unique_full_op_id)
238
+ else:
239
+ raise ValueError(f"Current op ID {current_unique_full_op_id} not found in join inputs {left_input_unique_full_op_id}, {right_input_unique_input_op_id}")
240
+ delta_adjusted = delta * multiplier
241
+ self.progress.update(next_task, total=self.get_task_total(next_unique_full_op_id) + delta_adjusted)
242
+
243
+ # move to the next operator in the plan
244
+ current_unique_full_op_id = next_unique_full_op_id
245
+ next_op, next_unique_full_op_id = self.unique_full_op_id_to_next_op_and_id[next_unique_full_op_id]
236
246
 
237
247
  # advance the progress bar for this task
238
248
  self.progress.update(
239
249
  task,
240
- advance=1,
241
- description=f"[bold blue]{self.get_task_description(full_op_id)}",
242
- cost=self.full_op_id_to_stats[full_op_id].total_cost,
243
- success=self.full_op_id_to_stats[full_op_id].success_count,
244
- failed=self.full_op_id_to_stats[full_op_id].failure_count,
250
+ advance=num_inputs,
251
+ description=f"[bold blue]{self.get_task_description(unique_full_op_id)}",
252
+ cost=self.unique_full_op_id_to_stats[unique_full_op_id].total_cost,
253
+ success=self.unique_full_op_id_to_stats[unique_full_op_id].success_count,
254
+ failed=self.unique_full_op_id_to_stats[unique_full_op_id].failure_count,
245
255
  memory=get_memory_usage(),
246
- recent=f"{self.full_op_id_to_stats[full_op_id].recent_text}" if display_text is not None else "",
256
+ recent=f"{self.unique_full_op_id_to_stats[unique_full_op_id].recent_text}" if display_text is not None else "",
247
257
  refresh=True,
248
258
  )
249
259
 
@@ -251,24 +261,24 @@ class PZProgressManager(ProgressManager):
251
261
  self.progress.stop()
252
262
 
253
263
  # compute total cost, success, and failure
254
- total_cost = sum(stats.total_cost for stats in self.full_op_id_to_stats.values())
255
- # success_count = sum(stats.success_count for stats in self.full_op_id_to_stats.values())
256
- # failure_count = sum(stats.failure_count for stats in self.full_op_id_to_stats.values())
264
+ total_cost = sum(stats.total_cost for stats in self.unique_full_op_id_to_stats.values())
265
+ # success_count = sum(stats.success_count for stats in self.unique_full_op_id_to_stats.values())
266
+ # failure_count = sum(stats.failure_count for stats in self.unique_full_op_id_to_stats.values())
257
267
 
258
268
  # Print final stats on new lines after progress display
259
269
  print(f"Total time: {time.time() - self.start_time:.2f}s")
260
270
  print(f"Total cost: ${total_cost:.4f}")
261
271
  # print(f"Success rate: {success_count}/{success_count + failure_count}")
262
272
 
263
- def update_stats(self, full_op_id: str, **kwargs):
273
+ def update_stats(self, unique_full_op_id: str, **kwargs):
264
274
  """Update progress statistics"""
265
275
  for key, value in kwargs.items():
266
- if hasattr(self.full_op_id_to_stats[full_op_id], key):
276
+ if hasattr(self.unique_full_op_id_to_stats[unique_full_op_id], key):
267
277
  if key != "total_cost":
268
- setattr(self.full_op_id_to_stats[full_op_id], key, value)
278
+ setattr(self.unique_full_op_id_to_stats[unique_full_op_id], key, value)
269
279
  else:
270
- self.full_op_id_to_stats[full_op_id].total_cost += value
271
- self.full_op_id_to_stats[full_op_id].memory_usage_mb = get_memory_usage()
280
+ self.unique_full_op_id_to_stats[unique_full_op_id].total_cost += value
281
+ self.unique_full_op_id_to_stats[unique_full_op_id].memory_usage_mb = get_memory_usage()
272
282
 
273
283
  class PZSentinelProgressManager(ProgressManager):
274
284
  def __init__(self, plan: SentinelPlan, sample_budget: int):
@@ -313,24 +323,25 @@ class PZSentinelProgressManager(ProgressManager):
313
323
  )
314
324
  self.live_display = Live(self.progress_table, refresh_per_second=10)
315
325
 
316
- # initialize mapping from logical_op_id --> ProgressStats
317
- self.logical_op_id_to_stats: dict[str, ProgressStats] = {}
326
+ # initialize mapping from unique_logical_op_id --> ProgressStats
327
+ self.unique_logical_op_id_to_stats: dict[str, ProgressStats] = {}
318
328
 
319
- # initialize mapping from logical_op_id --> task
320
- self.logical_op_id_to_task = {}
329
+ # initialize mapping from unique_logical_op_id --> task
330
+ self.unique_logical_op_id_to_task = {}
321
331
 
322
332
  # initialize start time
323
333
  self.start_time = None
324
334
 
325
335
  # add a task to the progress manager for each operator in the plan
326
- for logical_op_id, op_set in plan:
336
+ for topo_idx, (logical_op_id, op_set) in enumerate(plan):
337
+ unique_logical_op_id = f"{topo_idx}-{logical_op_id}"
327
338
  physical_op = op_set[0]
328
339
  is_llm_convert = isinstance(physical_op, LLMConvert)
329
340
  is_llm_filter = isinstance(physical_op, LLMFilter)
330
341
  op_name = "LLMConvert" if is_llm_convert else "LLMFilter" if is_llm_filter else physical_op.op_name()
331
- op_str = f"{op_name} ({logical_op_id})"
342
+ op_str = f"{op_name} ({unique_logical_op_id})"
332
343
  total = sample_budget if self._is_llm_op(op_set[0]) else 0
333
- self.add_task(logical_op_id, op_str, total)
344
+ self.add_task(unique_logical_op_id, op_str, total)
334
345
 
335
346
  self.console = Console()
336
347
 
@@ -338,14 +349,15 @@ class PZSentinelProgressManager(ProgressManager):
338
349
  is_llm_convert = isinstance(physical_op, LLMConvert)
339
350
  is_llm_filter = isinstance(physical_op, LLMFilter)
340
351
  is_llm_retrieve = isinstance(physical_op, RetrieveOp) and isinstance(physical_op.index, Collection)
341
- return is_llm_convert or is_llm_filter or is_llm_retrieve
352
+ is_llm_join = isinstance(physical_op, JoinOp)
353
+ return is_llm_convert or is_llm_filter or is_llm_retrieve or is_llm_join
342
354
 
343
- def get_task_description(self, logical_op_id: str) -> str:
355
+ def get_task_description(self, unique_logical_op_id: str) -> str:
344
356
  """Return the current description for the given task."""
345
- task = self.logical_op_id_to_task[logical_op_id]
357
+ task = self.unique_logical_op_id_to_task[unique_logical_op_id]
346
358
  return self.op_progress._tasks[task].description
347
359
 
348
- def add_task(self, logical_op_id: str, op_str: str, total: int):
360
+ def add_task(self, unique_logical_op_id: str, op_str: str, total: int):
349
361
  """Add a new task to the op progress bars"""
350
362
  task = self.op_progress.add_task(
351
363
  f"[blue]{op_str}",
@@ -358,10 +370,10 @@ class PZSentinelProgressManager(ProgressManager):
358
370
  )
359
371
 
360
372
  # store the mapping of operator ID to task ID
361
- self.logical_op_id_to_task[logical_op_id] = task
373
+ self.unique_logical_op_id_to_task[unique_logical_op_id] = task
362
374
 
363
375
  # initialize the stats for this operation
364
- self.logical_op_id_to_stats[logical_op_id] = ProgressStats(start_time=time.time())
376
+ self.unique_logical_op_id_to_stats[unique_logical_op_id] = ProgressStats(start_time=time.time())
365
377
 
366
378
  def start(self):
367
379
  # print a newline before starting to separate from previous output
@@ -373,29 +385,29 @@ class PZSentinelProgressManager(ProgressManager):
373
385
  # start progress bars
374
386
  self.live_display.start()
375
387
 
376
- def incr(self, logical_op_id: str, num_samples: int, display_text: str | None = None, **kwargs):
388
+ def incr(self, unique_logical_op_id: str, num_samples: int, display_text: str | None = None, **kwargs):
377
389
  # TODO: (above) organize progress bars into a Live / Table / Panel or something
378
390
  # get the task for the given operation
379
- task = self.logical_op_id_to_task.get(logical_op_id)
391
+ task = self.unique_logical_op_id_to_task.get(unique_logical_op_id)
380
392
 
381
393
  # update statistics with any additional keyword arguments
382
394
  if kwargs != {}:
383
- self.update_stats(logical_op_id, **kwargs)
395
+ self.update_stats(unique_logical_op_id, **kwargs)
384
396
 
385
397
  # update progress bar and recent text in one update
386
398
  if display_text is not None:
387
- self.logical_op_id_to_stats[logical_op_id].recent_text = display_text
399
+ self.unique_logical_op_id_to_stats[unique_logical_op_id].recent_text = display_text
388
400
 
389
- # advance the op progress bar for this logical_op_id
401
+ # advance the op progress bar for this unique_logical_op_id
390
402
  self.op_progress.update(
391
403
  task,
392
404
  advance=num_samples,
393
- description=f"[bold blue]{self.get_task_description(logical_op_id)}",
394
- cost=self.logical_op_id_to_stats[logical_op_id].total_cost,
395
- success=self.logical_op_id_to_stats[logical_op_id].success_count,
396
- failed=self.logical_op_id_to_stats[logical_op_id].failure_count,
405
+ description=f"[bold blue]{self.get_task_description(unique_logical_op_id)}",
406
+ cost=self.unique_logical_op_id_to_stats[unique_logical_op_id].total_cost,
407
+ success=self.unique_logical_op_id_to_stats[unique_logical_op_id].success_count,
408
+ failed=self.unique_logical_op_id_to_stats[unique_logical_op_id].failure_count,
397
409
  memory=get_memory_usage(),
398
- recent=f"{self.logical_op_id_to_stats[logical_op_id].recent_text}" if display_text is not None else "",
410
+ recent=f"{self.unique_logical_op_id_to_stats[unique_logical_op_id].recent_text}" if display_text is not None else "",
399
411
  refresh=True,
400
412
  )
401
413
 
@@ -403,7 +415,7 @@ class PZSentinelProgressManager(ProgressManager):
403
415
  self.overall_progress.update(
404
416
  self.overall_task_id,
405
417
  advance=num_samples,
406
- cost=sum(stats.total_cost for _, stats in self.logical_op_id_to_stats.items()),
418
+ cost=sum(stats.total_cost for _, stats in self.unique_logical_op_id_to_stats.items()),
407
419
  refresh=True,
408
420
  )
409
421
 
@@ -414,24 +426,24 @@ class PZSentinelProgressManager(ProgressManager):
414
426
  self.live_display.stop()
415
427
 
416
428
  # compute total cost, success, and failure
417
- total_cost = sum(stats.total_cost for stats in self.logical_op_id_to_stats.values())
418
- # success_count = sum(stats.success_count for stats in self.logical_op_id_to_stats.values())
419
- # failure_count = sum(stats.failure_count for stats in self.logical_op_id_to_stats.values())
429
+ total_cost = sum(stats.total_cost for stats in self.unique_logical_op_id_to_stats.values())
430
+ # success_count = sum(stats.success_count for stats in self.unique_logical_op_id_to_stats.values())
431
+ # failure_count = sum(stats.failure_count for stats in self.unique_logical_op_id_to_stats.values())
420
432
 
421
433
  # Print final stats on new lines after progress display
422
434
  print(f"Total opt. time: {time.time() - self.start_time:.2f}s")
423
435
  print(f"Total opt. cost: ${total_cost:.4f}")
424
436
  # print(f"Success rate: {success_count}/{success_count + failure_count}")
425
437
 
426
- def update_stats(self, logical_op_id: str, **kwargs):
438
+ def update_stats(self, unique_logical_op_id: str, **kwargs):
427
439
  """Update progress statistics"""
428
440
  for key, value in kwargs.items():
429
- if hasattr(self.logical_op_id_to_stats[logical_op_id], key):
441
+ if hasattr(self.unique_logical_op_id_to_stats[unique_logical_op_id], key):
430
442
  if key != "total_cost":
431
- setattr(self.logical_op_id_to_stats[logical_op_id], key, value)
443
+ setattr(self.unique_logical_op_id_to_stats[unique_logical_op_id], key, value)
432
444
  else:
433
- self.logical_op_id_to_stats[logical_op_id].total_cost += value
434
- self.logical_op_id_to_stats[logical_op_id].memory_usage_mb = get_memory_usage()
445
+ self.unique_logical_op_id_to_stats[unique_logical_op_id].total_cost += value
446
+ self.unique_logical_op_id_to_stats[unique_logical_op_id].memory_usage_mb = get_memory_usage()
435
447
 
436
448
  def create_progress_manager(
437
449
  plan: PhysicalPlan | SentinelPlan,
File without changes