PyPI - palimpzest - Versions diffs - 0.7.21__py3-none-any.whl → 0.8.1__py3-none-any.whl - Mend

palimpzest 0.7.21py3-none-any.whl → 0.8.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (89) hide show

palimpzest/__init__.py +37 -6
palimpzest/agents/__init__.py +0 -0
palimpzest/agents/compute_agents.py +0 -0
palimpzest/agents/search_agents.py +637 -0
palimpzest/constants.py +343 -209
palimpzest/core/data/context.py +393 -0
palimpzest/core/data/context_manager.py +163 -0
palimpzest/core/data/dataset.py +639 -0
palimpzest/core/data/{datareaders.py → iter_dataset.py} +202 -126
palimpzest/core/elements/groupbysig.py +16 -13
palimpzest/core/elements/records.py +166 -75
palimpzest/core/lib/schemas.py +152 -390
palimpzest/core/{data/dataclasses.py → models.py} +306 -170
palimpzest/policy.py +2 -27
palimpzest/prompts/__init__.py +35 -5
palimpzest/prompts/agent_prompts.py +357 -0
palimpzest/prompts/context_search.py +9 -0
palimpzest/prompts/convert_prompts.py +62 -6
palimpzest/prompts/filter_prompts.py +51 -6
palimpzest/prompts/join_prompts.py +163 -0
palimpzest/prompts/moa_proposer_convert_prompts.py +6 -6
palimpzest/prompts/prompt_factory.py +375 -47
palimpzest/prompts/split_proposer_prompts.py +1 -1
palimpzest/prompts/util_phrases.py +5 -0
palimpzest/prompts/validator.py +239 -0
palimpzest/query/execution/all_sample_execution_strategy.py +134 -76
palimpzest/query/execution/execution_strategy.py +210 -317
palimpzest/query/execution/execution_strategy_type.py +5 -7
palimpzest/query/execution/mab_execution_strategy.py +249 -136
palimpzest/query/execution/parallel_execution_strategy.py +153 -244
palimpzest/query/execution/single_threaded_execution_strategy.py +107 -64
palimpzest/query/generators/generators.py +160 -331
palimpzest/query/operators/__init__.py +15 -5
palimpzest/query/operators/aggregate.py +50 -33
palimpzest/query/operators/compute.py +201 -0
palimpzest/query/operators/convert.py +33 -19
palimpzest/query/operators/critique_and_refine_convert.py +7 -5
palimpzest/query/operators/distinct.py +62 -0
palimpzest/query/operators/filter.py +26 -16
palimpzest/query/operators/join.py +403 -0
palimpzest/query/operators/limit.py +3 -3
palimpzest/query/operators/logical.py +205 -77
palimpzest/query/operators/mixture_of_agents_convert.py +10 -8
palimpzest/query/operators/physical.py +27 -21
palimpzest/query/operators/project.py +3 -3
palimpzest/query/operators/rag_convert.py +7 -7
palimpzest/query/operators/retrieve.py +9 -9
palimpzest/query/operators/scan.py +81 -42
palimpzest/query/operators/search.py +524 -0
palimpzest/query/operators/split_convert.py +10 -8
palimpzest/query/optimizer/__init__.py +7 -9
palimpzest/query/optimizer/cost_model.py +108 -441
palimpzest/query/optimizer/optimizer.py +123 -181
palimpzest/query/optimizer/optimizer_strategy.py +66 -61
palimpzest/query/optimizer/plan.py +352 -67
palimpzest/query/optimizer/primitives.py +43 -19
palimpzest/query/optimizer/rules.py +484 -646
palimpzest/query/optimizer/tasks.py +127 -58
palimpzest/query/processor/config.py +42 -76
palimpzest/query/processor/query_processor.py +73 -18
palimpzest/query/processor/query_processor_factory.py +46 -38
palimpzest/schemabuilder/schema_builder.py +15 -28
palimpzest/utils/model_helpers.py +32 -77
palimpzest/utils/progress.py +114 -102
palimpzest/validator/__init__.py +0 -0
palimpzest/validator/validator.py +306 -0
{palimpzest-0.7.21.dist-info → palimpzest-0.8.1.dist-info}/METADATA +6 -1
palimpzest-0.8.1.dist-info/RECORD +95 -0
palimpzest/core/lib/fields.py +0 -141
palimpzest/prompts/code_synthesis_prompts.py +0 -28
palimpzest/query/execution/random_sampling_execution_strategy.py +0 -240
palimpzest/query/generators/api_client_factory.py +0 -30
palimpzest/query/operators/code_synthesis_convert.py +0 -488
palimpzest/query/operators/map.py +0 -130
palimpzest/query/processor/nosentinel_processor.py +0 -33
palimpzest/query/processor/processing_strategy_type.py +0 -28
palimpzest/query/processor/sentinel_processor.py +0 -88
palimpzest/query/processor/streaming_processor.py +0 -149
palimpzest/sets.py +0 -405
palimpzest/utils/datareader_helpers.py +0 -61
palimpzest/utils/demo_helpers.py +0 -75
palimpzest/utils/field_helpers.py +0 -69
palimpzest/utils/generation_helpers.py +0 -69
palimpzest/utils/sandbox.py +0 -183
palimpzest-0.7.21.dist-info/RECORD +0 -95
/palimpzest/core/{elements/index.py → data/index_dataset.py} +0 -0
{palimpzest-0.7.21.dist-info → palimpzest-0.8.1.dist-info}/WHEEL +0 -0
{palimpzest-0.7.21.dist-info → palimpzest-0.8.1.dist-info}/licenses/LICENSE +0 -0
{palimpzest-0.7.21.dist-info → palimpzest-0.8.1.dist-info}/top_level.txt +0 -0

palimpzest/utils/progress.py CHANGED Viewed

@@ -21,6 +21,7 @@ from rich.table import Table
 from palimpzest.query.operators.aggregate import AggregateOp
 from palimpzest.query.operators.convert import LLMConvert
 from palimpzest.query.operators.filter import LLMFilter
+from palimpzest.query.operators.join import JoinOp
 from palimpzest.query.operators.limit import LimitScanOp
 from palimpzest.query.operators.physical import PhysicalOperator
 from palimpzest.query.operators.retrieve import RetrieveOp
@@ -57,7 +58,7 @@ class ProgressManager(ABC):
         Initialize the progress manager for the given plan. This function takes in a plan,
         the number of samples to process (if specified).
-        If `num_samples` is None, then the entire DataReader will be scanned.
+        If `num_samples` is None, then the entire Dataset will be scanned.
         For each operator which is not an `AggregateOp` or `LimitScanOp`, we set its task `total`
         to the number of inputs to be processed by the plan. As intermediate operators process
@@ -81,51 +82,50 @@ class ProgressManager(ABC):
             expand=True,   # Use full width
         )
-        # initialize mapping from full_op_id --> ProgressStats
-        self.full_op_id_to_stats: dict[str, ProgressStats] = {}
+        # initialize mapping from unique_full_op_id --> ProgressStats
+        self.unique_full_op_id_to_stats: dict[str, ProgressStats] = {}
-        # initialize mapping from full_op_id --> task
-        self.full_op_id_to_task = {}
+        # initialize mapping from unique_full_op_id --> task
+        self.unique_full_op_id_to_task = {}
         # initialize start time
         self.start_time = None
-        # create mapping from full_op_id --> next_op
-        self.full_op_id_to_next_op: dict[str, PhysicalOperator] = {}
-        for op_idx, op in enumerate(plan.operators):
-            full_op_id = op.get_full_op_id()
-            next_op = plan.operators[op_idx + 1] if op_idx + 1 < len(plan.operators) else None
-            self.full_op_id_to_next_op[full_op_id] = next_op
-        # compute the total number of inputs to be processed by the plan
-        datareader_len = len(plan.operators[0].datareader)
-        total = datareader_len if num_samples is None else min(num_samples, datareader_len)
+        # TODO: store plan and use its methods within incr()
+        # create mapping from unique_full_op_id --> input unique_full_op_ids
+        self.unique_full_op_id_to_input_unique_full_op_ids: dict[str, list[str]] = {}
+        for topo_idx, op in enumerate(plan):
+            unique_full_op_id = f"{topo_idx}-{op.get_full_op_id()}"
+            input_unique_full_op_ids = plan.get_source_unique_full_op_ids(topo_idx, op)
+            self.unique_full_op_id_to_input_unique_full_op_ids[unique_full_op_id] = input_unique_full_op_ids
+        # create mapping from unique_full_op_id --> next_op
+        self.unique_full_op_id_to_next_op_and_id: dict[str, tuple[PhysicalOperator, str]] = {}
+        for topo_idx, op in enumerate(plan):
+            unique_full_op_id = f"{topo_idx}-{op.get_full_op_id()}"
+            next_op, next_unique_full_op_id = plan.get_next_unique_full_op_and_id(topo_idx, op)
+            self.unique_full_op_id_to_next_op_and_id[unique_full_op_id] = (next_op, next_unique_full_op_id)
         # add a task to the progress manager for each operator in the plan
-        for op in plan.operators:
+        est_total_outputs, _ = plan.get_est_total_outputs(num_samples)
+        for topo_idx, op in enumerate(plan):
             # get the op id and a short string representation of the op; (str(op) is too long)
             op_str = f"{op.op_name()} ({op.get_op_id()})"
+            unique_full_op_id = f"{topo_idx}-{op.get_full_op_id()}"
+            self.add_task(unique_full_op_id, op_str, est_total_outputs[unique_full_op_id])
-            # update the `total` if we encounter an AggregateOp or LimitScanOp
-            if isinstance(op, AggregateOp):
-                total = 1
-            elif isinstance(op, LimitScanOp):
-                total = op.limit
-            self.add_task(op.get_full_op_id(), op_str, total)
-    def get_task_total(self, full_op_id: str) -> int:
+    def get_task_total(self, unique_full_op_id: str) -> int:
         """Return the current total value for the given task."""
-        task = self.full_op_id_to_task[full_op_id]
+        task = self.unique_full_op_id_to_task[unique_full_op_id]
         return self.progress._tasks[task].total
-    def get_task_description(self, full_op_id: str) -> str:
+    def get_task_description(self, unique_full_op_id: str) -> str:
         """Return the current description for the given task."""
-        task = self.full_op_id_to_task[full_op_id]
+        task = self.unique_full_op_id_to_task[unique_full_op_id]
         return self.progress._tasks[task].description
     @abstractmethod
-    def add_task(self, full_op_id: str, op_str: str, total: int):
+    def add_task(self, unique_full_op_id: str, op_str: str, total: int):
         """Initialize progress tracking for operator execution with total items"""
         pass
@@ -135,18 +135,16 @@ class ProgressManager(ABC):
         pass
     @abstractmethod
-    def incr(self, full_op_id: str, num_outputs: int = 1, display_text: str | None = None, **kwargs):
+    def incr(self, unique_full_op_id: str, num_inputs: int = 1, num_outputs: int = 1, display_text: str | None = None, **kwargs):
         """
-        Advance the progress bar for the given operator by one. Modify the downstream operators'
+        Advance the progress bar for the given operator. Modify the downstream operators'
         progress bar `total` to reflect the number of outputs produced by this operator.
-        NOTE: The semantics of this function are that every time it is executed we advance the
-        progress bar by 1. This is because the progress bar represents what fraction of the inputs
-        have been processed by the operator. `num_outputs` specifies how many outputs were generated
-        by the operator when processing the input for which `incr()` was called. E.g. a filter which
-        filters an input record will advance its progress bar by 1, but the next operator will now
-        have 1 fewer inputs to process. Alternatively, a convert which generates 3 `num_outputs` will
-        increase the inputs for the next operator by `delta = num_outputs - 1 = 2`.
+        NOTE: `num_outputs` specifies how many outputs were generated by the operator when processing
+        the `num_inputs` inputs for which `incr()` was called. E.g. a filter which filters one input record
+        will advance its progress bar by 1, but the next operator will now have 1 fewer inputs to process.
+        Alternatively, a convert which generates 3 `num_outputs` for 2 `num_inputs` will increase the inputs
+        for the next operator by `delta = num_outputs - num_inputs = 3 - 2 = 1`.
         """
         pass
@@ -162,13 +160,13 @@ class MockProgressManager(ProgressManager):
     def __init__(self, plan: PhysicalPlan | SentinelPlan, num_samples: int | None = None):
         pass
-    def add_task(self, full_op_id: str, op_str: str, total: int):
+    def add_task(self, unique_full_op_id: str, op_str: str, total: int):
         pass
     def start(self):
         pass
-    def incr(self, full_op_id: str, num_outputs: int = 1, display_text: str | None = None, **kwargs):
+    def incr(self, unique_full_op_id: str, num_inputs: int = 1, num_outputs: int = 1, display_text: str | None = None, **kwargs):
         pass
     def finish(self):
@@ -181,7 +179,7 @@ class PZProgressManager(ProgressManager):
         super().__init__(plan, num_samples)
         self.console = Console()
-    def add_task(self, full_op_id: str, op_str: str, total: int):
+    def add_task(self, unique_full_op_id: str, op_str: str, total: int):
         """Add a new task to the progress bar"""
         task = self.progress.add_task(
             f"[blue]{op_str}",
@@ -194,10 +192,10 @@ class PZProgressManager(ProgressManager):
         )
         # store the mapping of operator ID to task ID
-        self.full_op_id_to_task[full_op_id] = task
+        self.unique_full_op_id_to_task[unique_full_op_id] = task
         # initialize the stats for this operation
-        self.full_op_id_to_stats[full_op_id] = ProgressStats(start_time=time.time())
+        self.unique_full_op_id_to_stats[unique_full_op_id] = ProgressStats(start_time=time.time())
     def start(self):
         # print a newline before starting to separate from previous output
@@ -209,41 +207,53 @@ class PZProgressManager(ProgressManager):
         # start progress bar
         self.progress.start()
-    def incr(self, full_op_id: str, num_outputs: int = 1, display_text: str | None = None, **kwargs):
+    def incr(self, unique_full_op_id: str, num_inputs: int = 1, num_outputs: int = 1, display_text: str | None = None, **kwargs):
         # get the task for the given operation
-        task = self.full_op_id_to_task.get(full_op_id)
+        task = self.unique_full_op_id_to_task.get(unique_full_op_id)
         # update statistics with any additional keyword arguments
         if kwargs != {}:
-            self.update_stats(full_op_id, **kwargs)
+            self.update_stats(unique_full_op_id, **kwargs)
         # update progress bar and recent text in one update
         if display_text is not None:
-            self.full_op_id_to_stats[full_op_id].recent_text = display_text
+            self.unique_full_op_id_to_stats[unique_full_op_id].recent_text = display_text
-        # if num_outputs is not 1, update the downstream operators' progress bar total for any
-        # operator which is not an AggregateOp or LimitScanOp
-        delta = num_outputs - 1
+        # update the downstream operators' progress bar total for any operator which is not an AggregateOp or LimitScanOp
+        delta = num_outputs - num_inputs
         if delta != 0:
-            next_op = self.full_op_id_to_next_op[full_op_id]
+            current_unique_full_op_id = unique_full_op_id
+            next_op, next_unique_full_op_id = self.unique_full_op_id_to_next_op_and_id[unique_full_op_id]
             while next_op is not None:
                 if not isinstance(next_op, (AggregateOp, LimitScanOp)):
-                    next_full_op_id = next_op.get_full_op_id()
-                    next_task = self.full_op_id_to_task[next_full_op_id]
-                    self.progress.update(next_task, total=self.get_task_total(next_full_op_id) + delta)
-                next_op = self.full_op_id_to_next_op[next_full_op_id]
+                    next_task = self.unique_full_op_id_to_task[next_unique_full_op_id]
+                    multiplier = 1
+                    if isinstance(next_op, JoinOp):
+                        # for joins, scale the delta by the number of inputs from the other side of the join
+                        left_input_unique_full_op_id, right_input_unique_input_op_id = self.unique_full_op_id_to_input_unique_full_op_ids[next_unique_full_op_id]
+                        if current_unique_full_op_id == left_input_unique_full_op_id:
+                            multiplier = self.get_task_total(right_input_unique_input_op_id)
+                        elif current_unique_full_op_id == right_input_unique_input_op_id:
+                            multiplier = self.get_task_total(left_input_unique_full_op_id)
+                        else:
+                            raise ValueError(f"Current op ID {current_unique_full_op_id} not found in join inputs {left_input_unique_full_op_id}, {right_input_unique_input_op_id}")
+                    delta_adjusted = delta * multiplier
+                    self.progress.update(next_task, total=self.get_task_total(next_unique_full_op_id) + delta_adjusted)
+                # move to the next operator in the plan
+                current_unique_full_op_id = next_unique_full_op_id
+                next_op, next_unique_full_op_id = self.unique_full_op_id_to_next_op_and_id[next_unique_full_op_id]
         # advance the progress bar for this task
         self.progress.update(
             task,
-            advance=1,
-            description=f"[bold blue]{self.get_task_description(full_op_id)}",
-            cost=self.full_op_id_to_stats[full_op_id].total_cost,
-            success=self.full_op_id_to_stats[full_op_id].success_count,
-            failed=self.full_op_id_to_stats[full_op_id].failure_count,
+            advance=num_inputs,
+            description=f"[bold blue]{self.get_task_description(unique_full_op_id)}",
+            cost=self.unique_full_op_id_to_stats[unique_full_op_id].total_cost,
+            success=self.unique_full_op_id_to_stats[unique_full_op_id].success_count,
+            failed=self.unique_full_op_id_to_stats[unique_full_op_id].failure_count,
             memory=get_memory_usage(),
-            recent=f"{self.full_op_id_to_stats[full_op_id].recent_text}" if display_text is not None else "",
+            recent=f"{self.unique_full_op_id_to_stats[unique_full_op_id].recent_text}" if display_text is not None else "",
             refresh=True,
         )
@@ -251,24 +261,24 @@ class PZProgressManager(ProgressManager):
         self.progress.stop()
         # compute total cost, success, and failure
-        total_cost = sum(stats.total_cost for stats in self.full_op_id_to_stats.values())
-        # success_count = sum(stats.success_count for stats in self.full_op_id_to_stats.values())
-        # failure_count = sum(stats.failure_count for stats in self.full_op_id_to_stats.values())
+        total_cost = sum(stats.total_cost for stats in self.unique_full_op_id_to_stats.values())
+        # success_count = sum(stats.success_count for stats in self.unique_full_op_id_to_stats.values())
+        # failure_count = sum(stats.failure_count for stats in self.unique_full_op_id_to_stats.values())
         # Print final stats on new lines after progress display
         print(f"Total time: {time.time() - self.start_time:.2f}s")
         print(f"Total cost: ${total_cost:.4f}")
         # print(f"Success rate: {success_count}/{success_count + failure_count}")
-    def update_stats(self, full_op_id: str, **kwargs):
+    def update_stats(self, unique_full_op_id: str, **kwargs):
         """Update progress statistics"""
         for key, value in kwargs.items():
-            if hasattr(self.full_op_id_to_stats[full_op_id], key):
+            if hasattr(self.unique_full_op_id_to_stats[unique_full_op_id], key):
                 if key != "total_cost":
-                    setattr(self.full_op_id_to_stats[full_op_id], key, value)
+                    setattr(self.unique_full_op_id_to_stats[unique_full_op_id], key, value)
                 else:
-                    self.full_op_id_to_stats[full_op_id].total_cost += value
-        self.full_op_id_to_stats[full_op_id].memory_usage_mb = get_memory_usage()
+                    self.unique_full_op_id_to_stats[unique_full_op_id].total_cost += value
+        self.unique_full_op_id_to_stats[unique_full_op_id].memory_usage_mb = get_memory_usage()
 class PZSentinelProgressManager(ProgressManager):
     def __init__(self, plan: SentinelPlan, sample_budget: int):
@@ -313,24 +323,25 @@ class PZSentinelProgressManager(ProgressManager):
         )
         self.live_display = Live(self.progress_table, refresh_per_second=10)
-        # initialize mapping from logical_op_id --> ProgressStats
-        self.logical_op_id_to_stats: dict[str, ProgressStats] = {}
+        # initialize mapping from unique_logical_op_id --> ProgressStats
+        self.unique_logical_op_id_to_stats: dict[str, ProgressStats] = {}
-        # initialize mapping from logical_op_id --> task
-        self.logical_op_id_to_task = {}
+        # initialize mapping from unique_logical_op_id --> task
+        self.unique_logical_op_id_to_task = {}
         # initialize start time
         self.start_time = None
         # add a task to the progress manager for each operator in the plan
-        for logical_op_id, op_set in plan:
+        for topo_idx, (logical_op_id, op_set) in enumerate(plan):
+            unique_logical_op_id = f"{topo_idx}-{logical_op_id}"
             physical_op = op_set[0]
             is_llm_convert = isinstance(physical_op, LLMConvert)
             is_llm_filter = isinstance(physical_op, LLMFilter)
             op_name = "LLMConvert" if is_llm_convert else "LLMFilter" if is_llm_filter else physical_op.op_name()
-            op_str = f"{op_name} ({logical_op_id})"
+            op_str = f"{op_name} ({unique_logical_op_id})"
             total = sample_budget if self._is_llm_op(op_set[0]) else 0
-            self.add_task(logical_op_id, op_str, total)
+            self.add_task(unique_logical_op_id, op_str, total)
         self.console = Console()
@@ -338,14 +349,15 @@ class PZSentinelProgressManager(ProgressManager):
         is_llm_convert = isinstance(physical_op, LLMConvert)
         is_llm_filter = isinstance(physical_op, LLMFilter)
         is_llm_retrieve = isinstance(physical_op, RetrieveOp) and isinstance(physical_op.index, Collection)
-        return is_llm_convert or is_llm_filter or is_llm_retrieve
+        is_llm_join = isinstance(physical_op, JoinOp)
+        return is_llm_convert or is_llm_filter or is_llm_retrieve or is_llm_join
-    def get_task_description(self, logical_op_id: str) -> str:
+    def get_task_description(self, unique_logical_op_id: str) -> str:
         """Return the current description for the given task."""
-        task = self.logical_op_id_to_task[logical_op_id]
+        task = self.unique_logical_op_id_to_task[unique_logical_op_id]
         return self.op_progress._tasks[task].description
-    def add_task(self, logical_op_id: str, op_str: str, total: int):
+    def add_task(self, unique_logical_op_id: str, op_str: str, total: int):
         """Add a new task to the op progress bars"""
         task = self.op_progress.add_task(
             f"[blue]{op_str}",
@@ -358,10 +370,10 @@ class PZSentinelProgressManager(ProgressManager):
         )
         # store the mapping of operator ID to task ID
-        self.logical_op_id_to_task[logical_op_id] = task
+        self.unique_logical_op_id_to_task[unique_logical_op_id] = task
         # initialize the stats for this operation
-        self.logical_op_id_to_stats[logical_op_id] = ProgressStats(start_time=time.time())
+        self.unique_logical_op_id_to_stats[unique_logical_op_id] = ProgressStats(start_time=time.time())
     def start(self):
         # print a newline before starting to separate from previous output
@@ -373,29 +385,29 @@ class PZSentinelProgressManager(ProgressManager):
         # start progress bars
         self.live_display.start()
-    def incr(self, logical_op_id: str, num_samples: int, display_text: str | None = None, **kwargs):
+    def incr(self, unique_logical_op_id: str, num_samples: int, display_text: str | None = None, **kwargs):
         # TODO: (above) organize progress bars into a Live / Table / Panel or something
         # get the task for the given operation
-        task = self.logical_op_id_to_task.get(logical_op_id)
+        task = self.unique_logical_op_id_to_task.get(unique_logical_op_id)
         # update statistics with any additional keyword arguments
         if kwargs != {}:
-            self.update_stats(logical_op_id, **kwargs)
+            self.update_stats(unique_logical_op_id, **kwargs)
         # update progress bar and recent text in one update
         if display_text is not None:
-            self.logical_op_id_to_stats[logical_op_id].recent_text = display_text
+            self.unique_logical_op_id_to_stats[unique_logical_op_id].recent_text = display_text
-        # advance the op progress bar for this logical_op_id
+        # advance the op progress bar for this unique_logical_op_id
         self.op_progress.update(
             task,
             advance=num_samples,
-            description=f"[bold blue]{self.get_task_description(logical_op_id)}",
-            cost=self.logical_op_id_to_stats[logical_op_id].total_cost,
-            success=self.logical_op_id_to_stats[logical_op_id].success_count,
-            failed=self.logical_op_id_to_stats[logical_op_id].failure_count,
+            description=f"[bold blue]{self.get_task_description(unique_logical_op_id)}",
+            cost=self.unique_logical_op_id_to_stats[unique_logical_op_id].total_cost,
+            success=self.unique_logical_op_id_to_stats[unique_logical_op_id].success_count,
+            failed=self.unique_logical_op_id_to_stats[unique_logical_op_id].failure_count,
             memory=get_memory_usage(),
-            recent=f"{self.logical_op_id_to_stats[logical_op_id].recent_text}" if display_text is not None else "",
+            recent=f"{self.unique_logical_op_id_to_stats[unique_logical_op_id].recent_text}" if display_text is not None else "",
             refresh=True,
         )
@@ -403,7 +415,7 @@ class PZSentinelProgressManager(ProgressManager):
         self.overall_progress.update(
             self.overall_task_id,
             advance=num_samples,
-            cost=sum(stats.total_cost for _, stats in self.logical_op_id_to_stats.items()),
+            cost=sum(stats.total_cost for _, stats in self.unique_logical_op_id_to_stats.items()),
             refresh=True,
         )
@@ -414,24 +426,24 @@ class PZSentinelProgressManager(ProgressManager):
         self.live_display.stop()
         # compute total cost, success, and failure
-        total_cost = sum(stats.total_cost for stats in self.logical_op_id_to_stats.values())
-        # success_count = sum(stats.success_count for stats in self.logical_op_id_to_stats.values())
-        # failure_count = sum(stats.failure_count for stats in self.logical_op_id_to_stats.values())
+        total_cost = sum(stats.total_cost for stats in self.unique_logical_op_id_to_stats.values())
+        # success_count = sum(stats.success_count for stats in self.unique_logical_op_id_to_stats.values())
+        # failure_count = sum(stats.failure_count for stats in self.unique_logical_op_id_to_stats.values())
         # Print final stats on new lines after progress display
         print(f"Total opt. time: {time.time() - self.start_time:.2f}s")
         print(f"Total opt. cost: ${total_cost:.4f}")
         # print(f"Success rate: {success_count}/{success_count + failure_count}")
-    def update_stats(self, logical_op_id: str, **kwargs):
+    def update_stats(self, unique_logical_op_id: str, **kwargs):
         """Update progress statistics"""
         for key, value in kwargs.items():
-            if hasattr(self.logical_op_id_to_stats[logical_op_id], key):
+            if hasattr(self.unique_logical_op_id_to_stats[unique_logical_op_id], key):
                 if key != "total_cost":
-                    setattr(self.logical_op_id_to_stats[logical_op_id], key, value)
+                    setattr(self.unique_logical_op_id_to_stats[unique_logical_op_id], key, value)
                 else:
-                    self.logical_op_id_to_stats[logical_op_id].total_cost += value
-        self.logical_op_id_to_stats[logical_op_id].memory_usage_mb = get_memory_usage()
+                    self.unique_logical_op_id_to_stats[unique_logical_op_id].total_cost += value
+        self.unique_logical_op_id_to_stats[unique_logical_op_id].memory_usage_mb = get_memory_usage()
 def create_progress_manager(
     plan: PhysicalPlan | SentinelPlan,

palimpzest/validator/__init__.py ADDED Viewed

File without changes

palimpzest 0.7.21__py3-none-any.whl → 0.8.1__py3-none-any.whl

palimpzest 0.7.21py3-none-any.whl → 0.8.1py3-none-any.whl