PyPI - palimpzest - Versions diffs - 0.8.1__py3-none-any.whl → 0.8.3__py3-none-any.whl - Mend

palimpzest 0.8.1py3-none-any.whl → 0.8.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

palimpzest/constants.py +38 -62
palimpzest/core/data/dataset.py +1 -1
palimpzest/core/data/iter_dataset.py +5 -5
palimpzest/core/elements/groupbysig.py +1 -1
palimpzest/core/elements/records.py +91 -109
palimpzest/core/lib/schemas.py +23 -0
palimpzest/core/models.py +3 -3
palimpzest/prompts/__init__.py +2 -6
palimpzest/prompts/convert_prompts.py +10 -66
palimpzest/prompts/critique_and_refine_prompts.py +66 -0
palimpzest/prompts/filter_prompts.py +8 -46
palimpzest/prompts/join_prompts.py +12 -75
palimpzest/prompts/{moa_aggregator_convert_prompts.py → moa_aggregator_prompts.py} +51 -2
palimpzest/prompts/moa_proposer_prompts.py +87 -0
palimpzest/prompts/prompt_factory.py +351 -479
palimpzest/prompts/split_merge_prompts.py +51 -2
palimpzest/prompts/split_proposer_prompts.py +48 -16
palimpzest/prompts/utils.py +109 -0
palimpzest/query/execution/all_sample_execution_strategy.py +1 -1
palimpzest/query/execution/execution_strategy.py +4 -4
palimpzest/query/execution/mab_execution_strategy.py +47 -23
palimpzest/query/execution/parallel_execution_strategy.py +3 -3
palimpzest/query/execution/single_threaded_execution_strategy.py +8 -8
palimpzest/query/generators/generators.py +31 -17
palimpzest/query/operators/__init__.py +15 -2
palimpzest/query/operators/aggregate.py +21 -19
palimpzest/query/operators/compute.py +6 -8
palimpzest/query/operators/convert.py +12 -37
palimpzest/query/operators/critique_and_refine.py +194 -0
palimpzest/query/operators/distinct.py +7 -7
palimpzest/query/operators/filter.py +13 -25
palimpzest/query/operators/join.py +321 -192
palimpzest/query/operators/limit.py +4 -4
palimpzest/query/operators/mixture_of_agents.py +246 -0
palimpzest/query/operators/physical.py +25 -2
palimpzest/query/operators/project.py +4 -4
palimpzest/query/operators/{rag_convert.py → rag.py} +202 -5
palimpzest/query/operators/retrieve.py +10 -9
palimpzest/query/operators/scan.py +9 -10
palimpzest/query/operators/search.py +18 -24
palimpzest/query/operators/split.py +321 -0
palimpzest/query/optimizer/__init__.py +12 -8
palimpzest/query/optimizer/optimizer.py +12 -10
palimpzest/query/optimizer/rules.py +201 -108
palimpzest/query/optimizer/tasks.py +18 -6
palimpzest/query/processor/config.py +2 -2
palimpzest/query/processor/query_processor.py +2 -2
palimpzest/query/processor/query_processor_factory.py +9 -5
palimpzest/validator/validator.py +7 -9
{palimpzest-0.8.1.dist-info → palimpzest-0.8.3.dist-info}/METADATA +3 -8
palimpzest-0.8.3.dist-info/RECORD +95 -0
palimpzest/prompts/critique_and_refine_convert_prompts.py +0 -216
palimpzest/prompts/moa_proposer_convert_prompts.py +0 -75
palimpzest/prompts/util_phrases.py +0 -19
palimpzest/query/operators/critique_and_refine_convert.py +0 -113
palimpzest/query/operators/mixture_of_agents_convert.py +0 -140
palimpzest/query/operators/split_convert.py +0 -170
palimpzest-0.8.1.dist-info/RECORD +0 -95
{palimpzest-0.8.1.dist-info → palimpzest-0.8.3.dist-info}/WHEEL +0 -0
{palimpzest-0.8.1.dist-info → palimpzest-0.8.3.dist-info}/licenses/LICENSE +0 -0
{palimpzest-0.8.1.dist-info → palimpzest-0.8.3.dist-info}/top_level.txt +0 -0

palimpzest/query/operators/search.py CHANGED Viewed

@@ -91,17 +91,15 @@ class SmolAgentsSearch(PhysicalOperator):
         Given an input DataRecord and a determination of whether it passed the filter or not,
         construct the resulting RecordSet.
         """
-        # create new DataRecord and set passed_operator attribute
-        dr = DataRecord.from_parent(self.output_schema, parent_record=candidate)
-        for field in self.output_schema.model_fields:
-            if field in answer:
-                dr[field] = answer[field]
+        # create new DataRecord
+        data_item = {field: answer[field] for field in self.output_schema.model_fields if field in answer}
+        dr = DataRecord.from_parent(self.output_schema, data_item, parent_record=candidate)
         # create RecordOpStats object
         record_op_stats = RecordOpStats(
-            record_id=dr.id,
-            record_parent_ids=dr.parent_ids,
-            record_source_indices=dr.source_indices,
+            record_id=dr._id,
+            record_parent_ids=dr._parent_ids,
+            record_source_indices=dr._source_indices,
             record_state=dr.to_dict(include_bytes=False),
             full_op_id=self.get_full_op_id(),
             logical_op_id=self.logical_op_id,
@@ -248,17 +246,15 @@ class SmolAgentsSearch(PhysicalOperator):
 #         Given an input DataRecord and a determination of whether it passed the filter or not,
 #         construct the resulting RecordSet.
 #         """
-#         # create new DataRecord and set passed_operator attribute
-#         dr = DataRecord.from_parent(self.output_schema, parent_record=candidate)
-#         for field in self.output_schema.model_fields:
-#             if field in answer:
-#                 dr[field] = answer[field]
+#         # create new DataRecord
+#         data_item = {field: answer[field] for field in self.output_schema.model_fields if field in answer}
+#         dr = DataRecord.from_parent(self.output_schema, data_item, parent_record=candidate)
         # # create RecordOpStats object
         # record_op_stats = RecordOpStats(
-        #     record_id=dr.id,
-        #     record_parent_ids=dr.parent_ids,
-        #     record_source_indices=dr.source_indices,
+        #     record_id=dr._id,
+        #     record_parent_ids=dr._parent_ids,
+        #     record_source_indices=dr._source_indices,
         #     record_state=dr.to_dict(include_bytes=False),
         #     full_op_id=self.get_full_op_id(),
         #     logical_op_id=self.logical_op_id,
@@ -440,17 +436,15 @@ class SmolAgentsSearch(PhysicalOperator):
 #         Given an input DataRecord and a determination of whether it passed the filter or not,
 #         construct the resulting RecordSet.
 #         """
-#         # create new DataRecord and set passed_operator attribute
-#         dr = DataRecord.from_parent(self.output_schema, parent_record=candidate)
-#         for field in self.output_schema.model_fields:
-#             if field in answer:
-#                 dr[field] = answer[field]
+#         # create new DataRecord
+#         data_item = {field: answer[field] for field in self.output_schema.model_fields if field in answer}
+#         dr = DataRecord.from_parent(self.output_schema, data_item, parent_record=candidate)
 #         # create RecordOpStats object
 #         record_op_stats = RecordOpStats(
-#             record_id=dr.id,
-#             record_parent_ids=dr.parent_ids,
-#             record_source_indices=dr.source_indices,
+#             record_id=dr._id,
+#             record_parent_ids=dr._parent_ids,
+#             record_source_indices=dr._source_indices,
 #             record_state=dr.to_dict(include_bytes=False),
 #             full_op_id=self.get_full_op_id(),
 #             logical_op_id=self.logical_op_id,

palimpzest/query/operators/split.py ADDED Viewed

@@ -0,0 +1,321 @@
+from __future__ import annotations
+import math
+from pydantic.fields import FieldInfo
+from palimpzest.constants import (
+    MODEL_CARDS,
+    NAIVE_EST_NUM_INPUT_TOKENS,
+    NAIVE_EST_NUM_OUTPUT_TOKENS,
+    Cardinality,
+    PromptStrategy,
+)
+from palimpzest.core.elements.records import DataRecord
+from palimpzest.core.models import GenerationStats, OperatorCostEstimates
+from palimpzest.query.generators.generators import Generator
+from palimpzest.query.operators.convert import LLMConvert
+from palimpzest.query.operators.filter import LLMFilter
+class SplitConvert(LLMConvert):
+    def __init__(self, num_chunks: int = 2, min_size_to_chunk: int = 1000, *args, **kwargs):
+        kwargs["prompt_strategy"] = None
+        super().__init__(*args, **kwargs)
+        self.num_chunks = num_chunks
+        self.min_size_to_chunk = min_size_to_chunk
+        self.split_generator = Generator(self.model, PromptStrategy.MAP_SPLIT_PROPOSER, self.reasoning_effort, self.api_base, self.cardinality, self.desc, self.verbose)
+        self.split_merge_generator = Generator(self.model, PromptStrategy.MAP_SPLIT_MERGER, self.reasoning_effort, self.api_base, self.cardinality, self.desc, self.verbose)
+        # crude adjustment factor for naive estimation in unoptimized setting
+        self.naive_quality_adjustment = 0.6
+    def __str__(self):
+        op = super().__str__()
+        op += f"    Chunk Size: {str(self.num_chunks)}\n"
+        op += f"    Min Size to Chunk: {str(self.min_size_to_chunk)}\n"
+        return op
+    def get_id_params(self):
+        id_params = super().get_id_params()
+        id_params = {"num_chunks": self.num_chunks, "min_size_to_chunk": self.min_size_to_chunk, **id_params}
+        return id_params
+    def get_op_params(self):
+        op_params = super().get_op_params()
+        return {"num_chunks": self.num_chunks, "min_size_to_chunk": self.min_size_to_chunk, **op_params}
+    def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostEstimates) -> OperatorCostEstimates:
+        """
+        Update the cost per record and quality estimates produced by LLMConvert's naive estimates.
+        We adjust the cost per record to account for the reduced number of input tokens following
+        the retrieval of relevant chunks, and we make a crude estimate of the quality degradation
+        that results from using a downsized input (although this may in fact improve quality in
+        some cases).
+        """
+        # get naive cost estimates from LLMConvert
+        naive_op_cost_estimates = super().naive_cost_estimates(source_op_cost_estimates)
+        # re-compute cost per record assuming we use fewer input tokens; naively assume a single input field
+        est_num_input_tokens = NAIVE_EST_NUM_INPUT_TOKENS
+        est_num_output_tokens = NAIVE_EST_NUM_OUTPUT_TOKENS
+        model_conversion_usd_per_record = (
+            MODEL_CARDS[self.model.value]["usd_per_input_token"] * est_num_input_tokens
+            + MODEL_CARDS[self.model.value]["usd_per_output_token"] * est_num_output_tokens
+        )
+        # set refined estimate of cost per record
+        naive_op_cost_estimates.cost_per_record = model_conversion_usd_per_record
+        naive_op_cost_estimates.cost_per_record_lower_bound = naive_op_cost_estimates.cost_per_record
+        naive_op_cost_estimates.cost_per_record_upper_bound = naive_op_cost_estimates.cost_per_record
+        naive_op_cost_estimates.quality = (naive_op_cost_estimates.quality) * self.naive_quality_adjustment
+        naive_op_cost_estimates.quality_lower_bound = naive_op_cost_estimates.quality
+        naive_op_cost_estimates.quality_upper_bound = naive_op_cost_estimates.quality
+        return naive_op_cost_estimates
+    def get_text_chunks(self, text: str, num_chunks: int) -> list[str]:
+        """
+        Given a text string, chunk it into num_chunks substrings of roughly equal size.
+        """
+        chunks = []
+        idx, chunk_size = 0, math.ceil(len(text) / num_chunks)
+        while idx + chunk_size < len(text):
+            chunks.append(text[idx : idx + chunk_size])
+            idx += chunk_size
+        if idx < len(text):
+            chunks.append(text[idx:])
+        return chunks
+    def get_chunked_candidate(self, candidate: DataRecord, input_fields: list[str]) -> list[DataRecord]:
+        """
+        For each text field, chunk the content. If a field is smaller than the chunk size,
+        simply include the full field.
+        """
+        # compute mapping from each field to its chunked content
+        field_name_to_chunked_content = {}
+        for field_name in input_fields:
+            field = candidate.get_field_type(field_name)
+            content = candidate[field_name]
+            # do not chunk this field if it is not a string or a list of strings
+            is_string_field = field.annotation in [str, str | None]
+            is_list_string_field = field.annotation in [list[str], list[str] | None]
+            if not (is_string_field or is_list_string_field):
+                field_name_to_chunked_content[field_name] = [content]
+                continue
+            # if this is a list of strings, join the strings
+            if is_list_string_field:
+                content = "[" + ", ".join(content) + "]"
+            # skip this field if its length is less than the min size to chunk
+            if len(content) < self.min_size_to_chunk:
+                field_name_to_chunked_content[field_name] = [content]
+                continue
+            # chunk the content
+            field_name_to_chunked_content[field_name] = self.get_text_chunks(content, self.num_chunks)
+        # compute the true number of chunks (may be 1 if all fields are not chunked)
+        num_chunks = max(len(chunks) for chunks in field_name_to_chunked_content.values())
+        # create the chunked canidates
+        candidates = []
+        for chunk_idx in range(num_chunks):
+            candidate_copy = candidate.copy()
+            for field_name in input_fields:
+                field_chunks = field_name_to_chunked_content[field_name]
+                candidate_copy[field_name] = field_chunks[chunk_idx] if len(field_chunks) > 1 else field_chunks[0]
+            candidates.append(candidate_copy)
+        return candidates
+    def convert(self, candidate: DataRecord, fields: dict[str, FieldInfo]) -> tuple[dict[str, list], GenerationStats]:
+        # get the set of input fields to use for the convert operation
+        input_fields = self.get_input_fields()
+        # lookup most relevant chunks for each field using embedding search
+        candidate_copy = candidate.copy()
+        chunked_candidates = self.get_chunked_candidate(candidate_copy, input_fields)
+        # construct kwargs for generation
+        gen_kwargs = {"project_cols": input_fields, "output_schema": self.output_schema}
+        # generate outputs for each chunk separately
+        chunk_outputs, chunk_generation_stats_lst = [], []
+        for candidate in chunked_candidates:
+            _, reasoning, chunk_generation_stats, _ = self.split_generator(candidate, fields, json_output=False, **gen_kwargs)
+            chunk_outputs.append(reasoning)
+            chunk_generation_stats_lst.append(chunk_generation_stats)
+        # call the merger
+        gen_kwargs = {
+            "project_cols": input_fields,
+            "output_schema": self.output_schema,
+            "chunk_outputs": chunk_outputs,
+        }
+        field_answers, _, merger_gen_stats, _ = self.split_merge_generator(candidate, fields, **gen_kwargs)
+        # compute the total generation stats
+        generation_stats = sum(chunk_generation_stats_lst) + merger_gen_stats
+        return field_answers, generation_stats
+class SplitFilter(LLMFilter):
+    def __init__(self, num_chunks: int = 2, min_size_to_chunk: int = 1000, *args, **kwargs):
+        kwargs["prompt_strategy"] = None
+        super().__init__(*args, **kwargs)
+        self.num_chunks = num_chunks
+        self.min_size_to_chunk = min_size_to_chunk
+        self.split_generator = Generator(self.model, PromptStrategy.FILTER_SPLIT_PROPOSER, self.reasoning_effort, self.api_base, Cardinality.ONE_TO_ONE, self.desc, self.verbose)
+        self.split_merge_generator = Generator(self.model, PromptStrategy.FILTER_SPLIT_MERGER, self.reasoning_effort, self.api_base, Cardinality.ONE_TO_ONE, self.desc, self.verbose)
+        # crude adjustment factor for naive estimation in no-sentinel setting
+        self.naive_quality_adjustment = 0.6
+    def __str__(self):
+        op = super().__str__()
+        op += f"    Chunk Size: {str(self.num_chunks)}\n"
+        op += f"    Min Size to Chunk: {str(self.min_size_to_chunk)}\n"
+        return op
+    def get_id_params(self):
+        id_params = super().get_id_params()
+        id_params = {"num_chunks": self.num_chunks, "min_size_to_chunk": self.min_size_to_chunk, **id_params}
+        return id_params
+    def get_op_params(self):
+        op_params = super().get_op_params()
+        return {"num_chunks": self.num_chunks, "min_size_to_chunk": self.min_size_to_chunk, **op_params}
+    def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostEstimates) -> OperatorCostEstimates:
+        """
+        Update the cost per record and quality estimates produced by LLMFilter's naive estimates.
+        We adjust the cost per record to account for the reduced number of input tokens following
+        the retrieval of relevant chunks, and we make a crude estimate of the quality degradation
+        that results from using a downsized input (although this may in fact improve quality in
+        some cases).
+        """
+        # get naive cost estimates from LLMFilter
+        naive_op_cost_estimates = super().naive_cost_estimates(source_op_cost_estimates)
+        # re-compute cost per record assuming we use fewer input tokens; naively assume a single input field
+        est_num_input_tokens = NAIVE_EST_NUM_INPUT_TOKENS
+        est_num_output_tokens = NAIVE_EST_NUM_OUTPUT_TOKENS
+        model_conversion_usd_per_record = (
+            MODEL_CARDS[self.model.value]["usd_per_input_token"] * est_num_input_tokens
+            + MODEL_CARDS[self.model.value]["usd_per_output_token"] * est_num_output_tokens
+        )
+        # set refined estimate of cost per record
+        naive_op_cost_estimates.cost_per_record = model_conversion_usd_per_record
+        naive_op_cost_estimates.cost_per_record_lower_bound = naive_op_cost_estimates.cost_per_record
+        naive_op_cost_estimates.cost_per_record_upper_bound = naive_op_cost_estimates.cost_per_record
+        naive_op_cost_estimates.quality = (naive_op_cost_estimates.quality) * self.naive_quality_adjustment
+        naive_op_cost_estimates.quality_lower_bound = naive_op_cost_estimates.quality
+        naive_op_cost_estimates.quality_upper_bound = naive_op_cost_estimates.quality
+        return naive_op_cost_estimates
+    def get_text_chunks(self, text: str, num_chunks: int) -> list[str]:
+        """
+        Given a text string, chunk it into num_chunks substrings of roughly equal size.
+        """
+        chunks = []
+        idx, chunk_size = 0, math.ceil(len(text) / num_chunks)
+        while idx + chunk_size < len(text):
+            chunks.append(text[idx : idx + chunk_size])
+            idx += chunk_size
+        if idx < len(text):
+            chunks.append(text[idx:])
+        return chunks
+    def get_chunked_candidate(self, candidate: DataRecord, input_fields: list[str]) -> list[DataRecord]:
+        """
+        For each text field, chunk the content. If a field is smaller than the chunk size,
+        simply include the full field.
+        """
+        # compute mapping from each field to its chunked content
+        field_name_to_chunked_content = {}
+        for field_name in input_fields:
+            field = candidate.get_field_type(field_name)
+            content = candidate[field_name]
+            # do not chunk this field if it is not a string or a list of strings
+            is_string_field = field.annotation in [str, str | None]
+            is_list_string_field = field.annotation in [list[str], list[str] | None]
+            if not (is_string_field or is_list_string_field):
+                field_name_to_chunked_content[field_name] = [content]
+                continue
+            # if this is a list of strings, join the strings
+            if is_list_string_field:
+                content = "[" + ", ".join(content) + "]"
+            # skip this field if its length is less than the min size to chunk
+            if len(content) < self.min_size_to_chunk:
+                field_name_to_chunked_content[field_name] = [content]
+                continue
+            # chunk the content
+            field_name_to_chunked_content[field_name] = self.get_text_chunks(content, self.num_chunks)
+        # compute the true number of chunks (may be 1 if all fields are not chunked)
+        num_chunks = max(len(chunks) for chunks in field_name_to_chunked_content.values())
+        # create the chunked canidates
+        candidates = []
+        for chunk_idx in range(num_chunks):
+            candidate_copy = candidate.copy()
+            for field_name in input_fields:
+                field_chunks = field_name_to_chunked_content[field_name]
+                candidate_copy[field_name] = field_chunks[chunk_idx] if len(field_chunks) > 1 else field_chunks[0]
+            candidates.append(candidate_copy)
+        return candidates
+    def filter(self, candidate: DataRecord) -> tuple[dict[str, bool], GenerationStats]:
+        # get the set of input fields to use for the filter operation
+        input_fields = self.get_input_fields()
+        # construct output fields
+        fields = {"passed_operator": FieldInfo(annotation=bool, description="Whether the record passed the filter operation")}
+        # lookup most relevant chunks for each field using embedding search
+        candidate_copy = candidate.copy()
+        chunked_candidates = self.get_chunked_candidate(candidate_copy, input_fields)
+        # construct kwargs for generation
+        gen_kwargs = {"project_cols": input_fields, "filter_condition": self.filter_obj.filter_condition}
+        # generate outputs for each chunk separately
+        chunk_outputs, chunk_generation_stats_lst = [], []
+        for candidate in chunked_candidates:
+            _, reasoning, chunk_generation_stats, _ = self.split_generator(candidate, fields, json_output=False, **gen_kwargs)
+            chunk_outputs.append(reasoning)
+            chunk_generation_stats_lst.append(chunk_generation_stats)
+        # call the merger
+        gen_kwargs = {
+            "project_cols": input_fields,
+            "filter_condition": self.filter_obj.filter_condition,
+            "chunk_outputs": chunk_outputs,
+        }
+        field_answers, _, merger_gen_stats, _ = self.split_merge_generator(candidate, fields, **gen_kwargs)
+        # compute the total generation stats
+        generation_stats = sum(chunk_generation_stats_lst) + merger_gen_stats
+        return field_answers, generation_stats

palimpzest/query/optimizer/__init__.py CHANGED Viewed

@@ -6,7 +6,7 @@ from palimpzest.query.optimizer.rules import (
     BasicSubstitutionRule as _BasicSubstitutionRule,
 )
 from palimpzest.query.optimizer.rules import (
-    CriticAndRefineConvertRule as _CriticAndRefineConvertRule,
+    CritiqueAndRefineRule as _CritiqueAndRefineRule,
 )
 from palimpzest.query.optimizer.rules import (
     ImplementationRule as _ImplementationRule,
@@ -21,7 +21,7 @@ from palimpzest.query.optimizer.rules import (
     LLMJoinRule as _LLMJoinRule,
 )
 from palimpzest.query.optimizer.rules import (
-    MixtureOfAgentsConvertRule as _MixtureOfAgentsConvertRule,
+    MixtureOfAgentsRule as _MixtureOfAgentsRule,
 )
 from palimpzest.query.optimizer.rules import (
     NonLLMConvertRule as _NonLLMConvertRule,
@@ -33,7 +33,10 @@ from palimpzest.query.optimizer.rules import (
     PushDownFilter as _PushDownFilter,
 )
 from palimpzest.query.optimizer.rules import (
-    RAGConvertRule as _RAGConvertRule,
+    RAGRule as _RAGRule,
+)
+from palimpzest.query.optimizer.rules import (
+    ReorderConverts as _ReorderConverts,
 )
 from palimpzest.query.optimizer.rules import (
     RetrieveRule as _RetrieveRule,
@@ -42,7 +45,7 @@ from palimpzest.query.optimizer.rules import (
     Rule as _Rule,
 )
 from palimpzest.query.optimizer.rules import (
-    SplitConvertRule as _SplitConvertRule,
+    SplitRule as _SplitRule,
 )
 from palimpzest.query.optimizer.rules import (
     TransformationRule as _TransformationRule,
@@ -52,19 +55,20 @@ ALL_RULES = [
     _AddContextsBeforeComputeRule,
     _AggregateRule,
     _BasicSubstitutionRule,
-    _CriticAndRefineConvertRule,
+    _CritiqueAndRefineRule,
     _ImplementationRule,
     _LLMConvertBondedRule,
     _LLMFilterRule,
     _LLMJoinRule,
-    _MixtureOfAgentsConvertRule,
+    _MixtureOfAgentsRule,
     _NonLLMConvertRule,
     _NonLLMFilterRule,
     _PushDownFilter,
-    _RAGConvertRule,
+    _RAGRule,
+    _ReorderConverts,
     _RetrieveRule,
     _Rule,
-    _SplitConvertRule,
+    _SplitRule,
     _TransformationRule,
 ]

palimpzest/query/optimizer/optimizer.py CHANGED Viewed

@@ -29,15 +29,15 @@ from palimpzest.query.optimizer.optimizer_strategy_type import OptimizationStrat
 from palimpzest.query.optimizer.plan import PhysicalPlan
 from palimpzest.query.optimizer.primitives import Group, LogicalExpression
 from palimpzest.query.optimizer.rules import (
-    CriticAndRefineConvertRule,
+    CritiqueAndRefineRule,
     LLMConvertBondedRule,
-    MixtureOfAgentsConvertRule,
-    RAGConvertRule,
-    SplitConvertRule,
+    MixtureOfAgentsRule,
+    RAGRule,
+    SplitRule,
 )
 from palimpzest.query.optimizer.tasks import (
     ApplyRule,
-    ExpandGroup,
+    ExploreGroup,
     OptimizeGroup,
     OptimizeLogicalExpression,
     OptimizePhysicalExpression,
@@ -150,22 +150,22 @@ class Optimizer:
         if not self.allow_rag_reduction:
             self.implementation_rules = [
-                rule for rule in self.implementation_rules if not issubclass(rule, RAGConvertRule)
+                rule for rule in self.implementation_rules if not issubclass(rule, RAGRule)
             ]
         if not self.allow_mixtures:
             self.implementation_rules = [
-                rule for rule in self.implementation_rules if not issubclass(rule, MixtureOfAgentsConvertRule)
+                rule for rule in self.implementation_rules if not issubclass(rule, MixtureOfAgentsRule)
             ]
         if not self.allow_critic:
             self.implementation_rules = [
-                rule for rule in self.implementation_rules if not issubclass(rule, CriticAndRefineConvertRule)
+                rule for rule in self.implementation_rules if not issubclass(rule, CritiqueAndRefineRule)
             ]
         if not self.allow_split_merge:
             self.implementation_rules = [
-                rule for rule in self.implementation_rules if not issubclass(rule, SplitConvertRule)
+                rule for rule in self.implementation_rules if not issubclass(rule, SplitRule)
             ]
         logger.info(f"Initialized Optimizer with verbose={self.verbose}")
@@ -396,8 +396,9 @@ class Optimizer:
         # TODO: conditionally stop when X number of tasks have been executed to limit exhaustive search
         while len(self.tasks_stack) > 0:
             task = self.tasks_stack.pop(-1)
             new_tasks = []
-            if isinstance(task, (OptimizeGroup, ExpandGroup)):
+            if isinstance(task, (OptimizeGroup, ExploreGroup)):
                 new_tasks = task.perform(self.groups)
             elif isinstance(task, OptimizeLogicalExpression):
                 new_tasks = task.perform(self.transformation_rules, self.implementation_rules)
@@ -409,6 +410,7 @@ class Optimizer:
             elif isinstance(task, OptimizePhysicalExpression):
                 context = {"optimizer_strategy": self.optimizer_strategy, "execution_strategy": self.execution_strategy}
                 new_tasks = task.perform(self.cost_model, self.groups, self.policy, context=context)
             self.tasks_stack.extend(new_tasks)
         logger.debug(f"Done searching optimization space for group_id: {group_id}")

palimpzest 0.8.1__py3-none-any.whl → 0.8.3__py3-none-any.whl

palimpzest 0.8.1py3-none-any.whl → 0.8.3py3-none-any.whl