PyPI - palimpzest - Versions diffs - 0.8.1__py3-none-any.whl → 0.8.3__py3-none-any.whl - Mend

palimpzest 0.8.1py3-none-any.whl → 0.8.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

palimpzest/constants.py +38 -62
palimpzest/core/data/dataset.py +1 -1
palimpzest/core/data/iter_dataset.py +5 -5
palimpzest/core/elements/groupbysig.py +1 -1
palimpzest/core/elements/records.py +91 -109
palimpzest/core/lib/schemas.py +23 -0
palimpzest/core/models.py +3 -3
palimpzest/prompts/__init__.py +2 -6
palimpzest/prompts/convert_prompts.py +10 -66
palimpzest/prompts/critique_and_refine_prompts.py +66 -0
palimpzest/prompts/filter_prompts.py +8 -46
palimpzest/prompts/join_prompts.py +12 -75
palimpzest/prompts/{moa_aggregator_convert_prompts.py → moa_aggregator_prompts.py} +51 -2
palimpzest/prompts/moa_proposer_prompts.py +87 -0
palimpzest/prompts/prompt_factory.py +351 -479
palimpzest/prompts/split_merge_prompts.py +51 -2
palimpzest/prompts/split_proposer_prompts.py +48 -16
palimpzest/prompts/utils.py +109 -0
palimpzest/query/execution/all_sample_execution_strategy.py +1 -1
palimpzest/query/execution/execution_strategy.py +4 -4
palimpzest/query/execution/mab_execution_strategy.py +47 -23
palimpzest/query/execution/parallel_execution_strategy.py +3 -3
palimpzest/query/execution/single_threaded_execution_strategy.py +8 -8
palimpzest/query/generators/generators.py +31 -17
palimpzest/query/operators/__init__.py +15 -2
palimpzest/query/operators/aggregate.py +21 -19
palimpzest/query/operators/compute.py +6 -8
palimpzest/query/operators/convert.py +12 -37
palimpzest/query/operators/critique_and_refine.py +194 -0
palimpzest/query/operators/distinct.py +7 -7
palimpzest/query/operators/filter.py +13 -25
palimpzest/query/operators/join.py +321 -192
palimpzest/query/operators/limit.py +4 -4
palimpzest/query/operators/mixture_of_agents.py +246 -0
palimpzest/query/operators/physical.py +25 -2
palimpzest/query/operators/project.py +4 -4
palimpzest/query/operators/{rag_convert.py → rag.py} +202 -5
palimpzest/query/operators/retrieve.py +10 -9
palimpzest/query/operators/scan.py +9 -10
palimpzest/query/operators/search.py +18 -24
palimpzest/query/operators/split.py +321 -0
palimpzest/query/optimizer/__init__.py +12 -8
palimpzest/query/optimizer/optimizer.py +12 -10
palimpzest/query/optimizer/rules.py +201 -108
palimpzest/query/optimizer/tasks.py +18 -6
palimpzest/query/processor/config.py +2 -2
palimpzest/query/processor/query_processor.py +2 -2
palimpzest/query/processor/query_processor_factory.py +9 -5
palimpzest/validator/validator.py +7 -9
{palimpzest-0.8.1.dist-info → palimpzest-0.8.3.dist-info}/METADATA +3 -8
palimpzest-0.8.3.dist-info/RECORD +95 -0
palimpzest/prompts/critique_and_refine_convert_prompts.py +0 -216
palimpzest/prompts/moa_proposer_convert_prompts.py +0 -75
palimpzest/prompts/util_phrases.py +0 -19
palimpzest/query/operators/critique_and_refine_convert.py +0 -113
palimpzest/query/operators/mixture_of_agents_convert.py +0 -140
palimpzest/query/operators/split_convert.py +0 -170
palimpzest-0.8.1.dist-info/RECORD +0 -95
{palimpzest-0.8.1.dist-info → palimpzest-0.8.3.dist-info}/WHEEL +0 -0
{palimpzest-0.8.1.dist-info → palimpzest-0.8.3.dist-info}/licenses/LICENSE +0 -0
{palimpzest-0.8.1.dist-info → palimpzest-0.8.3.dist-info}/top_level.txt +0 -0

palimpzest/query/operators/filter.py CHANGED Viewed

@@ -41,11 +41,6 @@ class FilterOp(PhysicalOperator, ABC):
         op_params = super().get_op_params()
         return {"filter": self.filter_obj, "desc": self.desc, **op_params}
-    @abstractmethod
-    def is_image_filter(self) -> bool:
-        """Return True if the filter operation processes an image, False otherwise."""
-        pass
     @abstractmethod
     def filter(self, candidate: DataRecord) -> tuple[dict[str, bool], GenerationStats]:
         """
@@ -76,14 +71,14 @@ class FilterOp(PhysicalOperator, ABC):
         construct the resulting RecordSet.
         """
         # create new DataRecord and set passed_operator attribute
-        dr = DataRecord.from_parent(candidate.schema, parent_record=candidate)
-        dr.passed_operator = passed_operator
+        dr = DataRecord.from_parent(schema=candidate.schema, data_item={}, parent_record=candidate)
+        dr._passed_operator = passed_operator
         # create RecordOpStats object
         record_op_stats = RecordOpStats(
-            record_id=dr.id,
-            record_parent_ids=dr.parent_ids,
-            record_source_indices=dr.source_indices,
+            record_id=dr._id,
+            record_parent_ids=dr._parent_ids,
+            record_source_indices=dr._source_indices,
             record_state=dr.to_dict(include_bytes=False),
             full_op_id=self.get_full_op_id(),
             logical_op_id=self.logical_op_id,
@@ -102,7 +97,6 @@ class FilterOp(PhysicalOperator, ABC):
             total_embedding_llm_calls=generation_stats.total_embedding_llm_calls,
             answer=answer,
             passed_operator=passed_operator,
-            image_operation=self.is_image_filter(),
             op_details={k: str(v) for k, v in self.get_id_params().items()},
         )
@@ -127,10 +121,6 @@ class FilterOp(PhysicalOperator, ABC):
 class NonLLMFilter(FilterOp):
-    def is_image_filter(self) -> bool:
-        # NOTE: even if the UDF is processing an image, we do not consider this an image filter
-        # (the output of this function will be used by the CostModel in a way which does not apply to UDFs)
-        return False
     def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostEstimates):
         # estimate output cardinality using a constant assumption of the filter selectivity
@@ -174,7 +164,7 @@ class LLMFilter(FilterOp):
     def __init__(
         self,
         model: Model,
-        prompt_strategy: PromptStrategy = PromptStrategy.COT_BOOL,
+        prompt_strategy: PromptStrategy = PromptStrategy.FILTER,
         reasoning_effort: str | None = None,
         *args,
         **kwargs,
@@ -183,13 +173,14 @@ class LLMFilter(FilterOp):
         self.model = model
         self.prompt_strategy = prompt_strategy
         self.reasoning_effort = reasoning_effort
-        self.generator = Generator(model, prompt_strategy, reasoning_effort, self.api_base, Cardinality.ONE_TO_ONE, self.desc, self.verbose)
+        if model is not None:
+            self.generator = Generator(model, prompt_strategy, reasoning_effort, self.api_base, Cardinality.ONE_TO_ONE, self.desc, self.verbose)
     def get_id_params(self):
         id_params = super().get_id_params()
         id_params = {
-            "model": self.model.value,
-            "prompt_strategy": self.prompt_strategy.value,
+            "model": None if self.model is None else self.model.value,
+            "prompt_strategy": None if self.prompt_strategy is None else self.prompt_strategy.value,
             "reasoning_effort": self.reasoning_effort,
             **id_params,
         }
@@ -208,15 +199,12 @@ class LLMFilter(FilterOp):
         return op_params
     def get_model_name(self):
-        return self.model.value
-    def is_image_filter(self) -> bool:
-        return self.prompt_strategy is PromptStrategy.COT_BOOL_IMAGE
+        return None if self.model is None else self.model.value
     def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostEstimates):
         # estimate number of input tokens from source
         est_num_input_tokens = NAIVE_EST_NUM_INPUT_TOKENS
-        if self.is_image_filter():
+        if self.is_image_op():
             est_num_input_tokens = 765 / 10  # 1024x1024 image is 765 tokens
         # NOTE: the output often generates an entire reasoning sentence, thus the true value may be higher
@@ -232,7 +220,7 @@ class LLMFilter(FilterOp):
         # get est. of conversion cost (in USD) per record from model card
         usd_per_input_token = (
             MODEL_CARDS[self.model.value]["usd_per_audio_input_token"]
-            if self.prompt_strategy.is_audio_prompt()
+            if self.is_audio_op()
             else MODEL_CARDS[self.model.value]["usd_per_input_token"]
         )
         model_conversion_usd_per_record = (

palimpzest 0.8.1__py3-none-any.whl → 0.8.3__py3-none-any.whl

palimpzest 0.8.1py3-none-any.whl → 0.8.3py3-none-any.whl