PyPI - palimpzest - Versions diffs - 0.8.4__py3-none-any.whl → 0.8.6__py3-none-any.whl - Mend

palimpzest 0.8.4py3-none-any.whl → 0.8.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

palimpzest/core/data/dataset.py +16 -1
palimpzest/core/elements/records.py +3 -40
palimpzest/core/lib/schemas.py +9 -0
palimpzest/query/execution/execution_strategy.py +5 -0
palimpzest/query/execution/mab_execution_strategy.py +56 -23
palimpzest/query/operators/__init__.py +2 -1
palimpzest/query/operators/join.py +13 -11
palimpzest/query/optimizer/__init__.py +7 -3
palimpzest/query/optimizer/optimizer.py +8 -0
palimpzest/query/optimizer/optimizer_strategy.py +0 -3
palimpzest/query/optimizer/plan.py +5 -6
palimpzest/query/optimizer/rules.py +40 -6
palimpzest/query/optimizer/tasks.py +9 -1
palimpzest/query/processor/config.py +1 -0
palimpzest/query/processor/query_processor_factory.py +7 -0
palimpzest/validator/validator.py +14 -14
{palimpzest-0.8.4.dist-info → palimpzest-0.8.6.dist-info}/METADATA +1 -1
{palimpzest-0.8.4.dist-info → palimpzest-0.8.6.dist-info}/RECORD +21 -21
{palimpzest-0.8.4.dist-info → palimpzest-0.8.6.dist-info}/WHEEL +0 -0
{palimpzest-0.8.4.dist-info → palimpzest-0.8.6.dist-info}/licenses/LICENSE +0 -0
{palimpzest-0.8.4.dist-info → palimpzest-0.8.6.dist-info}/top_level.txt +0 -0

palimpzest/core/data/dataset.py CHANGED Viewed

@@ -10,7 +10,7 @@ from pydantic import BaseModel
 from palimpzest.constants import AggFunc, Cardinality
 from palimpzest.core.elements.filters import Filter
 from palimpzest.core.elements.groupbysig import GroupBySig
-from palimpzest.core.lib.schemas import create_schema_from_fields, project, union_schemas
+from palimpzest.core.lib.schemas import create_schema_from_fields, project, relax_schema, union_schemas
 from palimpzest.policy import construct_policy_from_kwargs
 from palimpzest.query.operators.logical import (
     Aggregate,
@@ -193,6 +193,21 @@ class Dataset:
         return root_datasets
+    def relax_types(self) -> None:
+        """
+        Relax the types in this Dataset's schema and all upstream Datasets' schemas to be more permissive.
+        """
+        # relax the types in this dataset's schema
+        self._schema = relax_schema(self._schema)
+        # relax the types in dataset's operator's input and output schemas
+        self._operator.input_schema = None if self._operator.input_schema is None else relax_schema(self._operator.input_schema)
+        self._operator.output_schema = relax_schema(self._operator.output_schema)
+        # recursively relax the types in all upstream datasets
+        for source in self._sources:
+            source.relax_types()
     def get_upstream_datasets(self) -> list[Dataset]:
         """
         Get the list of all upstream datasets that are sources to this dataset.

palimpzest/core/elements/records.py CHANGED Viewed

@@ -16,12 +16,11 @@ from palimpzest.core.lib.schemas import (
     ImageBase64,
     ImageFilepath,
     ImageURL,
-    create_schema_from_df,
     project,
     union_schemas,
 )
 from palimpzest.core.models import ExecutionStats, PlanStats, RecordOpStats
-from palimpzest.utils.hash_helpers import hash_for_id, hash_for_serialized_dict
+from palimpzest.utils.hash_helpers import hash_for_id
 class DataRecord:
@@ -93,10 +92,8 @@ class DataRecord:
     def __getattr__(self, name: str) -> Any:
-        field = getattr(self._data_item, name, None)
-        if field is not None:
-            return field
-        raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
+        return getattr(self._data_item, name)
     def __getitem__(self, field: str) -> Any:
         return getattr(self._data_item, field)
@@ -266,40 +263,6 @@ class DataRecord:
         return new_dr
-    # TODO: unused outside of unit tests
-    @staticmethod
-    def from_df(df: pd.DataFrame, schema: type[BaseModel] | None = None) -> list[DataRecord]:
-        """Create a list of DataRecords from a pandas DataFrame
-        Args:
-            df (pd.DataFrame): Input DataFrame
-            schema (BaseModel, optional): Schema for the DataRecords. If None, will be derived from DataFrame
-        Returns:
-            list[DataRecord]: List of DataRecord instances
-        """
-        if df is None:
-            raise ValueError("DataFrame is None!")
-        # create schema if one isn't provided
-        if schema is None:
-            schema = create_schema_from_df(df)
-        # create an id for the dataset from the schema
-        dataset_id = hash_for_serialized_dict({
-            k: {"annotation": str(v.annotation), "default": str(v.default), "description": v.description}
-            for k, v in schema.model_fields.items()
-        })
-        # create records
-        records = []
-        for idx, row in df.iterrows():
-            row_dict = row.to_dict()
-            record = DataRecord(schema(**row_dict), source_indices=[f"{dataset_id}-{idx}"])
-            records.append(record)
-        return records
     @staticmethod
     def to_df(records: list[DataRecord], project_cols: list[str] | None = None) -> pd.DataFrame:
         if len(records) == 0:

palimpzest/core/lib/schemas.py CHANGED Viewed

@@ -80,6 +80,15 @@ def _create_pickleable_model(fields: dict[str, tuple[type, FieldInfo]]) -> type[
     return new_model
+def relax_schema(model: type[BaseModel]) -> type[BaseModel]:
+    """Updates the type annotation for every field in the BaseModel to include typing.Any"""
+    fields = {}
+    for field_name, field in model.model_fields.items():
+        fields[field_name] = (field.annotation | Any, field)
+    return _create_pickleable_model(fields)
 def project(model: type[BaseModel], project_fields: list[str]) -> type[BaseModel]:
     """Project a Pydantic model to only the specified columns."""
     # make sure projection column names are shortened

palimpzest/query/execution/execution_strategy.py CHANGED Viewed

@@ -314,6 +314,11 @@ class SentinelExecutionStrategy(BaseExecutionStrategy, ABC):
             for future in as_completed(futures):
                 # update output record sets
                 record_set, operator, source_indices, input = future.result()
+                # if the operator is a join, get record_set from tuple output
+                if isinstance(operator, JoinOp):
+                    record_set = record_set[0]
                 output_record_sets.append((record_set, operator, source_indices, input))
                 # update cache

palimpzest/query/execution/mab_execution_strategy.py CHANGED Viewed

@@ -11,7 +11,7 @@ from palimpzest.policy import Policy
 from palimpzest.query.execution.execution_strategy import SentinelExecutionStrategy
 from palimpzest.query.operators.aggregate import AggregateOp
 from palimpzest.query.operators.convert import LLMConvert
-from palimpzest.query.operators.filter import FilterOp, LLMFilter
+from palimpzest.query.operators.filter import FilterOp, LLMFilter, NonLLMFilter
 from palimpzest.query.operators.join import JoinOp
 from palimpzest.query.operators.physical import PhysicalOperator
 from palimpzest.query.operators.retrieve import RetrieveOp
@@ -351,7 +351,7 @@ class OpFrontier:
         return op_inputs
-    def update_frontier(self, unique_logical_op_id: str, plan_stats: SentinelPlanStats) -> None:
+    def update_frontier(self, unique_logical_op_id: str, plan_stats: SentinelPlanStats, full_op_id_to_source_indices_processed: dict[str, set[list]]) -> None:
         """
         Update the set of frontier operators, pulling in new ones from the reservoir as needed.
         This function will:
@@ -383,22 +383,14 @@ class OpFrontier:
             # compute final list of record op stats
             full_op_id_to_record_op_stats[full_op_id] = list(record_id_to_max_quality_record_op_stats.values())
-        # compute mapping of physical op to num samples and total samples drawn;
-        # also update the set of source indices which have been processed by each physical operator
-        full_op_id_to_num_samples, total_num_samples = {}, 0
-        for full_op_id, record_op_stats_lst in full_op_id_to_record_op_stats.items():
+        # update the set of source indices processed by each physical operator
+        for full_op_id, source_indices_processed in full_op_id_to_source_indices_processed.items():
             # update the set of source indices processed
-            source_indices_processed = set()
-            for record_op_stats in record_op_stats_lst:
-                source_indices = record_op_stats.record_source_indices
-                if len(source_indices) == 1:
-                    source_indices = source_indices[0]
-                elif self.is_llm_join or self.is_aggregate_op:
-                    source_indices = tuple(source_indices)
+            for source_indices in source_indices_processed:
+                source_indices = source_indices[0] if len(source_indices) == 1 else tuple(source_indices)
                 self.full_op_id_to_sources_processed[full_op_id].add(source_indices)
-                source_indices_processed.add(source_indices)
+                if source_indices in self.full_op_id_to_sources_not_processed[full_op_id]:
+                    self.full_op_id_to_sources_not_processed[full_op_id].remove(source_indices)
             # update the set of source indices not processed
             self.full_op_id_to_sources_not_processed[full_op_id] = [
@@ -406,8 +398,11 @@ class OpFrontier:
                 if indices not in source_indices_processed
             ]
-            # compute the number of samples as the number of source indices processed
-            num_samples = len(self.full_op_id_to_sources_processed[full_op_id])
+        # compute mapping of physical op to num samples and total samples drawn
+        full_op_id_to_num_samples, total_num_samples = {}, 0
+        for full_op_id, record_op_stats_lst in full_op_id_to_record_op_stats.items():
+            # compute the number of samples as the length of the record_op_stats_lst
+            num_samples = len(record_op_stats_lst)
             full_op_id_to_num_samples[full_op_id] = num_samples
             total_num_samples += num_samples
@@ -620,6 +615,28 @@ class MABExecutionStrategy(SentinelExecutionStrategy):
     calls does not perfectly match the sample_budget. This may cause some minor discrepancies with
     the progress manager as a result.
     """
+    def _remove_filtered_records_from_downstream_ops(self, topo_idx: int, plan: SentinelPlan, op_frontiers: dict[str, OpFrontier], source_indices_to_all_record_sets: dict[int, list[DataRecordSet]]) -> None:
+        """Remove records which were filtered out by a NonLLMFilter from all downstream operators."""
+        filtered_source_indices = set()
+        # NonLLMFilter will have one record_set per source_indices with a single record
+        for source_indices, record_sets in source_indices_to_all_record_sets.items():
+            record: DataRecord = record_sets[0][0]
+            if not record._passed_operator:
+                filtered_source_indices.add(source_indices)
+        # remove filtered source indices from all downstream operators
+        if len(filtered_source_indices) > 0:
+            for downstream_topo_idx in range(topo_idx + 1, len(plan)):
+                downstream_logical_op_id = plan[downstream_topo_idx][0]
+                downstream_unique_logical_op_id = f"{downstream_topo_idx}-{downstream_logical_op_id}"
+                downstream_op_frontier = op_frontiers[downstream_unique_logical_op_id]
+                for full_op_id in downstream_op_frontier.full_op_id_to_sources_not_processed:
+                    downstream_op_frontier.full_op_id_to_sources_not_processed[full_op_id] = [
+                        indices for indices in downstream_op_frontier.full_op_id_to_sources_not_processed[full_op_id]
+                        if indices not in filtered_source_indices
+                    ]
     def _get_max_quality_op(self, unique_logical_op_id: str, op_frontiers: dict[str, OpFrontier], plan_stats: SentinelPlanStats) -> PhysicalOperator:
         """
         Returns the operator in the frontier with the highest (estimated) quality.
@@ -639,7 +656,11 @@ class MABExecutionStrategy(SentinelExecutionStrategy):
         for op in frontier_ops:
             op_quality_stats = []
             if op.get_full_op_id() in full_op_id_to_record_op_stats:
-                op_quality_stats = [record_op_stats.quality for record_op_stats in full_op_id_to_record_op_stats[op.get_full_op_id()]]
+                op_quality_stats = [
+                    record_op_stats.quality
+                    for record_op_stats in full_op_id_to_record_op_stats[op.get_full_op_id()]
+                    if record_op_stats.quality is not None
+                ]
             avg_op_quality = sum(op_quality_stats) / len(op_quality_stats) if len(op_quality_stats) > 0 else 0.0
             if max_avg_quality is None or avg_op_quality > max_avg_quality:
                 max_quality_op = op
@@ -664,7 +685,7 @@ class MABExecutionStrategy(SentinelExecutionStrategy):
                 source_indices_to_sample.update(source_indices)
             # execute operator sets in sequence
-            for topo_idx, (logical_op_id, _) in enumerate(plan):
+            for topo_idx, (logical_op_id, op_set) in enumerate(plan):
                 # compute unique logical op id within plan
                 unique_logical_op_id = f"{topo_idx}-{logical_op_id}"
@@ -672,8 +693,10 @@ class MABExecutionStrategy(SentinelExecutionStrategy):
                 max_quality_op = self._get_max_quality_op(unique_logical_op_id, op_frontiers, plan_stats)
                 # get frontier ops and their next input
+                def is_filtered_out(tup: tuple) -> bool:
+                    return tup[-1] is None or isinstance(tup[-1], list) and all([record is None for record in tup[-1]])
                 frontier_op_inputs = op_frontiers[unique_logical_op_id].get_frontier_op_inputs(source_indices_to_sample, max_quality_op)
-                frontier_op_inputs = list(filter(lambda tup: tup[-1] is not None, frontier_op_inputs))
+                frontier_op_inputs = list(filter(lambda tup: not is_filtered_out(tup), frontier_op_inputs))
                 # break out of the loop if frontier_op_inputs is empty, as this means all records have been filtered out
                 if len(frontier_op_inputs) == 0:
@@ -711,7 +734,18 @@ class MABExecutionStrategy(SentinelExecutionStrategy):
                     op_frontiers[next_unique_logical_op_id].update_inputs(unique_logical_op_id, source_indices_to_all_record_sets)
                 # update the (pareto) frontier for each set of operators
-                op_frontiers[unique_logical_op_id].update_frontier(unique_logical_op_id, plan_stats)
+                full_op_id_to_source_indices_processed = {}
+                for source_indices, record_set_tuples in source_indices_to_record_set_tuples.items():
+                    for _, op, _ in record_set_tuples:
+                        if op.get_full_op_id() not in full_op_id_to_source_indices_processed:
+                            full_op_id_to_source_indices_processed[op.get_full_op_id()] = set()
+                        full_op_id_to_source_indices_processed[op.get_full_op_id()].add(source_indices)
+                op_frontiers[unique_logical_op_id].update_frontier(unique_logical_op_id, plan_stats, full_op_id_to_source_indices_processed)
+                # if the operator is a non-llm filter which has filtered out records, remove those records from
+                # all downstream operators' full_op_id_to_sources_not_processed
+                if isinstance(op_set[0], NonLLMFilter):
+                    self._remove_filtered_records_from_downstream_ops(topo_idx, plan, op_frontiers, source_indices_to_all_record_sets)
         # finalize plan stats
         plan_stats.finish()
@@ -721,7 +755,6 @@ class MABExecutionStrategy(SentinelExecutionStrategy):
     def execute_sentinel_plan(self, plan: SentinelPlan, train_dataset: dict[str, Dataset], validator: Validator) -> SentinelPlanStats:
         logger.info(f"Executing plan {plan.plan_id} with {self.max_workers} workers")
-        logger.info(f"Plan Details: {plan}")
         # initialize plan stats
         plan_stats = SentinelPlanStats.from_plan(plan)

palimpzest/query/operators/__init__.py CHANGED Viewed

@@ -12,6 +12,7 @@ from palimpzest.query.operators.distinct import DistinctOp as _DistinctOp
 from palimpzest.query.operators.filter import FilterOp as _FilterOp
 from palimpzest.query.operators.filter import LLMFilter as _LLMFilter
 from palimpzest.query.operators.filter import NonLLMFilter as _NonLLMFilter
+from palimpzest.query.operators.join import EmbeddingJoin as _EmbeddingJoin
 from palimpzest.query.operators.join import JoinOp as _JoinOp
 from palimpzest.query.operators.join import NestedLoopsJoin as _NestedLoopsJoin
 from palimpzest.query.operators.limit import LimitScanOp as _LimitScanOp
@@ -88,7 +89,7 @@ PHYSICAL_OPERATORS = (
     # filter
     + [_FilterOp, _NonLLMFilter, _LLMFilter]
     # join
-    + [_JoinOp, _NestedLoopsJoin]
+    + [_EmbeddingJoin, _JoinOp, _NestedLoopsJoin]
     # limit
     + [_LimitScanOp]
     # mixture-of-agents

palimpzest/query/operators/join.py CHANGED Viewed

@@ -41,6 +41,7 @@ class JoinOp(PhysicalOperator, ABC):
         prompt_strategy: PromptStrategy = PromptStrategy.JOIN,
         join_parallelism: int = 64,
         reasoning_effort: str | None = None,
+        retain_inputs: bool = True,
         desc: str | None = None,
         *args,
         **kwargs,
@@ -52,6 +53,7 @@ class JoinOp(PhysicalOperator, ABC):
         self.prompt_strategy = prompt_strategy
         self.join_parallelism = join_parallelism
         self.reasoning_effort = reasoning_effort
+        self.retain_inputs = retain_inputs
         self.desc = desc
         self.generator = Generator(model, prompt_strategy, reasoning_effort, self.api_base, Cardinality.ONE_TO_ONE, self.desc, self.verbose)
         self.join_idx = 0
@@ -82,10 +84,11 @@ class JoinOp(PhysicalOperator, ABC):
         op_params = super().get_op_params()
         op_params = {
             "condition": self.condition,
-            "model": self.model.value,
-            "prompt_strategy": self.prompt_strategy.value,
+            "model": self.model,
+            "prompt_strategy": self.prompt_strategy,
             "join_parallelism": self.join_parallelism,
             "reasoning_effort": self.reasoning_effort,
+            "retain_inputs": self.retain_inputs,
             "desc": self.desc,
             **op_params,
         }
@@ -227,8 +230,9 @@ class NestedLoopsJoin(JoinOp):
         num_inputs_processed = len(join_candidates)
         # store input records to join with new records added later
-        self._left_input_records.extend(left_candidates)
-        self._right_input_records.extend(right_candidates)
+        if self.retain_inputs:
+            self._left_input_records.extend(left_candidates)
+            self._right_input_records.extend(right_candidates)
         # return empty DataRecordSet if no output records were produced
         if len(output_records) == 0:
@@ -242,7 +246,7 @@ class EmbeddingJoin(JoinOp):
     # specialized use cases (e.g., speech-to-text) with strict requirements on things like e.g. sample rate
     def __init__(
         self,
-        num_samples: int = 100,
+        num_samples: int = 10,
         *args,
         **kwargs,
     ):
@@ -307,10 +311,7 @@ class EmbeddingJoin(JoinOp):
         )
         # get est. of conversion cost (in USD) per record from model card
-        model_conversion_usd_per_record = (
-            MODEL_CARDS[self.embedding_model.value]["usd_per_input_token"] * est_num_input_tokens
-            + MODEL_CARDS[self.embedding_model.value]["usd_per_output_token"] * est_num_output_tokens
-        )
+        model_conversion_usd_per_record = MODEL_CARDS[self.embedding_model.value]["usd_per_input_token"] * est_num_input_tokens
         # estimate output cardinality using a constant assumption of the filter selectivity
         selectivity = NAIVE_EST_JOIN_SELECTIVITY
@@ -521,8 +522,9 @@ class EmbeddingJoin(JoinOp):
             record_op_stats.total_embedding_cost = amortized_embedding_cost
         # store input records to join with new records added later
-        self._left_input_records.extend(zip(left_candidates, left_embeddings))
-        self._right_input_records.extend(zip(right_candidates, right_embeddings))
+        if self.retain_inputs:
+            self._left_input_records.extend(zip(left_candidates, left_embeddings))
+            self._right_input_records.extend(zip(right_candidates, right_embeddings))
         # return empty DataRecordSet if no output records were produced
         if len(output_records) == 0:

palimpzest/query/optimizer/__init__.py CHANGED Viewed

@@ -8,6 +8,9 @@ from palimpzest.query.optimizer.rules import (
 from palimpzest.query.optimizer.rules import (
     CritiqueAndRefineRule as _CritiqueAndRefineRule,
 )
+from palimpzest.query.optimizer.rules import (
+    EmbeddingJoinRule as _EmbeddingJoinRule,
+)
 from palimpzest.query.optimizer.rules import (
     ImplementationRule as _ImplementationRule,
 )
@@ -18,10 +21,10 @@ from palimpzest.query.optimizer.rules import (
     LLMFilterRule as _LLMFilterRule,
 )
 from palimpzest.query.optimizer.rules import (
-    LLMJoinRule as _LLMJoinRule,
+    MixtureOfAgentsRule as _MixtureOfAgentsRule,
 )
 from palimpzest.query.optimizer.rules import (
-    MixtureOfAgentsRule as _MixtureOfAgentsRule,
+    NestedLoopsJoinRule as _NestedLoopsJoinRule,
 )
 from palimpzest.query.optimizer.rules import (
     NonLLMConvertRule as _NonLLMConvertRule,
@@ -56,10 +59,11 @@ ALL_RULES = [
     _AggregateRule,
     _BasicSubstitutionRule,
     _CritiqueAndRefineRule,
+    _EmbeddingJoinRule,
     _ImplementationRule,
     _LLMConvertBondedRule,
     _LLMFilterRule,
-    _LLMJoinRule,
+    _NestedLoopsJoinRule,
     _MixtureOfAgentsRule,
     _NonLLMConvertRule,
     _NonLLMFilterRule,

palimpzest/query/optimizer/optimizer.py CHANGED Viewed

@@ -181,6 +181,7 @@ class Optimizer:
             "join_parallelism": self.join_parallelism,
             "reasoning_effort": self.reasoning_effort,
             "api_base": self.api_base,
+            "is_validation": self.optimizer_strategy == OptimizationStrategyType.SENTINEL,
         }
     def deepcopy_clean(self):
@@ -204,10 +205,17 @@ class Optimizer:
         return optimizer
     def update_strategy(self, optimizer_strategy: OptimizationStrategyType):
+        # set the optimizer_strategy
         self.optimizer_strategy = optimizer_strategy
+        # get the strategy class associated with the optimizer strategy
         optimizer_strategy_cls = optimizer_strategy.value
         self.strategy = optimizer_strategy_cls()
+        # remove transformation rules for optimization strategies which do not require them
+        if optimizer_strategy.no_transformation():
+            self.transformation_rules = []
     def construct_group_tree(self, dataset: Dataset) -> tuple[int, dict[str, FieldInfo], dict[str, set[str]]]:
         logger.debug(f"Constructing group tree for dataset: {dataset}")
         ### convert node --> Group ###

palimpzest/query/optimizer/optimizer_strategy.py CHANGED Viewed

@@ -58,7 +58,6 @@ class GreedyStrategy(OptimizationStrategy):
     def get_optimal_plans(self, groups: dict, final_group_id: int, policy: Policy, use_final_op_quality: bool) -> list[PhysicalPlan]:
         logger.info(f"Getting greedy optimal plans for final group id: {final_group_id}")
         plans = [self._get_greedy_physical_plan(groups, final_group_id)]
-        logger.info(f"Greedy optimal plans: {plans}")
         logger.info(f"Done getting greedy optimal plans for final group id: {final_group_id}")
         return plans
@@ -137,7 +136,6 @@ class ParetoStrategy(OptimizationStrategy):
             optimal_plan = optimal_plan if policy.choose(optimal_plan.plan_cost, plan.plan_cost) else plan
         plans = [optimal_plan]
-        logger.info(f"Pareto optimal plans: {plans}")
         logger.info(f"Done getting pareto optimal plans for final group id: {final_group_id}")
         return plans
@@ -174,7 +172,6 @@ class SentinelStrategy(OptimizationStrategy):
     def get_optimal_plans(self, groups: dict, final_group_id: int, policy: Policy, use_final_op_quality: bool) -> list[SentinelPlan]:
         logger.info(f"Getting sentinel optimal plans for final group id: {final_group_id}")
         plans = [self._get_sentinel_plan(groups, final_group_id)]
-        logger.info(f"Sentinel optimal plans: {plans}")
         logger.info(f"Done getting sentinel optimal plans for final group id: {final_group_id}")
         return plans

palimpzest/query/optimizer/plan.py CHANGED Viewed

@@ -330,12 +330,11 @@ class SentinelPlan(Plan):
     def _get_str(self, idx: int = 0, indent: int = 0) -> str:
         indent_str = " " * (indent * 2)
-        plan_str = ""
-        for inner_idx, operator in enumerate(self.operator_set):
-            inner_idx_str = "" if len(self.operator_set) == 1 else f"{inner_idx + 1}."
-            plan_str += f"{indent_str}{idx}.{inner_idx_str} {str(operator)}\n"
-            for subplan in self.subplans:
-                plan_str += subplan._get_str(idx=idx + 1, indent=indent + 1)
+        operator = self.operator_set[0]
+        inner_idx_str = "" if len(self.operator_set) == 1 else f"1 - {len(self.operator_set)}."
+        plan_str = f"{indent_str}{idx}.{inner_idx_str} {str(operator)}\n"
+        for subplan in self.subplans:
+            plan_str += subplan._get_str(idx=idx + 1, indent=indent + 1)
         return plan_str

palimpzest/query/optimizer/rules.py CHANGED Viewed

@@ -18,7 +18,7 @@ from palimpzest.query.operators.convert import LLMConvertBonded, NonLLMConvert
 from palimpzest.query.operators.critique_and_refine import CritiqueAndRefineConvert, CritiqueAndRefineFilter
 from palimpzest.query.operators.distinct import DistinctOp
 from palimpzest.query.operators.filter import LLMFilter, NonLLMFilter
-from palimpzest.query.operators.join import NestedLoopsJoin
+from palimpzest.query.operators.join import EmbeddingJoin, NestedLoopsJoin
 from palimpzest.query.operators.limit import LimitScanOp
 from palimpzest.query.operators.logical import (
     Aggregate,
@@ -761,8 +761,8 @@ class SplitRule(ImplementationRule):
     @classmethod
     def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
         logical_op = logical_expression.operator
-        is_map_match = isinstance(logical_op, ConvertScan) and cls._is_text_only_operation() and logical_op.udf is None
-        is_filter_match = isinstance(logical_op, FilteredScan) and cls._is_text_only_operation() and logical_op.filter.filter_fn is None
+        is_map_match = isinstance(logical_op, ConvertScan) and cls._is_text_only_operation(logical_expression) and logical_op.udf is None
+        is_filter_match = isinstance(logical_op, FilteredScan) and cls._is_text_only_operation(logical_expression) and logical_op.filter.filter_fn is None
         logger.debug(f"SplitRule matches_pattern: {is_map_match or is_filter_match} for {logical_expression}")
         return is_map_match or is_filter_match
@@ -860,7 +860,7 @@ class LLMFilterRule(ImplementationRule):
         return cls._perform_substitution(logical_expression, LLMFilter, runtime_kwargs, variable_op_kwargs)
-class LLMJoinRule(ImplementationRule):
+class NestedLoopsJoinRule(ImplementationRule):
     """
     Substitute a logical expression for a JoinOp with an (LLM) NestedLoopsJoin physical implementation.
     """
@@ -868,12 +868,12 @@ class LLMJoinRule(ImplementationRule):
     @classmethod
     def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
         is_match = isinstance(logical_expression.operator, JoinOp)
-        logger.debug(f"LLMJoinRule matches_pattern: {is_match} for {logical_expression}")
+        logger.debug(f"NestedLoopsJoinRule matches_pattern: {is_match} for {logical_expression}")
         return is_match
     @classmethod
     def substitute(cls, logical_expression: LogicalExpression, **runtime_kwargs) -> set[PhysicalExpression]:
-        logger.debug(f"Substituting LLMJoinRule for {logical_expression}")
+        logger.debug(f"Substituting NestedLoopsJoinRule for {logical_expression}")
         # create variable physical operator kwargs for each model which can implement this logical_expression
         models = [model for model in runtime_kwargs["available_models"] if cls._model_matches_input(model, logical_expression)]
@@ -884,6 +884,7 @@ class LLMJoinRule(ImplementationRule):
                 "prompt_strategy": PromptStrategy.JOIN_NO_REASONING if model.is_reasoning_model() and no_reasoning else PromptStrategy.JOIN,
                 "join_parallelism": runtime_kwargs["join_parallelism"],
                 "reasoning_effort": runtime_kwargs["reasoning_effort"],
+                "retain_inputs": not runtime_kwargs["is_validation"],
             }
             for model in models
         ]
@@ -891,6 +892,39 @@ class LLMJoinRule(ImplementationRule):
         return cls._perform_substitution(logical_expression, NestedLoopsJoin, runtime_kwargs, variable_op_kwargs)
+class EmbeddingJoinRule(ImplementationRule):
+    """
+    Substitute a logical expression for a JoinOp with an EmbeddingJoin physical implementation.
+    """
+    @classmethod
+    def matches_pattern(cls, logical_expression: LogicalExpression) -> bool:
+        is_match = isinstance(logical_expression.operator, JoinOp) and not cls._is_audio_operation(logical_expression)
+        logger.debug(f"EmbeddingJoinRule matches_pattern: {is_match} for {logical_expression}")
+        return is_match
+    @classmethod
+    def substitute(cls, logical_expression: LogicalExpression, **runtime_kwargs) -> set[PhysicalExpression]:
+        logger.debug(f"Substituting EmbeddingJoinRule for {logical_expression}")
+        # create variable physical operator kwargs for each model which can implement this logical_expression
+        models = [model for model in runtime_kwargs["available_models"] if cls._model_matches_input(model, logical_expression)]
+        no_reasoning = runtime_kwargs["reasoning_effort"] in [None, "minimal", "low"]
+        variable_op_kwargs = [
+            {
+                "model": model,
+                "prompt_strategy": PromptStrategy.JOIN_NO_REASONING if model.is_reasoning_model() and no_reasoning else PromptStrategy.JOIN,
+                "join_parallelism": runtime_kwargs["join_parallelism"],
+                "reasoning_effort": runtime_kwargs["reasoning_effort"],
+                "retain_inputs": not runtime_kwargs["is_validation"],
+                "num_samples": 10, # TODO: iterate over different choices of num_samples
+            }
+            for model in models
+        ]
+        return cls._perform_substitution(logical_expression, EmbeddingJoin, runtime_kwargs, variable_op_kwargs)
 class AggregateRule(ImplementationRule):
     """
     Substitute the logical expression for an aggregate with its physical counterpart.

palimpzest/query/optimizer/tasks.py CHANGED Viewed

@@ -247,8 +247,16 @@ class ApplyRule(Task):
             # apply implementation rule
             new_expressions = self.rule.substitute(self.logical_expression, **physical_op_params)
             new_expressions = [expr for expr in new_expressions if expr.expr_id not in expressions]
+            # get the costed_full_op_ids from the context (if provided) and compute whether this
+            # logical expression has physical operators which have been costed
             costed_full_op_ids = context['costed_full_op_ids']
-            if costed_full_op_ids is not None:
+            logical_op_has_been_costed = costed_full_op_ids is not None and any([
+                op_id.split("-")[0] == self.logical_expression.operator.get_logical_op_id()
+                for op_id in costed_full_op_ids
+            ])
+            if logical_op_has_been_costed:
                 new_expressions = [expr for expr in new_expressions if expr.operator.get_full_op_id() in costed_full_op_ids]
             expressions.update({expr.expr_id: expr for expr in new_expressions})
             group.physical_expressions.update(new_expressions)

palimpzest/query/processor/config.py CHANGED Viewed

@@ -16,6 +16,7 @@ class QueryProcessorConfig(BaseModel):
     # general execution flags
     policy: Policy = Field(default_factory=MaxQuality)
+    enforce_types: bool = Field(default=False)
     scan_start_idx: int = Field(default=0)
     num_samples: int = Field(default=None)
     verbose: bool = Field(default=False)

palimpzest/query/processor/query_processor_factory.py CHANGED Viewed

@@ -149,6 +149,13 @@ class QueryProcessorFactory:
         # apply any additional keyword arguments to the config and validate its contents
         config, validator = cls._config_validation_and_normalization(config, train_dataset, validator)
+        # update the dataset's types if we're not enforcing types
+        if not config.enforce_types:
+            dataset.relax_types()
+            if train_dataset is not None:
+                for _, ds in train_dataset.items():
+                    ds.relax_types()
         # create the optimizer, execution strateg(ies), and processor
         optimizer = cls._create_optimizer(config)
         config.execution_strategy = cls._create_execution_strategy(dataset, config)

palimpzest/validator/validator.py CHANGED Viewed

@@ -79,7 +79,7 @@ class Validator:
         Compute the quality of the generated output for the given fields and input_record.
         """
         # create prompt factory
-        factory = PromptFactory(PromptStrategy.MAP, Model.o4_MINI, Cardinality.ONE_TO_ONE)
+        factory = PromptFactory(PromptStrategy.MAP, self.model, Cardinality.ONE_TO_ONE)
         # get the input messages; strip out the system message(s)
         msg_kwargs = {"output_schema": op.output_schema, "project_cols": op.get_input_fields()}
@@ -95,14 +95,14 @@ class Validator:
             start_time = time.time()
             validator_prompt = MAP_IMAGE_VALIDATOR_PROMPT if op.is_image_op() else MAP_VALIDATOR_PROMPT
             val_messages = [{"role": "system", "content": validator_prompt}] + input_messages + [{"role": "user", "content": output_message}]
-            completion = litellm.completion(model="openai/o4-mini", messages=val_messages)
+            completion = litellm.completion(model=self.model.value, messages=val_messages)
             completion_text = completion.choices[0].message.content
             gen_stats = self._get_gen_stats_from_completion(completion, start_time)
             print(f"INPUT:\n{input_str}")
             print(Fore.GREEN + f"{completion_text}\n" + Style.RESET_ALL)
             # parse the evaluation
-            eval_dict: dict = get_json_from_answer(completion_text, Model.o4_MINI, Cardinality.ONE_TO_ONE)
+            eval_dict: dict = get_json_from_answer(completion_text, self.model, Cardinality.ONE_TO_ONE)
             score = sum(eval_dict.values()) / len(eval_dict)
         except Exception:
@@ -115,7 +115,7 @@ class Validator:
         Compute the quality for each record_op_stats object in the given record_set.
         """
         # create prompt factory
-        factory = PromptFactory(PromptStrategy.MAP, Model.o4_MINI, Cardinality.ONE_TO_MANY)
+        factory = PromptFactory(PromptStrategy.MAP, self.model, Cardinality.ONE_TO_MANY)
         # get the input messages; strip out the system message(s)
         msg_kwargs = {"output_schema": op.output_schema, "project_cols": op.get_input_fields()}
@@ -138,7 +138,7 @@ class Validator:
             # print(Fore.GREEN + f"{completion_text}\n" + Style.RESET_ALL)
             # parse the evaluation
-            eval_dicts: list[dict] = get_json_from_answer(completion_text, Model.o4_MINI, Cardinality.ONE_TO_MANY)
+            eval_dicts: list[dict] = get_json_from_answer(completion_text, self.model, Cardinality.ONE_TO_MANY)
             all_qualities = []
             for record_eval_dict in eval_dicts:
                 all_qualities.extend(record_eval_dict.values())
@@ -158,12 +158,12 @@ class Validator:
         label = self.filter_cache.get(filter_input_hash, None)
         if label is None:
             validator_op: LLMFilter = op.copy()
-            validator_op.model = Model.o4_MINI
+            validator_op.model = self.model
             try:
                 target_record_set = validator_op(input_record)
                 label = target_record_set[0]._passed_operator
                 self.filter_cache[filter_input_hash] = label
-                score = label == output
+                score = float(label == output)
                 record_op_stats = target_record_set.record_op_stats[0]
                 gen_stats = GenerationStats(
                     model_name=self.model.value,
@@ -181,7 +181,7 @@ class Validator:
                 pass
         else:
-            score = label == output
+            score = float(label == output)
         return score, gen_stats
@@ -191,12 +191,12 @@ class Validator:
         label = self.join_cache.get(join_input_hash, None)
         if label is None:
             validator_op: JoinOp = op.copy()
-            validator_op.model = Model.o4_MINI
+            validator_op.model = self.model
             try:
-                target_record_set = validator_op([left_input_record], [right_input_record])
+                target_record_set, _ = validator_op([left_input_record], [right_input_record])
                 label = target_record_set[0]._passed_operator
                 self.join_cache[join_input_hash] = label
-                score = label == output
+                score = float(label == output)
                 record_op_stats = target_record_set.record_op_stats[0]
                 gen_stats = GenerationStats(
                     model_name=self.model.value,
@@ -214,7 +214,7 @@ class Validator:
                 pass
         else:
-            score = label == output
+            score = float(label == output)
         return score, gen_stats
@@ -225,7 +225,7 @@ class Validator:
         # TODO: retrieve k=25; score each item based on relevance; compute F1
         # TODO: support retrieval over images
         # create prompt factory
-        factory = PromptFactory(PromptStrategy.MAP, Model.o4_MINI, Cardinality.ONE_TO_ONE)
+        factory = PromptFactory(PromptStrategy.MAP, self.model, Cardinality.ONE_TO_ONE)
         # get the input messages; strip out the system message(s)
         msg_kwargs = {"output_schema": op.output_schema, "project_cols": op.get_input_fields()}
@@ -249,7 +249,7 @@ class Validator:
             print(Fore.GREEN + f"{completion_text}\n" + Style.RESET_ALL)
             # parse the evaluation
-            eval_dict: dict = get_json_from_answer(completion_text, Model.o4_MINI, Cardinality.ONE_TO_ONE)
+            eval_dict: dict = get_json_from_answer(completion_text, self.model, Cardinality.ONE_TO_ONE)
             score = sum(eval_dict.values()) / len(eval_dict)
         except Exception:

{palimpzest-0.8.4.dist-info → palimpzest-0.8.6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: palimpzest
-Version: 0.8.4
+Version: 0.8.6
 Summary: Palimpzest is a system which enables anyone to process AI-powered analytical queries simply by defining them in a declarative language
 Author-email: MIT DSG Semantic Management Lab <michjc@csail.mit.edu>
 Project-URL: homepage, https://palimpzest.org

{palimpzest-0.8.4.dist-info → palimpzest-0.8.6.dist-info}/RECORD RENAMED Viewed

@@ -9,15 +9,15 @@ palimpzest/core/models.py,sha256=VNi49i9xn_FxekyYrGPS1-_C_PaGXL8dz-dqjrIOk8g,424
 palimpzest/core/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 palimpzest/core/data/context.py,sha256=x1xYyu9qW65dvtK_XayIfv_CgsCEPW6Qe0DTiSf9sjU,16207
 palimpzest/core/data/context_manager.py,sha256=8hAKWD2jhFZgghTu7AYgjkvKDsJUPVxq8g4nG0HWvfo,6150
-palimpzest/core/data/dataset.py,sha256=M7SxPXzHsfj-ljy_P3ckcJNqGf4RwNxtZI02q_tmL2M,28178
+palimpzest/core/data/dataset.py,sha256=0IMmV5_rheNb9ON8wZTy-h1VwWX9mRGkwgc93WGo73E,28881
 palimpzest/core/data/index_dataset.py,sha256=adO67DgzHhA4lBME0-h4SjXfdz9UcNMSDGXTpUdKbgE,1929
 palimpzest/core/data/iter_dataset.py,sha256=K47ajOXsCZV3WhOuDkw3xfiHzn8mXPU976uN3SjaP2U,20507
 palimpzest/core/elements/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 palimpzest/core/elements/filters.py,sha256=fU2x0eWDwfP52_5fUmqJXTuhs4H0vvHtPZLdA3IIw8I,1642
 palimpzest/core/elements/groupbysig.py,sha256=oFH5UkZzcR0msAgfQiRQOOvyJ3HaW4Dwr03h7tVOcrM,2324
-palimpzest/core/elements/records.py,sha256=KNY55cN9OuV9Q6apUaUq9W_WyfNremqFlQjClyWR1PU,18518
+palimpzest/core/elements/records.py,sha256=pqtuSgc-Jm5N57d6jtUXmQx0D-khqjOIQAFZjS1XmNM,17075
 palimpzest/core/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-palimpzest/core/lib/schemas.py,sha256=eFH_Lw3UoXLPk_-5Pds5NPRUXKhILPtCkErB26FyKMo,8983
+palimpzest/core/lib/schemas.py,sha256=2fzbTZBTssKTl9CFGDEQneXasOwo-PLP2lCqHZn2eng,9318
 palimpzest/prompts/__init__.py,sha256=942kdENfPU5mFjIxYm-FusL0FD6LNhoj6cYoSGiUsCI,1628
 palimpzest/prompts/agent_prompts.py,sha256=CUzBVLBiPSw8OShtKp4VTpQwtrNMtcMglo-IZHMvuDM,17459
 palimpzest/prompts/context_search.py,sha256=s3pti4XNRiIyiWzjVNL_NqmqEc31jzSKMF2SlN0Aaf8,357
@@ -35,21 +35,21 @@ palimpzest/prompts/validator.py,sha256=pJTZjlt_OiFM3IFOgsJ0jQdayra8iRVrpqENlXI9t
 palimpzest/query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 palimpzest/query/execution/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 palimpzest/query/execution/all_sample_execution_strategy.py,sha256=8a8-eKsndo_edCwIamNgcISLQmTzVSv5vmD6Ogl8a6k,14367
-palimpzest/query/execution/execution_strategy.py,sha256=Lg2anGM8I4WBetqduWwnPFE67sfihHJwsu3fQ_sYYJk,18909
+palimpzest/query/execution/execution_strategy.py,sha256=XoRVNlJSAgON-NWis9SecFr0B7DlJIm-25u1v5rjvu8,19085
 palimpzest/query/execution/execution_strategy_type.py,sha256=vRQBPCQN5_aoyD3TLIeW3VPo15mqF-5RBvEXkENz9FE,987
-palimpzest/query/execution/mab_execution_strategy.py,sha256=LKAi1RWGsIK7ppjWmH8sbAbHjyLA12y4tBn1ycOt35Q,43554
+palimpzest/query/execution/mab_execution_strategy.py,sha256=YjUZ2qBGvQMVUxi7rQCSU8JKP1RtqhG8Owik8hKB_UU,46292
 palimpzest/query/execution/parallel_execution_strategy.py,sha256=roZZy7wLcmAwm_ecYvqSJanRaiox3OoNPuXxvRZ5TXg,15710
 palimpzest/query/execution/single_threaded_execution_strategy.py,sha256=sESji79ytKxth9Tpm02c34Mltw0YiFn4GL5h0MI5Noo,16255
 palimpzest/query/generators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 palimpzest/query/generators/generators.py,sha256=UldCUEwaiBfpvQDieA-h7SiC8KM76gCABPj-mvqAJus,21500
-palimpzest/query/operators/__init__.py,sha256=yjOdbx3Sm5uVTR5R7fw66F602qBBkguH1ktv48T_JJ8,4264
+palimpzest/query/operators/__init__.py,sha256=T-OFUqWfbL_xqW1n7nkXCWu0JLRePEkMwVCEAl3JNeM,4356
 palimpzest/query/operators/aggregate.py,sha256=NZ_rmi0YrbNFivbBgKtonrCrK6fZw4h9Pm4lMMI5XVc,11376
 palimpzest/query/operators/compute.py,sha256=X_pWN45smg8L4dV54nOae7dldQGL1nJVlVyJ3ULWSmI,8432
 palimpzest/query/operators/convert.py,sha256=VfrWUFyuZC8fPf7LR7mMfpOjqSfxAuTLUxw-S-pn7hk,16123
 palimpzest/query/operators/critique_and_refine.py,sha256=Q-NhasVoD9meX7g36RPrv3q4R48_8XEU4d3TE46hRJI,8979
 palimpzest/query/operators/distinct.py,sha256=ZTXlIS7IaFRTsWv9RemzCo1JLz25vEma-TB42CV5fJQ,2614
 palimpzest/query/operators/filter.py,sha256=ufREsO2-8CBk4u4fabDBYpEvb806E11EOyW-wuRs4vw,10356
-palimpzest/query/operators/join.py,sha256=79pdA4gEgyXcWirkIRYdWNljrKjDvjtApGXKsroYElA,25591
+palimpzest/query/operators/join.py,sha256=A0f7d4Nmi-MRp80HD3BrglYZPbFzp5X2vA-X-5XxaGE,25658
 palimpzest/query/operators/limit.py,sha256=pdo7WfWY97SW3c-WqZ4SIPw7lHIVbaXPEWqHyK8qkF8,2130
 palimpzest/query/operators/logical.py,sha256=K_dRlNKkda35kQ7gYGsrW9PoFuDPzexpjtDq_FYdhVw,20223
 palimpzest/query/operators/mixture_of_agents.py,sha256=TWdg6XEg2u4TQM4d94gmbYqnK15wC7Q4Cyefp8SA4i8,11547
@@ -60,19 +60,19 @@ palimpzest/query/operators/retrieve.py,sha256=-OvEWmxwbepGz0w40FpHbqcOHZQ4Bp-MdX
 palimpzest/query/operators/scan.py,sha256=OqCiPRTvTY7SbauNMyFvGT5nRVeRzVsGYSrkoN1Ib_w,7407
 palimpzest/query/operators/search.py,sha256=cQin-Qc9FT7V0Gv3-pxMLbVMjqE6ALe99V0OrQhA6CI,22711
 palimpzest/query/operators/split.py,sha256=oLzwnYb8TNf3XA9TMKEAIw7EIA12wHneaD42BNLIHiI,15043
-palimpzest/query/optimizer/__init__.py,sha256=XsWk_qDh4lvPGhQgsya6D3ZWWHk6UXRUEzHuPHDifx8,2418
+palimpzest/query/optimizer/__init__.py,sha256=COn-okHtnYEyoNFRt3o3SA7jI5Wssx9BUEgfOfP4dOE,2560
 palimpzest/query/optimizer/cost_model.py,sha256=OldPy-TJdfsQbYRoKlb3yWeKbi15jcldTIUS6BTi9T8,12678
-palimpzest/query/optimizer/optimizer.py,sha256=bsH4RhdDncbrGLA9Pq6kZ5CoqjetQm_5Vyl7l48jKpQ,19578
-palimpzest/query/optimizer/optimizer_strategy.py,sha256=9YlNGkqwgX0WaV6y8tKOOHVN8kC8GjDI3DttvGW5SYY,10206
+palimpzest/query/optimizer/optimizer.py,sha256=BrhljITlFC5S5euA01pv4dzlqxrtKNEt_0DmhRtcMTk,19966
+palimpzest/query/optimizer/optimizer_strategy.py,sha256=0foDaBHqQehK_zz6IlDEbNIw-44wxY6LO5H1anJi56Y,10042
 palimpzest/query/optimizer/optimizer_strategy_type.py,sha256=V-MMHvJdnfZKoUX1xxxwh66q1RjN2FL35IsiT1C62c8,1084
-palimpzest/query/optimizer/plan.py,sha256=VIhN7tWT7EoRE9BKYa1qvvOhX7dEaM-aiobByX0qjzg,22900
+palimpzest/query/optimizer/plan.py,sha256=NoCUS_lyZ7LFj15_qpZ_cOFHVkCFMcIn8A7EsNeD57c,22849
 palimpzest/query/optimizer/primitives.py,sha256=jMMVq37y1tWiPU1lSSKQP9OP-mzkpSxSmUeDajRYYOQ,5445
-palimpzest/query/optimizer/rules.py,sha256=rVWIsygEMQmT1_rdHUzsOXC1GYCzLDhiuGjwkszyl-Y,48591
-palimpzest/query/optimizer/tasks.py,sha256=SAoErqxHwoE7nIhLwyH-2YN2nHYG3gR4m4NV7p0U7AE,30028
+palimpzest/query/optimizer/rules.py,sha256=er8K47L-qdRn0hCra-2PaqxhQEvDwJ7IVzNEszWHJ48,50452
+palimpzest/query/optimizer/tasks.py,sha256=GCRA4rK6Q8dBGj2FnsRJUk3IdKthNQgiK5lFEu7v0mI,30439
 palimpzest/query/processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-palimpzest/query/processor/config.py,sha256=vHVsgeBnKigacO0QA7bLf5q8pJhFWA2j9-p_no2bmYo,2366
+palimpzest/query/processor/config.py,sha256=kr9UHQ947SJmI77wqomy310mSaKNIMPxh-5k9frMVII,2413
 palimpzest/query/processor/query_processor.py,sha256=T4ffPbnOX23G8FDITzmM7Iw7DUEDWIHnwl8XLYllgjg,6240
-palimpzest/query/processor/query_processor_factory.py,sha256=6w9R1Y8AOV22X8MUf7g2G5Qb15BGEZAXQKbCQJafWJ0,8048
+palimpzest/query/processor/query_processor_factory.py,sha256=i9L9StqlUi7m1AqZMuYQWhunqOJi3nLK47skhxq9tIA,8317
 palimpzest/schemabuilder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 palimpzest/schemabuilder/schema_builder.py,sha256=QraGp66dcD-ej6Y2mER40o86G9JqlBkL7swkJzjUAIY,7968
 palimpzest/tools/README.md,sha256=56_6LPG80uc0CLVhTBP6I1wgIffNv9cyTr0TmVZqmrM,483
@@ -87,9 +87,9 @@ palimpzest/utils/model_helpers.py,sha256=X6SlMgD5I5Aj_cxaFaoGaaNvOOqTNZVmjj6zbfn
 palimpzest/utils/progress.py,sha256=7gucyZr82udMDZitrrkAOSKHZVljE3R2wv9nf5gA5TM,20807
 palimpzest/utils/udfs.py,sha256=LjHic54B1az-rKgNLur0wOpaz2ko_UodjLEJrazkxvY,1854
 palimpzest/validator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-palimpzest/validator/validator.py,sha256=dbqpSnis-3u6fpVmRMNwBlx5owwyAXH-sktN-eFoZVU,15995
-palimpzest-0.8.4.dist-info/licenses/LICENSE,sha256=5GUlHy9lr-Py9kvV38FF1m3yy3NqM18fefuE9wkWumo,1079
-palimpzest-0.8.4.dist-info/METADATA,sha256=N_FIla7hJQuAAFML7TwdP7Jr2JdCdh3qnRAE5C_qUaM,7048
-palimpzest-0.8.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-palimpzest-0.8.4.dist-info/top_level.txt,sha256=raV06dJUgohefUn3ZyJS2uqp_Y76EOLA9Y2e_fxt8Ew,11
-palimpzest-0.8.4.dist-info/RECORD,,
+palimpzest/validator/validator.py,sha256=vasnvAzEv9tDNLGz2X7MpMJBpn8MqSNelQSXk3X6MBs,16002
+palimpzest-0.8.6.dist-info/licenses/LICENSE,sha256=5GUlHy9lr-Py9kvV38FF1m3yy3NqM18fefuE9wkWumo,1079
+palimpzest-0.8.6.dist-info/METADATA,sha256=NuqbbYGwNa5VlbFP3d59-1KdXA1LjrfChElaSTkmZBk,7048
+palimpzest-0.8.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+palimpzest-0.8.6.dist-info/top_level.txt,sha256=raV06dJUgohefUn3ZyJS2uqp_Y76EOLA9Y2e_fxt8Ew,11
+palimpzest-0.8.6.dist-info/RECORD,,

{palimpzest-0.8.4.dist-info → palimpzest-0.8.6.dist-info}/WHEEL RENAMED Viewed

File without changes

{palimpzest-0.8.4.dist-info → palimpzest-0.8.6.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{palimpzest-0.8.4.dist-info → palimpzest-0.8.6.dist-info}/top_level.txt RENAMED Viewed

File without changes

palimpzest 0.8.4__py3-none-any.whl → 0.8.6__py3-none-any.whl

palimpzest 0.8.4py3-none-any.whl → 0.8.6py3-none-any.whl