PyPI - palimpzest - Versions diffs - 0.8.0__py3-none-any.whl → 0.8.2__py3-none-any.whl - Mend

palimpzest 0.8.0py3-none-any.whl → 0.8.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

palimpzest/constants.py +107 -35
palimpzest/core/data/dataset.py +13 -8
palimpzest/prompts/convert_prompts.py +2 -2
palimpzest/prompts/filter_prompts.py +2 -2
palimpzest/prompts/join_prompts.py +2 -2
palimpzest/prompts/moa_proposer_convert_prompts.py +1 -1
palimpzest/prompts/prompt_factory.py +17 -1
palimpzest/prompts/split_proposer_prompts.py +1 -1
palimpzest/prompts/util_phrases.py +5 -0
palimpzest/query/execution/mab_execution_strategy.py +46 -21
palimpzest/query/generators/generators.py +3 -1
palimpzest/query/operators/convert.py +10 -2
palimpzest/query/operators/critique_and_refine_convert.py +2 -2
palimpzest/query/operators/filter.py +5 -4
palimpzest/query/operators/join.py +6 -5
palimpzest/query/operators/logical.py +12 -2
palimpzest/query/operators/mixture_of_agents_convert.py +2 -2
palimpzest/query/operators/split_convert.py +2 -2
palimpzest/query/processor/config.py +3 -2
palimpzest/query/processor/query_processor.py +2 -2
palimpzest/query/processor/query_processor_factory.py +10 -6
palimpzest/utils/model_helpers.py +7 -2
{palimpzest-0.8.0.dist-info → palimpzest-0.8.2.dist-info}/METADATA +1 -1
{palimpzest-0.8.0.dist-info → palimpzest-0.8.2.dist-info}/RECORD +27 -27
{palimpzest-0.8.0.dist-info → palimpzest-0.8.2.dist-info}/WHEEL +0 -0
{palimpzest-0.8.0.dist-info → palimpzest-0.8.2.dist-info}/licenses/LICENSE +0 -0
{palimpzest-0.8.0.dist-info → palimpzest-0.8.2.dist-info}/top_level.txt +0 -0

palimpzest/constants.py CHANGED Viewed

@@ -18,8 +18,12 @@ class Model(str, Enum):
     DEEPSEEK_R1_DISTILL_QWEN_1_5B = "together_ai/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
     GPT_4o = "openai/gpt-4o-2024-08-06"
     GPT_4o_MINI = "openai/gpt-4o-mini-2024-07-18"
-    GPT_5 = "openai/gpt-5"
-    GPT_5_MINI = "openai/gpt-5-mini"
+    GPT_4_1 = "openai/gpt-4.1-2025-04-14"
+    GPT_4_1_MINI = "openai/gpt-4.1-mini-2025-04-14"
+    GPT_4_1_NANO = "openai/gpt-4.1-nano-2025-04-14"
+    GPT_5 = "openai/gpt-5-2025-08-07"
+    GPT_5_MINI = "openai/gpt-5-mini-2025-08-07"
+    GPT_5_NANO = "openai/gpt-5-nano-2025-08-07"
     o4_MINI = "openai/o4-mini-2025-04-16"  # noqa: N815
     TEXT_EMBEDDING_3_SMALL = "text-embedding-3-small"
     CLIP_VIT_B_32 = "clip-ViT-B-32"
@@ -29,6 +33,9 @@ class Model(str, Enum):
     GEMINI_2_0_FLASH = "vertex_ai/gemini-2.0-flash"
     GEMINI_2_5_FLASH = "vertex_ai/gemini-2.5-flash"
     GEMINI_2_5_PRO = "vertex_ai/gemini-2.5-pro"
+    GOOGLE_GEMINI_2_5_FLASH = "google/gemini-2.5-flash"
+    GOOGLE_GEMINI_2_5_FLASH_LITE = "google/gemini-2.5-flash-lite"
+    GOOGLE_GEMINI_2_5_PRO = "google/gemini-2.5-pro"
     LLAMA_4_MAVERICK = "vertex_ai/meta/llama-4-maverick-17b-128e-instruct-maas"
     GPT_4o_AUDIO_PREVIEW = "openai/gpt-4o-audio-preview"
     GPT_4o_MINI_AUDIO_PREVIEW = "openai/gpt-4o-mini-audio-preview"
@@ -54,7 +61,7 @@ class Model(str, Enum):
         return self in [Model.o4_MINI]
     def is_gpt_5_model(self):
-        return self in [Model.GPT_5, Model.GPT_5_MINI]
+        return self in [Model.GPT_5, Model.GPT_5_MINI, Model.GPT_5_NANO]
     def is_openai_model(self):
         return "openai" in self.value.lower() or self.is_text_embedding_model()
@@ -65,13 +72,17 @@ class Model(str, Enum):
     def is_vertex_model(self):
         return "vertex_ai" in self.value.lower()
+    def is_google_model(self):
+        return "google" in self.value.lower()
     def is_vllm_model(self):
         return "hosted_vllm" in self.value.lower()
     def is_reasoning_model(self):
         reasoning_models = [
-            Model.GPT_5, Model.GPT_5_MINI, Model.o4_MINI,
+            Model.GPT_5, Model.GPT_5_MINI, Model.GPT_5_NANO, Model.o4_MINI,
             Model.GEMINI_2_5_PRO, Model.GEMINI_2_5_FLASH,
+            Model.GOOGLE_GEMINI_2_5_PRO, Model.GOOGLE_GEMINI_2_5_FLASH, Model.GOOGLE_GEMINI_2_5_FLASH_LITE,
             Model.CLAUDE_3_7_SONNET,
         ]
         return self in reasoning_models
@@ -88,27 +99,31 @@ class Model(str, Enum):
     def is_vision_model(self):
         return self in [
             Model.LLAMA3_2_90B_V, Model.LLAMA_4_MAVERICK,
-            Model.GPT_4o, Model.GPT_4o_MINI, Model.o4_MINI, Model.GPT_5, Model.GPT_5_MINI,
+            Model.GPT_4o, Model.GPT_4o_MINI, Model.GPT_4_1, Model.GPT_4_1_MINI, Model.GPT_4_1_NANO, Model.o4_MINI, Model.GPT_5, Model.GPT_5_MINI, Model.GPT_5_NANO,
             Model.GEMINI_2_0_FLASH, Model.GEMINI_2_5_FLASH, Model.GEMINI_2_5_PRO,
+            Model.GOOGLE_GEMINI_2_5_PRO, Model.GOOGLE_GEMINI_2_5_FLASH, Model.GOOGLE_GEMINI_2_5_FLASH_LITE,
         ]
     def is_audio_model(self):
         return self in [
             Model.GPT_4o_AUDIO_PREVIEW, Model.GPT_4o_MINI_AUDIO_PREVIEW,
             Model.GEMINI_2_0_FLASH, Model.GEMINI_2_5_FLASH, Model.GEMINI_2_5_PRO,
+            Model.GOOGLE_GEMINI_2_5_PRO, Model.GOOGLE_GEMINI_2_5_FLASH, Model.GOOGLE_GEMINI_2_5_FLASH_LITE,
         ]
     def is_text_image_multimodal_model(self):
         return self in [
             Model.LLAMA_4_MAVERICK,
-            Model.GPT_4o, Model.GPT_4o_MINI, Model.o4_MINI, Model.GPT_5, Model.GPT_5_MINI,
+            Model.GPT_4o, Model.GPT_4o_MINI, Model.GPT_4_1, Model.GPT_4_1_MINI, Model.GPT_4_1_NANO, Model.o4_MINI, Model.GPT_5, Model.GPT_5_MINI, Model.GPT_5_NANO,
             Model.GEMINI_2_0_FLASH, Model.GEMINI_2_5_FLASH, Model.GEMINI_2_5_PRO,
+            Model.GOOGLE_GEMINI_2_5_PRO, Model.GOOGLE_GEMINI_2_5_FLASH, Model.GOOGLE_GEMINI_2_5_FLASH_LITE,
         ]
     def is_text_audio_multimodal_model(self):
         return self in [
             Model.GPT_4o_AUDIO_PREVIEW, Model.GPT_4o_MINI_AUDIO_PREVIEW,
             Model.GEMINI_2_0_FLASH, Model.GEMINI_2_5_FLASH, Model.GEMINI_2_5_PRO,
+            Model.GOOGLE_GEMINI_2_5_PRO, Model.GOOGLE_GEMINI_2_5_FLASH, Model.GOOGLE_GEMINI_2_5_FLASH_LITE,
         ]
     def is_embedding_model(self):
@@ -327,7 +342,7 @@ LLAMA3_2_3B_INSTRUCT_MODEL_CARD = {
     "usd_per_input_token": 0.06 / 1e6,
     "usd_per_output_token": 0.06 / 1e6,
     ##### Time #####
-    "seconds_per_output_token": 0.0064,
+    "seconds_per_output_token": 0.0079,
     ##### Agg. Benchmark #####
     "overall": 36.50, # https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct/discussions/13
 }
@@ -336,7 +351,7 @@ LLAMA3_1_8B_INSTRUCT_MODEL_CARD = {
     "usd_per_input_token": 0.18 / 1e6,
     "usd_per_output_token": 0.18 / 1e6,
     ##### Time #####
-    "seconds_per_output_token": 0.0059,
+    "seconds_per_output_token": 0.0050,
     ##### Agg. Benchmark #####
     "overall": 44.25,
 }
@@ -345,7 +360,7 @@ LLAMA3_3_70B_INSTRUCT_MODEL_CARD = {
     "usd_per_input_token": 0.88 / 1e6,
     "usd_per_output_token": 0.88 / 1e6,
     ##### Time #####
-    "seconds_per_output_token": 0.0139,
+    "seconds_per_output_token": 0.0122,
     ##### Agg. Benchmark #####
     "overall": 69.9,
 }
@@ -354,7 +369,7 @@ LLAMA3_2_90B_V_MODEL_CARD = {
     "usd_per_input_token": 1.2 / 1e6,
     "usd_per_output_token": 1.2 / 1e6,
     ##### Time #####
-    "seconds_per_output_token": 0.0222,
+    "seconds_per_output_token": 0.0303,
     ##### Agg. Benchmark #####
     "overall": 65.00, # set to be slightly higher than gpt-4o-mini
 }
@@ -363,7 +378,7 @@ DEEPSEEK_V3_MODEL_CARD = {
     "usd_per_input_token": 1.25 / 1E6,
     "usd_per_output_token": 1.25 / 1E6,
     ##### Time #####
-    "seconds_per_output_token": 0.0769,
+    "seconds_per_output_token": 0.0114,
     ##### Agg. Benchmark #####
     "overall": 73.8,
 }
@@ -372,7 +387,7 @@ DEEPSEEK_R1_DISTILL_QWEN_1_5B_MODEL_CARD = {
     "usd_per_input_token": 0.18 / 1E6,
     "usd_per_output_token": 0.18 / 1E6,
     ##### Time #####
-    "seconds_per_output_token": 0.0026,
+    "seconds_per_output_token": 0.0050, # NOTE: copied to be same as LLAMA3_1_8B_INSTRUCT_MODEL_CARD; need to update when we have data
     ##### Agg. Benchmark #####
     "overall": 39.90, # https://www.reddit.com/r/LocalLLaMA/comments/1iserf9/deepseek_r1_distilled_models_mmlu_pro_benchmarks/
 }
@@ -382,7 +397,7 @@ GPT_4o_AUDIO_PREVIEW_MODEL_CARD = {
     "usd_per_audio_input_token": 2.5 / 1e6,
     "usd_per_output_token": 10.0 / 1e6,
     ##### Time #####
-    "seconds_per_output_token": 0.0079,
+    "seconds_per_output_token": 0.0080,
     ##### Agg. Benchmark #####
     "overall": 74.1,
 }
@@ -392,7 +407,7 @@ GPT_4o_MINI_AUDIO_PREVIEW_MODEL_CARD = {
     "usd_per_audio_input_token": 0.15 / 1e6,
     "usd_per_output_token": 0.6 / 1e6,
     ##### Time #####
-    "seconds_per_output_token": 0.0098,
+    "seconds_per_output_token": 0.0159,
     ##### Agg. Benchmark #####
     "overall": 62.7,
 }
@@ -402,7 +417,7 @@ GPT_4o_MODEL_CARD = {
     "usd_per_input_token": 2.5 / 1e6,
     "usd_per_output_token": 10.0 / 1e6,
     ##### Time #####
-    "seconds_per_output_token": 0.0079,
+    "seconds_per_output_token": 0.0080,
     ##### Agg. Benchmark #####
     "overall": 74.1,
 }
@@ -412,17 +427,47 @@ GPT_4o_MINI_MODEL_CARD = {
     "usd_per_input_token": 0.15 / 1e6,
     "usd_per_output_token": 0.6 / 1e6,
     ##### Time #####
-    "seconds_per_output_token": 0.0098,
+    "seconds_per_output_token": 0.0159,
     ##### Agg. Benchmark #####
     "overall": 62.7,
 }
+GPT_4_1_MODEL_CARD = {
+    # NOTE: it is unclear if the same ($ / token) costs can be applied for vision, or if we have to calculate this ourselves
+    ##### Cost in USD #####
+    "usd_per_input_token": 2.0 / 1e6,
+    "usd_per_output_token": 8.0 / 1e6,
+    ##### Time #####
+    "seconds_per_output_token": 0.0076,
+    ##### Agg. Benchmark #####
+    "overall": 80.5,
+}
+GPT_4_1_MINI_MODEL_CARD = {
+    # NOTE: it is unclear if the same ($ / token) costs can be applied for vision, or if we have to calculate this ourselves
+    ##### Cost in USD #####
+    "usd_per_input_token": 0.4 / 1e6,
+    "usd_per_output_token": 1.6 / 1e6,
+    ##### Time #####
+    "seconds_per_output_token": 0.0161,
+    ##### Agg. Benchmark #####
+    "overall": 77.2,
+}
+GPT_4_1_NANO_MODEL_CARD = {
+    # NOTE: it is unclear if the same ($ / token) costs can be applied for vision, or if we have to calculate this ourselves
+    ##### Cost in USD #####
+    "usd_per_input_token": 0.1 / 1e6,
+    "usd_per_output_token": 0.4 / 1e6,
+    ##### Time #####
+    "seconds_per_output_token": 0.0060,
+    ##### Agg. Benchmark #####
+    "overall": 62.3,
+}
 GPT_5_MODEL_CARD = {
     # NOTE: it is unclear if the same ($ / token) costs can be applied for vision, or if we have to calculate this ourselves
     ##### Cost in USD #####
     "usd_per_input_token": 1.25 / 1e6,
     "usd_per_output_token": 10.0 / 1e6,
     ##### Time #####
-    "seconds_per_output_token": 0.0139,
+    "seconds_per_output_token": 0.0060,
     ##### Agg. Benchmark #####
     "overall": 87.00,
 }
@@ -432,30 +477,40 @@ GPT_5_MINI_MODEL_CARD = {
     "usd_per_input_token": 0.25 / 1e6,
     "usd_per_output_token": 2.0 / 1e6,
     ##### Time #####
-    "seconds_per_output_token": 0.0094,
+    "seconds_per_output_token": 0.0135,
     ##### Agg. Benchmark #####
     "overall": 82.50,
 }
-o4_MINI_MODEL_CARD = {  # noqa: N816
+GPT_5_NANO_MODEL_CARD = {
     # NOTE: it is unclear if the same ($ / token) costs can be applied for vision, or if we have to calculate this ourselves
     ##### Cost in USD #####
-    "usd_per_input_token": 1.1 / 1e6,
-    "usd_per_output_token": 4.4 / 1e6,
+    "usd_per_input_token": 0.05 / 1e6,
+    "usd_per_output_token": 0.4 / 1e6,
     ##### Time #####
-    "seconds_per_output_token": 0.0093,
+    "seconds_per_output_token": 0.0055,
     ##### Agg. Benchmark #####
-    "overall": 80.6,  # using number reported for o3-mini; true number is likely higher
+    "overall": 77.9,
 }
-o1_MODEL_CARD = {  # noqa: N816
+o4_MINI_MODEL_CARD = {  # noqa: N816
     # NOTE: it is unclear if the same ($ / token) costs can be applied for vision, or if we have to calculate this ourselves
     ##### Cost in USD #####
-    "usd_per_input_token": 15 / 1e6,
-    "usd_per_output_token": 60 / 1e6,
+    "usd_per_input_token": 1.1 / 1e6,
+    "usd_per_output_token": 4.4 / 1e6,
     ##### Time #####
-    "seconds_per_output_token": 0.0110,
+    "seconds_per_output_token": 0.0092,
     ##### Agg. Benchmark #####
-    "overall": 83.50,
+    "overall": 80.6,  # using number reported for o3-mini; true number is likely higher
 }
+# o1_MODEL_CARD = {  # noqa: N816
+#     # NOTE: it is unclear if the same ($ / token) costs can be applied for vision, or if we have to calculate this ourselves
+#     ##### Cost in USD #####
+#     "usd_per_input_token": 15 / 1e6,
+#     "usd_per_output_token": 60 / 1e6,
+#     ##### Time #####
+#     "seconds_per_output_token": 0.0110,
+#     ##### Agg. Benchmark #####
+#     "overall": 83.50,
+# }
 TEXT_EMBEDDING_3_SMALL_MODEL_CARD = {
     ##### Cost in USD #####
     "usd_per_input_token": 0.02 / 1e6,
@@ -479,7 +534,7 @@ CLAUDE_3_5_SONNET_MODEL_CARD = {
     "usd_per_input_token": 3.0 / 1e6,
     "usd_per_output_token": 15.0 / 1e6,
     ##### Time #####
-    "seconds_per_output_token": 0.0127,
+    "seconds_per_output_token": 0.0154,
     ##### Agg. Benchmark #####
     "overall": 78.4,
 }
@@ -488,7 +543,7 @@ CLAUDE_3_7_SONNET_MODEL_CARD = {
     "usd_per_input_token": 3.0 / 1e6,
     "usd_per_output_token": 15.0 / 1e6,
     ##### Time #####
-    "seconds_per_output_token": 0.0130,
+    "seconds_per_output_token": 0.0156,
     ##### Agg. Benchmark #####
     "overall": 80.7,
 }
@@ -497,7 +552,7 @@ CLAUDE_3_5_HAIKU_MODEL_CARD = {
     "usd_per_input_token": 0.8 / 1e6,
     "usd_per_output_token": 4.0 / 1e6,
     ##### Time #####
-    "seconds_per_output_token": 0.0152,
+    "seconds_per_output_token": 0.0189,
     ##### Agg. Benchmark #####
     "overall": 64.1,
 }
@@ -507,17 +562,27 @@ GEMINI_2_0_FLASH_MODEL_CARD = {
     "usd_per_output_token": 0.6 / 1e6,
     "usd_per_audio_input_token": 1.0 / 1e6,
     ##### Time #####
-    "seconds_per_output_token": 0.0049,
+    "seconds_per_output_token": 0.0054,
     ##### Agg. Benchmark #####
     "overall": 77.40,
 }
+GEMINI_2_5_FLASH_LITE_MODEL_CARD = {
+    ##### Cost in USD #####
+    "usd_per_input_token": 0.1 / 1e6,
+    "usd_per_output_token": 0.4 / 1e6,
+    "usd_per_audio_input_token": 0.3 / 1e6,
+    ##### Time #####
+    "seconds_per_output_token": 0.0034,
+    ##### Agg. Benchmark #####
+    "overall": 79.1, # NOTE: interpolated between gemini 2.5 flash and gemini 2.0 flash
+}
 GEMINI_2_5_FLASH_MODEL_CARD = {
     ##### Cost in USD #####
     "usd_per_input_token": 0.30 / 1e6,
     "usd_per_output_token": 2.5 / 1e6,
     "usd_per_audio_input_token": 1.0 / 1e6,
     ##### Time #####
-    "seconds_per_output_token": 0.0039,
+    "seconds_per_output_token": 0.0044,
     ##### Agg. Benchmark #####
     "overall": 80.75, # NOTE: interpolated between gemini 2.0 flash and gemini 2.5 pro
 }
@@ -527,7 +592,7 @@ GEMINI_2_5_PRO_MODEL_CARD = {
     "usd_per_output_token": 10.0 / 1e6,
     "usd_per_audio_input_token": 1.25 / 1e6,
     ##### Time #####
-    "seconds_per_output_token": 0.0070,
+    "seconds_per_output_token": 0.0072,
     ##### Agg. Benchmark #####
     "overall": 84.10,
 }
@@ -536,7 +601,7 @@ LLAMA_4_MAVERICK_MODEL_CARD = {
     "usd_per_input_token": 0.35 / 1e6,
     "usd_per_output_token": 1.15 / 1e6,
     ##### Time #####
-    "seconds_per_output_token": 0.0058,
+    "seconds_per_output_token": 0.0122,
     ##### Agg. Benchmark #####
     "overall": 79.4,
 }
@@ -561,8 +626,12 @@ MODEL_CARDS = {
     Model.GPT_4o_MINI.value: GPT_4o_MINI_MODEL_CARD,
     Model.GPT_4o_AUDIO_PREVIEW.value: GPT_4o_AUDIO_PREVIEW_MODEL_CARD,
     Model.GPT_4o_MINI_AUDIO_PREVIEW.value: GPT_4o_MINI_AUDIO_PREVIEW_MODEL_CARD,
+    Model.GPT_4_1.value: GPT_4_1_MODEL_CARD,
+    Model.GPT_4_1_MINI.value: GPT_4_1_MINI_MODEL_CARD,
+    Model.GPT_4_1_NANO.value: GPT_4_1_NANO_MODEL_CARD,
     Model.GPT_5.value: GPT_5_MODEL_CARD,
     Model.GPT_5_MINI.value: GPT_5_MINI_MODEL_CARD,
+    Model.GPT_5_NANO.value: GPT_5_NANO_MODEL_CARD,
     Model.o4_MINI.value: o4_MINI_MODEL_CARD,
     # Model.o1.value: o1_MODEL_CARD,
     Model.TEXT_EMBEDDING_3_SMALL.value: TEXT_EMBEDDING_3_SMALL_MODEL_CARD,
@@ -573,6 +642,9 @@ MODEL_CARDS = {
     Model.GEMINI_2_0_FLASH.value: GEMINI_2_0_FLASH_MODEL_CARD,
     Model.GEMINI_2_5_FLASH.value: GEMINI_2_5_FLASH_MODEL_CARD,
     Model.GEMINI_2_5_PRO.value: GEMINI_2_5_PRO_MODEL_CARD,
+    Model.GOOGLE_GEMINI_2_5_FLASH.value: GEMINI_2_5_FLASH_MODEL_CARD,
+    Model.GOOGLE_GEMINI_2_5_FLASH_LITE.value: GEMINI_2_5_FLASH_LITE_MODEL_CARD,
+    Model.GOOGLE_GEMINI_2_5_PRO.value: GEMINI_2_5_PRO_MODEL_CARD,
     Model.LLAMA_4_MAVERICK.value: LLAMA_4_MAVERICK_MODEL_CARD,
     Model.VLLM_QWEN_1_5_0_5B_CHAT.value: VLLM_QWEN_1_5_0_5B_CHAT_MODEL_CARD,
 }

palimpzest/core/data/dataset.py CHANGED Viewed

@@ -228,7 +228,7 @@ class Dataset:
             id=self.id,
         )
-    def sem_join(self, other: Dataset, condition: str, depends_on: str | list[str] | None = None) -> Dataset:
+    def sem_join(self, other: Dataset, condition: str, desc: str | None = None, depends_on: str | list[str] | None = None) -> Dataset:
         """
         Perform a semantic (inner) join on the specified join predicate
         """
@@ -244,6 +244,7 @@ class Dataset:
             input_schema=combined_schema,
             output_schema=combined_schema,
             condition=condition,
+            desc=desc,
             depends_on=depends_on,
         )
@@ -277,6 +278,7 @@ class Dataset:
     def sem_filter(
         self,
         filter: str,
+        desc: str | None = None,
         depends_on: str | list[str] | None = None,
     ) -> Dataset:
         """Add a natural language description of a filter to the Set. This filter will possibly restrict the items that are returned later."""
@@ -292,12 +294,13 @@ class Dataset:
             depends_on = [depends_on]
         # construct logical operator
-        operator = FilteredScan(input_schema=self.schema, output_schema=self.schema, filter=f, depends_on=depends_on)
+        operator = FilteredScan(input_schema=self.schema, output_schema=self.schema, filter=f, desc=desc, depends_on=depends_on)
         return Dataset(sources=[self], operator=operator, schema=self.schema)
     def _sem_map(self, cols: list[dict] | type[BaseModel] | None,
                  cardinality: Cardinality,
+                 desc: str | None = None,
                  depends_on: str | list[str] | None = None) -> Dataset:
         """Execute the semantic map operation with the appropriate cardinality."""
         # construct new output schema
@@ -322,6 +325,7 @@ class Dataset:
             output_schema=new_output_schema,
             cardinality=cardinality,
             udf=None,
+            desc=desc,
             depends_on=depends_on,
         )
@@ -330,6 +334,7 @@ class Dataset:
     def sem_add_columns(self, cols: list[dict] | type[BaseModel],
                         cardinality: Cardinality = Cardinality.ONE_TO_ONE,
+                        desc: str | None = None,
                         depends_on: str | list[str] | None = None) -> Dataset:
         """
         NOTE: we are renaming this function to `sem_map` and deprecating `sem_add_columns` in the next
@@ -354,9 +359,9 @@ class Dataset:
             stacklevel=2
         )
-        return self._sem_map(cols, cardinality, depends_on)
+        return self._sem_map(cols, cardinality, desc, depends_on)
-    def sem_map(self, cols: list[dict] | type[BaseModel], depends_on: str | list[str] | None = None) -> Dataset:
+    def sem_map(self, cols: list[dict] | type[BaseModel], desc: str | None = None, depends_on: str | list[str] | None = None) -> Dataset:
         """
         Compute new field(s) by specifying their names, descriptions, and types. For each input there will
         be one output. The field(s) will be computed during the execution of the Dataset.
@@ -368,9 +373,9 @@ class Dataset:
                  {'name': 'full_name', 'desc': 'The name of the person', 'type': str}]
             )
         """
-        return self._sem_map(cols, Cardinality.ONE_TO_ONE, depends_on)
+        return self._sem_map(cols, Cardinality.ONE_TO_ONE, desc, depends_on)
-    def sem_flat_map(self, cols: list[dict] | type[BaseModel], depends_on: str | list[str] | None = None) -> Dataset:
+    def sem_flat_map(self, cols: list[dict] | type[BaseModel], desc: str | None = None, depends_on: str | list[str] | None = None) -> Dataset:
         """
         Compute new field(s) by specifying their names, descriptions, and types. For each input there will
         be one or more output(s). The field(s) will be computed during the execution of the Dataset.
@@ -384,7 +389,7 @@ class Dataset:
                 ]
             )
         """
-        return self._sem_map(cols, Cardinality.ONE_TO_MANY, depends_on)
+        return self._sem_map(cols, Cardinality.ONE_TO_MANY, desc, depends_on)
     def _map(self, udf: Callable,
             cols: list[dict] | type[BaseModel] | None,
@@ -590,7 +595,7 @@ class Dataset:
         return QueryProcessorFactory.create_and_run_processor(self, config)
-    def optimize_and_run(self, train_dataset: dict[str, Dataset] | Dataset | None = None, validator: Validator | None = None, config: QueryProcessorConfig | None = None, **kwargs):
+    def optimize_and_run(self, config: QueryProcessorConfig | None = None, train_dataset: dict[str, Dataset] | Dataset | None = None, validator: Validator | None = None, **kwargs):
         """Optimize the PZ program using the train_dataset and validator before running the optimized plan."""
         # TODO: this import currently needs to be here to avoid a circular import; we should fix this in a subsequent PR
         from palimpzest.query.processor.query_processor_factory import QueryProcessorFactory

palimpzest/prompts/convert_prompts.py CHANGED Viewed

@@ -53,7 +53,7 @@ ANSWER:
 COT_QA_BASE_USER_PROMPT = """You are a helpful assistant whose job is to {job_instruction}.
 You will be presented with a context and a set of output fields to generate. Your task is to generate a JSON object which fills in the output fields with the correct values.
 You will be provided with a description of each input field and each output field. All of the fields in the output JSON object can be derived using information from the context.
+{desc_section}
 {output_format_instruction} Finish your response with a newline character followed by ---
 ---
 INPUT FIELDS:
@@ -72,7 +72,7 @@ REASONING: """
 COT_QA_NO_REASONING_BASE_USER_PROMPT = """You are a helpful assistant whose job is to {job_instruction}.
 You will be presented with a context and a set of output fields to generate. Your task is to generate a JSON object which fills in the output fields with the correct values.
 You will be provided with a description of each input field and each output field. All of the fields in the output JSON object can be derived using information from the context.
+{desc_section}
 {output_format_instruction} Finish your response with a newline character followed by ---
 ---
 INPUT FIELDS:

palimpzest/prompts/filter_prompts.py CHANGED Viewed

@@ -45,7 +45,7 @@ ANSWER: TRUE
 COT_BOOL_BASE_USER_PROMPT = """You are a helpful assistant whose job is to {job_instruction}.
 You will be presented with a context and a filter condition. Output TRUE if the context satisfies the filter condition, and FALSE otherwise.
+{desc_section}
 Remember, your answer must be TRUE or FALSE. Finish your response with a newline character followed by ---
 ---
 INPUT FIELDS:
@@ -62,7 +62,7 @@ REASONING: """
 COT_BOOL_NO_REASONING_BASE_USER_PROMPT = """You are a helpful assistant whose job is to {job_instruction}.
 You will be presented with a context and a filter condition. Output TRUE if the context satisfies the filter condition, and FALSE otherwise.
+{desc_section}
 Remember, your answer must be TRUE or FALSE. Finish your response with a newline character followed by ---
 ---
 INPUT FIELDS:

palimpzest/prompts/join_prompts.py CHANGED Viewed

@@ -57,7 +57,7 @@ ANSWER: TRUE
 COT_JOIN_BASE_USER_PROMPT = """You are a helpful assistant whose job is to {job_instruction}.
 You will be presented with two data records and a join condition. Output TRUE if the two data records satisfy the join condition, and FALSE otherwise.
+{desc_section}
 Remember, your answer must be TRUE or FALSE. Finish your response with a newline character followed by ---
 ---
 LEFT INPUT FIELDS:
@@ -80,7 +80,7 @@ REASONING: """
 COT_JOIN_NO_REASONING_BASE_USER_PROMPT = """You are a helpful assistant whose job is to {job_instruction}.
 You will be presented with two data records and a join condition. Output TRUE if the two data records satisfy the join condition, and FALSE otherwise.
+{desc_section}
 Remember, your answer must be TRUE or FALSE. Finish your response with a newline character followed by ---
 ---
 LEFT INPUT FIELDS:

palimpzest/prompts/moa_proposer_convert_prompts.py CHANGED Viewed

@@ -27,7 +27,7 @@ ANSWER: {example_answer}
 COT_MOA_PROPOSER_BASE_USER_PROMPT = """You are a helpful assistant whose job is to {job_instruction}.
 You will be presented with a context and a set of output fields to generate. Your task is to generate a paragraph or two which describes what you believe is the correct value for each output field.
 Be sure to cite information from the context as evidence of why your answers are correct. Do not hallucinate evidence.
+{desc_section}
 You will be provided with a description of each input field and each output field.
 ---
 INPUT FIELDS:

palimpzest/prompts/prompt_factory.py CHANGED Viewed

@@ -138,6 +138,7 @@ from palimpzest.prompts.split_proposer_prompts import (
     SPLIT_PROPOSER_JOB_INSTRUCTION,
 )
 from palimpzest.prompts.util_phrases import (
+    DESC_SECTION,
     ONE_TO_MANY_OUTPUT_FORMAT_INSTRUCTION,
     ONE_TO_ONE_OUTPUT_FORMAT_INSTRUCTION,
 )
@@ -205,10 +206,11 @@ class PromptFactory:
         PromptStrategy.SPLIT_MERGER: COT_SPLIT_MERGER_BASE_USER_PROMPT,
     }
-    def __init__(self, prompt_strategy: PromptStrategy, model: Model, cardinality: Cardinality) -> None:
+    def __init__(self, prompt_strategy: PromptStrategy, model: Model, cardinality: Cardinality, desc: str | None = None) -> None:
         self.prompt_strategy = prompt_strategy
         self.model = model
         self.cardinality = cardinality
+        self.desc = desc
     def _get_context(self, candidate: DataRecord, input_fields: list[str]) -> str:
         """
@@ -446,6 +448,19 @@ class PromptFactory:
         }
         return prompt_strategy_to_job_instruction.get(self.prompt_strategy)
+    def _get_desc_section(self) -> str:
+        """
+        Returns the description section for the prompt.
+        Returns:
+            str: The description section (if applicable).
+        """
+        desc_section = ""
+        if self.desc is not None:
+            desc_section = DESC_SECTION.format(desc=self.desc)
+        return desc_section
     def _get_critique_criteria(self) -> str | None:
         """
         Returns the critique criteria for the critique operation.
@@ -758,6 +773,7 @@ class PromptFactory:
         prompt_strategy_format_kwargs = {
             "output_format_instruction": self._get_output_format_instruction(),
             "job_instruction": self._get_job_instruction(),
+            "desc_section": self._get_desc_section(),
             "critique_criteria": self._get_critique_criteria(),
             "refinement_criteria": self._get_refinement_criteria(),
             "finish_instruction": self._get_finish_instruction(),

palimpzest/prompts/split_proposer_prompts.py CHANGED Viewed

@@ -27,7 +27,7 @@ ANSWER: {example_answer}
 COT_SPLIT_PROPOSER_BASE_USER_PROMPT = """You are a helpful assistant whose job is to {job_instruction}.
 You will be presented with a context and a set of output fields to generate. Your task is to generate a paragraph or two which describes what you believe is the correct value for each output field.
 Be sure to cite information from the context as evidence of why your answers are correct. Do not hallucinate evidence.
+{desc_section}
 You will be provided with a description of each input field and each output field.
 ---
 INPUT FIELDS:

palimpzest/prompts/util_phrases.py CHANGED Viewed

@@ -12,3 +12,8 @@ REASONING: """
 COT_ANSWER_INSTRUCTION = """Let's think step-by-step in order to answer the question.
 ANSWER: """
+DESC_SECTION = """
+The user has additionally provided you with this description of the task you need to perform:
+{desc}
+"""

palimpzest/query/execution/mab_execution_strategy.py CHANGED Viewed

@@ -2,16 +2,19 @@
 import logging
 import numpy as np
+from chromadb.api.models.Collection import Collection
 from palimpzest.core.data.dataset import Dataset
 from palimpzest.core.elements.records import DataRecord, DataRecordSet
-from palimpzest.core.models import OperatorStats, RecordOpStats, SentinelPlanStats
+from palimpzest.core.models import OperatorCostEstimates, OperatorStats, RecordOpStats, SentinelPlanStats
 from palimpzest.policy import Policy
 from palimpzest.query.execution.execution_strategy import SentinelExecutionStrategy
 from palimpzest.query.operators.aggregate import AggregateOp
-from palimpzest.query.operators.filter import FilterOp
+from palimpzest.query.operators.convert import LLMConvert
+from palimpzest.query.operators.filter import FilterOp, LLMFilter
 from palimpzest.query.operators.join import JoinOp
 from palimpzest.query.operators.physical import PhysicalOperator
+from palimpzest.query.operators.retrieve import RetrieveOp
 from palimpzest.query.operators.scan import ContextScanOp, ScanPhysicalOp
 from palimpzest.query.optimizer.plan import SentinelPlan
 from palimpzest.utils.progress import create_progress_manager
@@ -55,6 +58,17 @@ class OpFrontier:
         # store the prior beliefs on operator performance (if provided)
         self.priors = priors
+        # boolean indication of the type of operator in this OpFrontier
+        sample_op = op_set[0]
+        self.is_scan_op = isinstance(sample_op, (ScanPhysicalOp, ContextScanOp))
+        self.is_filter_op = isinstance(sample_op, FilterOp)
+        self.is_aggregate_op = isinstance(sample_op, AggregateOp)
+        self.is_llm_join = isinstance(sample_op, JoinOp)
+        is_llm_convert = isinstance(sample_op, LLMConvert)
+        is_llm_filter = isinstance(sample_op, LLMFilter)
+        is_llm_retrieve = isinstance(sample_op, RetrieveOp) and isinstance(sample_op.index, Collection)
+        self.is_llm_op = is_llm_convert or is_llm_filter or is_llm_retrieve or self.is_llm_join
         # get order in which we will sample physical operators for this logical operator
         sample_op_indices = self._get_op_index_order(op_set, seed)
@@ -68,13 +82,6 @@ class OpFrontier:
         self.full_op_id_to_sources_not_processed = {op.get_full_op_id(): source_indices for op in op_set}
         self.max_inputs = len(source_indices)
-        # boolean indication of the type of operator in this OpFrontier
-        sample_op = op_set[0]
-        self.is_scan_op = isinstance(sample_op, (ScanPhysicalOp, ContextScanOp))
-        self.is_filter_op = isinstance(sample_op, FilterOp)
-        self.is_aggregate_op = isinstance(sample_op, AggregateOp)
-        self.is_llm_join = isinstance(sample_op, JoinOp)
         # set the initial inputs for this logical operator; we maintain a mapping from source_unique_logical_op_id --> source_indices --> input;
         # for each unique source and (tuple of) source indices, we store its output, which is an input to this operator
         # for scan operators, we use the default name "source" since these operators have no source
@@ -149,16 +156,44 @@ class OpFrontier:
         return op_id_to_pareto_distance
+    def _compute_naive_priors(self, op_set: list[PhysicalOperator]) -> dict[str, dict[str, float]]:
+        naive_priors = {}
+        for op in op_set:
+            # use naive cost estimates with dummy source estimates to compute priors
+            source_op_estimates = OperatorCostEstimates(quality=1.0, cost_per_record=0.0, time_per_record=0.0, cardinality=100)
+            op_estimates = (
+                op.naive_cost_estimates(source_op_estimates, source_op_estimates)
+                if self.is_llm_join
+                else op.naive_cost_estimates(source_op_estimates)
+            )
+            # get op_id for this operator
+            op_id = op.get_op_id()
+            # set the naive quality, cost, and time priors for this operator
+            naive_priors[op_id] = {
+                "quality": op_estimates.quality,
+                "cost": op_estimates.cost_per_record,
+                "time": op_estimates.time_per_record,
+            }
+        return naive_priors
     def _get_op_index_order(self, op_set: list[PhysicalOperator], seed: int) -> list[int]:
         """
         Returns a list of indices for the operators in the op_set.
         """
-        if self.priors is None or any([op_id not in self.priors for op_id in map(lambda op: op.get_op_id(), op_set)]):
+        # if this is not an llm-operator, we simply return the indices in random order
+        if not self.is_llm_op:
             rng = np.random.default_rng(seed=seed)
             op_indices = np.arange(len(op_set))
             rng.shuffle(op_indices)
             return op_indices
+        # if this is an llm-operator, but we do not have priors, we first compute naive priors
+        if self.priors is None or any([op_id not in self.priors for op_id in map(lambda op: op.get_op_id(), op_set)]):
+            self.priors = self._compute_naive_priors(op_set)
         # NOTE: self.priors is a dictionary with format:
         # {op_id: {"quality": quality, "cost": cost, "time": time}}
@@ -215,7 +250,7 @@ class OpFrontier:
         op_source_indices_pairs = []
         # if this operator is not being optimized: we don't request inputs, but simply process what we are given / told to (in the case of scans)
-        if not self.is_llm_join and len(self.frontier_ops) == 1:
+        if not self.is_llm_op and len(self.frontier_ops) == 1:
             return [(self.frontier_ops[0], None)]
         # otherwise, sample (operator, source_indices) pairs
@@ -255,16 +290,6 @@ class OpFrontier:
                     all_inputs.extend(inputs)
             return [(op, tuple(), all_inputs)]
-        # if this is an un-optimized (non-scan, non-join) operator, flatten inputs and run on each one
-        elif not self.is_scan_op and not self.is_llm_join and len(self.frontier_ops) == 1:
-            op_inputs = []
-            op = self.frontier_ops[0]
-            for _, source_indices_to_inputs in self.source_indices_to_inputs.items():
-                for source_indices, inputs in source_indices_to_inputs.items():
-                    for input in inputs:
-                        op_inputs.append((op, source_indices, input))
-            return op_inputs
         ### for optimized operators
         # get the list of (op, source_indices) pairs which this operator needs to execute
         op_source_indices_pairs = self._get_op_source_indices_pairs()

palimpzest/query/generators/generators.py CHANGED Viewed

@@ -111,6 +111,7 @@ class Generator(Generic[ContextType, InputType]):
         reasoning_effort: str | None = None,
         api_base: str | None = None,
         cardinality: Cardinality = Cardinality.ONE_TO_ONE,
+        desc: str | None = None,
         verbose: bool = False,
     ):
         self.model = model
@@ -119,8 +120,9 @@ class Generator(Generic[ContextType, InputType]):
         self.prompt_strategy = prompt_strategy
         self.reasoning_effort = reasoning_effort
         self.api_base = api_base
+        self.desc = desc
         self.verbose = verbose
-        self.prompt_factory = PromptFactory(prompt_strategy, model, cardinality)
+        self.prompt_factory = PromptFactory(prompt_strategy, model, cardinality, desc)
     def _parse_reasoning(self, completion_text: str, **kwargs) -> str:
         """Extract the reasoning for the generated output from the completion object."""

palimpzest/query/operators/convert.py CHANGED Viewed

@@ -26,18 +26,21 @@ class ConvertOp(PhysicalOperator, ABC):
         self,
         cardinality: Cardinality = Cardinality.ONE_TO_ONE,
         udf: Callable | None = None,
+        desc: str | None = None,
         *args,
         **kwargs,
     ):
         super().__init__(*args, **kwargs)
         self.cardinality = cardinality
         self.udf = udf
+        self.desc = desc
     def get_id_params(self):
         id_params = super().get_id_params()
         id_params = {
             "cardinality": self.cardinality.value,
             "udf": self.udf,
+            "desc": self.desc,
             **id_params,
         }
@@ -45,7 +48,12 @@ class ConvertOp(PhysicalOperator, ABC):
     def get_op_params(self):
         op_params = super().get_op_params()
-        op_params = {"cardinality": self.cardinality, "udf": self.udf, **op_params}
+        op_params = {
+            "cardinality": self.cardinality,
+            "udf": self.udf,
+            "desc": self.desc,
+            **op_params,
+        }
         return op_params
@@ -289,7 +297,7 @@ class LLMConvert(ConvertOp):
         self.prompt_strategy = prompt_strategy
         self.reasoning_effort = reasoning_effort
         if model is not None:
-            self.generator = Generator(model, prompt_strategy, reasoning_effort, self.api_base, self.cardinality, self.verbose)
+            self.generator = Generator(model, prompt_strategy, reasoning_effort, self.api_base, self.cardinality, self.desc, self.verbose)
     def __str__(self):
         op = super().__str__()

palimpzest/query/operators/critique_and_refine_convert.py CHANGED Viewed

@@ -37,8 +37,8 @@ class CriticAndRefineConvert(LLMConvert):
             raise ValueError(f"Unsupported prompt strategy: {self.prompt_strategy}")
         # create generators
-        self.critic_generator = Generator(self.critic_model, self.critic_prompt_strategy, self.reasoning_effort, self.api_base, self.cardinality, self.verbose)
-        self.refine_generator = Generator(self.refine_model, self.refinement_prompt_strategy, self.reasoning_effort, self.api_base, self.cardinality, self.verbose)
+        self.critic_generator = Generator(self.critic_model, self.critic_prompt_strategy, self.reasoning_effort, self.api_base, self.cardinality, self.desc, self.verbose)
+        self.refine_generator = Generator(self.refine_model, self.refinement_prompt_strategy, self.reasoning_effort, self.api_base, self.cardinality, self.desc, self.verbose)
     def __str__(self):
         op = super().__str__()

palimpzest/query/operators/filter.py CHANGED Viewed

@@ -22,10 +22,11 @@ from palimpzest.query.operators.physical import PhysicalOperator
 class FilterOp(PhysicalOperator, ABC):
-    def __init__(self, filter: Filter, *args, **kwargs):
+    def __init__(self, filter: Filter, desc: str | None = None, *args, **kwargs):
         super().__init__(*args, **kwargs)
         assert self.input_schema == self.output_schema, "Input and output schemas must match for FilterOp"
         self.filter_obj = filter
+        self.desc = desc
     def __str__(self):
         op = super().__str__()
@@ -34,11 +35,11 @@ class FilterOp(PhysicalOperator, ABC):
     def get_id_params(self):
         id_params = super().get_id_params()
-        return {"filter": str(self.filter_obj), **id_params}
+        return {"filter": str(self.filter_obj), "desc": self.desc, **id_params}
     def get_op_params(self):
         op_params = super().get_op_params()
-        return {"filter": self.filter_obj, **op_params}
+        return {"filter": self.filter_obj, "desc": self.desc, **op_params}
     @abstractmethod
     def is_image_filter(self) -> bool:
@@ -182,7 +183,7 @@ class LLMFilter(FilterOp):
         self.model = model
         self.prompt_strategy = prompt_strategy
         self.reasoning_effort = reasoning_effort
-        self.generator = Generator(model, prompt_strategy, reasoning_effort, self.api_base, Cardinality.ONE_TO_ONE, self.verbose)
+        self.generator = Generator(model, prompt_strategy, reasoning_effort, self.api_base, Cardinality.ONE_TO_ONE, self.desc, self.verbose)
     def get_id_params(self):
         id_params = super().get_id_params()

palimpzest/query/operators/join.py CHANGED Viewed

@@ -21,10 +21,11 @@ from palimpzest.query.operators.physical import PhysicalOperator
 class JoinOp(PhysicalOperator, ABC):
-    def __init__(self, condition: str, *args, **kwargs):
+    def __init__(self, condition: str, desc: str | None = None, *args, **kwargs):
         super().__init__(*args, **kwargs)
         assert self.input_schema == self.output_schema, "Input and output schemas must match for JoinOp"
         self.condition = condition
+        self.desc = desc
     def __str__(self):
         op = super().__str__()
@@ -33,11 +34,11 @@ class JoinOp(PhysicalOperator, ABC):
     def get_id_params(self):
         id_params = super().get_id_params()
-        return {"condition": self.condition, **id_params}
+        return {"condition": self.condition, "desc": self.desc, **id_params}
     def get_op_params(self):
         op_params = super().get_op_params()
-        return {"condition": self.condition, **op_params}
+        return {"condition": self.condition, "desc": self.desc, **op_params}
     @abstractmethod
     def is_image_join(self) -> bool:
@@ -64,7 +65,7 @@ class BlockingNestedLoopsJoin(JoinOp):
         self.prompt_strategy = prompt_strategy
         self.join_parallelism = join_parallelism
         self.reasoning_effort = reasoning_effort
-        self.generator = Generator(model, prompt_strategy, reasoning_effort, self.api_base, Cardinality.ONE_TO_ONE, self.verbose)
+        self.generator = Generator(model, prompt_strategy, reasoning_effort, self.api_base, Cardinality.ONE_TO_ONE, self.desc, self.verbose)
         self.join_idx = 0
     def get_id_params(self):
@@ -228,7 +229,7 @@ class NestedLoopsJoin(JoinOp):
         self.prompt_strategy = prompt_strategy
         self.join_parallelism = join_parallelism
         self.reasoning_effort = reasoning_effort
-        self.generator = Generator(model, prompt_strategy, reasoning_effort, self.api_base, Cardinality.ONE_TO_ONE, self.verbose)
+        self.generator = Generator(model, prompt_strategy, reasoning_effort, self.api_base, Cardinality.ONE_TO_ONE, self.desc, self.verbose)
         self.join_idx = 0
         # maintain list(s) of input records for the join

palimpzest/query/operators/logical.py CHANGED Viewed

@@ -256,12 +256,14 @@ class ConvertScan(LogicalOperator):
         self,
         cardinality: Cardinality = Cardinality.ONE_TO_ONE,
         udf: Callable | None = None,
+        desc: str | None = None,
         *args,
         **kwargs,
     ):
         super().__init__(*args, **kwargs)
         self.cardinality = cardinality
         self.udf = udf
+        self.desc = desc
     def __str__(self):
         return f"ConvertScan({self.input_schema} -> {str(self.output_schema)})"
@@ -271,6 +273,7 @@ class ConvertScan(LogicalOperator):
         logical_id_params = {
             "cardinality": self.cardinality,
             "udf": self.udf,
+            "desc": self.desc,
             **logical_id_params,
         }
@@ -281,6 +284,7 @@ class ConvertScan(LogicalOperator):
         logical_op_params = {
             "cardinality": self.cardinality,
             "udf": self.udf,
+            "desc": self.desc,
             **logical_op_params,
         }
@@ -327,11 +331,13 @@ class FilteredScan(LogicalOperator):
     def __init__(
         self,
         filter: Filter,
+        desc: str | None = None,
         *args,
         **kwargs,
     ):
         super().__init__(*args, **kwargs)
         self.filter = filter
+        self.desc = desc
     def __str__(self):
         return f"FilteredScan({str(self.output_schema)}, {str(self.filter)})"
@@ -340,6 +346,7 @@ class FilteredScan(LogicalOperator):
         logical_id_params = super().get_logical_id_params()
         logical_id_params = {
             "filter": self.filter,
+            "desc": self.desc,
             **logical_id_params,
         }
@@ -349,6 +356,7 @@ class FilteredScan(LogicalOperator):
         logical_op_params = super().get_logical_op_params()
         logical_op_params = {
             "filter": self.filter,
+            "desc": self.desc,
             **logical_op_params,
         }
@@ -390,16 +398,17 @@ class GroupByAggregate(LogicalOperator):
 class JoinOp(LogicalOperator):
-    def __init__(self, condition: str, *args, **kwargs):
+    def __init__(self, condition: str, desc: str | None = None, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.condition = condition
+        self.desc = desc
     def __str__(self):
         return f"Join(condition={self.condition})"
     def get_logical_id_params(self) -> dict:
         logical_id_params = super().get_logical_id_params()
-        logical_id_params = {"condition": self.condition, **logical_id_params}
+        logical_id_params = {"condition": self.condition, "desc": self.desc, **logical_id_params}
         return logical_id_params
@@ -407,6 +416,7 @@ class JoinOp(LogicalOperator):
         logical_op_params = super().get_logical_op_params()
         logical_op_params = {
             "condition": self.condition,
+            "desc": self.desc,
             **logical_op_params,
         }

palimpzest/query/operators/mixture_of_agents_convert.py CHANGED Viewed

@@ -36,10 +36,10 @@ class MixtureOfAgentsConvert(LLMConvert):
         # create generators
         self.proposer_generators = [
-            Generator(model, self.proposer_prompt_strategy, self.reasoning_effort, self.api_base, self.cardinality, self.verbose)
+            Generator(model, self.proposer_prompt_strategy, self.reasoning_effort, self.api_base, self.cardinality, self.desc, self.verbose)
             for model in proposer_models
         ]
-        self.aggregator_generator = Generator(aggregator_model, self.aggregator_prompt_strategy, self.reasoning_effort, self.api_base, self.cardinality, self.verbose)
+        self.aggregator_generator = Generator(aggregator_model, self.aggregator_prompt_strategy, self.reasoning_effort, self.api_base, self.cardinality, self.desc, self.verbose)
     def __str__(self):
         op = super().__str__()

palimpzest/query/operators/split_convert.py CHANGED Viewed

@@ -22,8 +22,8 @@ class SplitConvert(LLMConvert):
         super().__init__(*args, **kwargs)
         self.num_chunks = num_chunks
         self.min_size_to_chunk = min_size_to_chunk
-        self.split_generator = Generator(self.model, PromptStrategy.SPLIT_PROPOSER, self.reasoning_effort, self.api_base, self.cardinality, self.verbose)
-        self.split_merge_generator = Generator(self.model, PromptStrategy.SPLIT_MERGER, self.reasoning_effort, self.api_base, self.cardinality, self.verbose)
+        self.split_generator = Generator(self.model, PromptStrategy.SPLIT_PROPOSER, self.reasoning_effort, self.api_base, self.cardinality, self.desc, self.verbose)
+        self.split_merge_generator = Generator(self.model, PromptStrategy.SPLIT_MERGER, self.reasoning_effort, self.api_base, self.cardinality, self.desc, self.verbose)
         # crude adjustment factor for naive estimation in no-sentinel setting
         self.naive_quality_adjustment = 0.6

palimpzest/query/processor/config.py CHANGED Viewed

@@ -26,6 +26,7 @@ class QueryProcessorConfig(BaseModel):
     join_parallelism: int = Field(default=64)
     batch_size: int | None = Field(default=None)
     reasoning_effort: str | None = Field(default=None)  # Gemini: "disable", "low", "medium", "high"
+    use_vertex: bool = Field(default=True)  # Whether to use Vertex models for Gemini or Google models
     gemini_credentials_path: str | None = Field(default=None)  # Path to Gemini credentials file
     api_base: str | None = Field(default=None)  # API base URL for vLLM
@@ -39,8 +40,8 @@ class QueryProcessorConfig(BaseModel):
     use_final_op_quality: bool = Field(default=False)
     # sentinel optimization flags
-    k: int = Field(default=5)
-    j: int = Field(default=5)
+    k: int = Field(default=6)
+    j: int = Field(default=4)
     sample_budget: int = Field(default=100)
     seed: int = Field(default=42)
     exp_name: str | None = Field(default=None)

palimpzest/query/processor/query_processor.py CHANGED Viewed

@@ -114,8 +114,8 @@ class QueryProcessor:
         execution_stats = ExecutionStats(execution_id=self.execution_id())
         execution_stats.start()
-        # if the user provides a train_dataset or validator, we perform optimization
-        if self.train_dataset is not None or self.validator is not None:
+        # if the user provides a validator, we perform optimization
+        if self.validator is not None:
             # create sentinel plan
             sentinel_plan = self._create_sentinel_plan(self.train_dataset)

palimpzest/query/processor/query_processor_factory.py CHANGED Viewed

@@ -62,13 +62,17 @@ class QueryProcessorFactory:
             print("WARNING: Both `progress` and `verbose` are set to True, but only one can be True at a time; defaulting to `progress=True`")
             config.verbose = False
+        # if the user provides a training dataset, but no validator, create a default validator
+        if train_dataset is not None and validator is None:
+            validator = Validator()
+            logger.info("No validator provided; using default Validator")
         # boolean flag for whether we're performing optimization or not
-        optimization = train_dataset is not None or validator is not None
-        val_based_opt = train_dataset is None and validator is not None
+        optimization = validator is not None
         # handle "auto" default for sentinel execution strategies
         if config.sentinel_execution_strategy == "auto":
-            config.sentinel_execution_strategy = ("validator" if val_based_opt else "mab") if optimization else None
+            config.sentinel_execution_strategy = "mab" if optimization else None
         # convert the config values for processing, execution, and optimization strategies to enums
         config = cls._normalize_strategies(config)
@@ -76,7 +80,7 @@ class QueryProcessorFactory:
         # get available models
         available_models = getattr(config, 'available_models', [])
         if available_models is None or len(available_models) == 0:
-            available_models = get_models(gemini_credentials_path=config.gemini_credentials_path, api_base=config.api_base)
+            available_models = get_models(use_vertex=config.use_vertex, gemini_credentials_path=config.gemini_credentials_path, api_base=config.api_base)
         # remove any models specified in the config
         remove_models = getattr(config, 'remove_models', [])
@@ -87,7 +91,7 @@ class QueryProcessorFactory:
         # set the final set of available models in the config
         config.available_models = available_models
-        return config
+        return config, validator
     @classmethod
     def _create_optimizer(cls, config: QueryProcessorConfig) -> Optimizer:
@@ -143,7 +147,7 @@ class QueryProcessorFactory:
             config = QueryProcessorConfig()
         # apply any additional keyword arguments to the config and validate its contents
-        config = cls._config_validation_and_normalization(config, train_dataset, validator)
+        config, validator = cls._config_validation_and_normalization(config, train_dataset, validator)
         # create the optimizer, execution strateg(ies), and processor
         optimizer = cls._create_optimizer(config)

palimpzest/utils/model_helpers.py CHANGED Viewed

@@ -3,7 +3,8 @@ import os
 from palimpzest.constants import Model
-def get_models(include_embedding: bool = False, gemini_credentials_path: str | None = None, api_base: str | None = None) -> list[Model]:
+# TODO: better handle vertex vs. google for gemini models
+def get_models(include_embedding: bool = False, use_vertex: bool = True, gemini_credentials_path: str | None = None, api_base: str | None = None) -> list[Model]:
     """
     Return the set of models which the system has access to based on the set environment variables.
     """
@@ -39,11 +40,15 @@ def get_models(include_embedding: bool = False, gemini_credentials_path: str | N
     )
     if os.getenv("GEMINI_API_KEY") is not None or os.path.exists(gemini_credentials_path):
         vertex_models = [model for model in Model if model.is_vertex_model()]
+        google_models = [model for model in Model if model.is_google_model()]
         if not include_embedding:
             vertex_models = [
                 model for model in vertex_models if not model.is_embedding_model()
             ]
-        models.extend(vertex_models)
+        if use_vertex:
+            models.extend(vertex_models)
+        else:
+            models.extend(google_models)
     if api_base is not None:
         vllm_models = [model for model in Model if model.is_vllm_model()]

{palimpzest-0.8.0.dist-info → palimpzest-0.8.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: palimpzest
-Version: 0.8.0
+Version: 0.8.2
 Summary: Palimpzest is a system which enables anyone to process AI-powered analytical queries simply by defining them in a declarative language
 Author-email: MIT DSG Semantic Management Lab <michjc@csail.mit.edu>
 Project-URL: homepage, https://palimpzest.org

{palimpzest-0.8.0.dist-info → palimpzest-0.8.2.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 palimpzest/__init__.py,sha256=1PzadDDOVMQJKNEYUH0_tw8tQKUYTT31M0vuzTr2Rqk,1694
-palimpzest/constants.py,sha256=1xGydUfkuVtaeoQ_Ku6P5PDLAelQKAVouivdXkva-zE,21109
+palimpzest/constants.py,sha256=GagsbJl1xCAjgt6Biw27KnHSZgiramxhnerhmYe3P_k,24690
 palimpzest/policy.py,sha256=lIvw_C_rmwCH4LZaeNkAuixl8zw9RAW_JcSWSHPjKyc,11628
 palimpzest/agents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 palimpzest/agents/compute_agents.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -9,7 +9,7 @@ palimpzest/core/models.py,sha256=fLO4T7x0njNeEbUpbhJm9cdnBva0y0Zw5WGBGdzdS_I,424
 palimpzest/core/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 palimpzest/core/data/context.py,sha256=x1xYyu9qW65dvtK_XayIfv_CgsCEPW6Qe0DTiSf9sjU,16207
 palimpzest/core/data/context_manager.py,sha256=8hAKWD2jhFZgghTu7AYgjkvKDsJUPVxq8g4nG0HWvfo,6150
-palimpzest/core/data/dataset.py,sha256=vqEEMxaG157jdyzUxM_tLt5Xq_49Yq-0dVGhS0ZUiHA,27904
+palimpzest/core/data/dataset.py,sha256=M7SxPXzHsfj-ljy_P3ckcJNqGf4RwNxtZI02q_tmL2M,28178
 palimpzest/core/data/index_dataset.py,sha256=adO67DgzHhA4lBME0-h4SjXfdz9UcNMSDGXTpUdKbgE,1929
 palimpzest/core/data/iter_dataset.py,sha256=u7eZNWWT84rH_D8LNIuq0NAnm2roX81ifKTYp-hwY7g,20512
 palimpzest/core/elements/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -21,45 +21,45 @@ palimpzest/core/lib/schemas.py,sha256=0qauaG3uW5tCJXNAo1i0G0UgbTaQLSLT6GoNDX8494
 palimpzest/prompts/__init__.py,sha256=sdZbC8RWi_IGjFuzKQMdRjS2Ih4zQnkyzFoJ6Q3Ce70,1764
 palimpzest/prompts/agent_prompts.py,sha256=CUzBVLBiPSw8OShtKp4VTpQwtrNMtcMglo-IZHMvuDM,17459
 palimpzest/prompts/context_search.py,sha256=s3pti4XNRiIyiWzjVNL_NqmqEc31jzSKMF2SlN0Aaf8,357
-palimpzest/prompts/convert_prompts.py,sha256=FR_zUADuOWxMqZED4S0lyO9VNgKPNiVpSZv6ND7a0v4,6009
+palimpzest/prompts/convert_prompts.py,sha256=quoIcdIrP4FoPBXlHKeOPjH5lVn8GH30f1tPiu3Xsyw,6037
 palimpzest/prompts/critique_and_refine_convert_prompts.py,sha256=WoXExBxQ7twswd9VCCST26c-2ehZtpD2iQoBi7sqDnQ,7814
-palimpzest/prompts/filter_prompts.py,sha256=lYQFrpAKhOMUQDOVbRBHh7IjuUNMCmBnAqHwDuptQHI,4232
-palimpzest/prompts/join_prompts.py,sha256=viQVvOpa2l9PYM34ua_jPNZnUOU_eCTMIoabBkF5cVc,5929
+palimpzest/prompts/filter_prompts.py,sha256=drTivlA_WnWkAIzY9GjqO_hfwdy432nMtsV-OYa-mlE,4260
+palimpzest/prompts/join_prompts.py,sha256=fEGZY_zn_dvOJCeUFYrHdg5P3h_H6Fo3FMmdnXwp2l4,5957
 palimpzest/prompts/moa_aggregator_convert_prompts.py,sha256=BQRrtGdr53PTqvXzmFh8kfQ_w9KoKw-zTtmdo-8RFjo,2887
-palimpzest/prompts/moa_proposer_convert_prompts.py,sha256=35pxtR2hnjLkv_10VEetRR9qUCR-zD85NZF3BaAANDk,3462
-palimpzest/prompts/prompt_factory.py,sha256=FDBoVdJ_khT7t6T6WAiK6RgC7HqB3efmRkwMam3AIhM,51262
+palimpzest/prompts/moa_proposer_convert_prompts.py,sha256=8vhq0bnikbCzS4CDV5IskFPWF0TC7VZGjeGvpOyIBV8,3476
+palimpzest/prompts/prompt_factory.py,sha256=MpEYoyPXY3gfFGG60O9rlw5A5UejC3CTRHcO6KRhyww,51733
 palimpzest/prompts/split_merge_prompts.py,sha256=0mTZeJhxtvlmv-ro0KwQpxlGgSTwyUhGRHJ-uHk2Zlw,3146
-palimpzest/prompts/split_proposer_prompts.py,sha256=TBHLGaM_ycHjGHrp1JziJoJDw4S5_F4afKSAdt2McKk,2624
-palimpzest/prompts/util_phrases.py,sha256=NWrcHfjJyiOY16Jyt7R50moVnlJDyvSBZ9kBqyX2WQo,751
+palimpzest/prompts/split_proposer_prompts.py,sha256=X3hufHPAiQyytZ_TFe2wJkVPgJtClZ9fVgz2zNk2Z5Q,2638
+palimpzest/prompts/util_phrases.py,sha256=ajxzj-B2gE56IENKVKElqw1xKWOF5IahOOqq026Pr00,876
 palimpzest/prompts/validator.py,sha256=pJTZjlt_OiFM3IFOgsJ0jQdayra8iRVrpqENlXI9tQQ,10532
 palimpzest/query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 palimpzest/query/execution/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 palimpzest/query/execution/all_sample_execution_strategy.py,sha256=3n2hl8m-WFWIu-a8DiSVsGkz4ej3yB7mSdFR0jsiwAU,14366
 palimpzest/query/execution/execution_strategy.py,sha256=KwBJbWOBOOPBiWRm3ypHcAQiWbCsvtW6UnVU4tHkYz8,18905
 palimpzest/query/execution/execution_strategy_type.py,sha256=vRQBPCQN5_aoyD3TLIeW3VPo15mqF-5RBvEXkENz9FE,987
-palimpzest/query/execution/mab_execution_strategy.py,sha256=LY1JlbYMsnJHCtYjaJ6iklojBqXc2B4KS62lobPFNz0,42341
+palimpzest/query/execution/mab_execution_strategy.py,sha256=paVfB8lqNyUuISqfhkTd6RqOZqpyVty1EAN1sZz7erA,43554
 palimpzest/query/execution/parallel_execution_strategy.py,sha256=Gn5hB5XddX2jCkxx6d7O-DmitK6fbuwBFnnyKhnGYEw,15706
 palimpzest/query/execution/single_threaded_execution_strategy.py,sha256=1eo-Z9G3u92_PjoSX8HmO3D3phYgA8f0Actbgd1-oKY,16247
 palimpzest/query/generators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-palimpzest/query/generators/generators.py,sha256=pi6gTCzQYs-z93IFNGKyoskIcdYCSnOwyaj-DvSlkb4,20877
+palimpzest/query/generators/generators.py,sha256=WYMcff7axgDEmYqXvy1A-C5FA4s6lI393CcCx-TKTgM,20941
 palimpzest/query/operators/__init__.py,sha256=j-yh0P5tzXGa0JU_g8aNn54wCJDXPCMbmtOmazXXEts,3459
 palimpzest/query/operators/aggregate.py,sha256=QvWr4C1arFSZWVqPSF5F5On6Ise5OF3VVWNGSq6Gfjk,11230
 palimpzest/query/operators/compute.py,sha256=bxMKLRU_o7v603daKeR0FayDZ_V6NLI1fGzgu6E-sac,8473
-palimpzest/query/operators/convert.py,sha256=teesuAeYl20ULwm6LIA277SZremdHedD2N2GYDUjb5E,17156
-palimpzest/query/operators/critique_and_refine_convert.py,sha256=nJOQf7RLJR5Acg7fPssb0tTmtsCipG8hHu9PRquM9RE,5271
+palimpzest/query/operators/convert.py,sha256=mpXXYdgcH6zPqypzh3SjTqHjq7PDEbkfdZ1_XEml-nw,17334
+palimpzest/query/operators/critique_and_refine_convert.py,sha256=PbtKva6e3fh3yeUMGlkcpacPD003bFBzgBsw_yy-8fw,5293
 palimpzest/query/operators/distinct.py,sha256=MuF3NlC0QMTSGs0_fe2oly0I5Ow0hfOa7h8BFGhHiCs,2594
-palimpzest/query/operators/filter.py,sha256=Wm1PaxURE1ZY5j7E1AitGdJfb_IKJoC_3qQW8aF0XC4,10703
-palimpzest/query/operators/join.py,sha256=z1bzhdazTEq1BjoUSwV6j_DQ84TJ3uaSZJpCzSP61nc,17727
+palimpzest/query/operators/filter.py,sha256=jmSGV7xZ8uxXzH-Oko7l8ZPZxNf_qJNkYVAYgiSHl9g,10802
+palimpzest/query/operators/join.py,sha256=rs9_Y59082dlnSJu9rpRDEuv7jDPItKSpYsC8FCMFDM,17837
 palimpzest/query/operators/limit.py,sha256=upJ775cGkxjFHRJm8GpSvtJN1cspg2FVYLN_MrIfUo4,2113
-palimpzest/query/operators/logical.py,sha256=rh3XBUVO1JAEijw9AHjU35uf5ag01-KONdpCHJXRs3M,19883
-palimpzest/query/operators/mixture_of_agents_convert.py,sha256=Y6O9-zL_6BPwl5Yix3SyYhI_68wiejOtJ3xuFcn_dbs,6731
+palimpzest/query/operators/logical.py,sha256=K_dRlNKkda35kQ7gYGsrW9PoFuDPzexpjtDq_FYdhVw,20223
+palimpzest/query/operators/mixture_of_agents_convert.py,sha256=4v2V612NqdVD0RmcJ5VSgTiVliObku-t-A79SXVnpk0,6753
 palimpzest/query/operators/physical.py,sha256=buPZjtP4HKNVfOCNWdBtDnRS217dSsIG74gqZ1jmoyo,8320
 palimpzest/query/operators/project.py,sha256=RX5SbHFRwHcMfiQRofIPQr-AHgIDYm68ifiFZAPu7Fo,2094
 palimpzest/query/operators/rag_convert.py,sha256=1QQGrE22-Ec3-MNbnaU3k4TGHdpi2qZqZR9MHUniEM4,10691
 palimpzest/query/operators/retrieve.py,sha256=v1FTFsSctqH4B37aWgBXYIxgOMJwRWQ2kwwXu1huwaQ,13106
 palimpzest/query/operators/scan.py,sha256=Da_EZUrArzlAameHYCmtqo-xbPOFvbTYSktrUcUEUSc,7398
 palimpzest/query/operators/search.py,sha256=xydO5Kni0RArpvLSn2ajzD4TcH442VjpP2x9NakjzaA,22842
-palimpzest/query/operators/split_convert.py,sha256=SgtkwGWnIFlQTk96NsgckRx5q15KaGpsF3Si0FzHEGo,7765
+palimpzest/query/operators/split_convert.py,sha256=acCPlkrUfqHhGD7bU2AXQAhIEeAEIh0itamuCOm4KBk,7787
 palimpzest/query/optimizer/__init__.py,sha256=L2E1rOA-8O9oH6JL56wLI1qUVxXBLubJEG1IHMH-HU4,2384
 palimpzest/query/optimizer/cost_model.py,sha256=OldPy-TJdfsQbYRoKlb3yWeKbi15jcldTIUS6BTi9T8,12678
 palimpzest/query/optimizer/optimizer.py,sha256=mgM6c0d_voGNun2hMzqjfumJVieACtcHsNnBP4LyXAA,19626
@@ -70,9 +70,9 @@ palimpzest/query/optimizer/primitives.py,sha256=jMMVq37y1tWiPU1lSSKQP9OP-mzkpSxS
 palimpzest/query/optimizer/rules.py,sha256=9AsuVjhiZUc0snQPNhIqeyKpmqFsSv7e-v6BEbp9CDw,43315
 palimpzest/query/optimizer/tasks.py,sha256=DJcKDNbVJox61rnTW0HgT1PtxGx2P_NiLvNroXie-Lg,29509
 palimpzest/query/processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-palimpzest/query/processor/config.py,sha256=b_EQOqOXoRP6AziOw1iLqb8tlSWP-D1_el3mmrnBDAk,2263
-palimpzest/query/processor/query_processor.py,sha256=W01-2FocN1Jsv58gmEo5ALTIcpLt7D0dmI8kghSCdBk,6291
-palimpzest/query/processor/query_processor_factory.py,sha256=H_2pkcN_aVbNDuMLsvZP2PXARLF9MwoHGAzEWkSNNYM,7866
+palimpzest/query/processor/config.py,sha256=vHVsgeBnKigacO0QA7bLf5q8pJhFWA2j9-p_no2bmYo,2366
+palimpzest/query/processor/query_processor.py,sha256=T4ffPbnOX23G8FDITzmM7Iw7DUEDWIHnwl8XLYllgjg,6240
+palimpzest/query/processor/query_processor_factory.py,sha256=6w9R1Y8AOV22X8MUf7g2G5Qb15BGEZAXQKbCQJafWJ0,8048
 palimpzest/schemabuilder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 palimpzest/schemabuilder/schema_builder.py,sha256=QraGp66dcD-ej6Y2mER40o86G9JqlBkL7swkJzjUAIY,7968
 palimpzest/tools/README.md,sha256=56_6LPG80uc0CLVhTBP6I1wgIffNv9cyTr0TmVZqmrM,483
@@ -83,13 +83,13 @@ palimpzest/tools/skema_tools.py,sha256=HXUFpjMhbVxZwKKkATeK-FwtlTCawaCbeP-uHntI1
 palimpzest/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 palimpzest/utils/env_helpers.py,sha256=n81KzoJ459pRxo7QmJA7duazwWsfoMGTHc71D2LatFk,334
 palimpzest/utils/hash_helpers.py,sha256=3A8dA7SbXTwnnvZvPVNqqMLlVRhCKyKF_bjNNAu3Exk,334
-palimpzest/utils/model_helpers.py,sha256=Vlu3KIvbc4Usg4iSI2KMFSc-qcdAubWN2CSjZod2czY,2233
+palimpzest/utils/model_helpers.py,sha256=X6SlMgD5I5Aj_cxaFaoGaaNvOOqTNZVmjj6zbfn63Yk,2476
 palimpzest/utils/progress.py,sha256=7gucyZr82udMDZitrrkAOSKHZVljE3R2wv9nf5gA5TM,20807
 palimpzest/utils/udfs.py,sha256=LjHic54B1az-rKgNLur0wOpaz2ko_UodjLEJrazkxvY,1854
 palimpzest/validator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 palimpzest/validator/validator.py,sha256=J2tGvJqfg6v5lOQDYYaqAa9d37uVHBrqkNs-a8d1Ic0,16365
-palimpzest-0.8.0.dist-info/licenses/LICENSE,sha256=5GUlHy9lr-Py9kvV38FF1m3yy3NqM18fefuE9wkWumo,1079
-palimpzest-0.8.0.dist-info/METADATA,sha256=MUkUorsKFMVGPmCeAZOBruvKP8shJ1kbF5kulxPnSHc,7286
-palimpzest-0.8.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-palimpzest-0.8.0.dist-info/top_level.txt,sha256=raV06dJUgohefUn3ZyJS2uqp_Y76EOLA9Y2e_fxt8Ew,11
-palimpzest-0.8.0.dist-info/RECORD,,
+palimpzest-0.8.2.dist-info/licenses/LICENSE,sha256=5GUlHy9lr-Py9kvV38FF1m3yy3NqM18fefuE9wkWumo,1079
+palimpzest-0.8.2.dist-info/METADATA,sha256=bDa2zFfJr_v4Ef6fzq3SCALSoXoXc0uPnefnmVbAzTA,7286
+palimpzest-0.8.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+palimpzest-0.8.2.dist-info/top_level.txt,sha256=raV06dJUgohefUn3ZyJS2uqp_Y76EOLA9Y2e_fxt8Ew,11
+palimpzest-0.8.2.dist-info/RECORD,,

{palimpzest-0.8.0.dist-info → palimpzest-0.8.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{palimpzest-0.8.0.dist-info → palimpzest-0.8.2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{palimpzest-0.8.0.dist-info → palimpzest-0.8.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

palimpzest 0.8.0__py3-none-any.whl → 0.8.2__py3-none-any.whl

palimpzest 0.8.0py3-none-any.whl → 0.8.2py3-none-any.whl