PyPI - palimpzest - Versions diffs - 0.7.21__py3-none-any.whl → 0.8.1__py3-none-any.whl - Mend

palimpzest 0.7.21py3-none-any.whl → 0.8.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (89) hide show

palimpzest/__init__.py +37 -6
palimpzest/agents/__init__.py +0 -0
palimpzest/agents/compute_agents.py +0 -0
palimpzest/agents/search_agents.py +637 -0
palimpzest/constants.py +343 -209
palimpzest/core/data/context.py +393 -0
palimpzest/core/data/context_manager.py +163 -0
palimpzest/core/data/dataset.py +639 -0
palimpzest/core/data/{datareaders.py → iter_dataset.py} +202 -126
palimpzest/core/elements/groupbysig.py +16 -13
palimpzest/core/elements/records.py +166 -75
palimpzest/core/lib/schemas.py +152 -390
palimpzest/core/{data/dataclasses.py → models.py} +306 -170
palimpzest/policy.py +2 -27
palimpzest/prompts/__init__.py +35 -5
palimpzest/prompts/agent_prompts.py +357 -0
palimpzest/prompts/context_search.py +9 -0
palimpzest/prompts/convert_prompts.py +62 -6
palimpzest/prompts/filter_prompts.py +51 -6
palimpzest/prompts/join_prompts.py +163 -0
palimpzest/prompts/moa_proposer_convert_prompts.py +6 -6
palimpzest/prompts/prompt_factory.py +375 -47
palimpzest/prompts/split_proposer_prompts.py +1 -1
palimpzest/prompts/util_phrases.py +5 -0
palimpzest/prompts/validator.py +239 -0
palimpzest/query/execution/all_sample_execution_strategy.py +134 -76
palimpzest/query/execution/execution_strategy.py +210 -317
palimpzest/query/execution/execution_strategy_type.py +5 -7
palimpzest/query/execution/mab_execution_strategy.py +249 -136
palimpzest/query/execution/parallel_execution_strategy.py +153 -244
palimpzest/query/execution/single_threaded_execution_strategy.py +107 -64
palimpzest/query/generators/generators.py +160 -331
palimpzest/query/operators/__init__.py +15 -5
palimpzest/query/operators/aggregate.py +50 -33
palimpzest/query/operators/compute.py +201 -0
palimpzest/query/operators/convert.py +33 -19
palimpzest/query/operators/critique_and_refine_convert.py +7 -5
palimpzest/query/operators/distinct.py +62 -0
palimpzest/query/operators/filter.py +26 -16
palimpzest/query/operators/join.py +403 -0
palimpzest/query/operators/limit.py +3 -3
palimpzest/query/operators/logical.py +205 -77
palimpzest/query/operators/mixture_of_agents_convert.py +10 -8
palimpzest/query/operators/physical.py +27 -21
palimpzest/query/operators/project.py +3 -3
palimpzest/query/operators/rag_convert.py +7 -7
palimpzest/query/operators/retrieve.py +9 -9
palimpzest/query/operators/scan.py +81 -42
palimpzest/query/operators/search.py +524 -0
palimpzest/query/operators/split_convert.py +10 -8
palimpzest/query/optimizer/__init__.py +7 -9
palimpzest/query/optimizer/cost_model.py +108 -441
palimpzest/query/optimizer/optimizer.py +123 -181
palimpzest/query/optimizer/optimizer_strategy.py +66 -61
palimpzest/query/optimizer/plan.py +352 -67
palimpzest/query/optimizer/primitives.py +43 -19
palimpzest/query/optimizer/rules.py +484 -646
palimpzest/query/optimizer/tasks.py +127 -58
palimpzest/query/processor/config.py +42 -76
palimpzest/query/processor/query_processor.py +73 -18
palimpzest/query/processor/query_processor_factory.py +46 -38
palimpzest/schemabuilder/schema_builder.py +15 -28
palimpzest/utils/model_helpers.py +32 -77
palimpzest/utils/progress.py +114 -102
palimpzest/validator/__init__.py +0 -0
palimpzest/validator/validator.py +306 -0
{palimpzest-0.7.21.dist-info → palimpzest-0.8.1.dist-info}/METADATA +6 -1
palimpzest-0.8.1.dist-info/RECORD +95 -0
palimpzest/core/lib/fields.py +0 -141
palimpzest/prompts/code_synthesis_prompts.py +0 -28
palimpzest/query/execution/random_sampling_execution_strategy.py +0 -240
palimpzest/query/generators/api_client_factory.py +0 -30
palimpzest/query/operators/code_synthesis_convert.py +0 -488
palimpzest/query/operators/map.py +0 -130
palimpzest/query/processor/nosentinel_processor.py +0 -33
palimpzest/query/processor/processing_strategy_type.py +0 -28
palimpzest/query/processor/sentinel_processor.py +0 -88
palimpzest/query/processor/streaming_processor.py +0 -149
palimpzest/sets.py +0 -405
palimpzest/utils/datareader_helpers.py +0 -61
palimpzest/utils/demo_helpers.py +0 -75
palimpzest/utils/field_helpers.py +0 -69
palimpzest/utils/generation_helpers.py +0 -69
palimpzest/utils/sandbox.py +0 -183
palimpzest-0.7.21.dist-info/RECORD +0 -95
/palimpzest/core/{elements/index.py → data/index_dataset.py} +0 -0
{palimpzest-0.7.21.dist-info → palimpzest-0.8.1.dist-info}/WHEEL +0 -0
{palimpzest-0.7.21.dist-info → palimpzest-0.8.1.dist-info}/licenses/LICENSE +0 -0
{palimpzest-0.7.21.dist-info → palimpzest-0.8.1.dist-info}/top_level.txt +0 -0

palimpzest/prompts/prompt_factory.py CHANGED Viewed

@@ -4,17 +4,25 @@ import base64
 import json
 from string import Formatter
+from pydantic import BaseModel
 from palimpzest.constants import (
-    MIXTRAL_LLAMA_CONTEXT_TOKENS_LIMIT,
+    LLAMA_CONTEXT_TOKENS_LIMIT,
     TOKENS_PER_CHARACTER,
     Cardinality,
     Model,
     PromptStrategy,
 )
 from palimpzest.core.elements.records import DataRecord
-from palimpzest.core.lib.fields import BytesField, ImageBase64Field, ImageFilepathField, ImageURLField
-from palimpzest.core.lib.schemas import Schema
+from palimpzest.core.lib.schemas import AudioBase64, AudioFilepath, ImageBase64, ImageFilepath, ImageURL
 from palimpzest.prompts.convert_prompts import (
+    COT_QA_AUDIO_DISCLAIMER,
+    COT_QA_AUDIO_EXAMPLE_ANSWER,
+    COT_QA_AUDIO_EXAMPLE_CONTEXT,
+    COT_QA_AUDIO_EXAMPLE_INPUT_FIELDS,
+    COT_QA_AUDIO_EXAMPLE_OUTPUT_FIELDS,
+    COT_QA_AUDIO_EXAMPLE_REASONING,
+    COT_QA_AUDIO_JOB_INSTRUCTION,
     COT_QA_BASE_SYSTEM_PROMPT,
     COT_QA_BASE_USER_PROMPT,
     COT_QA_EXAMPLE_ANSWER,
@@ -30,6 +38,8 @@ from palimpzest.prompts.convert_prompts import (
     COT_QA_IMAGE_EXAMPLE_REASONING,
     COT_QA_IMAGE_JOB_INSTRUCTION,
     COT_QA_JOB_INSTRUCTION,
+    COT_QA_NO_REASONING_BASE_SYSTEM_PROMPT,
+    COT_QA_NO_REASONING_BASE_USER_PROMPT,
 )
 from palimpzest.prompts.critique_and_refine_convert_prompts import (
     BASE_CRITIQUE_PROMPT,
@@ -42,6 +52,12 @@ from palimpzest.prompts.critique_and_refine_convert_prompts import (
     COT_QA_REFINEMENT_FINISH_INSTRUCTION,
 )
 from palimpzest.prompts.filter_prompts import (
+    COT_BOOL_AUDIO_DISCLAIMER,
+    COT_BOOL_AUDIO_EXAMPLE_CONTEXT,
+    COT_BOOL_AUDIO_EXAMPLE_FILTER_CONDITION,
+    COT_BOOL_AUDIO_EXAMPLE_INPUT_FIELDS,
+    COT_BOOL_AUDIO_EXAMPLE_REASONING,
+    COT_BOOL_AUDIO_JOB_INSTRUCTION,
     COT_BOOL_BASE_SYSTEM_PROMPT,
     COT_BOOL_BASE_USER_PROMPT,
     COT_BOOL_EXAMPLE_CONTEXT,
@@ -55,6 +71,39 @@ from palimpzest.prompts.filter_prompts import (
     COT_BOOL_IMAGE_EXAMPLE_REASONING,
     COT_BOOL_IMAGE_JOB_INSTRUCTION,
     COT_BOOL_JOB_INSTRUCTION,
+    COT_BOOL_NO_REASONING_BASE_SYSTEM_PROMPT,
+    COT_BOOL_NO_REASONING_BASE_USER_PROMPT,
+)
+from palimpzest.prompts.join_prompts import (
+    COT_JOIN_AUDIO_DISCLAIMER,
+    COT_JOIN_AUDIO_EXAMPLE_CONTEXT,
+    COT_JOIN_AUDIO_EXAMPLE_INPUT_FIELDS,
+    COT_JOIN_AUDIO_EXAMPLE_JOIN_CONDITION,
+    COT_JOIN_AUDIO_EXAMPLE_REASONING,
+    COT_JOIN_AUDIO_JOB_INSTRUCTION,
+    COT_JOIN_AUDIO_RIGHT_EXAMPLE_CONTEXT,
+    COT_JOIN_AUDIO_RIGHT_EXAMPLE_INPUT_FIELDS,
+    COT_JOIN_BASE_SYSTEM_PROMPT,
+    COT_JOIN_BASE_USER_PROMPT,
+    COT_JOIN_EXAMPLE_CONTEXT,
+    COT_JOIN_EXAMPLE_INPUT_FIELDS,
+    COT_JOIN_EXAMPLE_JOIN_CONDITION,
+    COT_JOIN_EXAMPLE_REASONING,
+    COT_JOIN_IMAGE_DISCLAIMER,
+    COT_JOIN_IMAGE_EXAMPLE_CONTEXT,
+    COT_JOIN_IMAGE_EXAMPLE_INPUT_FIELDS,
+    COT_JOIN_IMAGE_EXAMPLE_JOIN_CONDITION,
+    COT_JOIN_IMAGE_EXAMPLE_REASONING,
+    COT_JOIN_IMAGE_JOB_INSTRUCTION,
+    COT_JOIN_IMAGE_RIGHT_EXAMPLE_CONTEXT,
+    COT_JOIN_IMAGE_RIGHT_EXAMPLE_INPUT_FIELDS,
+    COT_JOIN_JOB_INSTRUCTION,
+    COT_JOIN_NO_REASONING_BASE_SYSTEM_PROMPT,
+    COT_JOIN_NO_REASONING_BASE_USER_PROMPT,
+    COT_JOIN_RIGHT_AUDIO_DISCLAIMER,
+    COT_JOIN_RIGHT_EXAMPLE_CONTEXT,
+    COT_JOIN_RIGHT_EXAMPLE_INPUT_FIELDS,
+    COT_JOIN_RIGHT_IMAGE_DISCLAIMER,
 )
 from palimpzest.prompts.moa_aggregator_convert_prompts import (
     COT_MOA_AGG_BASE_SYSTEM_PROMPT,
@@ -89,6 +138,7 @@ from palimpzest.prompts.split_proposer_prompts import (
     SPLIT_PROPOSER_JOB_INSTRUCTION,
 )
 from palimpzest.prompts.util_phrases import (
+    DESC_SECTION,
     ONE_TO_MANY_OUTPUT_FORMAT_INSTRUCTION,
     ONE_TO_ONE_OUTPUT_FORMAT_INSTRUCTION,
 )
@@ -99,11 +149,25 @@ class PromptFactory:
     BASE_SYSTEM_PROMPT_MAP = {
         PromptStrategy.COT_BOOL: COT_BOOL_BASE_SYSTEM_PROMPT,
+        PromptStrategy.COT_BOOL_NO_REASONING: COT_BOOL_NO_REASONING_BASE_SYSTEM_PROMPT,
+        PromptStrategy.COT_BOOL_AUDIO: COT_BOOL_BASE_SYSTEM_PROMPT,
+        PromptStrategy.COT_BOOL_AUDIO_NO_REASONING: COT_BOOL_NO_REASONING_BASE_SYSTEM_PROMPT,
         PromptStrategy.COT_BOOL_IMAGE: COT_BOOL_BASE_SYSTEM_PROMPT,
+        PromptStrategy.COT_BOOL_IMAGE_NO_REASONING: COT_BOOL_NO_REASONING_BASE_SYSTEM_PROMPT,
+        PromptStrategy.COT_JOIN: COT_JOIN_BASE_SYSTEM_PROMPT,
+        PromptStrategy.COT_JOIN_NO_REASONING: COT_JOIN_NO_REASONING_BASE_SYSTEM_PROMPT,
+        PromptStrategy.COT_JOIN_AUDIO: COT_JOIN_BASE_SYSTEM_PROMPT,
+        PromptStrategy.COT_JOIN_AUDIO_NO_REASONING: COT_JOIN_NO_REASONING_BASE_SYSTEM_PROMPT,
+        PromptStrategy.COT_JOIN_IMAGE: COT_JOIN_BASE_SYSTEM_PROMPT,
+        PromptStrategy.COT_JOIN_IMAGE_NO_REASONING: COT_JOIN_NO_REASONING_BASE_SYSTEM_PROMPT,
         PromptStrategy.COT_QA: COT_QA_BASE_SYSTEM_PROMPT,
+        PromptStrategy.COT_QA_NO_REASONING: COT_QA_NO_REASONING_BASE_SYSTEM_PROMPT,
+        PromptStrategy.COT_QA_AUDIO: COT_QA_BASE_SYSTEM_PROMPT,
+        PromptStrategy.COT_QA_AUDIO_NO_REASONING: COT_QA_NO_REASONING_BASE_SYSTEM_PROMPT,
         PromptStrategy.COT_QA_CRITIC: None,
         PromptStrategy.COT_QA_REFINE: None,
         PromptStrategy.COT_QA_IMAGE: COT_QA_BASE_SYSTEM_PROMPT,
+        PromptStrategy.COT_QA_IMAGE_NO_REASONING: COT_QA_NO_REASONING_BASE_SYSTEM_PROMPT,
         PromptStrategy.COT_QA_IMAGE_CRITIC: None,
         PromptStrategy.COT_QA_IMAGE_REFINE: None,
         PromptStrategy.COT_MOA_PROPOSER: COT_MOA_PROPOSER_BASE_SYSTEM_PROMPT,
@@ -114,11 +178,25 @@ class PromptFactory:
     }
     BASE_USER_PROMPT_MAP = {
         PromptStrategy.COT_BOOL: COT_BOOL_BASE_USER_PROMPT,
+        PromptStrategy.COT_BOOL_NO_REASONING: COT_BOOL_NO_REASONING_BASE_USER_PROMPT,
+        PromptStrategy.COT_BOOL_AUDIO: COT_BOOL_BASE_USER_PROMPT,
+        PromptStrategy.COT_BOOL_AUDIO_NO_REASONING: COT_BOOL_NO_REASONING_BASE_USER_PROMPT,
         PromptStrategy.COT_BOOL_IMAGE: COT_BOOL_BASE_USER_PROMPT,
+        PromptStrategy.COT_BOOL_IMAGE_NO_REASONING: COT_BOOL_NO_REASONING_BASE_USER_PROMPT,
+        PromptStrategy.COT_JOIN: COT_JOIN_BASE_USER_PROMPT,
+        PromptStrategy.COT_JOIN_NO_REASONING: COT_JOIN_NO_REASONING_BASE_USER_PROMPT,
+        PromptStrategy.COT_JOIN_AUDIO: COT_JOIN_BASE_USER_PROMPT,
+        PromptStrategy.COT_JOIN_AUDIO_NO_REASONING: COT_JOIN_NO_REASONING_BASE_USER_PROMPT,
+        PromptStrategy.COT_JOIN_IMAGE: COT_JOIN_BASE_USER_PROMPT,
+        PromptStrategy.COT_JOIN_IMAGE_NO_REASONING: COT_JOIN_NO_REASONING_BASE_USER_PROMPT,
         PromptStrategy.COT_QA: COT_QA_BASE_USER_PROMPT,
+        PromptStrategy.COT_QA_NO_REASONING: COT_QA_NO_REASONING_BASE_USER_PROMPT,
+        PromptStrategy.COT_QA_AUDIO: COT_QA_BASE_USER_PROMPT,
+        PromptStrategy.COT_QA_AUDIO_NO_REASONING: COT_QA_NO_REASONING_BASE_USER_PROMPT,
         PromptStrategy.COT_QA_CRITIC: BASE_CRITIQUE_PROMPT,
         PromptStrategy.COT_QA_REFINE: BASE_REFINEMENT_PROMPT,
         PromptStrategy.COT_QA_IMAGE: COT_QA_BASE_USER_PROMPT,
+        PromptStrategy.COT_QA_IMAGE_NO_REASONING: COT_QA_NO_REASONING_BASE_USER_PROMPT,
         PromptStrategy.COT_QA_IMAGE_CRITIC: BASE_CRITIQUE_PROMPT,
         PromptStrategy.COT_QA_IMAGE_REFINE: BASE_REFINEMENT_PROMPT,
         PromptStrategy.COT_MOA_PROPOSER: COT_MOA_PROPOSER_BASE_USER_PROMPT,
@@ -128,10 +206,11 @@ class PromptFactory:
         PromptStrategy.SPLIT_MERGER: COT_SPLIT_MERGER_BASE_USER_PROMPT,
     }
-    def __init__(self, prompt_strategy: PromptStrategy, model: Model, cardinality: Cardinality) -> None:
+    def __init__(self, prompt_strategy: PromptStrategy, model: Model, cardinality: Cardinality, desc: str | None = None) -> None:
         self.prompt_strategy = prompt_strategy
         self.model = model
         self.cardinality = cardinality
+        self.desc = desc
     def _get_context(self, candidate: DataRecord, input_fields: list[str]) -> str:
         """
@@ -144,8 +223,9 @@ class PromptFactory:
         Returns:
             str: The context.
         """
+        # TODO: remove mask_filepaths=True after SemBench evaluation
         # get context from input record (project_cols will be None if not provided in kwargs)
-        context: dict = candidate.to_dict(include_bytes=False, project_cols=input_fields)
+        context: dict = candidate.to_dict(include_bytes=False, project_cols=input_fields, mask_filepaths=True)
         # TODO: MOVE THIS LOGIC INTO A CHUNKING / CONTEXT MANAGEMENT CLASS
         #   - this class should be able to:
@@ -155,12 +235,12 @@ class PromptFactory:
         # TODO: this does not work for image prompts
         # TODO: this ignores the size of the `orignal_messages` in critique and refine prompts
         # cut down on context based on window length
-        if self.model.is_llama_model() or self.model.is_mixtral_model():
+        if self.model.is_llama_model():
             total_context_len = len(json.dumps(context, indent=2))
             # sort fields by length and progressively strip from the longest field until it is short enough;
-            # NOTE: MIXTRAL_LLAMA_CONTEXT_TOKENS_LIMIT is a rough estimate which leaves room for the rest of the prompt text
-            while total_context_len * TOKENS_PER_CHARACTER > MIXTRAL_LLAMA_CONTEXT_TOKENS_LIMIT:
+            # NOTE: LLAMA_CONTEXT_TOKENS_LIMIT is a rough estimate which leaves room for the rest of the prompt text
+            while total_context_len * TOKENS_PER_CHARACTER > LLAMA_CONTEXT_TOKENS_LIMIT:
                 # sort fields by length
                 field_lengths = [(field, len(value) if value is not None else 0) for field, value in context.items()]
                 sorted_fields = sorted(field_lengths, key=lambda item: item[1], reverse=True)
@@ -169,7 +249,7 @@ class PromptFactory:
                 longest_field_name, longest_field_length = sorted_fields[0]
                 # trim the field
-                context_factor = MIXTRAL_LLAMA_CONTEXT_TOKENS_LIMIT / (total_context_len * TOKENS_PER_CHARACTER)
+                context_factor = LLAMA_CONTEXT_TOKENS_LIMIT / (total_context_len * TOKENS_PER_CHARACTER)
                 keep_frac_idx = int(longest_field_length * context_factor)
                 context[longest_field_name] = context[longest_field_name][:keep_frac_idx]
@@ -191,7 +271,11 @@ class PromptFactory:
         Returns:
             list[str]: The list of input field names.
         """
-        return kwargs.get("project_cols", candidate.get_field_names())
+        # NOTE: joins will include left and right input fields in project_cols, so we have to check
+        #       if the field is in the candidate record
+        input_fields = kwargs.get("project_cols", candidate.get_field_names())
+        input_fields = [field for field in input_fields if field in candidate.get_field_names()]
+        return input_fields
     def _get_input_fields_desc(self, candidate: DataRecord, input_fields: list[str]) -> str:
         """
@@ -205,7 +289,7 @@ class PromptFactory:
         """
         input_fields_desc = ""
         for field_name in input_fields:
-            input_fields_desc += f"- {field_name}: {candidate.get_field_type(field_name)._desc}\n"
+            input_fields_desc += f"- {field_name}: {candidate.get_field_type(field_name).description}\n"
         return input_fields_desc[:-1]
@@ -221,13 +305,13 @@ class PromptFactory:
             str: The output fields description.
         """
         output_fields_desc = ""
-        output_schema: Schema = kwargs.get("output_schema")
+        output_schema: BaseModel = kwargs.get("output_schema")
         if self.prompt_strategy.is_convert_prompt():
             assert output_schema is not None, "Output schema must be provided for convert prompts."
-            field_desc_map = output_schema.field_desc_map()
-            for field_name in output_fields:
-                output_fields_desc += f"- {field_name}: {field_desc_map[field_name]}\n"
+            for field_name in sorted(output_fields):
+                desc = output_schema.model_fields[field_name].description
+                output_fields_desc += f"- {field_name}: {'no description available' if desc is None else desc}\n"
         # strip the last newline characters from the field descriptions and return
         return output_fields_desc[:-1]
@@ -245,6 +329,19 @@ class PromptFactory:
         return filter_condition
+    def _get_join_condition(self, **kwargs) -> str | None:
+        """
+        Returns the join condition for the join operation.
+        Returns:
+            str | None: The join condition (if applicable).
+        """
+        join_condition = kwargs.get("join_condition")
+        if self.prompt_strategy.is_join_prompt():
+            assert join_condition is not None, "Join condition must be provided for join operations."
+        return join_condition
     def _get_original_output(self, **kwargs) -> str | None:
         """
         Returns the original output from a previous model generation for the critique and refinement operations.
@@ -337,8 +434,13 @@ class PromptFactory:
         """
         prompt_strategy_to_job_instruction = {
             PromptStrategy.COT_BOOL: COT_BOOL_JOB_INSTRUCTION,
+            PromptStrategy.COT_BOOL_AUDIO: COT_BOOL_AUDIO_JOB_INSTRUCTION,
             PromptStrategy.COT_BOOL_IMAGE: COT_BOOL_IMAGE_JOB_INSTRUCTION,
+            PromptStrategy.COT_JOIN: COT_JOIN_JOB_INSTRUCTION,
+            PromptStrategy.COT_JOIN_AUDIO: COT_JOIN_AUDIO_JOB_INSTRUCTION,
+            PromptStrategy.COT_JOIN_IMAGE: COT_JOIN_IMAGE_JOB_INSTRUCTION,
             PromptStrategy.COT_QA: COT_QA_JOB_INSTRUCTION,
+            PromptStrategy.COT_QA_AUDIO: COT_QA_AUDIO_JOB_INSTRUCTION,
             PromptStrategy.COT_QA_IMAGE: COT_QA_IMAGE_JOB_INSTRUCTION,
             PromptStrategy.COT_MOA_PROPOSER: COT_MOA_PROPOSER_JOB_INSTRUCTION,
             PromptStrategy.COT_MOA_PROPOSER_IMAGE: COT_MOA_PROPOSER_IMAGE_JOB_INSTRUCTION,
@@ -346,6 +448,19 @@ class PromptFactory:
         }
         return prompt_strategy_to_job_instruction.get(self.prompt_strategy)
+    def _get_desc_section(self) -> str:
+        """
+        Returns the description section for the prompt.
+        Returns:
+            str: The description section (if applicable).
+        """
+        desc_section = ""
+        if self.desc is not None:
+            desc_section = DESC_SECTION.format(desc=self.desc)
+        return desc_section
     def _get_critique_criteria(self) -> str | None:
         """
         Returns the critique criteria for the critique operation.
@@ -402,8 +517,13 @@ class PromptFactory:
         """
         prompt_strategy_to_example_input_fields = {
             PromptStrategy.COT_BOOL: COT_BOOL_EXAMPLE_INPUT_FIELDS,
+            PromptStrategy.COT_BOOL_AUDIO: COT_BOOL_AUDIO_EXAMPLE_INPUT_FIELDS,
             PromptStrategy.COT_BOOL_IMAGE: COT_BOOL_IMAGE_EXAMPLE_INPUT_FIELDS,
+            PromptStrategy.COT_JOIN: COT_JOIN_EXAMPLE_INPUT_FIELDS,
+            PromptStrategy.COT_JOIN_AUDIO: COT_JOIN_AUDIO_EXAMPLE_INPUT_FIELDS,
+            PromptStrategy.COT_JOIN_IMAGE: COT_JOIN_IMAGE_EXAMPLE_INPUT_FIELDS,
             PromptStrategy.COT_QA: COT_QA_EXAMPLE_INPUT_FIELDS,
+            PromptStrategy.COT_QA_AUDIO: COT_QA_AUDIO_EXAMPLE_INPUT_FIELDS,
             PromptStrategy.COT_QA_IMAGE: COT_QA_IMAGE_EXAMPLE_INPUT_FIELDS,
             PromptStrategy.COT_MOA_PROPOSER: COT_MOA_PROPOSER_EXAMPLE_INPUT_FIELDS,
             PromptStrategy.COT_MOA_PROPOSER_IMAGE: COT_MOA_PROPOSER_IMAGE_EXAMPLE_INPUT_FIELDS,
@@ -412,6 +532,21 @@ class PromptFactory:
         return prompt_strategy_to_example_input_fields.get(self.prompt_strategy)
+    def _get_right_example_input_fields(self) -> str | None:
+        """
+        Returns the example right input fields for the join prompt.
+        Returns:
+            str | None: The example right input fields (if applicable).
+        """
+        prompt_strategy_to_right_example_input_fields = {
+            PromptStrategy.COT_JOIN: COT_JOIN_RIGHT_EXAMPLE_INPUT_FIELDS,
+            PromptStrategy.COT_JOIN_AUDIO: COT_JOIN_AUDIO_RIGHT_EXAMPLE_INPUT_FIELDS,
+            PromptStrategy.COT_JOIN_IMAGE: COT_JOIN_IMAGE_RIGHT_EXAMPLE_INPUT_FIELDS,
+        }
+        return prompt_strategy_to_right_example_input_fields.get(self.prompt_strategy)
     def _get_example_output_fields(self) -> str | None:
         """
         Returns the example output fields for the prompt.
@@ -421,6 +556,7 @@ class PromptFactory:
         """
         prompt_strategy_to_example_output_fields = {
             PromptStrategy.COT_QA: COT_QA_EXAMPLE_OUTPUT_FIELDS,
+            PromptStrategy.COT_QA_AUDIO: COT_QA_AUDIO_EXAMPLE_OUTPUT_FIELDS,
             PromptStrategy.COT_QA_IMAGE: COT_QA_IMAGE_EXAMPLE_OUTPUT_FIELDS,
             PromptStrategy.COT_MOA_PROPOSER: COT_MOA_PROPOSER_EXAMPLE_OUTPUT_FIELDS,
             PromptStrategy.COT_MOA_PROPOSER_IMAGE: COT_MOA_PROPOSER_IMAGE_EXAMPLE_OUTPUT_FIELDS,
@@ -438,8 +574,13 @@ class PromptFactory:
         """
         prompt_strategy_to_example_context = {
             PromptStrategy.COT_BOOL: COT_BOOL_EXAMPLE_CONTEXT,
+            PromptStrategy.COT_BOOL_AUDIO: COT_BOOL_AUDIO_EXAMPLE_CONTEXT,
             PromptStrategy.COT_BOOL_IMAGE: COT_BOOL_IMAGE_EXAMPLE_CONTEXT,
+            PromptStrategy.COT_JOIN: COT_JOIN_EXAMPLE_CONTEXT,
+            PromptStrategy.COT_JOIN_AUDIO: COT_JOIN_AUDIO_EXAMPLE_CONTEXT,
+            PromptStrategy.COT_JOIN_IMAGE: COT_JOIN_IMAGE_EXAMPLE_CONTEXT,
             PromptStrategy.COT_QA: COT_QA_EXAMPLE_CONTEXT,
+            PromptStrategy.COT_QA_AUDIO: COT_QA_AUDIO_EXAMPLE_CONTEXT,
             PromptStrategy.COT_QA_IMAGE: COT_QA_IMAGE_EXAMPLE_CONTEXT,
             PromptStrategy.COT_MOA_PROPOSER: COT_MOA_PROPOSER_EXAMPLE_CONTEXT,
             PromptStrategy.COT_MOA_PROPOSER_IMAGE: COT_MOA_PROPOSER_IMAGE_EXAMPLE_CONTEXT,
@@ -448,6 +589,21 @@ class PromptFactory:
         return prompt_strategy_to_example_context.get(self.prompt_strategy)
+    def _get_right_example_context(self) -> str | None:
+        """
+        Returns the right example context for the join prompt.
+        Returns:
+            str | None: The right example context (if applicable).
+        """
+        prompt_strategy_to_right_example_context = {
+            PromptStrategy.COT_JOIN: COT_JOIN_RIGHT_EXAMPLE_CONTEXT,
+            PromptStrategy.COT_JOIN_AUDIO: COT_JOIN_AUDIO_RIGHT_EXAMPLE_CONTEXT,
+            PromptStrategy.COT_JOIN_IMAGE: COT_JOIN_IMAGE_RIGHT_EXAMPLE_CONTEXT,
+        }
+        return prompt_strategy_to_right_example_context.get(self.prompt_strategy)
     def _get_image_disclaimer(self) -> str:
         """
         Returns the image disclaimer for the prompt. The disclaimer must be an empty string
@@ -458,12 +614,57 @@ class PromptFactory:
         """
         prompt_strategy_to_image_disclaimer = {
             PromptStrategy.COT_BOOL_IMAGE: COT_BOOL_IMAGE_DISCLAIMER,
+            PromptStrategy.COT_JOIN_IMAGE: COT_JOIN_IMAGE_DISCLAIMER,
             PromptStrategy.COT_QA_IMAGE: COT_QA_IMAGE_DISCLAIMER,
             PromptStrategy.COT_MOA_PROPOSER_IMAGE: COT_MOA_PROPOSER_IMAGE_DISCLAIMER,
         }
         return prompt_strategy_to_image_disclaimer.get(self.prompt_strategy, "")
+    def _get_audio_disclaimer(self) -> str:
+        """
+        Returns the audio disclaimer for the prompt. The disclaimer must be an empty string
+        for text prompts.
+        Returns:
+            str: The audio disclaimer. If this is a text prompt then it is an empty string.
+        """
+        prompt_strategy_to_audio_disclaimer = {
+            PromptStrategy.COT_BOOL_AUDIO: COT_BOOL_AUDIO_DISCLAIMER,
+            PromptStrategy.COT_JOIN_AUDIO: COT_JOIN_AUDIO_DISCLAIMER,
+            PromptStrategy.COT_QA_AUDIO: COT_QA_AUDIO_DISCLAIMER,
+        }
+        return prompt_strategy_to_audio_disclaimer.get(self.prompt_strategy, "")
+    def _get_right_image_disclaimer(self) -> str:
+        """
+        Returns the right image disclaimer for the prompt. The disclaimer must be an empty string
+        for text prompts.
+        Returns:
+            str: The right image disclaimer. If this is a text prompt then it is an empty string.
+        """
+        prompt_strategy_to_image_disclaimer = {
+            PromptStrategy.COT_JOIN_IMAGE: COT_JOIN_RIGHT_IMAGE_DISCLAIMER,
+        }
+        return prompt_strategy_to_image_disclaimer.get(self.prompt_strategy, "")
+    def _get_right_audio_disclaimer(self) -> str:
+        """
+        Returns the right audio disclaimer for the prompt. The disclaimer must be an empty string
+        for text prompts.
+        Returns:
+            str: The right audio disclaimer. If this is a text prompt then it is an empty string.
+        """
+        prompt_strategy_to_audio_disclaimer = {
+            PromptStrategy.COT_JOIN_AUDIO: COT_JOIN_RIGHT_AUDIO_DISCLAIMER,
+        }
+        return prompt_strategy_to_audio_disclaimer.get(self.prompt_strategy, "")
     def _get_example_filter_condition(self) -> str | None:
         """
         Returns the example filter condition for the prompt.
@@ -473,11 +674,27 @@ class PromptFactory:
         """
         prompt_strategy_to_example_filter_condition = {
             PromptStrategy.COT_BOOL: COT_BOOL_EXAMPLE_FILTER_CONDITION,
+            PromptStrategy.COT_BOOL_AUDIO: COT_BOOL_AUDIO_EXAMPLE_FILTER_CONDITION,
             PromptStrategy.COT_BOOL_IMAGE: COT_BOOL_IMAGE_EXAMPLE_FILTER_CONDITION,
         }
         return prompt_strategy_to_example_filter_condition.get(self.prompt_strategy)
+    def _get_example_join_condition(self) -> str | None:
+        """
+        Returns the example join condition for the prompt.
+        Returns:
+            str | None: The example join condition (if applicable).
+        """
+        prompt_strategy_to_example_join_condition = {
+            PromptStrategy.COT_JOIN: COT_JOIN_EXAMPLE_JOIN_CONDITION,
+            PromptStrategy.COT_JOIN_AUDIO: COT_JOIN_AUDIO_EXAMPLE_JOIN_CONDITION,
+            PromptStrategy.COT_JOIN_IMAGE: COT_JOIN_IMAGE_EXAMPLE_JOIN_CONDITION,
+        }
+        return prompt_strategy_to_example_join_condition.get(self.prompt_strategy)
     def _get_example_reasoning(self) -> str | None:
         """
         Returns the example reasoning for the prompt.
@@ -487,8 +704,13 @@ class PromptFactory:
         """
         prompt_strategy_to_example_reasoning = {
             PromptStrategy.COT_BOOL: COT_BOOL_EXAMPLE_REASONING,
+            PromptStrategy.COT_BOOL_AUDIO: COT_BOOL_AUDIO_EXAMPLE_REASONING,
             PromptStrategy.COT_BOOL_IMAGE: COT_BOOL_IMAGE_EXAMPLE_REASONING,
+            PromptStrategy.COT_JOIN: COT_JOIN_EXAMPLE_REASONING,
+            PromptStrategy.COT_JOIN_AUDIO: COT_JOIN_AUDIO_EXAMPLE_REASONING,
+            PromptStrategy.COT_JOIN_IMAGE: COT_JOIN_IMAGE_EXAMPLE_REASONING,
             PromptStrategy.COT_QA: COT_QA_EXAMPLE_REASONING,
+            PromptStrategy.COT_QA_AUDIO: COT_QA_AUDIO_EXAMPLE_REASONING,
             PromptStrategy.COT_QA_IMAGE: COT_QA_IMAGE_EXAMPLE_REASONING,
         }
@@ -503,6 +725,7 @@ class PromptFactory:
         """
         prompt_strategy_to_example_answer = {
             PromptStrategy.COT_QA: COT_QA_EXAMPLE_ANSWER,
+            PromptStrategy.COT_QA_AUDIO: COT_QA_AUDIO_EXAMPLE_ANSWER,
             PromptStrategy.COT_QA_IMAGE: COT_QA_IMAGE_EXAMPLE_ANSWER,
             PromptStrategy.COT_MOA_PROPOSER: COT_MOA_PROPOSER_EXAMPLE_ANSWER,
             PromptStrategy.COT_MOA_PROPOSER_IMAGE: COT_MOA_PROPOSER_IMAGE_EXAMPLE_ANSWER,
@@ -512,7 +735,7 @@ class PromptFactory:
         return prompt_strategy_to_example_answer.get(self.prompt_strategy)
     def _get_all_format_kwargs(
-        self, candidate: DataRecord, input_fields: list[str], output_fields: list[str], **kwargs
+        self, candidate: DataRecord, input_fields: list[str], output_fields: list[str], right_candidate: DataRecord | None, right_input_fields: list[str], **kwargs
     ) -> dict:
         """
         Returns a dictionary containing all the format kwargs for templating the prompts.
@@ -532,24 +755,39 @@ class PromptFactory:
             "input_fields_desc": self._get_input_fields_desc(candidate, input_fields),
             "output_fields_desc": self._get_output_fields_desc(output_fields, **kwargs),
             "filter_condition": self._get_filter_condition(**kwargs),
+            "join_condition": self._get_join_condition(**kwargs),
             "original_output": self._get_original_output(**kwargs),
             "critique_output": self._get_critique_output(**kwargs),
             "model_responses": self._get_model_responses(**kwargs),
             "chunk_outputs": self._get_chunk_outputs(**kwargs),
         }
+        # if a right candidate is provided, we also get the context and input field descriptions for the right candidate
+        if right_candidate is not None:
+            input_format_kwargs.update({
+                "right_context": self._get_context(right_candidate, right_input_fields),
+                "right_input_fields_desc": self._get_input_fields_desc(right_candidate, right_input_fields),
+            })
         # get format kwargs which depend on the prompt strategy
         prompt_strategy_format_kwargs = {
             "output_format_instruction": self._get_output_format_instruction(),
             "job_instruction": self._get_job_instruction(),
+            "desc_section": self._get_desc_section(),
             "critique_criteria": self._get_critique_criteria(),
             "refinement_criteria": self._get_refinement_criteria(),
             "finish_instruction": self._get_finish_instruction(),
             "example_input_fields": self._get_example_input_fields(),
+            "right_example_input_fields": self._get_right_example_input_fields(),
             "example_output_fields": self._get_example_output_fields(),
             "example_context": self._get_example_context(),
+            "right_example_context": self._get_right_example_context(),
             "image_disclaimer": self._get_image_disclaimer(),
+            "audio_disclaimer": self._get_audio_disclaimer(),
+            "right_image_disclaimer": self._get_right_image_disclaimer(),
+            "right_audio_disclaimer": self._get_right_audio_disclaimer(),
             "example_filter_condition": self._get_example_filter_condition(),
+            "example_join_condition": self._get_example_join_condition(),
             "example_reasoning": self._get_example_reasoning(),
             "example_answer": self._get_example_answer(),
         }
@@ -557,6 +795,53 @@ class PromptFactory:
         # return all format kwargs
         return {**input_format_kwargs, **prompt_strategy_format_kwargs}
+    def _create_audio_messages(self, candidate: DataRecord, input_fields: list[str]) -> list[dict]:
+        """
+        Parses the candidate record and returns the audio messages for the chat payload.
+        Args:
+            candidate (DataRecord): The input record.
+            input_fields (list[str]): The list of input fields.
+        Returns:
+            list[dict]: The audio messages for the chat payload.
+        """
+        # create a message for each audio recording in an input field with an audio (or list of audio) type
+        audio_content = []
+        for field_name in input_fields:
+            field_value = candidate[field_name]
+            field_type = candidate.get_field_type(field_name)
+            # audio filepath (or list of audio filepaths)
+            if field_type.annotation in [AudioFilepath, AudioFilepath | None]:
+                with open(field_value, "rb") as f:
+                    base64_audio_str = base64.b64encode(f.read()).decode("utf-8")
+                audio_content.append(
+                    {"type": "input_audio", "input_audio": {"data": base64_audio_str, "format": "wav"}}
+                )
+            elif field_type.annotation in [list[AudioFilepath], list[AudioFilepath] | None]:
+                for audio_filepath in field_value:
+                    with open(audio_filepath, "rb") as f:
+                        base64_audio_str = base64.b64encode(f.read()).decode("utf-8")
+                    audio_content.append(
+                        {"type": "input_audio", "input_audio": {"data": base64_audio_str, "format": "wav"}}
+                    )
+            # pre-encoded images (or list of pre-encoded images)
+            elif field_type.annotation in [AudioBase64, AudioBase64 | None]:
+                audio_content.append(
+                    {"type": "input_audio", "input_audio": {"data": field_value, "format": "wav"}}
+                )
+            elif field_type.annotation in [list[AudioBase64], list[AudioBase64] | None]:
+                for base64_audio in field_value:
+                    audio_content.append(
+                        {"type": "input_audio", "input_audio": {"data": base64_audio, "format": "wav"}}
+                    )
+        return [{"role": "user", "type": "input_audio", "content": audio_content}] if len(audio_content) > 0 else []
     def _create_image_messages(self, candidate: DataRecord, input_fields: list[str]) -> list[dict]:
         """
         Parses the candidate record and returns the image messages for the chat payload.
@@ -569,50 +854,48 @@ class PromptFactory:
             list[dict]: The image messages for the chat payload.
         """
         # create a message for each image in an input field with an image (or list of image) type
-        image_messages = []
+        image_content = []
         for field_name in input_fields:
             field_value = candidate[field_name]
             field_type = candidate.get_field_type(field_name)
             # image filepath (or list of image filepaths)
-            if isinstance(field_type, ImageFilepathField):
+            if field_type.annotation in [ImageFilepath, ImageFilepath | None]:
                 with open(field_value, "rb") as f:
                     base64_image_str = base64.b64encode(f.read()).decode("utf-8")
-                image_messages.append(
-                    {"role": "user", "type": "image", "content": f"data:image/jpeg;base64,{base64_image_str}"}
+                image_content.append(
+                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image_str}"}}
                 )
-            elif hasattr(field_type, "element_type") and issubclass(field_type.element_type, ImageFilepathField):
+            elif field_type.annotation in [list[ImageFilepath], list[ImageFilepath] | None]:
                 for image_filepath in field_value:
                     with open(image_filepath, "rb") as f:
                         base64_image_str = base64.b64encode(f.read()).decode("utf-8")
-                    image_messages.append(
-                        {"role": "user", "type": "image", "content": f"data:image/jpeg;base64,{base64_image_str}"}
+                    image_content.append(
+                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image_str}"}}
                     )
             # image url (or list of image urls)
-            elif isinstance(field_type, ImageURLField):
-                image_messages.append({"role": "user", "type": "image", "content": field_value})
+            elif field_type.annotation in [ImageURL, ImageURL | None]:
+                image_content.append({"type": "image_url", "image_url": {"url": field_value}})
-            elif hasattr(field_type, "element_type") and issubclass(field_type.element_type, ImageURLField):
+            elif field_type.annotation in [list[ImageURL], list[ImageURL] | None]:
                 for image_url in field_value:
-                    image_messages.append({"role": "user", "type": "image", "content": image_url})
+                    image_content.append({"type": "image_url", "image_url": {"url": image_url}})
             # pre-encoded images (or list of pre-encoded images)
-            elif isinstance(field_type, ImageBase64Field):
-                base64_image_str = field_value.decode("utf-8")
-                image_messages.append(
-                    {"role": "user", "type": "image", "content": f"data:image/jpeg;base64,{base64_image_str}"}
+            elif field_type.annotation in [ImageBase64, ImageBase64 | None]:
+                image_content.append(
+                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{field_value}"}}
                 )
-            elif hasattr(field_type, "element_type") and issubclass(field_type.element_type, ImageBase64Field):
+            elif field_type.annotation in [list[ImageBase64], list[ImageBase64] | None]:
                 for base64_image in field_value:
-                    base64_image_str = base64_image.decode("utf-8")
-                    image_messages.append(
-                        {"role": "user", "type": "image", "content": f"data:image/jpeg;base64,{base64_image_str}"}
+                    image_content.append(
+                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
                     )
-        return image_messages
+        return [{"role": "user", "type": "image", "content": image_content}] if len(image_content) > 0 else []
     def _get_system_prompt(self, **format_kwargs) -> str | None:
         """
@@ -631,7 +914,7 @@ class PromptFactory:
         return base_prompt.format(**format_kwargs)
-    def _get_user_messages(self, candidate: DataRecord, input_fields: list[str], **kwargs) -> str:
+    def _get_user_messages(self, candidate: DataRecord, input_fields: list[str], right_candidate: DataRecord | None, right_input_fields: list[str], **kwargs) -> str:
         """
         Returns a list of messages for the chat payload based on the prompt strategy.
@@ -648,10 +931,18 @@ class PromptFactory:
         # get the base prompt template
         base_prompt = self.BASE_USER_PROMPT_MAP.get(self.prompt_strategy)
-        # get any image messages for the chat payload (will be an empty list if this is not an image prompt)
-        image_messages = (
-            self._create_image_messages(candidate, input_fields) if self.prompt_strategy.is_image_prompt() else []
-        )
+        # get any image messages for the chat payload (will be an empty list if no image fields exist)
+        image_messages = self._create_image_messages(candidate, input_fields)
+        # get any audio messages for the chat payload (will be an empty list if no audio fields exist)
+        audio_messages = self._create_audio_messages(candidate, input_fields)
+        # get any right image messages for the chat payload (will be an empty list if this is not a join image prompt)
+        right_image_messages, right_audio_messages = [], []
+        if self.prompt_strategy.is_join_prompt():
+            assert right_candidate is not None, "Right candidate must be provided for join prompts."
+            right_image_messages = self._create_image_messages(right_candidate, right_input_fields)
+            right_audio_messages = self._create_audio_messages(right_candidate, right_input_fields)
         # get any original messages for critique and refinement operations
         original_messages = kwargs.get("original_messages")
@@ -660,6 +951,8 @@ class PromptFactory:
                 "Original messages must be provided for critique and refinement operations."
             )
+        # TODO: in the future if we support many modalities (e.g. images and audio) in the same prompt,
+        #       then we will need to streamline this logic to handle the many different cases
         # construct the user messages based on the prompt strategy
         user_messages = []
         if self.prompt_strategy.is_critic_prompt() or self.prompt_strategy.is_refine_prompt():
@@ -670,14 +963,47 @@ class PromptFactory:
             user_messages.extend(original_messages)
             user_messages.append({"role": "user", "type": "text", "content": base_prompt_end.format(**kwargs)})
-        elif self.prompt_strategy.is_image_prompt():
-            base_prompt_start, base_prompt_end = base_prompt.split("<<image-placeholder>>\n")
+        # image not join
+        elif self.prompt_strategy.is_image_prompt() and not self.prompt_strategy.is_join_prompt():
+            base_prompt = base_prompt.replace("<<audio-placeholder>>", "")
+            base_prompt_start, base_prompt_end = base_prompt.split("<<image-placeholder>>")
             user_messages.append({"role": "user", "type": "text", "content": base_prompt_start.format(**kwargs)})
             user_messages.extend(image_messages)
             user_messages.append({"role": "user", "type": "text", "content": base_prompt_end.format(**kwargs)})
+        # image join
+        elif self.prompt_strategy.is_image_prompt() and self.prompt_strategy.is_join_prompt():
+            # for join image prompts, we may have two sets of images (one from the left candidate and one from the right candidate)
+            base_prompt = base_prompt.replace("<<audio-placeholder>>", "")
+            base_prompt_start, base_prompt_mid, base_prompt_end = base_prompt.split("<<image-placeholder>>")
+            user_messages.append({"role": "user", "type": "text", "content": base_prompt_start.format(**kwargs)})
+            user_messages.extend(image_messages)
+            user_messages.append({"role": "user", "type": "text", "content": base_prompt_mid.format(**kwargs)})
+            user_messages.extend(right_image_messages)
+            user_messages.append({"role": "user", "type": "text", "content": base_prompt_end.format(**kwargs)})
+        # audio not join
+        elif self.prompt_strategy.is_audio_prompt() and not self.prompt_strategy.is_join_prompt():
+            base_prompt = base_prompt.replace("<<image-placeholder>>", "")
+            base_prompt_start, base_prompt_end = base_prompt.split("<<audio-placeholder>>")
+            user_messages.append({"role": "user", "type": "text", "content": base_prompt_start.format(**kwargs)})
+            user_messages.extend(audio_messages)
+            user_messages.append({"role": "user", "type": "text", "content": base_prompt_end.format(**kwargs)})
+        # audio join
+        elif self.prompt_strategy.is_audio_prompt() and self.prompt_strategy.is_join_prompt():
+            # for join image prompts, we may have two sets of images (one from the left candidate and one from the right candidate)
+            base_prompt = base_prompt.replace("<<image-placeholder>>", "")
+            base_prompt_start, base_prompt_mid, base_prompt_end = base_prompt.split("<<audio-placeholder>>")
+            user_messages.append({"role": "user", "type": "text", "content": base_prompt_start.format(**kwargs)})
+            user_messages.extend(audio_messages)
+            user_messages.append({"role": "user", "type": "text", "content": base_prompt_mid.format(**kwargs)})
+            user_messages.extend(right_audio_messages)
+            user_messages.append({"role": "user", "type": "text", "content": base_prompt_end.format(**kwargs)})
         else:
             base_prompt = base_prompt.replace("<<image-placeholder>>", "")
+            base_prompt = base_prompt.replace("<<audio-placeholder>>", "")
             user_messages.append({"role": "user", "type": "text", "content": base_prompt.format(**kwargs)})
         return user_messages
@@ -720,7 +1046,7 @@ class PromptFactory:
         # build set of format kwargs
         format_kwargs = {
             field_name: "<bytes>"
-            if isinstance(candidate.get_field_type(field_name), BytesField)
+            if candidate.get_field_type(field_name).annotation in [bytes, bytes | None]
             else candidate[field_name]
             for field_name in input_fields
         }
@@ -740,7 +1066,7 @@ class PromptFactory:
         return messages
-    def create_messages(self, candidate: DataRecord, output_fields: list[str], **kwargs) -> list[dict]:
+    def create_messages(self, candidate: DataRecord, output_fields: list[str], right_candidate: DataRecord | None = None, **kwargs) -> list[dict]:
         """
         Creates the messages for the chat payload based on the prompt strategy.
@@ -754,6 +1080,7 @@ class PromptFactory:
         Args:
             candidate (DataRecord): The input record.
             output_fields (list[str]): The output fields.
+            right_candidate (DataRecord | None): The other join input record (only provided for joins).
             kwargs: The keyword arguments provided by the user.
         Returns:
@@ -761,6 +1088,7 @@ class PromptFactory:
         """
         # compute the set of input fields
         input_fields = self._get_input_fields(candidate, **kwargs)
+        right_input_fields = [] if right_candidate is None else self._get_input_fields(right_candidate, **kwargs)
         # if the user provides a prompt, we process that prompt into messages and return them
         if "prompt" in kwargs:
@@ -774,7 +1102,7 @@ class PromptFactory:
         messages = []
         # compute the full dictionary of format kwargs and add to kwargs
-        format_kwargs = self._get_all_format_kwargs(candidate, input_fields, output_fields, **kwargs)
+        format_kwargs = self._get_all_format_kwargs(candidate, input_fields, output_fields, right_candidate, right_input_fields, **kwargs)
         kwargs = {**kwargs, **format_kwargs}
         # generate system message (if applicable)
@@ -783,7 +1111,7 @@ class PromptFactory:
             messages.append({"role": "system", "type": "text", "content": system_prompt})
         # generate user messages and add to messages
-        user_messages = self._get_user_messages(candidate, input_fields, **kwargs)
+        user_messages = self._get_user_messages(candidate, input_fields, right_candidate, right_input_fields, **kwargs)
         messages.extend(user_messages)
         return messages

palimpzest 0.7.21__py3-none-any.whl → 0.8.1__py3-none-any.whl

palimpzest 0.7.21py3-none-any.whl → 0.8.1py3-none-any.whl