palimpzest 0.8.1__py3-none-any.whl → 0.8.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. palimpzest/constants.py +38 -62
  2. palimpzest/core/data/dataset.py +1 -1
  3. palimpzest/core/data/iter_dataset.py +5 -5
  4. palimpzest/core/elements/groupbysig.py +1 -1
  5. palimpzest/core/elements/records.py +91 -109
  6. palimpzest/core/lib/schemas.py +23 -0
  7. palimpzest/core/models.py +3 -3
  8. palimpzest/prompts/__init__.py +2 -6
  9. palimpzest/prompts/convert_prompts.py +10 -66
  10. palimpzest/prompts/critique_and_refine_prompts.py +66 -0
  11. palimpzest/prompts/filter_prompts.py +8 -46
  12. palimpzest/prompts/join_prompts.py +12 -75
  13. palimpzest/prompts/{moa_aggregator_convert_prompts.py → moa_aggregator_prompts.py} +51 -2
  14. palimpzest/prompts/moa_proposer_prompts.py +87 -0
  15. palimpzest/prompts/prompt_factory.py +351 -479
  16. palimpzest/prompts/split_merge_prompts.py +51 -2
  17. palimpzest/prompts/split_proposer_prompts.py +48 -16
  18. palimpzest/prompts/utils.py +109 -0
  19. palimpzest/query/execution/all_sample_execution_strategy.py +1 -1
  20. palimpzest/query/execution/execution_strategy.py +4 -4
  21. palimpzest/query/execution/mab_execution_strategy.py +47 -23
  22. palimpzest/query/execution/parallel_execution_strategy.py +3 -3
  23. palimpzest/query/execution/single_threaded_execution_strategy.py +8 -8
  24. palimpzest/query/generators/generators.py +31 -17
  25. palimpzest/query/operators/__init__.py +15 -2
  26. palimpzest/query/operators/aggregate.py +21 -19
  27. palimpzest/query/operators/compute.py +6 -8
  28. palimpzest/query/operators/convert.py +12 -37
  29. palimpzest/query/operators/critique_and_refine.py +194 -0
  30. palimpzest/query/operators/distinct.py +7 -7
  31. palimpzest/query/operators/filter.py +13 -25
  32. palimpzest/query/operators/join.py +321 -192
  33. palimpzest/query/operators/limit.py +4 -4
  34. palimpzest/query/operators/mixture_of_agents.py +246 -0
  35. palimpzest/query/operators/physical.py +25 -2
  36. palimpzest/query/operators/project.py +4 -4
  37. palimpzest/query/operators/{rag_convert.py → rag.py} +202 -5
  38. palimpzest/query/operators/retrieve.py +10 -9
  39. palimpzest/query/operators/scan.py +9 -10
  40. palimpzest/query/operators/search.py +18 -24
  41. palimpzest/query/operators/split.py +321 -0
  42. palimpzest/query/optimizer/__init__.py +12 -8
  43. palimpzest/query/optimizer/optimizer.py +12 -10
  44. palimpzest/query/optimizer/rules.py +201 -108
  45. palimpzest/query/optimizer/tasks.py +18 -6
  46. palimpzest/query/processor/config.py +2 -2
  47. palimpzest/query/processor/query_processor.py +2 -2
  48. palimpzest/query/processor/query_processor_factory.py +9 -5
  49. palimpzest/validator/validator.py +7 -9
  50. {palimpzest-0.8.1.dist-info → palimpzest-0.8.3.dist-info}/METADATA +3 -8
  51. palimpzest-0.8.3.dist-info/RECORD +95 -0
  52. palimpzest/prompts/critique_and_refine_convert_prompts.py +0 -216
  53. palimpzest/prompts/moa_proposer_convert_prompts.py +0 -75
  54. palimpzest/prompts/util_phrases.py +0 -19
  55. palimpzest/query/operators/critique_and_refine_convert.py +0 -113
  56. palimpzest/query/operators/mixture_of_agents_convert.py +0 -140
  57. palimpzest/query/operators/split_convert.py +0 -170
  58. palimpzest-0.8.1.dist-info/RECORD +0 -95
  59. {palimpzest-0.8.1.dist-info → palimpzest-0.8.3.dist-info}/WHEEL +0 -0
  60. {palimpzest-0.8.1.dist-info → palimpzest-0.8.3.dist-info}/licenses/LICENSE +0 -0
  61. {palimpzest-0.8.1.dist-info → palimpzest-0.8.3.dist-info}/top_level.txt +0 -0
@@ -41,11 +41,6 @@ class FilterOp(PhysicalOperator, ABC):
41
41
  op_params = super().get_op_params()
42
42
  return {"filter": self.filter_obj, "desc": self.desc, **op_params}
43
43
 
44
- @abstractmethod
45
- def is_image_filter(self) -> bool:
46
- """Return True if the filter operation processes an image, False otherwise."""
47
- pass
48
-
49
44
  @abstractmethod
50
45
  def filter(self, candidate: DataRecord) -> tuple[dict[str, bool], GenerationStats]:
51
46
  """
@@ -76,14 +71,14 @@ class FilterOp(PhysicalOperator, ABC):
76
71
  construct the resulting RecordSet.
77
72
  """
78
73
  # create new DataRecord and set passed_operator attribute
79
- dr = DataRecord.from_parent(candidate.schema, parent_record=candidate)
80
- dr.passed_operator = passed_operator
74
+ dr = DataRecord.from_parent(schema=candidate.schema, data_item={}, parent_record=candidate)
75
+ dr._passed_operator = passed_operator
81
76
 
82
77
  # create RecordOpStats object
83
78
  record_op_stats = RecordOpStats(
84
- record_id=dr.id,
85
- record_parent_ids=dr.parent_ids,
86
- record_source_indices=dr.source_indices,
79
+ record_id=dr._id,
80
+ record_parent_ids=dr._parent_ids,
81
+ record_source_indices=dr._source_indices,
87
82
  record_state=dr.to_dict(include_bytes=False),
88
83
  full_op_id=self.get_full_op_id(),
89
84
  logical_op_id=self.logical_op_id,
@@ -102,7 +97,6 @@ class FilterOp(PhysicalOperator, ABC):
102
97
  total_embedding_llm_calls=generation_stats.total_embedding_llm_calls,
103
98
  answer=answer,
104
99
  passed_operator=passed_operator,
105
- image_operation=self.is_image_filter(),
106
100
  op_details={k: str(v) for k, v in self.get_id_params().items()},
107
101
  )
108
102
 
@@ -127,10 +121,6 @@ class FilterOp(PhysicalOperator, ABC):
127
121
 
128
122
 
129
123
  class NonLLMFilter(FilterOp):
130
- def is_image_filter(self) -> bool:
131
- # NOTE: even if the UDF is processing an image, we do not consider this an image filter
132
- # (the output of this function will be used by the CostModel in a way which does not apply to UDFs)
133
- return False
134
124
 
135
125
  def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostEstimates):
136
126
  # estimate output cardinality using a constant assumption of the filter selectivity
@@ -174,7 +164,7 @@ class LLMFilter(FilterOp):
174
164
  def __init__(
175
165
  self,
176
166
  model: Model,
177
- prompt_strategy: PromptStrategy = PromptStrategy.COT_BOOL,
167
+ prompt_strategy: PromptStrategy = PromptStrategy.FILTER,
178
168
  reasoning_effort: str | None = None,
179
169
  *args,
180
170
  **kwargs,
@@ -183,13 +173,14 @@ class LLMFilter(FilterOp):
183
173
  self.model = model
184
174
  self.prompt_strategy = prompt_strategy
185
175
  self.reasoning_effort = reasoning_effort
186
- self.generator = Generator(model, prompt_strategy, reasoning_effort, self.api_base, Cardinality.ONE_TO_ONE, self.desc, self.verbose)
176
+ if model is not None:
177
+ self.generator = Generator(model, prompt_strategy, reasoning_effort, self.api_base, Cardinality.ONE_TO_ONE, self.desc, self.verbose)
187
178
 
188
179
  def get_id_params(self):
189
180
  id_params = super().get_id_params()
190
181
  id_params = {
191
- "model": self.model.value,
192
- "prompt_strategy": self.prompt_strategy.value,
182
+ "model": None if self.model is None else self.model.value,
183
+ "prompt_strategy": None if self.prompt_strategy is None else self.prompt_strategy.value,
193
184
  "reasoning_effort": self.reasoning_effort,
194
185
  **id_params,
195
186
  }
@@ -208,15 +199,12 @@ class LLMFilter(FilterOp):
208
199
  return op_params
209
200
 
210
201
  def get_model_name(self):
211
- return self.model.value
212
-
213
- def is_image_filter(self) -> bool:
214
- return self.prompt_strategy is PromptStrategy.COT_BOOL_IMAGE
202
+ return None if self.model is None else self.model.value
215
203
 
216
204
  def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostEstimates):
217
205
  # estimate number of input tokens from source
218
206
  est_num_input_tokens = NAIVE_EST_NUM_INPUT_TOKENS
219
- if self.is_image_filter():
207
+ if self.is_image_op():
220
208
  est_num_input_tokens = 765 / 10 # 1024x1024 image is 765 tokens
221
209
 
222
210
  # NOTE: the output often generates an entire reasoning sentence, thus the true value may be higher
@@ -232,7 +220,7 @@ class LLMFilter(FilterOp):
232
220
  # get est. of conversion cost (in USD) per record from model card
233
221
  usd_per_input_token = (
234
222
  MODEL_CARDS[self.model.value]["usd_per_audio_input_token"]
235
- if self.prompt_strategy.is_audio_prompt()
223
+ if self.is_audio_op()
236
224
  else MODEL_CARDS[self.model.value]["usd_per_input_token"]
237
225
  )
238
226
  model_conversion_usd_per_record = (