palimpzest 0.8.1__py3-none-any.whl → 0.8.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- palimpzest/constants.py +38 -62
- palimpzest/core/data/dataset.py +1 -1
- palimpzest/core/data/iter_dataset.py +5 -5
- palimpzest/core/elements/groupbysig.py +1 -1
- palimpzest/core/elements/records.py +91 -109
- palimpzest/core/lib/schemas.py +23 -0
- palimpzest/core/models.py +3 -3
- palimpzest/prompts/__init__.py +2 -6
- palimpzest/prompts/convert_prompts.py +10 -66
- palimpzest/prompts/critique_and_refine_prompts.py +66 -0
- palimpzest/prompts/filter_prompts.py +8 -46
- palimpzest/prompts/join_prompts.py +12 -75
- palimpzest/prompts/{moa_aggregator_convert_prompts.py → moa_aggregator_prompts.py} +51 -2
- palimpzest/prompts/moa_proposer_prompts.py +87 -0
- palimpzest/prompts/prompt_factory.py +351 -479
- palimpzest/prompts/split_merge_prompts.py +51 -2
- palimpzest/prompts/split_proposer_prompts.py +48 -16
- palimpzest/prompts/utils.py +109 -0
- palimpzest/query/execution/all_sample_execution_strategy.py +1 -1
- palimpzest/query/execution/execution_strategy.py +4 -4
- palimpzest/query/execution/mab_execution_strategy.py +47 -23
- palimpzest/query/execution/parallel_execution_strategy.py +3 -3
- palimpzest/query/execution/single_threaded_execution_strategy.py +8 -8
- palimpzest/query/generators/generators.py +31 -17
- palimpzest/query/operators/__init__.py +15 -2
- palimpzest/query/operators/aggregate.py +21 -19
- palimpzest/query/operators/compute.py +6 -8
- palimpzest/query/operators/convert.py +12 -37
- palimpzest/query/operators/critique_and_refine.py +194 -0
- palimpzest/query/operators/distinct.py +7 -7
- palimpzest/query/operators/filter.py +13 -25
- palimpzest/query/operators/join.py +321 -192
- palimpzest/query/operators/limit.py +4 -4
- palimpzest/query/operators/mixture_of_agents.py +246 -0
- palimpzest/query/operators/physical.py +25 -2
- palimpzest/query/operators/project.py +4 -4
- palimpzest/query/operators/{rag_convert.py → rag.py} +202 -5
- palimpzest/query/operators/retrieve.py +10 -9
- palimpzest/query/operators/scan.py +9 -10
- palimpzest/query/operators/search.py +18 -24
- palimpzest/query/operators/split.py +321 -0
- palimpzest/query/optimizer/__init__.py +12 -8
- palimpzest/query/optimizer/optimizer.py +12 -10
- palimpzest/query/optimizer/rules.py +201 -108
- palimpzest/query/optimizer/tasks.py +18 -6
- palimpzest/query/processor/config.py +2 -2
- palimpzest/query/processor/query_processor.py +2 -2
- palimpzest/query/processor/query_processor_factory.py +9 -5
- palimpzest/validator/validator.py +7 -9
- {palimpzest-0.8.1.dist-info → palimpzest-0.8.3.dist-info}/METADATA +3 -8
- palimpzest-0.8.3.dist-info/RECORD +95 -0
- palimpzest/prompts/critique_and_refine_convert_prompts.py +0 -216
- palimpzest/prompts/moa_proposer_convert_prompts.py +0 -75
- palimpzest/prompts/util_phrases.py +0 -19
- palimpzest/query/operators/critique_and_refine_convert.py +0 -113
- palimpzest/query/operators/mixture_of_agents_convert.py +0 -140
- palimpzest/query/operators/split_convert.py +0 -170
- palimpzest-0.8.1.dist-info/RECORD +0 -95
- {palimpzest-0.8.1.dist-info → palimpzest-0.8.3.dist-info}/WHEEL +0 -0
- {palimpzest-0.8.1.dist-info → palimpzest-0.8.3.dist-info}/licenses/LICENSE +0 -0
- {palimpzest-0.8.1.dist-info → palimpzest-0.8.3.dist-info}/top_level.txt +0 -0
|
@@ -41,11 +41,6 @@ class FilterOp(PhysicalOperator, ABC):
|
|
|
41
41
|
op_params = super().get_op_params()
|
|
42
42
|
return {"filter": self.filter_obj, "desc": self.desc, **op_params}
|
|
43
43
|
|
|
44
|
-
@abstractmethod
|
|
45
|
-
def is_image_filter(self) -> bool:
|
|
46
|
-
"""Return True if the filter operation processes an image, False otherwise."""
|
|
47
|
-
pass
|
|
48
|
-
|
|
49
44
|
@abstractmethod
|
|
50
45
|
def filter(self, candidate: DataRecord) -> tuple[dict[str, bool], GenerationStats]:
|
|
51
46
|
"""
|
|
@@ -76,14 +71,14 @@ class FilterOp(PhysicalOperator, ABC):
|
|
|
76
71
|
construct the resulting RecordSet.
|
|
77
72
|
"""
|
|
78
73
|
# create new DataRecord and set passed_operator attribute
|
|
79
|
-
dr = DataRecord.from_parent(candidate.schema, parent_record=candidate)
|
|
80
|
-
dr.
|
|
74
|
+
dr = DataRecord.from_parent(schema=candidate.schema, data_item={}, parent_record=candidate)
|
|
75
|
+
dr._passed_operator = passed_operator
|
|
81
76
|
|
|
82
77
|
# create RecordOpStats object
|
|
83
78
|
record_op_stats = RecordOpStats(
|
|
84
|
-
record_id=dr.
|
|
85
|
-
record_parent_ids=dr.
|
|
86
|
-
record_source_indices=dr.
|
|
79
|
+
record_id=dr._id,
|
|
80
|
+
record_parent_ids=dr._parent_ids,
|
|
81
|
+
record_source_indices=dr._source_indices,
|
|
87
82
|
record_state=dr.to_dict(include_bytes=False),
|
|
88
83
|
full_op_id=self.get_full_op_id(),
|
|
89
84
|
logical_op_id=self.logical_op_id,
|
|
@@ -102,7 +97,6 @@ class FilterOp(PhysicalOperator, ABC):
|
|
|
102
97
|
total_embedding_llm_calls=generation_stats.total_embedding_llm_calls,
|
|
103
98
|
answer=answer,
|
|
104
99
|
passed_operator=passed_operator,
|
|
105
|
-
image_operation=self.is_image_filter(),
|
|
106
100
|
op_details={k: str(v) for k, v in self.get_id_params().items()},
|
|
107
101
|
)
|
|
108
102
|
|
|
@@ -127,10 +121,6 @@ class FilterOp(PhysicalOperator, ABC):
|
|
|
127
121
|
|
|
128
122
|
|
|
129
123
|
class NonLLMFilter(FilterOp):
|
|
130
|
-
def is_image_filter(self) -> bool:
|
|
131
|
-
# NOTE: even if the UDF is processing an image, we do not consider this an image filter
|
|
132
|
-
# (the output of this function will be used by the CostModel in a way which does not apply to UDFs)
|
|
133
|
-
return False
|
|
134
124
|
|
|
135
125
|
def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostEstimates):
|
|
136
126
|
# estimate output cardinality using a constant assumption of the filter selectivity
|
|
@@ -174,7 +164,7 @@ class LLMFilter(FilterOp):
|
|
|
174
164
|
def __init__(
|
|
175
165
|
self,
|
|
176
166
|
model: Model,
|
|
177
|
-
prompt_strategy: PromptStrategy = PromptStrategy.
|
|
167
|
+
prompt_strategy: PromptStrategy = PromptStrategy.FILTER,
|
|
178
168
|
reasoning_effort: str | None = None,
|
|
179
169
|
*args,
|
|
180
170
|
**kwargs,
|
|
@@ -183,13 +173,14 @@ class LLMFilter(FilterOp):
|
|
|
183
173
|
self.model = model
|
|
184
174
|
self.prompt_strategy = prompt_strategy
|
|
185
175
|
self.reasoning_effort = reasoning_effort
|
|
186
|
-
|
|
176
|
+
if model is not None:
|
|
177
|
+
self.generator = Generator(model, prompt_strategy, reasoning_effort, self.api_base, Cardinality.ONE_TO_ONE, self.desc, self.verbose)
|
|
187
178
|
|
|
188
179
|
def get_id_params(self):
|
|
189
180
|
id_params = super().get_id_params()
|
|
190
181
|
id_params = {
|
|
191
|
-
"model": self.model.value,
|
|
192
|
-
"prompt_strategy": self.prompt_strategy.value,
|
|
182
|
+
"model": None if self.model is None else self.model.value,
|
|
183
|
+
"prompt_strategy": None if self.prompt_strategy is None else self.prompt_strategy.value,
|
|
193
184
|
"reasoning_effort": self.reasoning_effort,
|
|
194
185
|
**id_params,
|
|
195
186
|
}
|
|
@@ -208,15 +199,12 @@ class LLMFilter(FilterOp):
|
|
|
208
199
|
return op_params
|
|
209
200
|
|
|
210
201
|
def get_model_name(self):
|
|
211
|
-
return self.model.value
|
|
212
|
-
|
|
213
|
-
def is_image_filter(self) -> bool:
|
|
214
|
-
return self.prompt_strategy is PromptStrategy.COT_BOOL_IMAGE
|
|
202
|
+
return None if self.model is None else self.model.value
|
|
215
203
|
|
|
216
204
|
def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostEstimates):
|
|
217
205
|
# estimate number of input tokens from source
|
|
218
206
|
est_num_input_tokens = NAIVE_EST_NUM_INPUT_TOKENS
|
|
219
|
-
if self.
|
|
207
|
+
if self.is_image_op():
|
|
220
208
|
est_num_input_tokens = 765 / 10 # 1024x1024 image is 765 tokens
|
|
221
209
|
|
|
222
210
|
# NOTE: the output often generates an entire reasoning sentence, thus the true value may be higher
|
|
@@ -232,7 +220,7 @@ class LLMFilter(FilterOp):
|
|
|
232
220
|
# get est. of conversion cost (in USD) per record from model card
|
|
233
221
|
usd_per_input_token = (
|
|
234
222
|
MODEL_CARDS[self.model.value]["usd_per_audio_input_token"]
|
|
235
|
-
if self.
|
|
223
|
+
if self.is_audio_op()
|
|
236
224
|
else MODEL_CARDS[self.model.value]["usd_per_input_token"]
|
|
237
225
|
)
|
|
238
226
|
model_conversion_usd_per_record = (
|