palimpzest 1.3.0__tar.gz → 1.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {palimpzest-1.3.0/src/palimpzest.egg-info → palimpzest-1.3.2}/PKG-INFO +2 -2
- {palimpzest-1.3.0 → palimpzest-1.3.2}/pyproject.toml +2 -2
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/generators/generators.py +2 -13
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/optimizer/optimizer.py +1 -1
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/optimizer/rules.py +107 -76
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/processor/config.py +8 -2
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/processor/query_processor_factory.py +3 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/utils/model_helpers.py +28 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2/src/palimpzest.egg-info}/PKG-INFO +2 -2
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest.egg-info/requires.txt +1 -1
- {palimpzest-1.3.0 → palimpzest-1.3.2}/LICENSE +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/README.md +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/setup.cfg +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/__init__.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/agents/__init__.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/agents/compute_agents.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/agents/search_agents.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/constants.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/core/__init__.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/core/data/__init__.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/core/data/context.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/core/data/context_manager.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/core/data/dataset.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/core/data/index_dataset.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/core/data/iter_dataset.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/core/elements/__init__.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/core/elements/filters.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/core/elements/groupbysig.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/core/elements/records.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/core/lib/__init__.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/core/lib/schemas.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/core/models.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/policy.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/prompts/__init__.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/prompts/agent_prompts.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/prompts/aggregate_prompts.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/prompts/context_search.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/prompts/convert_prompts.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/prompts/critique_and_refine_prompts.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/prompts/filter_prompts.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/prompts/join_prompts.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/prompts/moa_aggregator_prompts.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/prompts/moa_proposer_prompts.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/prompts/prompt_factory.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/prompts/split_merge_prompts.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/prompts/split_proposer_prompts.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/prompts/utils.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/prompts/validator.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/__init__.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/execution/__init__.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/execution/all_sample_execution_strategy.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/execution/execution_strategy.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/execution/execution_strategy_type.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/execution/mab_execution_strategy.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/execution/parallel_execution_strategy.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/execution/single_threaded_execution_strategy.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/generators/__init__.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/operators/__init__.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/operators/aggregate.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/operators/compute.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/operators/convert.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/operators/critique_and_refine.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/operators/distinct.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/operators/filter.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/operators/join.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/operators/limit.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/operators/logical.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/operators/mixture_of_agents.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/operators/physical.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/operators/project.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/operators/rag.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/operators/scan.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/operators/search.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/operators/split.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/operators/topk.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/optimizer/__init__.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/optimizer/cost_model.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/optimizer/optimizer_strategy.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/optimizer/optimizer_strategy_type.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/optimizer/plan.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/optimizer/primitives.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/optimizer/tasks.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/processor/__init__.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/processor/query_processor.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/schemabuilder/__init__.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/schemabuilder/schema_builder.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/tools/README.md +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/tools/__init__.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/tools/allenpdf.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/tools/pdfparser.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/tools/skema_tools.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/utils/__init__.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/utils/env_helpers.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/utils/hash_helpers.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/utils/progress.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/utils/udfs.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/validator/__init__.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/validator/validator.py +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest.egg-info/SOURCES.txt +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest.egg-info/dependency_links.txt +0 -0
- {palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: palimpzest
|
|
3
|
-
Version: 1.3.
|
|
3
|
+
Version: 1.3.2
|
|
4
4
|
Summary: Palimpzest is a system which enables anyone to process AI-powered analytical queries simply by defining them in a declarative language
|
|
5
5
|
Author-email: MIT DSG Semantic Management Lab <michjc@csail.mit.edu>
|
|
6
6
|
Project-URL: homepage, https://palimpzest.org
|
|
@@ -15,7 +15,7 @@ Classifier: Programming Language :: Python :: 3.8
|
|
|
15
15
|
Requires-Python: >=3.12
|
|
16
16
|
Description-Content-Type: text/markdown
|
|
17
17
|
License-File: LICENSE
|
|
18
|
-
Requires-Dist: anthropic>=0.
|
|
18
|
+
Requires-Dist: anthropic>=0.46.0
|
|
19
19
|
Requires-Dist: beautifulsoup4>=4.13.4
|
|
20
20
|
Requires-Dist: chromadb>=1.0.15
|
|
21
21
|
Requires-Dist: colorama>=0.4.6
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "palimpzest"
|
|
3
|
-
version = "1.3.
|
|
3
|
+
version = "1.3.2"
|
|
4
4
|
description = "Palimpzest is a system which enables anyone to process AI-powered analytical queries simply by defining them in a declarative language"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.12"
|
|
@@ -9,7 +9,7 @@ authors = [
|
|
|
9
9
|
{name="MIT DSG Semantic Management Lab", email="michjc@csail.mit.edu"},
|
|
10
10
|
]
|
|
11
11
|
dependencies = [
|
|
12
|
-
"anthropic>=0.
|
|
12
|
+
"anthropic>=0.46.0",
|
|
13
13
|
"beautifulsoup4>=4.13.4",
|
|
14
14
|
"chromadb>=1.0.15",
|
|
15
15
|
"colorama>=0.4.6",
|
|
@@ -108,7 +108,7 @@ class Generator(Generic[ContextType, InputType]):
|
|
|
108
108
|
self,
|
|
109
109
|
model: Model,
|
|
110
110
|
prompt_strategy: PromptStrategy,
|
|
111
|
-
reasoning_effort: str | None
|
|
111
|
+
reasoning_effort: str | None,
|
|
112
112
|
api_base: str | None = None,
|
|
113
113
|
cardinality: Cardinality = Cardinality.ONE_TO_ONE,
|
|
114
114
|
desc: str | None = None,
|
|
@@ -325,18 +325,7 @@ class Generator(Generic[ContextType, InputType]):
|
|
|
325
325
|
if is_audio_op:
|
|
326
326
|
completion_kwargs = {"modalities": ["text"], **completion_kwargs}
|
|
327
327
|
if self.model.is_reasoning_model():
|
|
328
|
-
|
|
329
|
-
reasoning_effort = self.reasoning_effort
|
|
330
|
-
if self.reasoning_effort is None and self.model == Model.GEMINI_2_5_PRO:
|
|
331
|
-
reasoning_effort = "low"
|
|
332
|
-
elif self.reasoning_effort is None:
|
|
333
|
-
reasoning_effort = "disable"
|
|
334
|
-
completion_kwargs = {"reasoning_effort": reasoning_effort, **completion_kwargs}
|
|
335
|
-
elif self.model.is_anthropic_model() and self.reasoning_effort is not None:
|
|
336
|
-
completion_kwargs = {"reasoning_effort": self.reasoning_effort, **completion_kwargs}
|
|
337
|
-
elif self.model.is_openai_model():
|
|
338
|
-
reasoning_effort = "minimal" if self.reasoning_effort is None else self.reasoning_effort
|
|
339
|
-
completion_kwargs = {"reasoning_effort": reasoning_effort, **completion_kwargs}
|
|
328
|
+
completion_kwargs = {"reasoning_effort": self.reasoning_effort, **completion_kwargs}
|
|
340
329
|
if self.model.is_vllm_model():
|
|
341
330
|
completion_kwargs = {"api_base": self.api_base, "api_key": os.environ.get("VLLM_API_KEY", "fake-api-key"), **completion_kwargs}
|
|
342
331
|
completion = litellm.completion(model=self.model_name, messages=messages, **completion_kwargs)
|
|
@@ -75,7 +75,7 @@ class Optimizer:
|
|
|
75
75
|
cost_model: BaseCostModel,
|
|
76
76
|
available_models: list[Model],
|
|
77
77
|
join_parallelism: int = 64,
|
|
78
|
-
reasoning_effort: str | None =
|
|
78
|
+
reasoning_effort: str | None = "default",
|
|
79
79
|
api_base: str | None = None,
|
|
80
80
|
verbose: bool = False,
|
|
81
81
|
allow_bonded_query: bool = True,
|
|
@@ -54,6 +54,7 @@ from palimpzest.query.operators.search import (
|
|
|
54
54
|
from palimpzest.query.operators.split import SplitConvert, SplitFilter
|
|
55
55
|
from palimpzest.query.operators.topk import TopKOp
|
|
56
56
|
from palimpzest.query.optimizer.primitives import Expression, Group, LogicalExpression, PhysicalExpression
|
|
57
|
+
from palimpzest.utils.model_helpers import resolve_reasoning_settings
|
|
57
58
|
|
|
58
59
|
logger = logging.getLogger(__name__)
|
|
59
60
|
|
|
@@ -628,15 +629,17 @@ class LLMConvertBondedRule(ImplementationRule):
|
|
|
628
629
|
|
|
629
630
|
# create variable physical operator kwargs for each model which can implement this logical_expression
|
|
630
631
|
models = [model for model in runtime_kwargs["available_models"] if cls._model_matches_input(model, logical_expression)]
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
632
|
+
variable_op_kwargs = []
|
|
633
|
+
for model in models:
|
|
634
|
+
use_reasoning_prompt, reasoning_effort = resolve_reasoning_settings(model, runtime_kwargs["reasoning_effort"])
|
|
635
|
+
prompt_strategy = PromptStrategy.MAP if use_reasoning_prompt else PromptStrategy.MAP_NO_REASONING
|
|
636
|
+
variable_op_kwargs.append(
|
|
637
|
+
{
|
|
638
|
+
"model": model,
|
|
639
|
+
"prompt_strategy": prompt_strategy,
|
|
640
|
+
"reasoning_effort": reasoning_effort,
|
|
641
|
+
}
|
|
642
|
+
)
|
|
640
643
|
|
|
641
644
|
return cls._perform_substitution(logical_expression, LLMConvertBonded, runtime_kwargs, variable_op_kwargs)
|
|
642
645
|
|
|
@@ -665,18 +668,27 @@ class RAGRule(ImplementationRule):
|
|
|
665
668
|
|
|
666
669
|
# create variable physical operator kwargs for each model which can implement this logical_expression
|
|
667
670
|
models = [model for model in runtime_kwargs["available_models"] if cls._model_matches_input(model, logical_expression)]
|
|
668
|
-
variable_op_kwargs = [
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
671
|
+
variable_op_kwargs = []
|
|
672
|
+
for model in models:
|
|
673
|
+
use_reasoning_prompt, reasoning_effort = resolve_reasoning_settings(model, runtime_kwargs["reasoning_effort"])
|
|
674
|
+
prompt_strategy = (
|
|
675
|
+
PromptStrategy.MAP if use_reasoning_prompt else PromptStrategy.MAP_NO_REASONING
|
|
676
|
+
if phys_op_cls is RAGConvert
|
|
677
|
+
else PromptStrategy.FILTER if use_reasoning_prompt else PromptStrategy.FILTER_NO_REASONING
|
|
678
|
+
)
|
|
679
|
+
variable_op_kwargs.extend(
|
|
680
|
+
[
|
|
681
|
+
{
|
|
682
|
+
"model": model,
|
|
683
|
+
"prompt_strategy": prompt_strategy,
|
|
684
|
+
"num_chunks_per_field": num_chunks_per_field,
|
|
685
|
+
"chunk_size": chunk_size,
|
|
686
|
+
"reasoning_effort": reasoning_effort,
|
|
687
|
+
}
|
|
688
|
+
for num_chunks_per_field in cls.num_chunks_per_fields
|
|
689
|
+
for chunk_size in cls.chunk_sizes
|
|
690
|
+
]
|
|
691
|
+
)
|
|
680
692
|
|
|
681
693
|
return cls._perform_substitution(logical_expression, phys_op_cls, runtime_kwargs, variable_op_kwargs)
|
|
682
694
|
|
|
@@ -704,6 +716,7 @@ class MixtureOfAgentsRule(ImplementationRule):
|
|
|
704
716
|
phys_op_cls = MixtureOfAgentsConvert if isinstance(logical_expression.operator, ConvertScan) else MixtureOfAgentsFilter
|
|
705
717
|
|
|
706
718
|
# create variable physical operator kwargs for each model which can implement this logical_expression
|
|
719
|
+
_, reasoning_effort = resolve_reasoning_settings(None, runtime_kwargs["reasoning_effort"])
|
|
707
720
|
proposer_model_set = {model for model in runtime_kwargs["available_models"] if cls._model_matches_input(model, logical_expression)}
|
|
708
721
|
aggregator_model_set = {model for model in runtime_kwargs["available_models"] if model.is_text_model()}
|
|
709
722
|
variable_op_kwargs = [
|
|
@@ -711,7 +724,7 @@ class MixtureOfAgentsRule(ImplementationRule):
|
|
|
711
724
|
"proposer_models": list(proposer_models),
|
|
712
725
|
"temperatures": [temp] * len(proposer_models),
|
|
713
726
|
"aggregator_model": aggregator_model,
|
|
714
|
-
"reasoning_effort":
|
|
727
|
+
"reasoning_effort": reasoning_effort,
|
|
715
728
|
}
|
|
716
729
|
for k in cls.num_proposer_models
|
|
717
730
|
for temp in cls.temperatures
|
|
@@ -743,18 +756,27 @@ class CritiqueAndRefineRule(ImplementationRule):
|
|
|
743
756
|
|
|
744
757
|
# create variable physical operator kwargs for each model which can implement this logical_expression
|
|
745
758
|
models = [model for model in runtime_kwargs["available_models"] if cls._model_matches_input(model, logical_expression)]
|
|
746
|
-
variable_op_kwargs = [
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
759
|
+
variable_op_kwargs = []
|
|
760
|
+
for model in models:
|
|
761
|
+
use_reasoning_prompt, reasoning_effort = resolve_reasoning_settings(model, runtime_kwargs["reasoning_effort"])
|
|
762
|
+
prompt_strategy = (
|
|
763
|
+
PromptStrategy.MAP if use_reasoning_prompt else PromptStrategy.MAP_NO_REASONING
|
|
764
|
+
if phys_op_cls is CritiqueAndRefineConvert
|
|
765
|
+
else PromptStrategy.FILTER if use_reasoning_prompt else PromptStrategy.FILTER_NO_REASONING
|
|
766
|
+
)
|
|
767
|
+
variable_op_kwargs.extend(
|
|
768
|
+
[
|
|
769
|
+
{
|
|
770
|
+
"model": model,
|
|
771
|
+
"critic_model": critic_model,
|
|
772
|
+
"refine_model": refine_model,
|
|
773
|
+
"prompt_strategy": prompt_strategy,
|
|
774
|
+
"reasoning_effort": reasoning_effort,
|
|
775
|
+
}
|
|
776
|
+
for critic_model in models
|
|
777
|
+
for refine_model in models
|
|
778
|
+
]
|
|
779
|
+
)
|
|
758
780
|
|
|
759
781
|
return cls._perform_substitution(logical_expression, phys_op_cls, runtime_kwargs, variable_op_kwargs)
|
|
760
782
|
|
|
@@ -782,12 +804,13 @@ class SplitRule(ImplementationRule):
|
|
|
782
804
|
|
|
783
805
|
# create variable physical operator kwargs for each model which can implement this logical_expression
|
|
784
806
|
models = [model for model in runtime_kwargs["available_models"] if cls._model_matches_input(model, logical_expression)]
|
|
807
|
+
_, reasoning_effort = resolve_reasoning_settings(None, runtime_kwargs["reasoning_effort"])
|
|
785
808
|
variable_op_kwargs = [
|
|
786
809
|
{
|
|
787
810
|
"model": model,
|
|
788
811
|
"min_size_to_chunk": min_size_to_chunk,
|
|
789
812
|
"num_chunks": num_chunks,
|
|
790
|
-
"reasoning_effort":
|
|
813
|
+
"reasoning_effort": reasoning_effort,
|
|
791
814
|
}
|
|
792
815
|
for model in models
|
|
793
816
|
for min_size_to_chunk in cls.min_size_to_chunk
|
|
@@ -855,15 +878,17 @@ class LLMFilterRule(ImplementationRule):
|
|
|
855
878
|
|
|
856
879
|
# create variable physical operator kwargs for each model which can implement this logical_expression
|
|
857
880
|
models = [model for model in runtime_kwargs["available_models"] if cls._model_matches_input(model, logical_expression)]
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
881
|
+
variable_op_kwargs = []
|
|
882
|
+
for model in models:
|
|
883
|
+
use_reasoning_prompt, reasoning_effort = resolve_reasoning_settings(model, runtime_kwargs["reasoning_effort"])
|
|
884
|
+
prompt_strategy = PromptStrategy.FILTER if use_reasoning_prompt else PromptStrategy.FILTER_NO_REASONING
|
|
885
|
+
variable_op_kwargs.append(
|
|
886
|
+
{
|
|
887
|
+
"model": model,
|
|
888
|
+
"prompt_strategy": prompt_strategy,
|
|
889
|
+
"reasoning_effort": reasoning_effort,
|
|
890
|
+
}
|
|
891
|
+
)
|
|
867
892
|
|
|
868
893
|
return cls._perform_substitution(logical_expression, LLMFilter, runtime_kwargs, variable_op_kwargs)
|
|
869
894
|
|
|
@@ -902,17 +927,19 @@ class NestedLoopsJoinRule(ImplementationRule):
|
|
|
902
927
|
|
|
903
928
|
# create variable physical operator kwargs for each model which can implement this logical_expression
|
|
904
929
|
models = [model for model in runtime_kwargs["available_models"] if cls._model_matches_input(model, logical_expression)]
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
930
|
+
variable_op_kwargs = []
|
|
931
|
+
for model in models:
|
|
932
|
+
use_reasoning_prompt, reasoning_effort = resolve_reasoning_settings(model, runtime_kwargs["reasoning_effort"])
|
|
933
|
+
prompt_strategy = PromptStrategy.JOIN if use_reasoning_prompt else PromptStrategy.JOIN_NO_REASONING
|
|
934
|
+
variable_op_kwargs.append(
|
|
935
|
+
{
|
|
936
|
+
"model": model,
|
|
937
|
+
"prompt_strategy": prompt_strategy,
|
|
938
|
+
"join_parallelism": runtime_kwargs["join_parallelism"],
|
|
939
|
+
"reasoning_effort": reasoning_effort,
|
|
940
|
+
"retain_inputs": not runtime_kwargs["is_validation"],
|
|
941
|
+
}
|
|
942
|
+
)
|
|
916
943
|
|
|
917
944
|
return cls._perform_substitution(logical_expression, NestedLoopsJoin, runtime_kwargs, variable_op_kwargs)
|
|
918
945
|
|
|
@@ -934,18 +961,20 @@ class EmbeddingJoinRule(ImplementationRule):
|
|
|
934
961
|
|
|
935
962
|
# create variable physical operator kwargs for each model which can implement this logical_expression
|
|
936
963
|
models = [model for model in runtime_kwargs["available_models"] if cls._model_matches_input(model, logical_expression)]
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
964
|
+
variable_op_kwargs = []
|
|
965
|
+
for model in models:
|
|
966
|
+
use_reasoning_prompt, reasoning_effort = resolve_reasoning_settings(model, runtime_kwargs["reasoning_effort"])
|
|
967
|
+
prompt_strategy = PromptStrategy.JOIN if use_reasoning_prompt else PromptStrategy.JOIN_NO_REASONING
|
|
968
|
+
variable_op_kwargs.append(
|
|
969
|
+
{
|
|
970
|
+
"model": model,
|
|
971
|
+
"prompt_strategy": prompt_strategy,
|
|
972
|
+
"join_parallelism": runtime_kwargs["join_parallelism"],
|
|
973
|
+
"reasoning_effort": reasoning_effort,
|
|
974
|
+
"retain_inputs": not runtime_kwargs["is_validation"],
|
|
975
|
+
"num_samples": 10, # TODO: iterate over different choices of num_samples
|
|
976
|
+
}
|
|
977
|
+
)
|
|
949
978
|
|
|
950
979
|
return cls._perform_substitution(logical_expression, EmbeddingJoin, runtime_kwargs, variable_op_kwargs)
|
|
951
980
|
|
|
@@ -966,15 +995,17 @@ class SemanticAggregateRule(ImplementationRule):
|
|
|
966
995
|
|
|
967
996
|
# create variable physical operator kwargs for each model which can implement this logical_expression
|
|
968
997
|
models = [model for model in runtime_kwargs["available_models"] if cls._model_matches_input(model, logical_expression) and not model.is_llama_model()]
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
998
|
+
variable_op_kwargs = []
|
|
999
|
+
for model in models:
|
|
1000
|
+
use_reasoning_prompt, reasoning_effort = resolve_reasoning_settings(model, runtime_kwargs["reasoning_effort"])
|
|
1001
|
+
prompt_strategy = PromptStrategy.AGG if use_reasoning_prompt else PromptStrategy.AGG_NO_REASONING
|
|
1002
|
+
variable_op_kwargs.append(
|
|
1003
|
+
{
|
|
1004
|
+
"model": model,
|
|
1005
|
+
"prompt_strategy": prompt_strategy,
|
|
1006
|
+
"reasoning_effort": reasoning_effort,
|
|
1007
|
+
}
|
|
1008
|
+
)
|
|
978
1009
|
|
|
979
1010
|
return cls._perform_substitution(logical_expression, SemanticAggregate, runtime_kwargs, variable_op_kwargs)
|
|
980
1011
|
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from pydantic import BaseModel, ConfigDict, Field
|
|
2
4
|
|
|
3
5
|
from palimpzest.constants import Model
|
|
@@ -18,7 +20,7 @@ class QueryProcessorConfig(BaseModel):
|
|
|
18
20
|
policy: Policy = Field(default_factory=MaxQuality)
|
|
19
21
|
enforce_types: bool = Field(default=False)
|
|
20
22
|
scan_start_idx: int = Field(default=0)
|
|
21
|
-
num_samples: int = Field(default=None)
|
|
23
|
+
num_samples: int | None = Field(default=None)
|
|
22
24
|
verbose: bool = Field(default=False)
|
|
23
25
|
progress: bool = Field(default=True)
|
|
24
26
|
available_models: list[Model] | None = Field(default=None)
|
|
@@ -26,7 +28,7 @@ class QueryProcessorConfig(BaseModel):
|
|
|
26
28
|
max_workers: int | None = Field(default=64)
|
|
27
29
|
join_parallelism: int = Field(default=64)
|
|
28
30
|
batch_size: int | None = Field(default=None)
|
|
29
|
-
reasoning_effort: str | None = Field(default=
|
|
31
|
+
reasoning_effort: str | None = Field(default="default") # Gemini: "disable", "low", "medium", "high"
|
|
30
32
|
use_vertex: bool = Field(default=False) # Whether to use Vertex models for Gemini or Google models
|
|
31
33
|
gemini_credentials_path: str | None = Field(default=None) # Path to Gemini credentials file
|
|
32
34
|
api_base: str | None = Field(default=None) # API base URL for vLLM
|
|
@@ -53,3 +55,7 @@ class QueryProcessorConfig(BaseModel):
|
|
|
53
55
|
def to_dict(self) -> dict:
|
|
54
56
|
"""Convert the config to a dict representation."""
|
|
55
57
|
return self.model_dump()
|
|
58
|
+
|
|
59
|
+
def copy(self) -> QueryProcessorConfig:
|
|
60
|
+
"""Create a copy of the config."""
|
|
61
|
+
return QueryProcessorConfig(**self.to_dict())
|
{palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/processor/query_processor_factory.py
RENAMED
|
@@ -170,6 +170,9 @@ class QueryProcessorFactory:
|
|
|
170
170
|
if config is None:
|
|
171
171
|
config = QueryProcessorConfig()
|
|
172
172
|
|
|
173
|
+
# make a copy of the config to avoid modifying the original
|
|
174
|
+
config = config.copy()
|
|
175
|
+
|
|
173
176
|
# apply any additional keyword arguments to the config and validate its contents
|
|
174
177
|
config, validator = cls._config_validation_and_normalization(config, train_dataset, validator)
|
|
175
178
|
|
|
@@ -58,3 +58,31 @@ def get_models(include_embedding: bool = False, use_vertex: bool = False, gemini
|
|
|
58
58
|
models.extend(vllm_models)
|
|
59
59
|
|
|
60
60
|
return models
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def resolve_reasoning_settings(model: Model | None, reasoning_effort: str | None) -> tuple[bool, str]:
|
|
64
|
+
"""
|
|
65
|
+
Resolve the reasoning settings based on the model and provided reasoning effort.
|
|
66
|
+
Returns a tuple indicating whether reasoning prompt should be used and the reasoning effort level.
|
|
67
|
+
By default, we use the reasoning prompt everywhere while setting the model reasoning effort to None (or minimal).
|
|
68
|
+
If a user explicitly provides a reasoning_effort, we pass that through to the model.
|
|
69
|
+
If the user explicitly disables reasoning_effort, we disable the reasoning prompt as well.
|
|
70
|
+
"""
|
|
71
|
+
# turn off reasoning prompt if reasoning_effort is in [None, "disable", "minimal", "low"]
|
|
72
|
+
use_reasoning_prompt = reasoning_effort not in [None, "disable", "minimal", "low"]
|
|
73
|
+
|
|
74
|
+
# if reasoning_effort is set to "default", set it to None to use model defaults
|
|
75
|
+
if reasoning_effort == "default":
|
|
76
|
+
reasoning_effort = None
|
|
77
|
+
|
|
78
|
+
# translate reasoning_effort into model-specific settings
|
|
79
|
+
if model is not None and model.is_reasoning_model():
|
|
80
|
+
if model.is_vertex_model() or model.is_google_ai_studio_model():
|
|
81
|
+
if reasoning_effort is None and model in [Model.GEMINI_2_5_PRO, Model.GOOGLE_GEMINI_2_5_PRO]:
|
|
82
|
+
reasoning_effort = "low"
|
|
83
|
+
elif reasoning_effort is None:
|
|
84
|
+
reasoning_effort = "disable"
|
|
85
|
+
elif model.is_openai_model():
|
|
86
|
+
reasoning_effort = "minimal" if reasoning_effort in [None, "disable", "minimal", "low"] else reasoning_effort
|
|
87
|
+
|
|
88
|
+
return use_reasoning_prompt, reasoning_effort
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: palimpzest
|
|
3
|
-
Version: 1.3.
|
|
3
|
+
Version: 1.3.2
|
|
4
4
|
Summary: Palimpzest is a system which enables anyone to process AI-powered analytical queries simply by defining them in a declarative language
|
|
5
5
|
Author-email: MIT DSG Semantic Management Lab <michjc@csail.mit.edu>
|
|
6
6
|
Project-URL: homepage, https://palimpzest.org
|
|
@@ -15,7 +15,7 @@ Classifier: Programming Language :: Python :: 3.8
|
|
|
15
15
|
Requires-Python: >=3.12
|
|
16
16
|
Description-Content-Type: text/markdown
|
|
17
17
|
License-File: LICENSE
|
|
18
|
-
Requires-Dist: anthropic>=0.
|
|
18
|
+
Requires-Dist: anthropic>=0.46.0
|
|
19
19
|
Requires-Dist: beautifulsoup4>=4.13.4
|
|
20
20
|
Requires-Dist: chromadb>=1.0.15
|
|
21
21
|
Requires-Dist: colorama>=0.4.6
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/execution/execution_strategy_type.py
RENAMED
|
File without changes
|
{palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/execution/mab_execution_strategy.py
RENAMED
|
File without changes
|
{palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/execution/parallel_execution_strategy.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{palimpzest-1.3.0 → palimpzest-1.3.2}/src/palimpzest/query/optimizer/optimizer_strategy_type.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|