palimpzest 0.8.1__tar.gz → 0.8.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {palimpzest-0.8.1/src/palimpzest.egg-info → palimpzest-0.8.2}/PKG-INFO +1 -1
- {palimpzest-0.8.1 → palimpzest-0.8.2}/pyproject.toml +1 -1
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/core/data/dataset.py +1 -1
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/execution/mab_execution_strategy.py +46 -21
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/processor/config.py +2 -2
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/processor/query_processor.py +2 -2
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/processor/query_processor_factory.py +9 -5
- {palimpzest-0.8.1 → palimpzest-0.8.2/src/palimpzest.egg-info}/PKG-INFO +1 -1
- {palimpzest-0.8.1 → palimpzest-0.8.2}/LICENSE +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/README.md +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/setup.cfg +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/__init__.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/agents/__init__.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/agents/compute_agents.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/agents/search_agents.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/constants.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/core/__init__.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/core/data/__init__.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/core/data/context.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/core/data/context_manager.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/core/data/index_dataset.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/core/data/iter_dataset.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/core/elements/__init__.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/core/elements/filters.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/core/elements/groupbysig.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/core/elements/records.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/core/lib/__init__.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/core/lib/schemas.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/core/models.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/policy.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/prompts/__init__.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/prompts/agent_prompts.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/prompts/context_search.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/prompts/convert_prompts.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/prompts/critique_and_refine_convert_prompts.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/prompts/filter_prompts.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/prompts/join_prompts.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/prompts/moa_aggregator_convert_prompts.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/prompts/moa_proposer_convert_prompts.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/prompts/prompt_factory.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/prompts/split_merge_prompts.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/prompts/split_proposer_prompts.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/prompts/util_phrases.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/prompts/validator.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/__init__.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/execution/__init__.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/execution/all_sample_execution_strategy.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/execution/execution_strategy.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/execution/execution_strategy_type.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/execution/parallel_execution_strategy.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/execution/single_threaded_execution_strategy.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/generators/__init__.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/generators/generators.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/operators/__init__.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/operators/aggregate.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/operators/compute.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/operators/convert.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/operators/critique_and_refine_convert.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/operators/distinct.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/operators/filter.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/operators/join.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/operators/limit.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/operators/logical.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/operators/mixture_of_agents_convert.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/operators/physical.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/operators/project.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/operators/rag_convert.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/operators/retrieve.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/operators/scan.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/operators/search.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/operators/split_convert.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/optimizer/__init__.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/optimizer/cost_model.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/optimizer/optimizer.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/optimizer/optimizer_strategy.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/optimizer/optimizer_strategy_type.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/optimizer/plan.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/optimizer/primitives.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/optimizer/rules.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/optimizer/tasks.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/processor/__init__.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/schemabuilder/__init__.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/schemabuilder/schema_builder.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/tools/README.md +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/tools/__init__.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/tools/allenpdf.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/tools/pdfparser.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/tools/skema_tools.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/utils/__init__.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/utils/env_helpers.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/utils/hash_helpers.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/utils/model_helpers.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/utils/progress.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/utils/udfs.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/validator/__init__.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/validator/validator.py +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest.egg-info/SOURCES.txt +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest.egg-info/dependency_links.txt +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest.egg-info/requires.txt +0 -0
- {palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: palimpzest
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.2
|
|
4
4
|
Summary: Palimpzest is a system which enables anyone to process AI-powered analytical queries simply by defining them in a declarative language
|
|
5
5
|
Author-email: MIT DSG Semantic Management Lab <michjc@csail.mit.edu>
|
|
6
6
|
Project-URL: homepage, https://palimpzest.org
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "palimpzest"
|
|
3
|
-
version = "0.8.
|
|
3
|
+
version = "0.8.2"
|
|
4
4
|
description = "Palimpzest is a system which enables anyone to process AI-powered analytical queries simply by defining them in a declarative language"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.8"
|
|
@@ -595,7 +595,7 @@ class Dataset:
|
|
|
595
595
|
|
|
596
596
|
return QueryProcessorFactory.create_and_run_processor(self, config)
|
|
597
597
|
|
|
598
|
-
def optimize_and_run(self, train_dataset: dict[str, Dataset] | Dataset | None = None, validator: Validator | None = None,
|
|
598
|
+
def optimize_and_run(self, config: QueryProcessorConfig | None = None, train_dataset: dict[str, Dataset] | Dataset | None = None, validator: Validator | None = None, **kwargs):
|
|
599
599
|
"""Optimize the PZ program using the train_dataset and validator before running the optimized plan."""
|
|
600
600
|
# TODO: this import currently needs to be here to avoid a circular import; we should fix this in a subsequent PR
|
|
601
601
|
from palimpzest.query.processor.query_processor_factory import QueryProcessorFactory
|
{palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/execution/mab_execution_strategy.py
RENAMED
|
@@ -2,16 +2,19 @@
|
|
|
2
2
|
import logging
|
|
3
3
|
|
|
4
4
|
import numpy as np
|
|
5
|
+
from chromadb.api.models.Collection import Collection
|
|
5
6
|
|
|
6
7
|
from palimpzest.core.data.dataset import Dataset
|
|
7
8
|
from palimpzest.core.elements.records import DataRecord, DataRecordSet
|
|
8
|
-
from palimpzest.core.models import OperatorStats, RecordOpStats, SentinelPlanStats
|
|
9
|
+
from palimpzest.core.models import OperatorCostEstimates, OperatorStats, RecordOpStats, SentinelPlanStats
|
|
9
10
|
from palimpzest.policy import Policy
|
|
10
11
|
from palimpzest.query.execution.execution_strategy import SentinelExecutionStrategy
|
|
11
12
|
from palimpzest.query.operators.aggregate import AggregateOp
|
|
12
|
-
from palimpzest.query.operators.
|
|
13
|
+
from palimpzest.query.operators.convert import LLMConvert
|
|
14
|
+
from palimpzest.query.operators.filter import FilterOp, LLMFilter
|
|
13
15
|
from palimpzest.query.operators.join import JoinOp
|
|
14
16
|
from palimpzest.query.operators.physical import PhysicalOperator
|
|
17
|
+
from palimpzest.query.operators.retrieve import RetrieveOp
|
|
15
18
|
from palimpzest.query.operators.scan import ContextScanOp, ScanPhysicalOp
|
|
16
19
|
from palimpzest.query.optimizer.plan import SentinelPlan
|
|
17
20
|
from palimpzest.utils.progress import create_progress_manager
|
|
@@ -55,6 +58,17 @@ class OpFrontier:
|
|
|
55
58
|
# store the prior beliefs on operator performance (if provided)
|
|
56
59
|
self.priors = priors
|
|
57
60
|
|
|
61
|
+
# boolean indication of the type of operator in this OpFrontier
|
|
62
|
+
sample_op = op_set[0]
|
|
63
|
+
self.is_scan_op = isinstance(sample_op, (ScanPhysicalOp, ContextScanOp))
|
|
64
|
+
self.is_filter_op = isinstance(sample_op, FilterOp)
|
|
65
|
+
self.is_aggregate_op = isinstance(sample_op, AggregateOp)
|
|
66
|
+
self.is_llm_join = isinstance(sample_op, JoinOp)
|
|
67
|
+
is_llm_convert = isinstance(sample_op, LLMConvert)
|
|
68
|
+
is_llm_filter = isinstance(sample_op, LLMFilter)
|
|
69
|
+
is_llm_retrieve = isinstance(sample_op, RetrieveOp) and isinstance(sample_op.index, Collection)
|
|
70
|
+
self.is_llm_op = is_llm_convert or is_llm_filter or is_llm_retrieve or self.is_llm_join
|
|
71
|
+
|
|
58
72
|
# get order in which we will sample physical operators for this logical operator
|
|
59
73
|
sample_op_indices = self._get_op_index_order(op_set, seed)
|
|
60
74
|
|
|
@@ -68,13 +82,6 @@ class OpFrontier:
|
|
|
68
82
|
self.full_op_id_to_sources_not_processed = {op.get_full_op_id(): source_indices for op in op_set}
|
|
69
83
|
self.max_inputs = len(source_indices)
|
|
70
84
|
|
|
71
|
-
# boolean indication of the type of operator in this OpFrontier
|
|
72
|
-
sample_op = op_set[0]
|
|
73
|
-
self.is_scan_op = isinstance(sample_op, (ScanPhysicalOp, ContextScanOp))
|
|
74
|
-
self.is_filter_op = isinstance(sample_op, FilterOp)
|
|
75
|
-
self.is_aggregate_op = isinstance(sample_op, AggregateOp)
|
|
76
|
-
self.is_llm_join = isinstance(sample_op, JoinOp)
|
|
77
|
-
|
|
78
85
|
# set the initial inputs for this logical operator; we maintain a mapping from source_unique_logical_op_id --> source_indices --> input;
|
|
79
86
|
# for each unique source and (tuple of) source indices, we store its output, which is an input to this operator
|
|
80
87
|
# for scan operators, we use the default name "source" since these operators have no source
|
|
@@ -149,16 +156,44 @@ class OpFrontier:
|
|
|
149
156
|
|
|
150
157
|
return op_id_to_pareto_distance
|
|
151
158
|
|
|
159
|
+
def _compute_naive_priors(self, op_set: list[PhysicalOperator]) -> dict[str, dict[str, float]]:
|
|
160
|
+
naive_priors = {}
|
|
161
|
+
for op in op_set:
|
|
162
|
+
# use naive cost estimates with dummy source estimates to compute priors
|
|
163
|
+
source_op_estimates = OperatorCostEstimates(quality=1.0, cost_per_record=0.0, time_per_record=0.0, cardinality=100)
|
|
164
|
+
op_estimates = (
|
|
165
|
+
op.naive_cost_estimates(source_op_estimates, source_op_estimates)
|
|
166
|
+
if self.is_llm_join
|
|
167
|
+
else op.naive_cost_estimates(source_op_estimates)
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
# get op_id for this operator
|
|
171
|
+
op_id = op.get_op_id()
|
|
172
|
+
|
|
173
|
+
# set the naive quality, cost, and time priors for this operator
|
|
174
|
+
naive_priors[op_id] = {
|
|
175
|
+
"quality": op_estimates.quality,
|
|
176
|
+
"cost": op_estimates.cost_per_record,
|
|
177
|
+
"time": op_estimates.time_per_record,
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
return naive_priors
|
|
181
|
+
|
|
152
182
|
def _get_op_index_order(self, op_set: list[PhysicalOperator], seed: int) -> list[int]:
|
|
153
183
|
"""
|
|
154
184
|
Returns a list of indices for the operators in the op_set.
|
|
155
185
|
"""
|
|
156
|
-
if
|
|
186
|
+
# if this is not an llm-operator, we simply return the indices in random order
|
|
187
|
+
if not self.is_llm_op:
|
|
157
188
|
rng = np.random.default_rng(seed=seed)
|
|
158
189
|
op_indices = np.arange(len(op_set))
|
|
159
190
|
rng.shuffle(op_indices)
|
|
160
191
|
return op_indices
|
|
161
192
|
|
|
193
|
+
# if this is an llm-operator, but we do not have priors, we first compute naive priors
|
|
194
|
+
if self.priors is None or any([op_id not in self.priors for op_id in map(lambda op: op.get_op_id(), op_set)]):
|
|
195
|
+
self.priors = self._compute_naive_priors(op_set)
|
|
196
|
+
|
|
162
197
|
# NOTE: self.priors is a dictionary with format:
|
|
163
198
|
# {op_id: {"quality": quality, "cost": cost, "time": time}}
|
|
164
199
|
|
|
@@ -215,7 +250,7 @@ class OpFrontier:
|
|
|
215
250
|
op_source_indices_pairs = []
|
|
216
251
|
|
|
217
252
|
# if this operator is not being optimized: we don't request inputs, but simply process what we are given / told to (in the case of scans)
|
|
218
|
-
if not self.
|
|
253
|
+
if not self.is_llm_op and len(self.frontier_ops) == 1:
|
|
219
254
|
return [(self.frontier_ops[0], None)]
|
|
220
255
|
|
|
221
256
|
# otherwise, sample (operator, source_indices) pairs
|
|
@@ -255,16 +290,6 @@ class OpFrontier:
|
|
|
255
290
|
all_inputs.extend(inputs)
|
|
256
291
|
return [(op, tuple(), all_inputs)]
|
|
257
292
|
|
|
258
|
-
# if this is an un-optimized (non-scan, non-join) operator, flatten inputs and run on each one
|
|
259
|
-
elif not self.is_scan_op and not self.is_llm_join and len(self.frontier_ops) == 1:
|
|
260
|
-
op_inputs = []
|
|
261
|
-
op = self.frontier_ops[0]
|
|
262
|
-
for _, source_indices_to_inputs in self.source_indices_to_inputs.items():
|
|
263
|
-
for source_indices, inputs in source_indices_to_inputs.items():
|
|
264
|
-
for input in inputs:
|
|
265
|
-
op_inputs.append((op, source_indices, input))
|
|
266
|
-
return op_inputs
|
|
267
|
-
|
|
268
293
|
### for optimized operators
|
|
269
294
|
# get the list of (op, source_indices) pairs which this operator needs to execute
|
|
270
295
|
op_source_indices_pairs = self._get_op_source_indices_pairs()
|
|
@@ -40,8 +40,8 @@ class QueryProcessorConfig(BaseModel):
|
|
|
40
40
|
use_final_op_quality: bool = Field(default=False)
|
|
41
41
|
|
|
42
42
|
# sentinel optimization flags
|
|
43
|
-
k: int = Field(default=
|
|
44
|
-
j: int = Field(default=
|
|
43
|
+
k: int = Field(default=6)
|
|
44
|
+
j: int = Field(default=4)
|
|
45
45
|
sample_budget: int = Field(default=100)
|
|
46
46
|
seed: int = Field(default=42)
|
|
47
47
|
exp_name: str | None = Field(default=None)
|
|
@@ -114,8 +114,8 @@ class QueryProcessor:
|
|
|
114
114
|
execution_stats = ExecutionStats(execution_id=self.execution_id())
|
|
115
115
|
execution_stats.start()
|
|
116
116
|
|
|
117
|
-
# if the user provides a
|
|
118
|
-
if self.
|
|
117
|
+
# if the user provides a validator, we perform optimization
|
|
118
|
+
if self.validator is not None:
|
|
119
119
|
# create sentinel plan
|
|
120
120
|
sentinel_plan = self._create_sentinel_plan(self.train_dataset)
|
|
121
121
|
|
{palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/processor/query_processor_factory.py
RENAMED
|
@@ -62,13 +62,17 @@ class QueryProcessorFactory:
|
|
|
62
62
|
print("WARNING: Both `progress` and `verbose` are set to True, but only one can be True at a time; defaulting to `progress=True`")
|
|
63
63
|
config.verbose = False
|
|
64
64
|
|
|
65
|
+
# if the user provides a training dataset, but no validator, create a default validator
|
|
66
|
+
if train_dataset is not None and validator is None:
|
|
67
|
+
validator = Validator()
|
|
68
|
+
logger.info("No validator provided; using default Validator")
|
|
69
|
+
|
|
65
70
|
# boolean flag for whether we're performing optimization or not
|
|
66
|
-
optimization =
|
|
67
|
-
val_based_opt = train_dataset is None and validator is not None
|
|
71
|
+
optimization = validator is not None
|
|
68
72
|
|
|
69
73
|
# handle "auto" default for sentinel execution strategies
|
|
70
74
|
if config.sentinel_execution_strategy == "auto":
|
|
71
|
-
config.sentinel_execution_strategy =
|
|
75
|
+
config.sentinel_execution_strategy = "mab" if optimization else None
|
|
72
76
|
|
|
73
77
|
# convert the config values for processing, execution, and optimization strategies to enums
|
|
74
78
|
config = cls._normalize_strategies(config)
|
|
@@ -87,7 +91,7 @@ class QueryProcessorFactory:
|
|
|
87
91
|
# set the final set of available models in the config
|
|
88
92
|
config.available_models = available_models
|
|
89
93
|
|
|
90
|
-
return config
|
|
94
|
+
return config, validator
|
|
91
95
|
|
|
92
96
|
@classmethod
|
|
93
97
|
def _create_optimizer(cls, config: QueryProcessorConfig) -> Optimizer:
|
|
@@ -143,7 +147,7 @@ class QueryProcessorFactory:
|
|
|
143
147
|
config = QueryProcessorConfig()
|
|
144
148
|
|
|
145
149
|
# apply any additional keyword arguments to the config and validate its contents
|
|
146
|
-
config = cls._config_validation_and_normalization(config, train_dataset, validator)
|
|
150
|
+
config, validator = cls._config_validation_and_normalization(config, train_dataset, validator)
|
|
147
151
|
|
|
148
152
|
# create the optimizer, execution strateg(ies), and processor
|
|
149
153
|
optimizer = cls._create_optimizer(config)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: palimpzest
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.2
|
|
4
4
|
Summary: Palimpzest is a system which enables anyone to process AI-powered analytical queries simply by defining them in a declarative language
|
|
5
5
|
Author-email: MIT DSG Semantic Management Lab <michjc@csail.mit.edu>
|
|
6
6
|
Project-URL: homepage, https://palimpzest.org
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/prompts/critique_and_refine_convert_prompts.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/prompts/moa_aggregator_convert_prompts.py
RENAMED
|
File without changes
|
{palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/prompts/moa_proposer_convert_prompts.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/execution/execution_strategy_type.py
RENAMED
|
File without changes
|
{palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/execution/parallel_execution_strategy.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/operators/critique_and_refine_convert.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/operators/mixture_of_agents_convert.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{palimpzest-0.8.1 → palimpzest-0.8.2}/src/palimpzest/query/optimizer/optimizer_strategy_type.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|