palimpzest 1.0.0__tar.gz → 1.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {palimpzest-1.0.0/src/palimpzest.egg-info → palimpzest-1.1.0}/PKG-INFO +1 -1
- {palimpzest-1.0.0 → palimpzest-1.1.0}/pyproject.toml +1 -1
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/core/elements/groupbysig.py +5 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/core/models.py +6 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/execution/execution_strategy.py +5 -3
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/execution/mab_execution_strategy.py +10 -3
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/generators/generators.py +1 -1
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/operators/rag.py +5 -4
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/processor/config.py +1 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/utils/progress.py +32 -6
- {palimpzest-1.0.0 → palimpzest-1.1.0/src/palimpzest.egg-info}/PKG-INFO +1 -1
- {palimpzest-1.0.0 → palimpzest-1.1.0}/LICENSE +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/README.md +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/setup.cfg +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/__init__.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/agents/__init__.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/agents/compute_agents.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/agents/search_agents.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/constants.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/core/__init__.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/core/data/__init__.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/core/data/context.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/core/data/context_manager.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/core/data/dataset.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/core/data/index_dataset.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/core/data/iter_dataset.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/core/elements/__init__.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/core/elements/filters.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/core/elements/records.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/core/lib/__init__.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/core/lib/schemas.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/policy.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/prompts/__init__.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/prompts/agent_prompts.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/prompts/aggregate_prompts.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/prompts/context_search.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/prompts/convert_prompts.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/prompts/critique_and_refine_prompts.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/prompts/filter_prompts.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/prompts/join_prompts.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/prompts/moa_aggregator_prompts.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/prompts/moa_proposer_prompts.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/prompts/prompt_factory.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/prompts/split_merge_prompts.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/prompts/split_proposer_prompts.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/prompts/utils.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/prompts/validator.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/__init__.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/execution/__init__.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/execution/all_sample_execution_strategy.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/execution/execution_strategy_type.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/execution/parallel_execution_strategy.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/execution/single_threaded_execution_strategy.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/generators/__init__.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/operators/__init__.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/operators/aggregate.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/operators/compute.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/operators/convert.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/operators/critique_and_refine.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/operators/distinct.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/operators/filter.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/operators/join.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/operators/limit.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/operators/logical.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/operators/mixture_of_agents.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/operators/physical.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/operators/project.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/operators/scan.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/operators/search.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/operators/split.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/operators/topk.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/optimizer/__init__.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/optimizer/cost_model.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/optimizer/optimizer.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/optimizer/optimizer_strategy.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/optimizer/optimizer_strategy_type.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/optimizer/plan.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/optimizer/primitives.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/optimizer/rules.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/optimizer/tasks.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/processor/__init__.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/processor/query_processor.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/processor/query_processor_factory.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/schemabuilder/__init__.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/schemabuilder/schema_builder.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/tools/README.md +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/tools/__init__.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/tools/allenpdf.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/tools/pdfparser.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/tools/skema_tools.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/utils/__init__.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/utils/env_helpers.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/utils/hash_helpers.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/utils/model_helpers.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/utils/udfs.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/validator/__init__.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/validator/validator.py +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest.egg-info/SOURCES.txt +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest.egg-info/dependency_links.txt +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest.egg-info/requires.txt +0 -0
- {palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: palimpzest
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.1.0
|
|
4
4
|
Summary: Palimpzest is a system which enables anyone to process AI-powered analytical queries simply by defining them in a declarative language
|
|
5
5
|
Author-email: MIT DSG Semantic Management Lab <michjc@csail.mit.edu>
|
|
6
6
|
Project-URL: homepage, https://palimpzest.org
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "palimpzest"
|
|
3
|
-
version = "1.
|
|
3
|
+
version = "1.1.0"
|
|
4
4
|
description = "Palimpzest is a system which enables anyone to process AI-powered analytical queries simply by defining them in a declarative language"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.12"
|
|
@@ -11,6 +11,11 @@ from palimpzest.core.lib.schemas import create_schema_from_fields
|
|
|
11
11
|
# - construct the correct output schema using the input schema and the group by and aggregation fields
|
|
12
12
|
# - remove/update all other references to GroupBySig in the codebase
|
|
13
13
|
|
|
14
|
+
# TODO:
|
|
15
|
+
# - move the arguments for group_by_fields, agg_funcs, and agg_fields into the Dataset.groupby() operator
|
|
16
|
+
# - construct the correct output schema using the input schema and the group by and aggregation fields
|
|
17
|
+
# - remove/update all other references to GroupBySig in the codebase
|
|
18
|
+
|
|
14
19
|
# signature for a group by aggregate that applies
|
|
15
20
|
# group and aggregation to an input tuple
|
|
16
21
|
class GroupBySig:
|
|
@@ -454,6 +454,12 @@ class BasePlanStats(BaseModel):
|
|
|
454
454
|
"""
|
|
455
455
|
return sum([gen_stats.total_output_tokens for _, gen_stats in self.validation_gen_stats.items()])
|
|
456
456
|
|
|
457
|
+
def get_total_cost_so_far(self) -> float:
|
|
458
|
+
"""
|
|
459
|
+
Get the total cost incurred so far in this plan execution.
|
|
460
|
+
"""
|
|
461
|
+
return self.sum_op_costs() + self.sum_validation_costs()
|
|
462
|
+
|
|
457
463
|
|
|
458
464
|
class PlanStats(BasePlanStats):
|
|
459
465
|
"""
|
|
@@ -82,10 +82,11 @@ class SentinelExecutionStrategy(BaseExecutionStrategy, ABC):
|
|
|
82
82
|
"""
|
|
83
83
|
def __init__(
|
|
84
84
|
self,
|
|
85
|
-
k: int,
|
|
86
|
-
j: int,
|
|
87
|
-
sample_budget: int,
|
|
88
85
|
policy: Policy,
|
|
86
|
+
k: int = 6,
|
|
87
|
+
j: int = 4,
|
|
88
|
+
sample_budget: int = 100,
|
|
89
|
+
sample_cost_budget: float | None = None,
|
|
89
90
|
priors: dict | None = None,
|
|
90
91
|
use_final_op_quality: bool = False,
|
|
91
92
|
seed: int = 42,
|
|
@@ -97,6 +98,7 @@ class SentinelExecutionStrategy(BaseExecutionStrategy, ABC):
|
|
|
97
98
|
self.k = k
|
|
98
99
|
self.j = j
|
|
99
100
|
self.sample_budget = sample_budget
|
|
101
|
+
self.sample_cost_budget = sample_cost_budget
|
|
100
102
|
self.policy = policy
|
|
101
103
|
self.priors = priors
|
|
102
104
|
self.use_final_op_quality = use_final_op_quality
|
{palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/execution/mab_execution_strategy.py
RENAMED
|
@@ -680,6 +680,9 @@ class MABExecutionStrategy(SentinelExecutionStrategy):
|
|
|
680
680
|
|
|
681
681
|
return max_quality_op
|
|
682
682
|
|
|
683
|
+
def _compute_termination_condition(self, samples_drawn: int, sampling_cost: float) -> bool:
|
|
684
|
+
return (samples_drawn >= self.sample_budget) if self.sample_cost_budget is None else (sampling_cost >= self.sample_cost_budget)
|
|
685
|
+
|
|
683
686
|
def _execute_sentinel_plan(
|
|
684
687
|
self,
|
|
685
688
|
plan: SentinelPlan,
|
|
@@ -688,8 +691,8 @@ class MABExecutionStrategy(SentinelExecutionStrategy):
|
|
|
688
691
|
plan_stats: SentinelPlanStats,
|
|
689
692
|
) -> SentinelPlanStats:
|
|
690
693
|
# sample records and operators and update the frontiers
|
|
691
|
-
samples_drawn = 0
|
|
692
|
-
while
|
|
694
|
+
samples_drawn, sampling_cost = 0, 0.0
|
|
695
|
+
while not self._compute_termination_condition(samples_drawn, sampling_cost):
|
|
693
696
|
# pre-compute the set of source indices which will need to be sampled
|
|
694
697
|
source_indices_to_sample = set()
|
|
695
698
|
for op_frontier in op_frontiers.values():
|
|
@@ -732,6 +735,9 @@ class MABExecutionStrategy(SentinelExecutionStrategy):
|
|
|
732
735
|
}
|
|
733
736
|
source_indices_to_all_record_sets, val_gen_stats = self._score_quality(validator, source_indices_to_all_record_sets)
|
|
734
737
|
|
|
738
|
+
# update the progress manager with validation cost
|
|
739
|
+
self.progress_manager.incr_overall_progress_cost(val_gen_stats.cost_per_record)
|
|
740
|
+
|
|
735
741
|
# remove records that were read from the execution cache before adding to record op stats
|
|
736
742
|
new_record_op_stats = []
|
|
737
743
|
for _, record_set_tuples in source_indices_to_record_set_tuples.items():
|
|
@@ -742,6 +748,7 @@ class MABExecutionStrategy(SentinelExecutionStrategy):
|
|
|
742
748
|
# update plan stats
|
|
743
749
|
plan_stats.add_record_op_stats(unique_logical_op_id, new_record_op_stats)
|
|
744
750
|
plan_stats.add_validation_gen_stats(unique_logical_op_id, val_gen_stats)
|
|
751
|
+
sampling_cost = plan_stats.get_total_cost_so_far()
|
|
745
752
|
|
|
746
753
|
# provide the best record sets as inputs to the next logical operator
|
|
747
754
|
next_unique_logical_op_id = plan.get_next_unique_logical_op_id(unique_logical_op_id)
|
|
@@ -813,7 +820,7 @@ class MABExecutionStrategy(SentinelExecutionStrategy):
|
|
|
813
820
|
op_frontiers[unique_logical_op_id] = OpFrontier(op_set, source_unique_logical_op_ids, root_dataset_ids, source_indices, self.k, self.j, self.seed, self.policy, self.priors)
|
|
814
821
|
|
|
815
822
|
# initialize and start the progress manager
|
|
816
|
-
self.progress_manager = create_progress_manager(plan, sample_budget=self.sample_budget, progress=self.progress)
|
|
823
|
+
self.progress_manager = create_progress_manager(plan, sample_budget=self.sample_budget, sample_cost_budget=self.sample_cost_budget, progress=self.progress)
|
|
817
824
|
self.progress_manager.start()
|
|
818
825
|
|
|
819
826
|
# NOTE: we must handle progress manager outside of _execute_sentinel_plan to ensure that it is shut down correctly;
|
|
@@ -338,7 +338,7 @@ class Generator(Generic[ContextType, InputType]):
|
|
|
338
338
|
reasoning_effort = "minimal" if self.reasoning_effort is None else self.reasoning_effort
|
|
339
339
|
completion_kwargs = {"reasoning_effort": reasoning_effort, **completion_kwargs}
|
|
340
340
|
if self.model.is_vllm_model():
|
|
341
|
-
completion_kwargs = {"api_base": self.api_base, "api_key": os.environ.get("VLLM_API_KEY", "fake-api-key") **completion_kwargs}
|
|
341
|
+
completion_kwargs = {"api_base": self.api_base, "api_key": os.environ.get("VLLM_API_KEY", "fake-api-key"), **completion_kwargs}
|
|
342
342
|
completion = litellm.completion(model=self.model_name, messages=messages, **completion_kwargs)
|
|
343
343
|
end_time = time.time()
|
|
344
344
|
logger.debug(f"Generated completion in {end_time - start_time:.2f} seconds")
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import time
|
|
4
|
+
from typing import Any
|
|
4
5
|
|
|
5
6
|
from numpy import dot
|
|
6
7
|
from numpy.linalg import norm
|
|
@@ -153,8 +154,8 @@ class RAGConvert(LLMConvert):
|
|
|
153
154
|
field = candidate.get_field_type(field_name)
|
|
154
155
|
|
|
155
156
|
# skip this field if it is not a string or a list of strings
|
|
156
|
-
is_string_field = field.annotation in [str, str | None]
|
|
157
|
-
is_list_string_field = field.annotation in [list[str], list[str] | None]
|
|
157
|
+
is_string_field = field.annotation in [str, str | None, str | Any]
|
|
158
|
+
is_list_string_field = field.annotation in [list[str], list[str] | None, list[str] | Any]
|
|
158
159
|
if not (is_string_field or is_list_string_field):
|
|
159
160
|
continue
|
|
160
161
|
|
|
@@ -358,8 +359,8 @@ class RAGFilter(LLMFilter):
|
|
|
358
359
|
field = candidate.get_field_type(field_name)
|
|
359
360
|
|
|
360
361
|
# skip this field if it is not a string or a list of strings
|
|
361
|
-
is_string_field = field.annotation in [str, str | None]
|
|
362
|
-
is_list_string_field = field.annotation in [list[str], list[str] | None]
|
|
362
|
+
is_string_field = field.annotation in [str, str | None, str | Any]
|
|
363
|
+
is_list_string_field = field.annotation in [list[str], list[str] | None, list[str] | Any]
|
|
363
364
|
if not (is_string_field or is_list_string_field):
|
|
364
365
|
continue
|
|
365
366
|
|
|
@@ -44,6 +44,7 @@ class QueryProcessorConfig(BaseModel):
|
|
|
44
44
|
k: int = Field(default=6)
|
|
45
45
|
j: int = Field(default=4)
|
|
46
46
|
sample_budget: int = Field(default=100)
|
|
47
|
+
sample_cost_budget: float | None = Field(default=None)
|
|
47
48
|
seed: int = Field(default=42)
|
|
48
49
|
exp_name: str | None = Field(default=None)
|
|
49
50
|
priors: dict | None = Field(default=None)
|
|
@@ -283,7 +283,7 @@ class PZProgressManager(ProgressManager):
|
|
|
283
283
|
self.unique_full_op_id_to_stats[unique_full_op_id].memory_usage_mb = get_memory_usage()
|
|
284
284
|
|
|
285
285
|
class PZSentinelProgressManager(ProgressManager):
|
|
286
|
-
def __init__(self, plan: SentinelPlan, sample_budget: int):
|
|
286
|
+
def __init__(self, plan: SentinelPlan, sample_budget: int | None, sample_cost_budget: float | None):
|
|
287
287
|
# overall progress bar
|
|
288
288
|
self.overall_progress = RichProgress(
|
|
289
289
|
SpinnerColumn(),
|
|
@@ -298,7 +298,9 @@ class PZSentinelProgressManager(ProgressManager):
|
|
|
298
298
|
refresh_per_second=10,
|
|
299
299
|
expand=True, # Use full width
|
|
300
300
|
)
|
|
301
|
-
self.
|
|
301
|
+
self.use_cost_budget = sample_cost_budget is not None
|
|
302
|
+
total = sample_cost_budget if self.use_cost_budget else sample_budget
|
|
303
|
+
self.overall_task_id = self.overall_progress.add_task("", total=total, cost=0.0, recent="")
|
|
302
304
|
|
|
303
305
|
# logical operator progress bars
|
|
304
306
|
self.op_progress = RichProgress(
|
|
@@ -334,6 +336,9 @@ class PZSentinelProgressManager(ProgressManager):
|
|
|
334
336
|
# initialize start time
|
|
335
337
|
self.start_time = None
|
|
336
338
|
|
|
339
|
+
# initialize validation cost
|
|
340
|
+
self.validation_cost = 0.0
|
|
341
|
+
|
|
337
342
|
# add a task to the progress manager for each operator in the plan
|
|
338
343
|
for topo_idx, (logical_op_id, op_set) in enumerate(plan):
|
|
339
344
|
unique_logical_op_id = f"{topo_idx}-{logical_op_id}"
|
|
@@ -387,15 +392,34 @@ class PZSentinelProgressManager(ProgressManager):
|
|
|
387
392
|
# start progress bars
|
|
388
393
|
self.live_display.start()
|
|
389
394
|
|
|
395
|
+
def incr_overall_progress_cost(self, cost_delta: float):
|
|
396
|
+
"""Advance the overall progress bar by the given cost delta"""
|
|
397
|
+
self.validation_cost += cost_delta
|
|
398
|
+
self.overall_progress.update(
|
|
399
|
+
self.overall_task_id,
|
|
400
|
+
advance=cost_delta,
|
|
401
|
+
cost=sum(stats.total_cost for _, stats in self.unique_logical_op_id_to_stats.items()) + self.validation_cost,
|
|
402
|
+
refresh=True,
|
|
403
|
+
)
|
|
404
|
+
|
|
405
|
+
# force the live display to refresh
|
|
406
|
+
self.live_display.refresh()
|
|
407
|
+
|
|
390
408
|
def incr(self, unique_logical_op_id: str, num_samples: int, display_text: str | None = None, **kwargs):
|
|
391
409
|
# TODO: (above) organize progress bars into a Live / Table / Panel or something
|
|
392
410
|
# get the task for the given operation
|
|
393
411
|
task = self.unique_logical_op_id_to_task.get(unique_logical_op_id)
|
|
394
412
|
|
|
413
|
+
# store the cost before updating stats
|
|
414
|
+
previous_total_cost = self.unique_logical_op_id_to_stats[unique_logical_op_id].total_cost
|
|
415
|
+
|
|
395
416
|
# update statistics with any additional keyword arguments
|
|
396
417
|
if kwargs != {}:
|
|
397
418
|
self.update_stats(unique_logical_op_id, **kwargs)
|
|
398
419
|
|
|
420
|
+
# compute the cost delta
|
|
421
|
+
cost_delta = self.unique_logical_op_id_to_stats[unique_logical_op_id].total_cost - previous_total_cost
|
|
422
|
+
|
|
399
423
|
# update progress bar and recent text in one update
|
|
400
424
|
if display_text is not None:
|
|
401
425
|
self.unique_logical_op_id_to_stats[unique_logical_op_id].recent_text = display_text
|
|
@@ -414,10 +438,11 @@ class PZSentinelProgressManager(ProgressManager):
|
|
|
414
438
|
)
|
|
415
439
|
|
|
416
440
|
# advance the overall progress bar
|
|
441
|
+
advance = cost_delta if self.use_cost_budget else num_samples
|
|
417
442
|
self.overall_progress.update(
|
|
418
443
|
self.overall_task_id,
|
|
419
|
-
advance=
|
|
420
|
-
cost=sum(stats.total_cost for _, stats in self.unique_logical_op_id_to_stats.items()),
|
|
444
|
+
advance=advance,
|
|
445
|
+
cost=sum(stats.total_cost for _, stats in self.unique_logical_op_id_to_stats.items()) + self.validation_cost,
|
|
421
446
|
refresh=True,
|
|
422
447
|
)
|
|
423
448
|
|
|
@@ -451,6 +476,7 @@ def create_progress_manager(
|
|
|
451
476
|
plan: PhysicalPlan | SentinelPlan,
|
|
452
477
|
num_samples: int | None = None,
|
|
453
478
|
sample_budget: int | None = None,
|
|
479
|
+
sample_cost_budget: float | None = None,
|
|
454
480
|
progress: bool = True,
|
|
455
481
|
) -> ProgressManager:
|
|
456
482
|
"""Factory function to create appropriate progress manager based on environment"""
|
|
@@ -458,7 +484,7 @@ def create_progress_manager(
|
|
|
458
484
|
return MockProgressManager(plan, num_samples)
|
|
459
485
|
|
|
460
486
|
if isinstance(plan, SentinelPlan):
|
|
461
|
-
assert sample_budget is not None, "Sample budget must be specified for SentinelPlan progress manager"
|
|
462
|
-
return PZSentinelProgressManager(plan, sample_budget)
|
|
487
|
+
assert sample_budget is not None or sample_cost_budget is not None, "Sample budget must be specified for SentinelPlan progress manager"
|
|
488
|
+
return PZSentinelProgressManager(plan, sample_budget, sample_cost_budget)
|
|
463
489
|
|
|
464
490
|
return PZProgressManager(plan, num_samples)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: palimpzest
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.1.0
|
|
4
4
|
Summary: Palimpzest is a system which enables anyone to process AI-powered analytical queries simply by defining them in a declarative language
|
|
5
5
|
Author-email: MIT DSG Semantic Management Lab <michjc@csail.mit.edu>
|
|
6
6
|
Project-URL: homepage, https://palimpzest.org
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/execution/execution_strategy_type.py
RENAMED
|
File without changes
|
{palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/execution/parallel_execution_strategy.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/optimizer/optimizer_strategy_type.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{palimpzest-1.0.0 → palimpzest-1.1.0}/src/palimpzest/query/processor/query_processor_factory.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|