palimpzest 0.6.4__py3-none-any.whl → 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. palimpzest/__init__.py +5 -0
  2. palimpzest/constants.py +110 -43
  3. palimpzest/core/__init__.py +0 -78
  4. palimpzest/core/data/dataclasses.py +382 -44
  5. palimpzest/core/elements/filters.py +7 -3
  6. palimpzest/core/elements/index.py +70 -0
  7. palimpzest/core/elements/records.py +33 -11
  8. palimpzest/core/lib/fields.py +1 -0
  9. palimpzest/core/lib/schemas.py +4 -3
  10. palimpzest/prompts/moa_proposer_convert_prompts.py +0 -4
  11. palimpzest/prompts/prompt_factory.py +44 -7
  12. palimpzest/prompts/split_merge_prompts.py +56 -0
  13. palimpzest/prompts/split_proposer_prompts.py +55 -0
  14. palimpzest/query/execution/execution_strategy.py +435 -53
  15. palimpzest/query/execution/execution_strategy_type.py +20 -0
  16. palimpzest/query/execution/mab_execution_strategy.py +532 -0
  17. palimpzest/query/execution/parallel_execution_strategy.py +143 -172
  18. palimpzest/query/execution/random_sampling_execution_strategy.py +240 -0
  19. palimpzest/query/execution/single_threaded_execution_strategy.py +173 -203
  20. palimpzest/query/generators/api_client_factory.py +31 -0
  21. palimpzest/query/generators/generators.py +256 -76
  22. palimpzest/query/operators/__init__.py +1 -2
  23. palimpzest/query/operators/code_synthesis_convert.py +33 -18
  24. palimpzest/query/operators/convert.py +30 -97
  25. palimpzest/query/operators/critique_and_refine_convert.py +5 -6
  26. palimpzest/query/operators/filter.py +7 -10
  27. palimpzest/query/operators/logical.py +54 -10
  28. palimpzest/query/operators/map.py +130 -0
  29. palimpzest/query/operators/mixture_of_agents_convert.py +6 -6
  30. palimpzest/query/operators/physical.py +3 -12
  31. palimpzest/query/operators/rag_convert.py +66 -18
  32. palimpzest/query/operators/retrieve.py +230 -34
  33. palimpzest/query/operators/scan.py +5 -2
  34. palimpzest/query/operators/split_convert.py +169 -0
  35. palimpzest/query/operators/token_reduction_convert.py +8 -14
  36. palimpzest/query/optimizer/__init__.py +4 -16
  37. palimpzest/query/optimizer/cost_model.py +73 -266
  38. palimpzest/query/optimizer/optimizer.py +87 -58
  39. palimpzest/query/optimizer/optimizer_strategy.py +18 -97
  40. palimpzest/query/optimizer/optimizer_strategy_type.py +37 -0
  41. palimpzest/query/optimizer/plan.py +2 -3
  42. palimpzest/query/optimizer/primitives.py +5 -3
  43. palimpzest/query/optimizer/rules.py +336 -172
  44. palimpzest/query/optimizer/tasks.py +30 -100
  45. palimpzest/query/processor/config.py +38 -22
  46. palimpzest/query/processor/nosentinel_processor.py +16 -520
  47. palimpzest/query/processor/processing_strategy_type.py +28 -0
  48. palimpzest/query/processor/query_processor.py +38 -206
  49. palimpzest/query/processor/query_processor_factory.py +117 -130
  50. palimpzest/query/processor/sentinel_processor.py +90 -0
  51. palimpzest/query/processor/streaming_processor.py +25 -32
  52. palimpzest/sets.py +88 -41
  53. palimpzest/utils/model_helpers.py +8 -7
  54. palimpzest/utils/progress.py +368 -152
  55. palimpzest/utils/token_reduction_helpers.py +1 -3
  56. {palimpzest-0.6.4.dist-info → palimpzest-0.7.1.dist-info}/METADATA +19 -9
  57. palimpzest-0.7.1.dist-info/RECORD +96 -0
  58. {palimpzest-0.6.4.dist-info → palimpzest-0.7.1.dist-info}/WHEEL +1 -1
  59. palimpzest/query/processor/mab_sentinel_processor.py +0 -884
  60. palimpzest/query/processor/random_sampling_sentinel_processor.py +0 -639
  61. palimpzest/utils/index_helpers.py +0 -6
  62. palimpzest-0.6.4.dist-info/RECORD +0 -87
  63. {palimpzest-0.6.4.dist-info → palimpzest-0.7.1.dist-info/licenses}/LICENSE +0 -0
  64. {palimpzest-0.6.4.dist-info → palimpzest-0.7.1.dist-info}/top_level.txt +0 -0
@@ -1,14 +1,16 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import logging
3
4
  from typing import Any
4
5
 
5
6
  from palimpzest.core.data.dataclasses import PlanCost
6
7
  from palimpzest.policy import Policy
7
8
  from palimpzest.query.optimizer.cost_model import BaseCostModel
8
- from palimpzest.query.optimizer.optimizer_strategy import OptimizationStrategyType
9
+ from palimpzest.query.optimizer.optimizer_strategy_type import OptimizationStrategyType
9
10
  from palimpzest.query.optimizer.primitives import Expression, Group
10
11
  from palimpzest.query.optimizer.rules import ImplementationRule, Rule, TransformationRule
11
12
 
13
+ logger = logging.getLogger(__name__)
12
14
 
13
15
  class Task:
14
16
  """
@@ -41,6 +43,7 @@ class OptimizeGroup(Task):
41
43
  self.group_id = group_id
42
44
 
43
45
  def perform(self, groups: dict[int, Group], context: dict[str, Any] | None = None) -> list[Task]:
46
+ logger.debug(f"Optimizing group {self.group_id}")
44
47
  # get updated instance of the group to be optimized
45
48
  if context is None:
46
49
  context = {}
@@ -61,6 +64,8 @@ class OptimizeGroup(Task):
61
64
  task = OptimizePhysicalExpression(physical_expr)
62
65
  new_tasks.append(task)
63
66
 
67
+ logger.debug(f"Done optimizing group {self.group_id}")
68
+ logger.debug(f"New tasks: {len(new_tasks)}")
64
69
  return new_tasks
65
70
 
66
71
 
@@ -76,6 +81,8 @@ class ExpandGroup(Task):
76
81
  self.group_id = group_id
77
82
 
78
83
  def perform(self, groups: dict[int, Group], context: dict[str, Any] | None = None) -> list[Task]:
84
+ logger.debug(f"Expanding group {self.group_id}")
85
+
79
86
  # fetch group
80
87
  if context is None:
81
88
  context = {}
@@ -94,6 +101,8 @@ class ExpandGroup(Task):
94
101
  # mark the group as explored and return tasks
95
102
  group.set_explored()
96
103
 
104
+ logger.debug(f"Done expanding group {self.group_id}")
105
+ logger.debug(f"New tasks: {len(new_tasks)}")
97
106
  return new_tasks
98
107
 
99
108
 
@@ -115,6 +124,7 @@ class OptimizeLogicalExpression(Task):
115
124
  implementation_rules: list[ImplementationRule],
116
125
  context: dict[str, Any] | None = None,
117
126
  ) -> list[Task]:
127
+ logger.debug(f"Optimizing logical expression {self.logical_expression}")
118
128
  # if we're exploring, only apply transformation rules
119
129
  if context is None:
120
130
  context = {}
@@ -135,6 +145,8 @@ class OptimizeLogicalExpression(Task):
135
145
  apply_rule_task = ApplyRule(rule, self.logical_expression, self.exploring)
136
146
  new_tasks.append(apply_rule_task)
137
147
 
148
+ logger.debug(f"Done optimizing logical expression {self.logical_expression}")
149
+ logger.debug(f"New tasks: {len(new_tasks)}")
138
150
  return new_tasks
139
151
 
140
152
 
@@ -170,6 +182,8 @@ class ApplyRule(Task):
170
182
  context: dict[str, Any] | None = None,
171
183
  **physical_op_params,
172
184
  ) -> tuple[list[Task], int]:
185
+ logger.debug(f"Applying rule {self.rule} to logical expression {self.logical_expression}")
186
+
173
187
  # check if rule has already been applied to this logical expression; return [] if so
174
188
  if context is None:
175
189
  context = {}
@@ -235,6 +249,8 @@ class ApplyRule(Task):
235
249
  # mark that the rule has been applied to the logical expression
236
250
  self.logical_expression.add_applied_rule(self.rule)
237
251
 
252
+ logger.debug(f"Done applying rule {self.rule} to logical expression {self.logical_expression}")
253
+ logger.debug(f"New tasks: {len(new_tasks)}")
238
254
  return new_tasks
239
255
 
240
256
 
@@ -244,8 +260,8 @@ class OptimizePhysicalExpression(Task):
244
260
 
245
261
  This task computes the cost of input groups for the given physical expression (scheduling
246
262
  OptimizeGroup tasks if needed), computes the cost of the given expression, and then updates
247
- the expression's group depending on whether this expression is its best_physical_expression
248
- or in its ci_best_physical_expressions.
263
+ the expression's group depending on whether this expression is its `best_physical_expression`
264
+ or in its `pareto_optimal_physical_expressions`.
249
265
  """
250
266
 
251
267
  def __init__(self, physical_expression: Expression, exploring: bool = False):
@@ -384,74 +400,6 @@ class OptimizePhysicalExpression(Task):
384
400
 
385
401
  return group
386
402
 
387
-
388
- def update_ci_best_physical_expressions(self, group: Group, policy: Policy) -> Group:
389
- """
390
- Update the CI best physical expressions for the given group and policy (if necessary).
391
- """
392
- # get the primary metric for the policy
393
- policy_metric = policy.get_primary_metric()
394
-
395
- # get the PlanCost for this physical expression
396
- expr_plan_cost = self.physical_expression.plan_cost
397
-
398
- # pre-compute whether or not this physical expression satisfies the policy constraint
399
- expr_satisfies_constraint = policy.constraint(expr_plan_cost)
400
-
401
- # attribute names for lower and upper bounds
402
- lower_bound = f"{policy_metric}_lower_bound"
403
- upper_bound = f"{policy_metric}_upper_bound"
404
-
405
- # get the expression and plan's upper and lower bounds on the metric of interest
406
- expr_lower_bound = getattr(expr_plan_cost, lower_bound)
407
- expr_upper_bound = getattr(expr_plan_cost, upper_bound)
408
- group_lower_bound = getattr(group, lower_bound)
409
-
410
- # if either of the following is true:
411
- # 1) the CI best physical expressions are empty
412
- # 2) the group does not satisfy the constrant but this physical expression does
413
- # set the CI best physical expressions to be this expression
414
- if (
415
- group.ci_best_physical_expressions == []
416
- or (not group.satisfies_constraint and expr_satisfies_constraint)
417
- ):
418
- group.ci_best_physical_expressions = [self.physical_expression]
419
- group.satisfies_constraint = expr_satisfies_constraint
420
- setattr(group, lower_bound, expr_lower_bound)
421
- setattr(group, upper_bound, expr_upper_bound)
422
-
423
- # otherwise, if this expression and the group both satisfy the constraint (or both do not satisfy the constraint),
424
- # then update the CI best physical expressions if this expression also has an upper bound on the policy metric
425
- # above the group's lower bound on the policy metric
426
- elif (
427
- (group.satisfies_constraint == expr_satisfies_constraint)
428
- and expr_upper_bound > group_lower_bound
429
- ):
430
- # filter out any current best expressions whose upper bound is below the lower bound of this expression
431
- group.ci_best_physical_expressions = [
432
- curr_expr
433
- for curr_expr in group.ci_best_physical_expressions
434
- if not getattr(curr_expr, upper_bound) < expr_lower_bound
435
- ]
436
-
437
- # add this expression to the CI best physical expressions
438
- group.ci_best_physical_expressions.append(self.physical_expression)
439
-
440
- # compute the upper and lower bounds for the group
441
- new_group_upper_bound = max(
442
- map(lambda expr: getattr(expr, upper_bound), group.ci_best_physical_expressions)
443
- )
444
- new_group_lower_bound = max(
445
- map(lambda expr: getattr(expr, lower_bound), group.ci_best_physical_expressions)
446
- )
447
-
448
- # set the new upper and lower bounds for the group
449
- setattr(group, lower_bound, new_group_lower_bound)
450
- setattr(group, upper_bound, new_group_upper_bound)
451
-
452
- return group
453
-
454
-
455
403
  def perform(
456
404
  self,
457
405
  cost_model: BaseCostModel,
@@ -459,20 +407,19 @@ class OptimizePhysicalExpression(Task):
459
407
  policy: Policy,
460
408
  context: dict[str, Any] | None = None,
461
409
  ) -> list[Task]:
410
+ logger.debug(f"Optimizing physical expression {self.physical_expression}")
411
+
462
412
  if context is None:
463
413
  context = {}
464
414
 
415
+ # get the optimizer strategy (type) from the context
416
+ optimizer_strategy: OptimizationStrategyType = context['optimizer_strategy']
417
+
465
418
  # return if we've already computed the cost of this physical expression
466
- if ( # noqa: SIM114
467
- context['optimization_strategy_type'] in [OptimizationStrategyType.GREEDY, OptimizationStrategyType.SENTINEL, OptimizationStrategyType.NONE]
468
- and self.physical_expression.plan_cost is not None
469
- ):
419
+ if optimizer_strategy.is_pareto() and self.physical_expression.pareto_optimal_plan_costs is not None:
470
420
  return []
471
421
 
472
- elif (
473
- context['optimization_strategy_type'] == OptimizationStrategyType.PARETO
474
- and self.physical_expression.pareto_optimal_plan_costs is not None
475
- ):
422
+ if optimizer_strategy.is_not_pareto() and self.physical_expression.plan_cost is not None:
476
423
  return []
477
424
 
478
425
  # for expressions with an input group, compute the input plan cost(s)
@@ -485,24 +432,11 @@ class OptimizePhysicalExpression(Task):
485
432
 
486
433
  # compute the input plan cost or list of input plan costs
487
434
  new_tasks = []
488
- if (
489
- context['optimization_strategy_type'] in [OptimizationStrategyType.GREEDY, OptimizationStrategyType.SENTINEL, OptimizationStrategyType.NONE]
490
- and input_group.best_physical_expression is not None
491
- ):
435
+ if optimizer_strategy.is_not_pareto() and input_group.best_physical_expression is not None:
492
436
  # TODO: apply policy constraint here
493
437
  best_input_plan_cost = input_group.best_physical_expression.plan_cost
494
438
 
495
- elif (
496
- context['optimization_strategy_type'] == OptimizationStrategyType.CONFIDENCE_INTERVAL
497
- and input_group.ci_best_physical_expressions is not None
498
- ):
499
- # TODO: fix this to properly compute set of potential input plan costs
500
- raise Exception("NotImplementedError")
501
-
502
- elif (
503
- context['optimization_strategy_type'] == OptimizationStrategyType.PARETO
504
- and input_group.pareto_optimal_physical_expressions is not None
505
- ):
439
+ elif optimizer_strategy.is_pareto() and input_group.pareto_optimal_physical_expressions is not None:
506
440
  # TODO: apply policy constraint here
507
441
  input_plan_costs = []
508
442
  for pareto_physical_expression in input_group.pareto_optimal_physical_expressions:
@@ -524,12 +458,7 @@ class OptimizePhysicalExpression(Task):
524
458
  return [self] + new_tasks
525
459
 
526
460
  group = groups[self.physical_expression.group_id]
527
- if context['optimization_strategy_type'] == OptimizationStrategyType.CONFIDENCE_INTERVAL:
528
- # TODO: fix this to properly compute and update set of possible plan costs
529
- raise Exception("NotImplementedError")
530
- group = self.update_ci_best_physical_expressions(group, policy)
531
-
532
- elif context['optimization_strategy_type'] == OptimizationStrategyType.PARETO:
461
+ if optimizer_strategy.is_pareto():
533
462
  # compute all possible plan costs for this physical expression given the pareto optimal input plan costs
534
463
  all_possible_plan_costs = []
535
464
  for input_plan_cost in input_plan_costs:
@@ -583,4 +512,5 @@ class OptimizePhysicalExpression(Task):
583
512
  group.optimized = True
584
513
  groups[self.physical_expression.group_id] = group
585
514
 
515
+ logger.debug(f"Done optimizing physical expression {self.physical_expression}")
586
516
  return []
@@ -11,62 +11,78 @@ from palimpzest.policy import MaxQuality, Policy
11
11
  @dataclass
12
12
  class QueryProcessorConfig:
13
13
  """Shared context for query processors"""
14
- processing_strategy: str = field(default="no_sentinel")
15
- execution_strategy: str = field(default="sequential")
16
- optimizer_strategy: str = field(default="pareto")
14
+ processing_strategy: str = field(default="auto") # substituted with ProcessingStrategyType
15
+ execution_strategy: str = field(default="sequential") # substituted with ExecutionStrategyType
16
+ sentinel_execution_strategy: str | None = field(default="auto") # substituted with SentinelExecutionStrategyType
17
+ optimizer_strategy: str = field(default="pareto") # substituted with OptimizationStrategyType
17
18
 
18
19
  val_datasource: DataReader | None = field(default=None)
19
20
 
20
21
  policy: Policy = field(default_factory=MaxQuality)
21
22
  scan_start_idx: int = field(default=0)
22
- num_samples: int = field(default=float("inf"))
23
- nocache: bool = field(default=True) # NOTE: until we properly implement caching, let's set the default to True
24
- include_baselines: bool = field(default=False)
25
- min_plans: int | None = field(default=None)
23
+ num_samples: int = field(default=None)
24
+ cache: bool = field(default=False) # NOTE: until we properly implement caching, let's set the default to False
26
25
  verbose: bool = field(default=False)
26
+ progress: bool = field(default=True)
27
27
  available_models: list[Model] | None = field(default=None)
28
-
28
+
29
29
  max_workers: int | None = field(default=None)
30
- num_workers_per_plan: int = field(default=1)
31
30
 
32
31
  allow_bonded_query: bool = field(default=True)
33
- allow_conventional_query: bool = field(default=False)
34
32
  allow_model_selection: bool = field(default=True)
35
33
  allow_code_synth: bool = field(default=False)
36
34
  allow_token_reduction: bool = field(default=False)
37
- allow_rag_reduction: bool = field(default=False)
35
+ allow_rag_reduction: bool = field(default=True)
38
36
  allow_mixtures: bool = field(default=True)
39
- allow_critic: bool = field(default=False)
37
+ allow_critic: bool = field(default=True)
38
+ allow_split_merge: bool = field(default=False)
40
39
  use_final_op_quality: bool = field(default=False)
41
40
 
42
- def to_json_str(self):
43
- return json.dumps({
41
+ kwargs: dict = field(default_factory=dict)
42
+
43
+ def to_dict(self) -> dict:
44
+ """Convert the config to a dict representation."""
45
+ return {
44
46
  "processing_strategy": self.processing_strategy,
45
47
  "execution_strategy": self.execution_strategy,
48
+ "sentinel_execution_strategy": self.sentinel_execution_strategy,
46
49
  "optimizer_strategy": self.optimizer_strategy,
47
- "val_datasource": None if self.val_datasource is None else self.val_datasource.serialize(),
48
- "policy": self.policy.to_json_str(),
50
+ "val_datasource": self.val_datasource,
51
+ "policy": self.policy,
49
52
  "scan_start_idx": self.scan_start_idx,
50
53
  "num_samples": self.num_samples,
51
- "nocache": self.nocache,
52
- "include_baselines": self.include_baselines,
53
- "min_plans": self.min_plans,
54
+ "cache": self.cache,
54
55
  "verbose": self.verbose,
56
+ "progress": self.progress,
55
57
  "available_models": self.available_models,
56
58
  "max_workers": self.max_workers,
57
- "num_workers_per_plan": self.num_workers_per_plan,
58
59
  "allow_bonded_query": self.allow_bonded_query,
59
- "allow_conventional_query": self.allow_conventional_query,
60
60
  "allow_model_selection": self.allow_model_selection,
61
61
  "allow_code_synth": self.allow_code_synth,
62
62
  "allow_token_reduction": self.allow_token_reduction,
63
63
  "allow_rag_reduction": self.allow_rag_reduction,
64
64
  "allow_mixtures": self.allow_mixtures,
65
65
  "allow_critic": self.allow_critic,
66
+ "allow_split_merge": self.allow_split_merge,
66
67
  "use_final_op_quality": self.use_final_op_quality,
67
- }, indent=2)
68
+ **self.kwargs,
69
+ }
70
+
71
+ def to_json_str(self):
72
+ """Convert the config to a JSON string representation."""
73
+ config_dict = self.to_dict()
74
+ config_dict["val_datasource"] = (
75
+ None if self.val_datasource is None else self.val_datasource.serialize()
76
+ )
77
+ config_dict["policy"] = self.policy.to_json_str()
78
+ for strategy in ["processing_strategy", "execution_strategy", "sentinel_execution_strategy", "optimizer_strategy"]:
79
+ config_dict[strategy] = str(config_dict[strategy])
80
+
81
+ return json.dumps(config_dict, indent=2)
68
82
 
69
83
  def update(self, **kwargs) -> None:
70
84
  for key, value in kwargs.items():
71
85
  if hasattr(self, key):
72
86
  setattr(self, key, value)
87
+
88
+ self.kwargs.update(kwargs)