palimpzest 0.8.7__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
palimpzest/constants.py CHANGED
@@ -136,13 +136,17 @@ class PromptStrategy(str, Enum):
136
136
  performing some task with a specified Model.
137
137
  """
138
138
 
139
+ # aggregation prompt strategies
140
+ AGG = "aggregation"
141
+ AGG_NO_REASONING = "aggregation-no-reasoning"
142
+
139
143
  # filter prompt strategies
140
144
  FILTER = "filter"
141
145
  FILTER_NO_REASONING = "filter-no-reasoning"
142
146
  FILTER_CRITIC = "filter-critic"
143
147
  FILTER_REFINE = "filter-refine"
144
148
  FILTER_MOA_PROPOSER = "filter-mixture-of-agents-proposer"
145
- FILTER_MOA_AGG = "filter-mixture-of-agents-aggregation"
149
+ FILTER_MOA_AGG = "filter-mixture-of-agents-aggregator"
146
150
  FILTER_SPLIT_PROPOSER = "filter-split-proposer"
147
151
  FILTER_SPLIT_MERGER = "filter-split-merger"
148
152
 
@@ -156,10 +160,13 @@ class PromptStrategy(str, Enum):
156
160
  MAP_CRITIC = "map-critic"
157
161
  MAP_REFINE = "map-refine"
158
162
  MAP_MOA_PROPOSER = "map-mixture-of-agents-proposer"
159
- MAP_MOA_AGG = "map-mixture-of-agents-aggregation"
163
+ MAP_MOA_AGG = "map-mixture-of-agents-aggregator"
160
164
  MAP_SPLIT_PROPOSER = "map-split-proposer"
161
165
  MAP_SPLIT_MERGER = "map-split-merger"
162
166
 
167
+ def is_agg_prompt(self):
168
+ return "aggregation" in self.value
169
+
163
170
  def is_filter_prompt(self):
164
171
  return "filter" in self.value
165
172
 
@@ -179,7 +186,7 @@ class PromptStrategy(str, Enum):
179
186
  return "mixture-of-agents-proposer" in self.value
180
187
 
181
188
  def is_moa_aggregator_prompt(self):
182
- return "mixture-of-agents-aggregation" in self.value
189
+ return "mixture-of-agents-aggregator" in self.value
183
190
 
184
191
  def is_split_proposer_prompt(self):
185
192
  return "split-proposer" in self.value
@@ -200,7 +207,8 @@ class Modality(str, Enum):
200
207
  class AggFunc(str, Enum):
201
208
  COUNT = "count"
202
209
  AVERAGE = "average"
203
-
210
+ MIN = "min"
211
+ MAX = "max"
204
212
 
205
213
  class Cardinality(str, Enum):
206
214
  ONE_TO_ONE = "one-to-one"
@@ -534,11 +534,53 @@ class Dataset:
534
534
  operator = Aggregate(input_schema=self.schema, agg_func=AggFunc.AVERAGE)
535
535
  return Dataset(sources=[self], operator=operator, schema=operator.output_schema)
536
536
 
537
+ def min(self) -> Dataset:
538
+ """Apply an min operator to this set"""
539
+ operator = Aggregate(input_schema=self.schema, agg_func=AggFunc.MIN)
540
+ return Dataset(sources=[self], operator=operator, schema=operator.output_schema)
541
+
542
+ def max(self) -> Dataset:
543
+ """Apply an max operator to this set"""
544
+ operator = Aggregate(input_schema=self.schema, agg_func=AggFunc.MAX)
545
+ return Dataset(sources=[self], operator=operator, schema=operator.output_schema)
546
+
537
547
  def groupby(self, groupby: GroupBySig) -> Dataset:
538
548
  output_schema = groupby.output_schema()
539
549
  operator = GroupByAggregate(input_schema=self.schema, output_schema=output_schema, group_by_sig=groupby)
540
550
  return Dataset(sources=[self], operator=operator, schema=output_schema)
541
551
 
552
+ def sem_agg(self, col: dict | type[BaseModel], agg: str, depends_on: str | list[str] | None = None) -> Dataset:
553
+ """
554
+ Apply a semantic aggregation to this set. The `agg` string will be applied using an LLM
555
+ over the entire set of inputs' fields specified in `depends_on` to generate the output `col`.
556
+
557
+ Example:
558
+ sem_agg(
559
+ col={'name': 'overall_sentiment', 'desc': 'The overall sentiment of the reviews', 'type': str},
560
+ agg="Compute the overall sentiment of the reviews as POSITIVE or NEGATIVE.",
561
+ depends_on="review_text",
562
+ )
563
+ """
564
+ # construct new output schema
565
+ new_output_schema = None
566
+ if isinstance(col, dict):
567
+ col_schema = create_schema_from_fields([col])
568
+ new_output_schema = union_schemas([self.schema, col_schema])
569
+ elif issubclass(col, BaseModel):
570
+ assert len(col.model_fields) == 1, "For semantic aggregation, when passing a BaseModel to `col` it must have exactly one field."
571
+ new_output_schema = union_schemas([self.schema, col])
572
+ else:
573
+ raise ValueError("`col` must be a dictionary or a single-field BaseModel.")
574
+
575
+ # enforce type for depends_on
576
+ if isinstance(depends_on, str):
577
+ depends_on = [depends_on]
578
+
579
+ # construct logical operator
580
+ operator = Aggregate(input_schema=self.schema, output_schema=new_output_schema, agg_str=agg, depends_on=depends_on)
581
+
582
+ return Dataset(sources=[self], operator=operator, schema=operator.output_schema)
583
+
542
584
  def retrieve(
543
585
  self,
544
586
  index: Collection,
@@ -194,6 +194,12 @@ class Average(BaseModel):
194
194
  class Count(BaseModel):
195
195
  count: int = Field(description="The count of items in the dataset")
196
196
 
197
+ class Min(BaseModel):
198
+ min: int | float = Field(description="The minimum value of some items in the dataset")
199
+
200
+ class Max(BaseModel):
201
+ max: int | float = Field(description="The maximum value of some items in the dataset")
202
+
197
203
  class OperatorDerivedSchema(BaseModel):
198
204
  """Schema defined by an operator, e.g., a join or a group by"""
199
205
 
@@ -0,0 +1,99 @@
1
+ """This file contains prompts for aggregation operations."""
2
+
3
+ ### BASE PROMPTS ###
4
+ AGG_BASE_SYSTEM_PROMPT = """You are a helpful assistant whose job is to {job_instruction}.
5
+ You will be presented with a context and an output field to generate. Your task is to generate a JSON object which aggregates the input and fills in the output field with the correct value.
6
+ You will be provided with a description of each input field and each output field. The field in the output JSON object can be derived using information from the context.
7
+
8
+ {output_format_instruction} Finish your response with a newline character followed by ---
9
+
10
+ An example is shown below:
11
+ ---
12
+ INPUT FIELDS:
13
+ {example_input_fields}
14
+
15
+ OUTPUT FIELDS:
16
+ {example_output_fields}
17
+
18
+ CONTEXT:
19
+ {{{example_context}}}
20
+ {{{second_example_context}}}
21
+ {{{third_example_context}}}{image_disclaimer}{audio_disclaimer}
22
+
23
+ AGGREGATION INSTRUCTION: {example_agg_instruction}
24
+
25
+ Let's think step-by-step in order to answer the question.
26
+
27
+ REASONING: {example_reasoning}
28
+
29
+ ANSWER:
30
+ {{{example_answer}}}
31
+ ---
32
+ """
33
+
34
+ AGG_NO_REASONING_BASE_SYSTEM_PROMPT = """You are a helpful assistant whose job is to {job_instruction}.
35
+ You will be presented with a context and an output field to generate. Your task is to generate a JSON object which aggregates the input and fills in the output field with the correct value.
36
+ You will be provided with a description of each input field and each output field. The field in the output JSON object can be derived using information from the context.
37
+
38
+ {output_format_instruction} Finish your response with a newline character followed by ---
39
+
40
+ An example is shown below:
41
+ ---
42
+ INPUT FIELDS:
43
+ {example_input_fields}
44
+
45
+ OUTPUT FIELDS:
46
+ {example_output_fields}
47
+
48
+ CONTEXT:
49
+ {{{example_context}}}
50
+ {{{second_example_context}}}
51
+ {{{third_example_context}}}{image_disclaimer}{audio_disclaimer}
52
+
53
+ AGGREGATION INSTRUCTION: {example_agg_instruction}
54
+
55
+ ANSWER:
56
+ {{{example_answer}}}
57
+ ---
58
+ """
59
+
60
+
61
+ AGG_BASE_USER_PROMPT = """You are a helpful assistant whose job is to {job_instruction}.
62
+ You will be presented with a context and an output field to generate. Your task is to generate a JSON object which aggregates the input and fills in the output field with the correct value.
63
+ You will be provided with a description of each input field and each output field. The field in the output JSON object can be derived using information from the context.
64
+ {desc_section}
65
+ {output_format_instruction} Finish your response with a newline character followed by ---
66
+ ---
67
+ INPUT FIELDS:
68
+ {input_fields_desc}
69
+
70
+ OUTPUT FIELDS:
71
+ {output_fields_desc}
72
+
73
+ CONTEXT:
74
+ {context}<<image-audio-placeholder>>
75
+
76
+ AGGREGATION INSTRUCTION: {agg_instruction}
77
+
78
+ Let's think step-by-step in order to answer the question.
79
+
80
+ REASONING: """
81
+
82
+ AGG_NO_REASONING_BASE_USER_PROMPT = """You are a helpful assistant whose job is to {job_instruction}.
83
+ You will be presented with a context and an output field to generate. Your task is to generate a JSON object which aggregates the input and fills in the output field with the correct value.
84
+ You will be provided with a description of each input field and each output field. The field in the output JSON object can be derived using information from the context.
85
+ {desc_section}
86
+ {output_format_instruction} Finish your response with a newline character followed by ---
87
+ ---
88
+ INPUT FIELDS:
89
+ {input_fields_desc}
90
+
91
+ OUTPUT FIELDS:
92
+ {output_fields_desc}
93
+
94
+ CONTEXT:
95
+ {context}<<image-audio-placeholder>>
96
+
97
+ AGGREGATION INSTRUCTION: {agg_instruction}
98
+
99
+ ANSWER: """