palimpzest 0.6.1__py3-none-any.whl → 0.6.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,5 @@
1
1
  """This file contains factory methods which return template prompts and return messages for chat payloads."""
2
+
2
3
  import base64
3
4
  import json
4
5
  from string import Formatter
@@ -82,6 +83,7 @@ from palimpzest.prompts.util_phrases import (
82
83
 
83
84
  class PromptFactory:
84
85
  """Factory class for generating prompts for the Generator given the input(s)."""
86
+
85
87
  BASE_SYSTEM_PROMPT_MAP = {
86
88
  PromptStrategy.COT_BOOL: COT_BOOL_BASE_SYSTEM_PROMPT,
87
89
  PromptStrategy.COT_BOOL_IMAGE: COT_BOOL_BASE_SYSTEM_PROMPT,
@@ -92,6 +94,7 @@ class PromptFactory:
92
94
  PromptStrategy.COT_QA_IMAGE_CRITIC: None,
93
95
  PromptStrategy.COT_QA_IMAGE_REFINE: None,
94
96
  PromptStrategy.COT_MOA_PROPOSER: COT_MOA_PROPOSER_BASE_SYSTEM_PROMPT,
97
+ PromptStrategy.COT_MOA_PROPOSER_IMAGE: COT_MOA_PROPOSER_BASE_SYSTEM_PROMPT,
95
98
  PromptStrategy.COT_MOA_AGG: COT_MOA_AGG_BASE_SYSTEM_PROMPT,
96
99
  }
97
100
  BASE_USER_PROMPT_MAP = {
@@ -104,6 +107,7 @@ class PromptFactory:
104
107
  PromptStrategy.COT_QA_IMAGE_CRITIC: BASE_CRITIQUE_PROMPT,
105
108
  PromptStrategy.COT_QA_IMAGE_REFINE: BASE_REFINEMENT_PROMPT,
106
109
  PromptStrategy.COT_MOA_PROPOSER: COT_MOA_PROPOSER_BASE_USER_PROMPT,
110
+ PromptStrategy.COT_MOA_PROPOSER_IMAGE: COT_MOA_PROPOSER_BASE_USER_PROMPT,
107
111
  PromptStrategy.COT_MOA_AGG: COT_MOA_AGG_BASE_USER_PROMPT,
108
112
  }
109
113
 
@@ -148,13 +152,13 @@ class PromptFactory:
148
152
  longest_field_name, longest_field_length = sorted_fields[0]
149
153
 
150
154
  # trim the field
151
- context_factor = MIXTRAL_LLAMA_CONTEXT_TOKENS_LIMIT / (total_context_len * TOKENS_PER_CHARACTER)
155
+ context_factor = MIXTRAL_LLAMA_CONTEXT_TOKENS_LIMIT / (total_context_len * TOKENS_PER_CHARACTER)
152
156
  keep_frac_idx = int(longest_field_length * context_factor)
153
157
  context[longest_field_name] = context[longest_field_name][:keep_frac_idx]
154
158
 
155
159
  # update total context length
156
160
  total_context_len = len(json.dumps(context, indent=2))
157
-
161
+
158
162
  return json.dumps(context, indent=2)
159
163
 
160
164
  def _get_input_fields(self, candidate: DataRecord, **kwargs) -> list[str]:
@@ -201,7 +205,11 @@ class PromptFactory:
201
205
  """
202
206
  output_fields_desc = ""
203
207
  output_schema: Schema = kwargs.get("output_schema")
204
- if self.prompt_strategy.is_cot_qa_prompt():
208
+ if (
209
+ self.prompt_strategy.is_cot_qa_prompt()
210
+ or self.prompt_strategy.is_moa_proposer_prompt()
211
+ or self.prompt_strategy.is_moa_aggregator_prompt()
212
+ ):
205
213
  assert output_schema is not None, "Output schema must be provided for convert prompts."
206
214
 
207
215
  field_desc_map = output_schema.field_desc_map()
@@ -230,14 +238,16 @@ class PromptFactory:
230
238
 
231
239
  Args:
232
240
  kwargs: The keyword arguments provided by the user.
233
-
241
+
234
242
  Returns:
235
243
  str | None: The original output.
236
244
  """
237
245
  original_output = kwargs.get("original_output")
238
246
  if self.prompt_strategy.is_critic_prompt() or self.prompt_strategy.is_refine_prompt():
239
- assert original_output is not None, "Original output must be provided for critique and refinement operations."
240
-
247
+ assert original_output is not None, (
248
+ "Original output must be provided for critique and refinement operations."
249
+ )
250
+
241
251
  return original_output
242
252
 
243
253
  def _get_critique_output(self, **kwargs) -> str | None:
@@ -246,7 +256,7 @@ class PromptFactory:
246
256
 
247
257
  Args:
248
258
  kwargs: The keyword arguments provided by the user.
249
-
259
+
250
260
  Returns:
251
261
  str | None: The critique output.
252
262
  """
@@ -259,10 +269,10 @@ class PromptFactory:
259
269
  def _get_model_responses(self, **kwargs) -> str | None:
260
270
  """
261
271
  Returns the model responses for the mixture-of-agents aggregation operation.
262
-
272
+
263
273
  Args:
264
274
  kwargs: The keyword arguments provided by the user.
265
-
275
+
266
276
  Returns:
267
277
  str | None: The model responses.
268
278
  """
@@ -314,9 +324,7 @@ class PromptFactory:
314
324
  critique_criteria = None
315
325
  if self.prompt_strategy.is_critic_prompt():
316
326
  critique_criteria = (
317
- COT_QA_IMAGE_CRITIQUE_CRITERIA
318
- if self.prompt_strategy.is_image_prompt()
319
- else COT_QA_CRITIQUE_CRITERIA
327
+ COT_QA_IMAGE_CRITIQUE_CRITERIA if self.prompt_strategy.is_image_prompt() else COT_QA_CRITIQUE_CRITERIA
320
328
  )
321
329
 
322
330
  return critique_criteria
@@ -467,16 +475,18 @@ class PromptFactory:
467
475
 
468
476
  return prompt_strategy_to_example_answer.get(self.prompt_strategy)
469
477
 
470
- def _get_all_format_kwargs(self, candidate: DataRecord, input_fields: list[str], output_fields: list[str], **kwargs) -> dict:
478
+ def _get_all_format_kwargs(
479
+ self, candidate: DataRecord, input_fields: list[str], output_fields: list[str], **kwargs
480
+ ) -> dict:
471
481
  """
472
482
  Returns a dictionary containing all the format kwargs for templating the prompts.
473
-
483
+
474
484
  Args:
475
485
  candidate (DataRecord): The input record.
476
486
  input_fields (list[str]): The input fields.
477
487
  output_fields (list[str]): The output fields.
478
488
  kwargs: The keyword arguments provided by the user.
479
-
489
+
480
490
  Returns:
481
491
  dict: The dictionary containing all the format kwargs.
482
492
  """
@@ -517,7 +527,7 @@ class PromptFactory:
517
527
  Args:
518
528
  candidate (DataRecord): The input record.
519
529
  input_fields (list[str]): The list of input fields.
520
-
530
+
521
531
  Returns:
522
532
  list[dict]: The image messages for the chat payload.
523
533
  """
@@ -529,15 +539,19 @@ class PromptFactory:
529
539
 
530
540
  # image filepath (or list of image filepaths)
531
541
  if isinstance(field_type, ImageFilepathField):
532
- with open(field_value, 'rb') as f:
533
- base64_image_str = base64.b64encode(f.read()).decode('utf-8')
534
- image_messages.append({"role": "user", "type": "image", "content": f"data:image/jpeg;base64,{base64_image_str}"})
542
+ with open(field_value, "rb") as f:
543
+ base64_image_str = base64.b64encode(f.read()).decode("utf-8")
544
+ image_messages.append(
545
+ {"role": "user", "type": "image", "content": f"data:image/jpeg;base64,{base64_image_str}"}
546
+ )
535
547
 
536
548
  elif hasattr(field_type, "element_type") and issubclass(field_type.element_type, ImageFilepathField):
537
549
  for image_filepath in field_value:
538
- with open(image_filepath, 'rb') as f:
539
- base64_image_str = base64.b64encode(f.read()).decode('utf-8')
540
- image_messages.append({"role": "user", "type": "image", "content": f"data:image/jpeg;base64,{base64_image_str}"})
550
+ with open(image_filepath, "rb") as f:
551
+ base64_image_str = base64.b64encode(f.read()).decode("utf-8")
552
+ image_messages.append(
553
+ {"role": "user", "type": "image", "content": f"data:image/jpeg;base64,{base64_image_str}"}
554
+ )
541
555
 
542
556
  # image url (or list of image urls)
543
557
  elif isinstance(field_type, ImageURLField):
@@ -550,12 +564,16 @@ class PromptFactory:
550
564
  # pre-encoded images (or list of pre-encoded images)
551
565
  elif isinstance(field_type, ImageBase64Field):
552
566
  base64_image_str = field_value.decode("utf-8")
553
- image_messages.append({"role": "user", "type": "image", "content": f"data:image/jpeg;base64,{base64_image_str}"})
567
+ image_messages.append(
568
+ {"role": "user", "type": "image", "content": f"data:image/jpeg;base64,{base64_image_str}"}
569
+ )
554
570
 
555
571
  elif hasattr(field_type, "element_type") and issubclass(field_type.element_type, ImageBase64Field):
556
572
  for base64_image in field_value:
557
573
  base64_image_str = base64_image.decode("utf-8")
558
- image_messages.append({"role": "user", "type": "image", "content": f"data:image/jpeg;base64,{base64_image_str}"})
574
+ image_messages.append(
575
+ {"role": "user", "type": "image", "content": f"data:image/jpeg;base64,{base64_image_str}"}
576
+ )
559
577
 
560
578
  return image_messages
561
579
 
@@ -595,15 +613,15 @@ class PromptFactory:
595
613
 
596
614
  # get any image messages for the chat payload (will be an empty list if this is not an image prompt)
597
615
  image_messages = (
598
- self._create_image_messages(candidate, input_fields)
599
- if self.prompt_strategy.is_image_prompt()
600
- else []
616
+ self._create_image_messages(candidate, input_fields) if self.prompt_strategy.is_image_prompt() else []
601
617
  )
602
618
 
603
619
  # get any original messages for critique and refinement operations
604
620
  original_messages = kwargs.get("original_messages")
605
621
  if self.prompt_strategy.is_critic_prompt() or self.prompt_strategy.is_refine_prompt():
606
- assert original_messages is not None, "Original messages must be provided for critique and refinement operations."
622
+ assert original_messages is not None, (
623
+ "Original messages must be provided for critique and refinement operations."
624
+ )
607
625
 
608
626
  # construct the user messages based on the prompt strategy
609
627
  user_messages = []
@@ -661,17 +679,19 @@ class PromptFactory:
661
679
  f"Input fields: {input_fields}\n"
662
680
  )
663
681
  assert fields_check, err_msg
664
-
682
+
665
683
  # build set of format kwargs
666
684
  format_kwargs = {
667
- field_name: "<bytes>" if isinstance(candidate.get_field_type(field_name), BytesField) else candidate[field_name]
685
+ field_name: "<bytes>"
686
+ if isinstance(candidate.get_field_type(field_name), BytesField)
687
+ else candidate[field_name]
668
688
  for field_name in input_fields
669
689
  }
670
690
 
671
691
  # split prompt on <<image-placeholder>> if it exists
672
692
  if "<<image-placeholder>>" in user_prompt:
673
693
  raise NotImplementedError("Image prompts are not yet supported.")
674
-
694
+
675
695
  prompt_sections = user_prompt.split("<<image-placeholder>>")
676
696
  messages = [{"role": "user", "type": "text", "content": prompt_sections[0].format(**format_kwargs)}]
677
697
 
@@ -686,7 +706,7 @@ class PromptFactory:
686
706
  def create_messages(self, candidate: DataRecord, output_fields: list[str], **kwargs) -> list[dict]:
687
707
  """
688
708
  Creates the messages for the chat payload based on the prompt strategy.
689
-
709
+
690
710
  Each message will be a dictionary with the following format:
691
711
  {
692
712
  "role": "user" | "system",
@@ -219,6 +219,9 @@ class Optimizer:
219
219
  allow_conventional_query=self.allow_conventional_query,
220
220
  allow_code_synth=self.allow_code_synth,
221
221
  allow_token_reduction=self.allow_token_reduction,
222
+ allow_rag_reduction=self.allow_rag_reduction,
223
+ allow_mixtures=self.allow_mixtures,
224
+ allow_critic=self.allow_critic,
222
225
  optimization_strategy_type=self.optimization_strategy_type,
223
226
  use_final_op_quality=self.use_final_op_quality,
224
227
  )
@@ -123,6 +123,9 @@ class QueryProcessorFactory:
123
123
  allow_conventional_query=config.allow_conventional_query,
124
124
  allow_code_synth=config.allow_code_synth,
125
125
  allow_token_reduction=config.allow_token_reduction,
126
+ allow_rag_reduction=config.allow_rag_reduction,
127
+ allow_mixtures=config.allow_mixtures,
128
+ allow_critic=config.allow_critic,
126
129
  optimization_strategy_type=optimizer_strategy,
127
130
  use_final_op_quality=config.use_final_op_quality
128
131
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: palimpzest
3
- Version: 0.6.1
3
+ Version: 0.6.3
4
4
  Summary: Palimpzest is a system which enables anyone to process AI-powered analytical queries simply by defining them in a declarative language
5
5
  Author-email: MIT DSG Semantic Management Lab <michjc@csail.mit.edu>
6
6
  Project-URL: homepage, https://palimpzest.org
@@ -20,7 +20,7 @@ palimpzest/prompts/critique_and_refine_convert_prompts.py,sha256=WoXExBxQ7twswd9
20
20
  palimpzest/prompts/filter_prompts.py,sha256=iQjn-39h3L0E5wng_UPgAXRHrP1ok329TXpOgZ6Wn1w,2372
21
21
  palimpzest/prompts/moa_aggregator_convert_prompts.py,sha256=BQRrtGdr53PTqvXzmFh8kfQ_w9KoKw-zTtmdo-8RFjo,2887
22
22
  palimpzest/prompts/moa_proposer_convert_prompts.py,sha256=d_hOh0-0m6HWBDAxUu7W3WyQtSTlUvqio3nzpnX2bxM,3642
23
- palimpzest/prompts/prompt_factory.py,sha256=VzZNH9kblFXYn4YKVKudJ21Y5Q-3tL6ZgFmNhBNTGjQ,31921
23
+ palimpzest/prompts/prompt_factory.py,sha256=w6h4eQnaGVxv49A8hlAzij7yP8GaKl6tgSQbt3HXFDU,32350
24
24
  palimpzest/prompts/util_phrases.py,sha256=NWrcHfjJyiOY16Jyt7R50moVnlJDyvSBZ9kBqyX2WQo,751
25
25
  palimpzest/query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
26
  palimpzest/query/execution/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -46,7 +46,7 @@ palimpzest/query/operators/scan.py,sha256=z6wUVxuhr5VqPIeUxb3hxhkaljKpDc_exzGMe4
46
46
  palimpzest/query/operators/token_reduction_convert.py,sha256=yy9GYMPt-LQxPdwIgVyhCb9hi_8FRorGU8XqK_3jq9g,8513
47
47
  palimpzest/query/optimizer/__init__.py,sha256=pl1co0dCwDZkAQ-0oiwT81GjvB0Oc59WiwmmYF8k73s,3109
48
48
  palimpzest/query/optimizer/cost_model.py,sha256=zSK2Nsya96pR5Zh67cr_O5q0qtPa08--Tchn0cYvE58,44837
49
- palimpzest/query/optimizer/optimizer.py,sha256=lBfNYgbyyE_0bdZCGnz9oicyG3gFUdkRnzcyJ31_36o,20644
49
+ palimpzest/query/optimizer/optimizer.py,sha256=EQ5yKb1oa0EdqiK2R8Ey7PPhC-YjXZquA-XWIWq3fso,20794
50
50
  palimpzest/query/optimizer/optimizer_strategy.py,sha256=-1xx_cviSJw6PH8XiQQK9qe4YPnAmxZEAhNVKdxRgH4,12894
51
51
  palimpzest/query/optimizer/plan.py,sha256=xlWB3sY5qDac3o6IHoWcuGK5Azv-4C2_zKKx4PzxEh4,5768
52
52
  palimpzest/query/optimizer/primitives.py,sha256=ikaX8YcDM3IrxKt98OX-mYujRYQtdMlDgsFKyjchMMA,4061
@@ -57,7 +57,7 @@ palimpzest/query/processor/config.py,sha256=kOhBxAZ3OeDDlQ2qMII1i2EorFpSSQbEFVFr
57
57
  palimpzest/query/processor/mab_sentinel_processor.py,sha256=xShPVW8ejhNR_wQ8ofPF2hL7iGj8tlFhsc2wiiIPb9Y,45600
58
58
  palimpzest/query/processor/nosentinel_processor.py,sha256=alPq1tnZvqxCSO5LYRCjlF4CB4v7NbzH_BHB-DSuehI,26478
59
59
  palimpzest/query/processor/query_processor.py,sha256=aR0OBmaZZt4_KSBjHy_KCT6pIBI8WTfT8TTcIkgPBt4,11109
60
- palimpzest/query/processor/query_processor_factory.py,sha256=663_V-AJK0VsBZNwgnqYu84g0rmtKf-U-xJWnps3XWs,8239
60
+ palimpzest/query/processor/query_processor_factory.py,sha256=OjmSDVAwWcpwLbi7b97pBkV2sJNo4Jv3hkxD94VyOlI,8395
61
61
  palimpzest/query/processor/random_sampling_sentinel_processor.py,sha256=VFC0HFY3OLxAjpdmBt41K0rw8C_1ylECFjbRYsui3rU,30440
62
62
  palimpzest/query/processor/streaming_processor.py,sha256=4-XvgAjUTnO3Dgdxm9VSw4udREjNWTW526Rggy5Do7s,6501
63
63
  palimpzest/schemabuilder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -80,8 +80,8 @@ palimpzest/utils/progress.py,sha256=GYmPUBdG7xmqbqj1UiSNP-pWZKmRMLX797MBgrOPugM,
80
80
  palimpzest/utils/sandbox.py,sha256=Ge96gmzqeOGlNkMCG9A95_PB8wRQbvTFua136of8FcA,6465
81
81
  palimpzest/utils/token_reduction_helpers.py,sha256=Ob95PcqCsbGLiBdQ-4YQsWGWRppb2hvQyt0gi1fzL-Y,3855
82
82
  palimpzest/utils/udfs.py,sha256=LjHic54B1az-rKgNLur0wOpaz2ko_UodjLEJrazkxvY,1854
83
- palimpzest-0.6.1.dist-info/LICENSE,sha256=5GUlHy9lr-Py9kvV38FF1m3yy3NqM18fefuE9wkWumo,1079
84
- palimpzest-0.6.1.dist-info/METADATA,sha256=VxPI4-vfq3Fm3l3PjxTpdHGbDclIQNHo1Ag1enfAyMU,7837
85
- palimpzest-0.6.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
86
- palimpzest-0.6.1.dist-info/top_level.txt,sha256=raV06dJUgohefUn3ZyJS2uqp_Y76EOLA9Y2e_fxt8Ew,11
87
- palimpzest-0.6.1.dist-info/RECORD,,
83
+ palimpzest-0.6.3.dist-info/LICENSE,sha256=5GUlHy9lr-Py9kvV38FF1m3yy3NqM18fefuE9wkWumo,1079
84
+ palimpzest-0.6.3.dist-info/METADATA,sha256=FxQ3szdZPEMH-y0zkOrTPEAN2jLVPOcVS7A9am-Hy6o,7837
85
+ palimpzest-0.6.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
86
+ palimpzest-0.6.3.dist-info/top_level.txt,sha256=raV06dJUgohefUn3ZyJS2uqp_Y76EOLA9Y2e_fxt8Ew,11
87
+ palimpzest-0.6.3.dist-info/RECORD,,