palimpzest 0.7.20__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- palimpzest/__init__.py +37 -6
- palimpzest/agents/__init__.py +0 -0
- palimpzest/agents/compute_agents.py +0 -0
- palimpzest/agents/search_agents.py +637 -0
- palimpzest/constants.py +259 -197
- palimpzest/core/data/context.py +393 -0
- palimpzest/core/data/context_manager.py +163 -0
- palimpzest/core/data/dataset.py +634 -0
- palimpzest/core/data/{datareaders.py → iter_dataset.py} +202 -126
- palimpzest/core/elements/groupbysig.py +16 -13
- palimpzest/core/elements/records.py +166 -75
- palimpzest/core/lib/schemas.py +152 -390
- palimpzest/core/{data/dataclasses.py → models.py} +306 -170
- palimpzest/policy.py +2 -27
- palimpzest/prompts/__init__.py +35 -5
- palimpzest/prompts/agent_prompts.py +357 -0
- palimpzest/prompts/context_search.py +9 -0
- palimpzest/prompts/convert_prompts.py +61 -5
- palimpzest/prompts/filter_prompts.py +50 -5
- palimpzest/prompts/join_prompts.py +163 -0
- palimpzest/prompts/moa_proposer_convert_prompts.py +5 -5
- palimpzest/prompts/prompt_factory.py +358 -46
- palimpzest/prompts/validator.py +239 -0
- palimpzest/query/execution/all_sample_execution_strategy.py +134 -76
- palimpzest/query/execution/execution_strategy.py +210 -317
- palimpzest/query/execution/execution_strategy_type.py +5 -7
- palimpzest/query/execution/mab_execution_strategy.py +249 -136
- palimpzest/query/execution/parallel_execution_strategy.py +153 -244
- palimpzest/query/execution/single_threaded_execution_strategy.py +107 -64
- palimpzest/query/generators/generators.py +157 -330
- palimpzest/query/operators/__init__.py +15 -5
- palimpzest/query/operators/aggregate.py +50 -33
- palimpzest/query/operators/compute.py +201 -0
- palimpzest/query/operators/convert.py +27 -21
- palimpzest/query/operators/critique_and_refine_convert.py +7 -5
- palimpzest/query/operators/distinct.py +62 -0
- palimpzest/query/operators/filter.py +22 -13
- palimpzest/query/operators/join.py +402 -0
- palimpzest/query/operators/limit.py +3 -3
- palimpzest/query/operators/logical.py +198 -80
- palimpzest/query/operators/mixture_of_agents_convert.py +10 -8
- palimpzest/query/operators/physical.py +27 -21
- palimpzest/query/operators/project.py +3 -3
- palimpzest/query/operators/rag_convert.py +7 -7
- palimpzest/query/operators/retrieve.py +9 -9
- palimpzest/query/operators/scan.py +81 -42
- palimpzest/query/operators/search.py +524 -0
- palimpzest/query/operators/split_convert.py +10 -8
- palimpzest/query/optimizer/__init__.py +7 -9
- palimpzest/query/optimizer/cost_model.py +108 -441
- palimpzest/query/optimizer/optimizer.py +123 -181
- palimpzest/query/optimizer/optimizer_strategy.py +66 -61
- palimpzest/query/optimizer/plan.py +352 -67
- palimpzest/query/optimizer/primitives.py +43 -19
- palimpzest/query/optimizer/rules.py +484 -646
- palimpzest/query/optimizer/tasks.py +127 -58
- palimpzest/query/processor/config.py +41 -76
- palimpzest/query/processor/query_processor.py +73 -18
- palimpzest/query/processor/query_processor_factory.py +46 -38
- palimpzest/schemabuilder/schema_builder.py +15 -28
- palimpzest/utils/model_helpers.py +27 -77
- palimpzest/utils/progress.py +114 -102
- palimpzest/validator/__init__.py +0 -0
- palimpzest/validator/validator.py +306 -0
- {palimpzest-0.7.20.dist-info → palimpzest-0.8.0.dist-info}/METADATA +6 -1
- palimpzest-0.8.0.dist-info/RECORD +95 -0
- palimpzest/core/lib/fields.py +0 -141
- palimpzest/prompts/code_synthesis_prompts.py +0 -28
- palimpzest/query/execution/random_sampling_execution_strategy.py +0 -240
- palimpzest/query/generators/api_client_factory.py +0 -30
- palimpzest/query/operators/code_synthesis_convert.py +0 -488
- palimpzest/query/operators/map.py +0 -130
- palimpzest/query/processor/nosentinel_processor.py +0 -33
- palimpzest/query/processor/processing_strategy_type.py +0 -28
- palimpzest/query/processor/sentinel_processor.py +0 -88
- palimpzest/query/processor/streaming_processor.py +0 -149
- palimpzest/sets.py +0 -405
- palimpzest/utils/datareader_helpers.py +0 -61
- palimpzest/utils/demo_helpers.py +0 -75
- palimpzest/utils/field_helpers.py +0 -69
- palimpzest/utils/generation_helpers.py +0 -69
- palimpzest/utils/sandbox.py +0 -183
- palimpzest-0.7.20.dist-info/RECORD +0 -95
- /palimpzest/core/{elements/index.py → data/index_dataset.py} +0 -0
- {palimpzest-0.7.20.dist-info → palimpzest-0.8.0.dist-info}/WHEEL +0 -0
- {palimpzest-0.7.20.dist-info → palimpzest-0.8.0.dist-info}/licenses/LICENSE +0 -0
- {palimpzest-0.7.20.dist-info → palimpzest-0.8.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
import litellm
|
|
5
|
+
from colorama import Fore, Style
|
|
6
|
+
|
|
7
|
+
from palimpzest.constants import MODEL_CARDS, Cardinality, Model, PromptStrategy
|
|
8
|
+
from palimpzest.core.elements.records import DataRecord
|
|
9
|
+
from palimpzest.core.models import GenerationStats
|
|
10
|
+
from palimpzest.prompts import (
|
|
11
|
+
FLAT_MAP_IMAGE_VALIDATOR_PROMPT,
|
|
12
|
+
FLAT_MAP_VALIDATOR_PROMPT,
|
|
13
|
+
MAP_IMAGE_VALIDATOR_PROMPT,
|
|
14
|
+
MAP_VALIDATOR_PROMPT,
|
|
15
|
+
RETRIEVE_VALIDATOR_PROMPT,
|
|
16
|
+
PromptFactory,
|
|
17
|
+
)
|
|
18
|
+
from palimpzest.query.generators.generators import get_json_from_answer
|
|
19
|
+
from palimpzest.query.operators.convert import LLMConvert
|
|
20
|
+
from palimpzest.query.operators.filter import LLMFilter
|
|
21
|
+
from palimpzest.query.operators.join import JoinOp
|
|
22
|
+
from palimpzest.query.operators.retrieve import RetrieveOp
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Validator:
|
|
26
|
+
"""
|
|
27
|
+
The Validator is used during optimization to score the output of physical operator(s) and physical plan(s).
|
|
28
|
+
|
|
29
|
+
TODO: support end-to-end labels; will likely require a different SentinelExecutionStrategy which
|
|
30
|
+
executes the full input to produce an output, evaluates the output, and then updates
|
|
31
|
+
intermediate operator(s) based on the evaluation.
|
|
32
|
+
"""
|
|
33
|
+
def __init__(self, model: Model = Model.o4_MINI):
|
|
34
|
+
self.model = model
|
|
35
|
+
self.filter_cache = {}
|
|
36
|
+
self.join_cache = {}
|
|
37
|
+
|
|
38
|
+
def map_score_fn(self, fields: list[str], input_record: dict, output: dict) -> float | None:
|
|
39
|
+
raise NotImplementedError("Validator.map_score_fn not implemented.")
|
|
40
|
+
|
|
41
|
+
def flat_map_score_fn(self, fields: list[str], input_record: dict, output: list[dict]) -> float | None:
|
|
42
|
+
raise NotImplementedError("Validator.flat_map_score_fn not implemented.")
|
|
43
|
+
|
|
44
|
+
def filter_score_fn(self, filter_str: str, input_record: dict, output: bool) -> float | None:
|
|
45
|
+
raise NotImplementedError("Validator.filter_score_fn not implemented.")
|
|
46
|
+
|
|
47
|
+
def join_score_fn(self, condition: str, left_input_record: dict, right_input_record: dict, output: bool) -> float | None:
|
|
48
|
+
raise NotImplementedError("Validator.join_score_fn not implemented.")
|
|
49
|
+
|
|
50
|
+
def retrieve_score_fn(self, fields: list[str], input_record: dict, output: dict) -> float | None:
|
|
51
|
+
raise NotImplementedError("Validator.map_score_fn not implemented.")
|
|
52
|
+
|
|
53
|
+
def _get_gen_stats_from_completion(self, completion, start_time: float) -> GenerationStats:
|
|
54
|
+
"""
|
|
55
|
+
Extract generation stats from the given completion response.
|
|
56
|
+
"""
|
|
57
|
+
usage = completion.usage.model_dump()
|
|
58
|
+
|
|
59
|
+
# get cost per input/output token for the model and parse number of input and output tokens
|
|
60
|
+
usd_per_input_token = MODEL_CARDS[self.model.value]["usd_per_input_token"]
|
|
61
|
+
usd_per_output_token = MODEL_CARDS[self.model.value]["usd_per_output_token"]
|
|
62
|
+
input_tokens = usage["prompt_tokens"]
|
|
63
|
+
output_tokens = usage["completion_tokens"]
|
|
64
|
+
|
|
65
|
+
return GenerationStats(
|
|
66
|
+
model_name=self.model.value,
|
|
67
|
+
llm_call_duration_secs=time.time() - start_time,
|
|
68
|
+
fn_call_duration_secs=0.0,
|
|
69
|
+
total_input_tokens=input_tokens,
|
|
70
|
+
total_output_tokens=output_tokens,
|
|
71
|
+
total_input_cost=input_tokens * usd_per_input_token,
|
|
72
|
+
total_output_cost=output_tokens * usd_per_output_token,
|
|
73
|
+
cost_per_record=input_tokens * usd_per_input_token + output_tokens * usd_per_output_token,
|
|
74
|
+
total_llm_calls=1,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
def _default_map_score_fn(self, op: LLMConvert, fields: list[str], input_record: DataRecord, output: dict) -> tuple[float | None, GenerationStats]:
|
|
78
|
+
"""
|
|
79
|
+
Compute the quality of the generated output for the given fields and input_record.
|
|
80
|
+
"""
|
|
81
|
+
# create prompt factory
|
|
82
|
+
prompt_strategy = PromptStrategy.COT_QA_IMAGE if op.is_image_conversion() else PromptStrategy.COT_QA
|
|
83
|
+
factory = PromptFactory(prompt_strategy, Model.o4_MINI, Cardinality.ONE_TO_ONE) # TODO: switch to o4_MINI after merging in dev
|
|
84
|
+
|
|
85
|
+
# get the input messages; strip out the system message(s)
|
|
86
|
+
msg_kwargs = {"output_schema": op.output_schema, "project_cols": op.get_input_fields()}
|
|
87
|
+
messages = factory.create_messages(input_record, fields, **msg_kwargs)
|
|
88
|
+
input_messages = [msg for msg in messages if msg["role"] != "system"]
|
|
89
|
+
output = json.dumps(output, indent=2)
|
|
90
|
+
output_message = f"OUTPUT:\n--------\n{output}\n\nEVALUATION: "
|
|
91
|
+
input_str = '\n'.join(list(map(lambda d: d['content'], input_messages + [{"role": "user", "content": output_message}])))
|
|
92
|
+
|
|
93
|
+
# invoke the judge
|
|
94
|
+
score, gen_stats = None, GenerationStats()
|
|
95
|
+
try:
|
|
96
|
+
start_time = time.time()
|
|
97
|
+
validator_prompt = MAP_IMAGE_VALIDATOR_PROMPT if op.is_image_conversion() else MAP_VALIDATOR_PROMPT
|
|
98
|
+
val_messages = [{"role": "system", "content": validator_prompt}] + input_messages + [{"role": "user", "content": output_message}]
|
|
99
|
+
completion = litellm.completion(model="openai/o4-mini", messages=val_messages)
|
|
100
|
+
completion_text = completion.choices[0].message.content
|
|
101
|
+
gen_stats = self._get_gen_stats_from_completion(completion, start_time)
|
|
102
|
+
print(f"INPUT:\n{input_str}")
|
|
103
|
+
print(Fore.GREEN + f"{completion_text}\n" + Style.RESET_ALL)
|
|
104
|
+
|
|
105
|
+
# parse the evaluation
|
|
106
|
+
eval_dict: dict = get_json_from_answer(completion_text, Model.o4_MINI, Cardinality.ONE_TO_ONE)
|
|
107
|
+
score = sum(eval_dict.values()) / len(eval_dict)
|
|
108
|
+
|
|
109
|
+
except Exception:
|
|
110
|
+
pass
|
|
111
|
+
|
|
112
|
+
return score, gen_stats
|
|
113
|
+
|
|
114
|
+
def _default_flat_map_score_fn(self, op: LLMConvert, fields: list[str], input_record: dict, output: list[dict]) -> tuple[float | None, GenerationStats]:
|
|
115
|
+
"""
|
|
116
|
+
Compute the quality for each record_op_stats object in the given record_set.
|
|
117
|
+
"""
|
|
118
|
+
# create prompt factory
|
|
119
|
+
prompt_strategy = PromptStrategy.COT_QA_IMAGE if op.is_image_conversion() else PromptStrategy.COT_QA
|
|
120
|
+
factory = PromptFactory(prompt_strategy, Model.o4_MINI, Cardinality.ONE_TO_MANY) # TODO: switch to o4_MINI after merging in dev
|
|
121
|
+
|
|
122
|
+
# get the input messages; strip out the system message(s)
|
|
123
|
+
msg_kwargs = {"output_schema": op.output_schema, "project_cols": op.get_input_fields()}
|
|
124
|
+
messages = factory.create_messages(input_record, fields, **msg_kwargs)
|
|
125
|
+
input_messages = [msg for msg in messages if msg["role"] != "system"]
|
|
126
|
+
output = json.dumps(output, indent=2)
|
|
127
|
+
output_message = f"OUTPUTS:\n--------\n{output}\n\nEVALUATION: "
|
|
128
|
+
# input_str = '\n'.join(list(map(lambda d: d['content'], input_messages + [{"role": "user", "content": output_message}])))
|
|
129
|
+
|
|
130
|
+
# invoke the judge
|
|
131
|
+
score, gen_stats = None, GenerationStats()
|
|
132
|
+
try:
|
|
133
|
+
start_time = time.time()
|
|
134
|
+
validator_prompt = FLAT_MAP_IMAGE_VALIDATOR_PROMPT if op.is_image_conversion() else FLAT_MAP_VALIDATOR_PROMPT
|
|
135
|
+
val_messages = [{"role": "system", "content": validator_prompt}] + input_messages + [{"role": "user", "content": output_message}]
|
|
136
|
+
completion = litellm.completion(model="openai/o4-mini", messages=val_messages)
|
|
137
|
+
completion_text = completion.choices[0].message.content
|
|
138
|
+
gen_stats = self._get_gen_stats_from_completion(completion, start_time)
|
|
139
|
+
# print(f"INPUT:\n{input_str}")
|
|
140
|
+
# print(Fore.GREEN + f"{completion_text}\n" + Style.RESET_ALL)
|
|
141
|
+
|
|
142
|
+
# parse the evaluation
|
|
143
|
+
eval_dicts: list[dict] = get_json_from_answer(completion_text, Model.o4_MINI, Cardinality.ONE_TO_MANY)
|
|
144
|
+
all_qualities = []
|
|
145
|
+
for record_eval_dict in eval_dicts:
|
|
146
|
+
all_qualities.extend(record_eval_dict.values())
|
|
147
|
+
score = sum(all_qualities) / len(all_qualities)
|
|
148
|
+
|
|
149
|
+
except Exception:
|
|
150
|
+
pass
|
|
151
|
+
|
|
152
|
+
return score, gen_stats
|
|
153
|
+
|
|
154
|
+
def _default_filter_score_fn(self, op: LLMFilter, filter_str: str, input_record: dict, output: bool) -> tuple[float | None, GenerationStats]:
|
|
155
|
+
"""
|
|
156
|
+
Compute the quality for each record_op_stats object in the given record_set.
|
|
157
|
+
"""
|
|
158
|
+
score, gen_stats = None, GenerationStats()
|
|
159
|
+
filter_input_hash = hash(f"{filter_str}{hash(input_record)}")
|
|
160
|
+
label = self.filter_cache.get(filter_input_hash, None)
|
|
161
|
+
if label is None:
|
|
162
|
+
validator_op: LLMFilter = op.copy()
|
|
163
|
+
validator_op.model = Model.o4_MINI
|
|
164
|
+
try:
|
|
165
|
+
target_record_set = validator_op(input_record)
|
|
166
|
+
label = target_record_set[0].passed_operator
|
|
167
|
+
self.filter_cache[filter_input_hash] = label
|
|
168
|
+
score = label == output
|
|
169
|
+
record_op_stats = target_record_set.record_op_stats[0]
|
|
170
|
+
gen_stats = GenerationStats(
|
|
171
|
+
model_name=self.model.value,
|
|
172
|
+
total_input_tokens=record_op_stats.total_input_tokens,
|
|
173
|
+
total_output_tokens=record_op_stats.total_output_tokens,
|
|
174
|
+
total_input_cost=record_op_stats.total_input_cost,
|
|
175
|
+
total_output_cost=record_op_stats.total_output_cost,
|
|
176
|
+
cost_per_record=record_op_stats.cost_per_record,
|
|
177
|
+
llm_call_duration_secs=record_op_stats.llm_call_duration_secs,
|
|
178
|
+
fn_call_duration_secs=record_op_stats.fn_call_duration_secs,
|
|
179
|
+
total_llm_calls=record_op_stats.total_llm_calls,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
except Exception:
|
|
183
|
+
pass
|
|
184
|
+
|
|
185
|
+
else:
|
|
186
|
+
score = label == output
|
|
187
|
+
|
|
188
|
+
return score, gen_stats
|
|
189
|
+
|
|
190
|
+
def _default_join_score_fn(self, op: JoinOp, condition: str, left_input_record: DataRecord, right_input_record: DataRecord, output: bool) -> tuple[float | None, GenerationStats]:
|
|
191
|
+
score, gen_stats = None, GenerationStats()
|
|
192
|
+
join_input_hash = hash(f"{condition}{hash(left_input_record)}{hash(right_input_record)}")
|
|
193
|
+
label = self.join_cache.get(join_input_hash, None)
|
|
194
|
+
if label is None:
|
|
195
|
+
validator_op: JoinOp = op.copy()
|
|
196
|
+
validator_op.model = Model.o4_MINI
|
|
197
|
+
try:
|
|
198
|
+
target_record_set = validator_op([left_input_record], [right_input_record])
|
|
199
|
+
label = target_record_set[0].passed_operator
|
|
200
|
+
self.join_cache[join_input_hash] = label
|
|
201
|
+
score = label == output
|
|
202
|
+
record_op_stats = target_record_set.record_op_stats[0]
|
|
203
|
+
gen_stats = GenerationStats(
|
|
204
|
+
model_name=self.model.value,
|
|
205
|
+
total_input_tokens=record_op_stats.total_input_tokens,
|
|
206
|
+
total_output_tokens=record_op_stats.total_output_tokens,
|
|
207
|
+
total_input_cost=record_op_stats.total_input_cost,
|
|
208
|
+
total_output_cost=record_op_stats.total_output_cost,
|
|
209
|
+
cost_per_record=record_op_stats.cost_per_record,
|
|
210
|
+
llm_call_duration_secs=record_op_stats.llm_call_duration_secs,
|
|
211
|
+
fn_call_duration_secs=record_op_stats.fn_call_duration_secs,
|
|
212
|
+
total_llm_calls=record_op_stats.total_llm_calls,
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
except Exception:
|
|
216
|
+
pass
|
|
217
|
+
|
|
218
|
+
else:
|
|
219
|
+
score = label == output
|
|
220
|
+
|
|
221
|
+
return score, gen_stats
|
|
222
|
+
|
|
223
|
+
def _default_retrieve_score_fn(self, op: RetrieveOp, fields: list[str], input_record: DataRecord, output: dict) -> tuple[float | None, GenerationStats]:
|
|
224
|
+
"""
|
|
225
|
+
Compute the quality of the generated output for the given fields and input_record.
|
|
226
|
+
"""
|
|
227
|
+
# TODO: retrieve k=25; score each item based on relevance; compute F1
|
|
228
|
+
# TODO: support retrieval over images
|
|
229
|
+
# create prompt factory
|
|
230
|
+
factory = PromptFactory(PromptStrategy.COT_QA, Model.o4_MINI, Cardinality.ONE_TO_ONE) # TODO: switch to o4_MINI after merging in dev
|
|
231
|
+
|
|
232
|
+
# get the input messages; strip out the system message(s)
|
|
233
|
+
msg_kwargs = {"output_schema": op.output_schema, "project_cols": op.get_input_fields()}
|
|
234
|
+
messages = factory.create_messages(input_record, fields, **msg_kwargs)
|
|
235
|
+
input_messages = [msg for msg in messages if msg["role"] != "system"]
|
|
236
|
+
output = json.dumps(output, indent=2)
|
|
237
|
+
output_message = f"OUTPUT:\n--------\n{output}\n\nEVALUATION: "
|
|
238
|
+
input_str = '\n'.join(list(map(lambda d: d['content'], input_messages + [{"role": "user", "content": output_message}])))
|
|
239
|
+
|
|
240
|
+
# invoke the judge
|
|
241
|
+
score, gen_stats = None, GenerationStats()
|
|
242
|
+
try:
|
|
243
|
+
start_time = time.time()
|
|
244
|
+
# TODO: support retrieval over images
|
|
245
|
+
validator_prompt = RETRIEVE_VALIDATOR_PROMPT
|
|
246
|
+
val_messages = [{"role": "system", "content": validator_prompt}] + input_messages + [{"role": "user", "content": output_message}]
|
|
247
|
+
completion = litellm.completion(model="openai/o4-mini", messages=val_messages)
|
|
248
|
+
completion_text = completion.choices[0].message.content
|
|
249
|
+
gen_stats = self._get_gen_stats_from_completion(completion, start_time)
|
|
250
|
+
print(f"INPUT:\n{input_str}")
|
|
251
|
+
print(Fore.GREEN + f"{completion_text}\n" + Style.RESET_ALL)
|
|
252
|
+
|
|
253
|
+
# parse the evaluation
|
|
254
|
+
eval_dict: dict = get_json_from_answer(completion_text, Model.o4_MINI, Cardinality.ONE_TO_ONE)
|
|
255
|
+
score = sum(eval_dict.values()) / len(eval_dict)
|
|
256
|
+
|
|
257
|
+
except Exception:
|
|
258
|
+
pass
|
|
259
|
+
|
|
260
|
+
return score, gen_stats
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def _score_map(self, op: LLMConvert, fields: list[str], input_record: DataRecord, output: dict, full_hash: str) -> tuple[float | None, GenerationStats, str]:
|
|
264
|
+
try:
|
|
265
|
+
out = self.map_score_fn(fields, input_record.to_dict(), output)
|
|
266
|
+
score, gen_stats = out if isinstance(out, tuple) else (out, GenerationStats())
|
|
267
|
+
return score, gen_stats, full_hash
|
|
268
|
+
except NotImplementedError:
|
|
269
|
+
score, gen_stats = self._default_map_score_fn(op, fields, input_record, output)
|
|
270
|
+
return score, gen_stats, full_hash
|
|
271
|
+
|
|
272
|
+
def _score_flat_map(self, op: LLMConvert, fields: list[str], input_record: DataRecord, output: list[dict], full_hash: str) -> tuple[float | None, GenerationStats, str]:
|
|
273
|
+
try:
|
|
274
|
+
out = self.flat_map_score_fn(fields, input_record.to_dict(), output)
|
|
275
|
+
score, gen_stats = out if isinstance(out, tuple) else (out, GenerationStats())
|
|
276
|
+
return score, gen_stats, full_hash
|
|
277
|
+
except NotImplementedError:
|
|
278
|
+
score, gen_stats = self._default_flat_map_score_fn(op, fields, input_record, output)
|
|
279
|
+
return score, gen_stats, full_hash
|
|
280
|
+
|
|
281
|
+
def _score_filter(self, op: LLMFilter, filter_str: str, input_record: DataRecord, output: bool, full_hash: str) -> tuple[float | None, GenerationStats, str]:
|
|
282
|
+
try:
|
|
283
|
+
out = self.filter_score_fn(filter_str, input_record.to_dict(), output)
|
|
284
|
+
score, gen_stats = out if isinstance(out, tuple) else (out, GenerationStats())
|
|
285
|
+
return score, gen_stats, full_hash
|
|
286
|
+
except NotImplementedError:
|
|
287
|
+
score, gen_stats = self._default_filter_score_fn(op, filter_str, input_record, output)
|
|
288
|
+
return score, gen_stats, full_hash
|
|
289
|
+
|
|
290
|
+
def _score_join(self, op: JoinOp, condition: str, left_input_record: DataRecord, right_input_record: DataRecord, output: bool, full_hash: str) -> tuple[float | None, GenerationStats, str]:
|
|
291
|
+
try:
|
|
292
|
+
out = self.join_score_fn(condition, left_input_record.to_dict(), right_input_record.to_dict(), output)
|
|
293
|
+
score, gen_stats = out if isinstance(out, tuple) else (out, GenerationStats())
|
|
294
|
+
return score, gen_stats, full_hash
|
|
295
|
+
except NotImplementedError:
|
|
296
|
+
score, gen_stats = self._default_join_score_fn(op, condition, left_input_record, right_input_record, output)
|
|
297
|
+
return score, gen_stats, full_hash
|
|
298
|
+
|
|
299
|
+
def _score_retrieve(self, op: RetrieveOp, fields: list[str], input_record: DataRecord, output: dict, full_hash: str) -> tuple[float | None, GenerationStats, str]:
|
|
300
|
+
try:
|
|
301
|
+
out = self.retrieve_score_fn(fields, input_record.to_dict(), output)
|
|
302
|
+
score, gen_stats = out if isinstance(out, tuple) else (out, GenerationStats())
|
|
303
|
+
return score, gen_stats, full_hash
|
|
304
|
+
except NotImplementedError:
|
|
305
|
+
score, gen_stats = self._default_retrieve_score_fn(op, fields, input_record, output)
|
|
306
|
+
return score, gen_stats, full_hash
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: palimpzest
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.0
|
|
4
4
|
Summary: Palimpzest is a system which enables anyone to process AI-powered analytical queries simply by defining them in a declarative language
|
|
5
5
|
Author-email: MIT DSG Semantic Management Lab <michjc@csail.mit.edu>
|
|
6
6
|
Project-URL: homepage, https://palimpzest.org
|
|
@@ -15,12 +15,14 @@ Classifier: Programming Language :: Python :: 3.8
|
|
|
15
15
|
Requires-Python: >=3.8
|
|
16
16
|
Description-Content-Type: text/markdown
|
|
17
17
|
License-File: LICENSE
|
|
18
|
+
Requires-Dist: anthropic>=0.55.0
|
|
18
19
|
Requires-Dist: beautifulsoup4>=4.13.4
|
|
19
20
|
Requires-Dist: chromadb>=1.0.15
|
|
20
21
|
Requires-Dist: colorama>=0.4.6
|
|
21
22
|
Requires-Dist: datasets>=4.0.0
|
|
22
23
|
Requires-Dist: fastapi~=0.115.0
|
|
23
24
|
Requires-Dist: gradio>=5.26.0
|
|
25
|
+
Requires-Dist: litellm>=1.73.1
|
|
24
26
|
Requires-Dist: numpy==2.0.2
|
|
25
27
|
Requires-Dist: openai>=1.0
|
|
26
28
|
Requires-Dist: pandas>=2.1.1
|
|
@@ -37,6 +39,7 @@ Requires-Dist: requests>=2.25
|
|
|
37
39
|
Requires-Dist: ruff>=0.9.0
|
|
38
40
|
Requires-Dist: sentence-transformers==5.0.0
|
|
39
41
|
Requires-Dist: setuptools>=70.1.1
|
|
42
|
+
Requires-Dist: smolagents[toolkit]
|
|
40
43
|
Requires-Dist: tabulate>=0.9.0
|
|
41
44
|
Requires-Dist: together>=1.5.5
|
|
42
45
|
Requires-Dist: tqdm~=4.66.1
|
|
@@ -46,6 +49,8 @@ Requires-Dist: mkdocs>=1.6.1; extra == "docs"
|
|
|
46
49
|
Requires-Dist: mkdocs-material>=9.6.3; extra == "docs"
|
|
47
50
|
Requires-Dist: mkdocstrings-python>=1.15.0; extra == "docs"
|
|
48
51
|
Requires-Dist: mkdocs-material[imaging]; extra == "docs"
|
|
52
|
+
Provides-Extra: vllm
|
|
53
|
+
Requires-Dist: vllm>=0.10.1.1; extra == "vllm"
|
|
49
54
|
Dynamic: license-file
|
|
50
55
|
|
|
51
56
|

|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
palimpzest/__init__.py,sha256=1PzadDDOVMQJKNEYUH0_tw8tQKUYTT31M0vuzTr2Rqk,1694
|
|
2
|
+
palimpzest/constants.py,sha256=1xGydUfkuVtaeoQ_Ku6P5PDLAelQKAVouivdXkva-zE,21109
|
|
3
|
+
palimpzest/policy.py,sha256=lIvw_C_rmwCH4LZaeNkAuixl8zw9RAW_JcSWSHPjKyc,11628
|
|
4
|
+
palimpzest/agents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
+
palimpzest/agents/compute_agents.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
palimpzest/agents/search_agents.py,sha256=t2QMreB5Ph71aoNk5bBtV-0l8im79z-pMAR3JDAySDw,29418
|
|
7
|
+
palimpzest/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
+
palimpzest/core/models.py,sha256=fLO4T7x0njNeEbUpbhJm9cdnBva0y0Zw5WGBGdzdS_I,42442
|
|
9
|
+
palimpzest/core/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
+
palimpzest/core/data/context.py,sha256=x1xYyu9qW65dvtK_XayIfv_CgsCEPW6Qe0DTiSf9sjU,16207
|
|
11
|
+
palimpzest/core/data/context_manager.py,sha256=8hAKWD2jhFZgghTu7AYgjkvKDsJUPVxq8g4nG0HWvfo,6150
|
|
12
|
+
palimpzest/core/data/dataset.py,sha256=vqEEMxaG157jdyzUxM_tLt5Xq_49Yq-0dVGhS0ZUiHA,27904
|
|
13
|
+
palimpzest/core/data/index_dataset.py,sha256=adO67DgzHhA4lBME0-h4SjXfdz9UcNMSDGXTpUdKbgE,1929
|
|
14
|
+
palimpzest/core/data/iter_dataset.py,sha256=u7eZNWWT84rH_D8LNIuq0NAnm2roX81ifKTYp-hwY7g,20512
|
|
15
|
+
palimpzest/core/elements/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
|
+
palimpzest/core/elements/filters.py,sha256=fU2x0eWDwfP52_5fUmqJXTuhs4H0vvHtPZLdA3IIw8I,1642
|
|
17
|
+
palimpzest/core/elements/groupbysig.py,sha256=Fcbt1GSAkxIILS5me0sWaNMLiptLJ6NIY6BIC0S-g3k,2318
|
|
18
|
+
palimpzest/core/elements/records.py,sha256=Su4d8GRNp5-Q1nSWANm-jehyw53tHUl20iGluyEk8NI,19053
|
|
19
|
+
palimpzest/core/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
|
+
palimpzest/core/lib/schemas.py,sha256=0qauaG3uW5tCJXNAo1i0G0UgbTaQLSLT6GoNDX8494Q,8376
|
|
21
|
+
palimpzest/prompts/__init__.py,sha256=sdZbC8RWi_IGjFuzKQMdRjS2Ih4zQnkyzFoJ6Q3Ce70,1764
|
|
22
|
+
palimpzest/prompts/agent_prompts.py,sha256=CUzBVLBiPSw8OShtKp4VTpQwtrNMtcMglo-IZHMvuDM,17459
|
|
23
|
+
palimpzest/prompts/context_search.py,sha256=s3pti4XNRiIyiWzjVNL_NqmqEc31jzSKMF2SlN0Aaf8,357
|
|
24
|
+
palimpzest/prompts/convert_prompts.py,sha256=FR_zUADuOWxMqZED4S0lyO9VNgKPNiVpSZv6ND7a0v4,6009
|
|
25
|
+
palimpzest/prompts/critique_and_refine_convert_prompts.py,sha256=WoXExBxQ7twswd9VCCST26c-2ehZtpD2iQoBi7sqDnQ,7814
|
|
26
|
+
palimpzest/prompts/filter_prompts.py,sha256=lYQFrpAKhOMUQDOVbRBHh7IjuUNMCmBnAqHwDuptQHI,4232
|
|
27
|
+
palimpzest/prompts/join_prompts.py,sha256=viQVvOpa2l9PYM34ua_jPNZnUOU_eCTMIoabBkF5cVc,5929
|
|
28
|
+
palimpzest/prompts/moa_aggregator_convert_prompts.py,sha256=BQRrtGdr53PTqvXzmFh8kfQ_w9KoKw-zTtmdo-8RFjo,2887
|
|
29
|
+
palimpzest/prompts/moa_proposer_convert_prompts.py,sha256=35pxtR2hnjLkv_10VEetRR9qUCR-zD85NZF3BaAANDk,3462
|
|
30
|
+
palimpzest/prompts/prompt_factory.py,sha256=FDBoVdJ_khT7t6T6WAiK6RgC7HqB3efmRkwMam3AIhM,51262
|
|
31
|
+
palimpzest/prompts/split_merge_prompts.py,sha256=0mTZeJhxtvlmv-ro0KwQpxlGgSTwyUhGRHJ-uHk2Zlw,3146
|
|
32
|
+
palimpzest/prompts/split_proposer_prompts.py,sha256=TBHLGaM_ycHjGHrp1JziJoJDw4S5_F4afKSAdt2McKk,2624
|
|
33
|
+
palimpzest/prompts/util_phrases.py,sha256=NWrcHfjJyiOY16Jyt7R50moVnlJDyvSBZ9kBqyX2WQo,751
|
|
34
|
+
palimpzest/prompts/validator.py,sha256=pJTZjlt_OiFM3IFOgsJ0jQdayra8iRVrpqENlXI9tQQ,10532
|
|
35
|
+
palimpzest/query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
|
+
palimpzest/query/execution/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
|
+
palimpzest/query/execution/all_sample_execution_strategy.py,sha256=3n2hl8m-WFWIu-a8DiSVsGkz4ej3yB7mSdFR0jsiwAU,14366
|
|
38
|
+
palimpzest/query/execution/execution_strategy.py,sha256=KwBJbWOBOOPBiWRm3ypHcAQiWbCsvtW6UnVU4tHkYz8,18905
|
|
39
|
+
palimpzest/query/execution/execution_strategy_type.py,sha256=vRQBPCQN5_aoyD3TLIeW3VPo15mqF-5RBvEXkENz9FE,987
|
|
40
|
+
palimpzest/query/execution/mab_execution_strategy.py,sha256=LY1JlbYMsnJHCtYjaJ6iklojBqXc2B4KS62lobPFNz0,42341
|
|
41
|
+
palimpzest/query/execution/parallel_execution_strategy.py,sha256=Gn5hB5XddX2jCkxx6d7O-DmitK6fbuwBFnnyKhnGYEw,15706
|
|
42
|
+
palimpzest/query/execution/single_threaded_execution_strategy.py,sha256=1eo-Z9G3u92_PjoSX8HmO3D3phYgA8f0Actbgd1-oKY,16247
|
|
43
|
+
palimpzest/query/generators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
44
|
+
palimpzest/query/generators/generators.py,sha256=pi6gTCzQYs-z93IFNGKyoskIcdYCSnOwyaj-DvSlkb4,20877
|
|
45
|
+
palimpzest/query/operators/__init__.py,sha256=j-yh0P5tzXGa0JU_g8aNn54wCJDXPCMbmtOmazXXEts,3459
|
|
46
|
+
palimpzest/query/operators/aggregate.py,sha256=QvWr4C1arFSZWVqPSF5F5On6Ise5OF3VVWNGSq6Gfjk,11230
|
|
47
|
+
palimpzest/query/operators/compute.py,sha256=bxMKLRU_o7v603daKeR0FayDZ_V6NLI1fGzgu6E-sac,8473
|
|
48
|
+
palimpzest/query/operators/convert.py,sha256=teesuAeYl20ULwm6LIA277SZremdHedD2N2GYDUjb5E,17156
|
|
49
|
+
palimpzest/query/operators/critique_and_refine_convert.py,sha256=nJOQf7RLJR5Acg7fPssb0tTmtsCipG8hHu9PRquM9RE,5271
|
|
50
|
+
palimpzest/query/operators/distinct.py,sha256=MuF3NlC0QMTSGs0_fe2oly0I5Ow0hfOa7h8BFGhHiCs,2594
|
|
51
|
+
palimpzest/query/operators/filter.py,sha256=Wm1PaxURE1ZY5j7E1AitGdJfb_IKJoC_3qQW8aF0XC4,10703
|
|
52
|
+
palimpzest/query/operators/join.py,sha256=z1bzhdazTEq1BjoUSwV6j_DQ84TJ3uaSZJpCzSP61nc,17727
|
|
53
|
+
palimpzest/query/operators/limit.py,sha256=upJ775cGkxjFHRJm8GpSvtJN1cspg2FVYLN_MrIfUo4,2113
|
|
54
|
+
palimpzest/query/operators/logical.py,sha256=rh3XBUVO1JAEijw9AHjU35uf5ag01-KONdpCHJXRs3M,19883
|
|
55
|
+
palimpzest/query/operators/mixture_of_agents_convert.py,sha256=Y6O9-zL_6BPwl5Yix3SyYhI_68wiejOtJ3xuFcn_dbs,6731
|
|
56
|
+
palimpzest/query/operators/physical.py,sha256=buPZjtP4HKNVfOCNWdBtDnRS217dSsIG74gqZ1jmoyo,8320
|
|
57
|
+
palimpzest/query/operators/project.py,sha256=RX5SbHFRwHcMfiQRofIPQr-AHgIDYm68ifiFZAPu7Fo,2094
|
|
58
|
+
palimpzest/query/operators/rag_convert.py,sha256=1QQGrE22-Ec3-MNbnaU3k4TGHdpi2qZqZR9MHUniEM4,10691
|
|
59
|
+
palimpzest/query/operators/retrieve.py,sha256=v1FTFsSctqH4B37aWgBXYIxgOMJwRWQ2kwwXu1huwaQ,13106
|
|
60
|
+
palimpzest/query/operators/scan.py,sha256=Da_EZUrArzlAameHYCmtqo-xbPOFvbTYSktrUcUEUSc,7398
|
|
61
|
+
palimpzest/query/operators/search.py,sha256=xydO5Kni0RArpvLSn2ajzD4TcH442VjpP2x9NakjzaA,22842
|
|
62
|
+
palimpzest/query/operators/split_convert.py,sha256=SgtkwGWnIFlQTk96NsgckRx5q15KaGpsF3Si0FzHEGo,7765
|
|
63
|
+
palimpzest/query/optimizer/__init__.py,sha256=L2E1rOA-8O9oH6JL56wLI1qUVxXBLubJEG1IHMH-HU4,2384
|
|
64
|
+
palimpzest/query/optimizer/cost_model.py,sha256=OldPy-TJdfsQbYRoKlb3yWeKbi15jcldTIUS6BTi9T8,12678
|
|
65
|
+
palimpzest/query/optimizer/optimizer.py,sha256=mgM6c0d_voGNun2hMzqjfumJVieACtcHsNnBP4LyXAA,19626
|
|
66
|
+
palimpzest/query/optimizer/optimizer_strategy.py,sha256=9YlNGkqwgX0WaV6y8tKOOHVN8kC8GjDI3DttvGW5SYY,10206
|
|
67
|
+
palimpzest/query/optimizer/optimizer_strategy_type.py,sha256=V-MMHvJdnfZKoUX1xxxwh66q1RjN2FL35IsiT1C62c8,1084
|
|
68
|
+
palimpzest/query/optimizer/plan.py,sha256=VIhN7tWT7EoRE9BKYa1qvvOhX7dEaM-aiobByX0qjzg,22900
|
|
69
|
+
palimpzest/query/optimizer/primitives.py,sha256=jMMVq37y1tWiPU1lSSKQP9OP-mzkpSxSmUeDajRYYOQ,5445
|
|
70
|
+
palimpzest/query/optimizer/rules.py,sha256=9AsuVjhiZUc0snQPNhIqeyKpmqFsSv7e-v6BEbp9CDw,43315
|
|
71
|
+
palimpzest/query/optimizer/tasks.py,sha256=DJcKDNbVJox61rnTW0HgT1PtxGx2P_NiLvNroXie-Lg,29509
|
|
72
|
+
palimpzest/query/processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
73
|
+
palimpzest/query/processor/config.py,sha256=b_EQOqOXoRP6AziOw1iLqb8tlSWP-D1_el3mmrnBDAk,2263
|
|
74
|
+
palimpzest/query/processor/query_processor.py,sha256=W01-2FocN1Jsv58gmEo5ALTIcpLt7D0dmI8kghSCdBk,6291
|
|
75
|
+
palimpzest/query/processor/query_processor_factory.py,sha256=H_2pkcN_aVbNDuMLsvZP2PXARLF9MwoHGAzEWkSNNYM,7866
|
|
76
|
+
palimpzest/schemabuilder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
77
|
+
palimpzest/schemabuilder/schema_builder.py,sha256=QraGp66dcD-ej6Y2mER40o86G9JqlBkL7swkJzjUAIY,7968
|
|
78
|
+
palimpzest/tools/README.md,sha256=56_6LPG80uc0CLVhTBP6I1wgIffNv9cyTr0TmVZqmrM,483
|
|
79
|
+
palimpzest/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
80
|
+
palimpzest/tools/allenpdf.py,sha256=fXMOmSDdSSLXDKAPYYJ8k4egtWEBf_Me9Lq9tM3iyoA,1690
|
|
81
|
+
palimpzest/tools/pdfparser.py,sha256=0DOVUZLxYfqjxM8WNEfYcyiXb1qW9BWVIHEB_B_YhWA,9570
|
|
82
|
+
palimpzest/tools/skema_tools.py,sha256=HXUFpjMhbVxZwKKkATeK-FwtlTCawaCbeP-uHntI1Kg,669
|
|
83
|
+
palimpzest/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
84
|
+
palimpzest/utils/env_helpers.py,sha256=n81KzoJ459pRxo7QmJA7duazwWsfoMGTHc71D2LatFk,334
|
|
85
|
+
palimpzest/utils/hash_helpers.py,sha256=3A8dA7SbXTwnnvZvPVNqqMLlVRhCKyKF_bjNNAu3Exk,334
|
|
86
|
+
palimpzest/utils/model_helpers.py,sha256=Vlu3KIvbc4Usg4iSI2KMFSc-qcdAubWN2CSjZod2czY,2233
|
|
87
|
+
palimpzest/utils/progress.py,sha256=7gucyZr82udMDZitrrkAOSKHZVljE3R2wv9nf5gA5TM,20807
|
|
88
|
+
palimpzest/utils/udfs.py,sha256=LjHic54B1az-rKgNLur0wOpaz2ko_UodjLEJrazkxvY,1854
|
|
89
|
+
palimpzest/validator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
90
|
+
palimpzest/validator/validator.py,sha256=J2tGvJqfg6v5lOQDYYaqAa9d37uVHBrqkNs-a8d1Ic0,16365
|
|
91
|
+
palimpzest-0.8.0.dist-info/licenses/LICENSE,sha256=5GUlHy9lr-Py9kvV38FF1m3yy3NqM18fefuE9wkWumo,1079
|
|
92
|
+
palimpzest-0.8.0.dist-info/METADATA,sha256=MUkUorsKFMVGPmCeAZOBruvKP8shJ1kbF5kulxPnSHc,7286
|
|
93
|
+
palimpzest-0.8.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
94
|
+
palimpzest-0.8.0.dist-info/top_level.txt,sha256=raV06dJUgohefUn3ZyJS2uqp_Y76EOLA9Y2e_fxt8Ew,11
|
|
95
|
+
palimpzest-0.8.0.dist-info/RECORD,,
|
palimpzest/core/lib/fields.py
DELETED
|
@@ -1,141 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class Field:
|
|
5
|
-
"""
|
|
6
|
-
A Field is defined by its description and its type. The Field class is subclassed to specify
|
|
7
|
-
that values of the subclass should belong to a specific type.
|
|
8
|
-
|
|
9
|
-
For example, if you wanted to define Fields relevant to indexing research papers, you could define a field
|
|
10
|
-
representing the title of a paper, the year it was published, and the journal it was published in:
|
|
11
|
-
|
|
12
|
-
```python
|
|
13
|
-
paper_title = Field(desc="The title of a scientific paper")
|
|
14
|
-
paper_year = Field(desc="The year the paper was published")
|
|
15
|
-
paper_journal = Field(desc="The name of the journal that published the paper")
|
|
16
|
-
```
|
|
17
|
-
"""
|
|
18
|
-
is_image_field = False
|
|
19
|
-
|
|
20
|
-
def __init__(self, desc: str = "") -> None:
|
|
21
|
-
self._desc = desc
|
|
22
|
-
self.type = None
|
|
23
|
-
|
|
24
|
-
def __str__(self) -> str:
|
|
25
|
-
return f"{self.__class__.__name__}(desc={self._desc})"
|
|
26
|
-
|
|
27
|
-
def __hash__(self) -> int:
|
|
28
|
-
return hash(self._desc + self.__class__.__name__)
|
|
29
|
-
|
|
30
|
-
def __eq__(self, other) -> bool:
|
|
31
|
-
return self._desc == other._desc and self.__class__ == other.__class__
|
|
32
|
-
|
|
33
|
-
@property
|
|
34
|
-
def desc(self) -> str:
|
|
35
|
-
return self._desc
|
|
36
|
-
|
|
37
|
-
def json_schema(self) -> dict:
|
|
38
|
-
return {"description": self._desc, "type": str(self.type)}
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
class BooleanField(Field):
|
|
42
|
-
"""A BooleanField is a Field that is True or False."""
|
|
43
|
-
|
|
44
|
-
def __init__(self, desc: str):
|
|
45
|
-
super().__init__(desc=desc)
|
|
46
|
-
self.type = bool
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
class BytesField(Field):
|
|
50
|
-
"""A BytesField is a Field that is definitely an array of bytes."""
|
|
51
|
-
|
|
52
|
-
def __init__(self, desc: str):
|
|
53
|
-
super().__init__(desc=desc)
|
|
54
|
-
self.type = bytes
|
|
55
|
-
|
|
56
|
-
def json_schema(self) -> dict[str, str]:
|
|
57
|
-
return {
|
|
58
|
-
"description": self._desc,
|
|
59
|
-
"type": str(self.type),
|
|
60
|
-
"contentEncoding": "base64",
|
|
61
|
-
"contentMediaType": "application/octet-stream",
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
class CallableField(Field):
|
|
66
|
-
"""A CallableField is a Field that stores a function."""
|
|
67
|
-
|
|
68
|
-
def __init__(self, desc: str):
|
|
69
|
-
super().__init__(desc=desc)
|
|
70
|
-
self.type = type(lambda x: x)
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
class FloatField(Field):
|
|
74
|
-
"""A FloatField is a Field that is definitely an integer or a float."""
|
|
75
|
-
|
|
76
|
-
def __init__(self, desc: str):
|
|
77
|
-
super().__init__(desc=desc)
|
|
78
|
-
self.type = float
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
class IntField(Field):
|
|
82
|
-
"""An IntField is a Field that is definitely an integer or a float."""
|
|
83
|
-
|
|
84
|
-
def __init__(self, desc: str):
|
|
85
|
-
super().__init__(desc=desc)
|
|
86
|
-
self.type = int
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
class NumericField(Field):
|
|
90
|
-
"""A NumericField is a Field that is definitely a number."""
|
|
91
|
-
|
|
92
|
-
def __init__(self, desc: str):
|
|
93
|
-
super().__init__(desc=desc)
|
|
94
|
-
self.type = int | float
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
class StringField(Field):
|
|
98
|
-
"""A StringField is a Field that is definitely a string of text."""
|
|
99
|
-
|
|
100
|
-
def __init__(self, desc: str):
|
|
101
|
-
super().__init__(desc=desc)
|
|
102
|
-
self.type = str
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
class ImageFilepathField(StringField):
|
|
106
|
-
"""An ImageFilepathField is a StringField that contains the filepath to an image."""
|
|
107
|
-
is_image_field = True
|
|
108
|
-
|
|
109
|
-
def __init__(self, desc: str):
|
|
110
|
-
super().__init__(desc=desc)
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
class ImageURLField(StringField):
|
|
114
|
-
"""An ImageURLField is a StringField that contains the publicly accessible URL for an image."""
|
|
115
|
-
is_image_field = True
|
|
116
|
-
|
|
117
|
-
def __init__(self, desc: str):
|
|
118
|
-
super().__init__(desc=desc)
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
class ImageBase64Field(BytesField):
|
|
122
|
-
"""An ImageBase64Field is a BytesField that contains a base64 encoded image."""
|
|
123
|
-
is_image_field = True
|
|
124
|
-
|
|
125
|
-
def __init__(self, desc: str):
|
|
126
|
-
super().__init__(desc=desc)
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
### fields which are metaclasses that produce other field types ###
|
|
130
|
-
class ListField(Field):
|
|
131
|
-
"""A field representing a list of elements of specified types, with full list functionality."""
|
|
132
|
-
|
|
133
|
-
def __new__(cls, element_type: Field, desc: str | None = None):
|
|
134
|
-
attrs = {
|
|
135
|
-
"element_type": element_type,
|
|
136
|
-
"is_image_field": element_type.is_image_field,
|
|
137
|
-
"type": list,
|
|
138
|
-
"_desc": desc,
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
return type(f"List[{element_type.__name__}]", (Field,), attrs)
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
### CODE SYNTHESIS PROMPTS ###
|
|
2
|
-
EXAMPLE_PROMPT = """Example{idx}:
|
|
3
|
-
Example Input
|
|
4
|
-
-------------
|
|
5
|
-
{example_inputs}
|
|
6
|
-
|
|
7
|
-
Example Output
|
|
8
|
-
--------------
|
|
9
|
-
{example_output}
|
|
10
|
-
"""
|
|
11
|
-
|
|
12
|
-
CODEGEN_PROMPT = """You are a helpful programming assistant and an expert {language} programmer. Implement the {language} function `{api}` that extracts `{output}` ({output_desc}) from given inputs:
|
|
13
|
-
{inputs_desc}
|
|
14
|
-
{examples_desc}
|
|
15
|
-
Notice that the evaluation will severely punish incorrect outputs. Thus, when the function is uncertain, it should return `None` to abstain instead of returning an incorrect guess.
|
|
16
|
-
{advice}
|
|
17
|
-
Return the implementation only."""
|
|
18
|
-
|
|
19
|
-
ADVICEGEN_PROMPT = """You are a helpful programming assistant and an expert {language} programmer. Your job is to provide programming ideas to help me write {language} programs.
|
|
20
|
-
For example, if I want to complete a task: "extract the salary number (in USD) from a given employee's document", you can provide me with {n} different ways to do it like:
|
|
21
|
-
Idea 1: Use regular expressions to extract the salary number: a number with a dollar sign in front of it. For example, $100,000.
|
|
22
|
-
Idea 2: Find the table entry with the salary number.
|
|
23
|
-
Idea 3: Use a pre-trained NLP model to extract the salary number.
|
|
24
|
-
#
|
|
25
|
-
Now, consider the following {language} programming task that extracts `{output}` ({output_desc}) from given inputs:
|
|
26
|
-
{examples_desc}
|
|
27
|
-
Please provide me with {n} different ideas to complete this task. Return the ideas only, following the format above.
|
|
28
|
-
"""
|