palimpzest 0.7.20__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- palimpzest/__init__.py +37 -6
- palimpzest/agents/__init__.py +0 -0
- palimpzest/agents/compute_agents.py +0 -0
- palimpzest/agents/search_agents.py +637 -0
- palimpzest/constants.py +259 -197
- palimpzest/core/data/context.py +393 -0
- palimpzest/core/data/context_manager.py +163 -0
- palimpzest/core/data/dataset.py +634 -0
- palimpzest/core/data/{datareaders.py → iter_dataset.py} +202 -126
- palimpzest/core/elements/groupbysig.py +16 -13
- palimpzest/core/elements/records.py +166 -75
- palimpzest/core/lib/schemas.py +152 -390
- palimpzest/core/{data/dataclasses.py → models.py} +306 -170
- palimpzest/policy.py +2 -27
- palimpzest/prompts/__init__.py +35 -5
- palimpzest/prompts/agent_prompts.py +357 -0
- palimpzest/prompts/context_search.py +9 -0
- palimpzest/prompts/convert_prompts.py +61 -5
- palimpzest/prompts/filter_prompts.py +50 -5
- palimpzest/prompts/join_prompts.py +163 -0
- palimpzest/prompts/moa_proposer_convert_prompts.py +5 -5
- palimpzest/prompts/prompt_factory.py +358 -46
- palimpzest/prompts/validator.py +239 -0
- palimpzest/query/execution/all_sample_execution_strategy.py +134 -76
- palimpzest/query/execution/execution_strategy.py +210 -317
- palimpzest/query/execution/execution_strategy_type.py +5 -7
- palimpzest/query/execution/mab_execution_strategy.py +249 -136
- palimpzest/query/execution/parallel_execution_strategy.py +153 -244
- palimpzest/query/execution/single_threaded_execution_strategy.py +107 -64
- palimpzest/query/generators/generators.py +157 -330
- palimpzest/query/operators/__init__.py +15 -5
- palimpzest/query/operators/aggregate.py +50 -33
- palimpzest/query/operators/compute.py +201 -0
- palimpzest/query/operators/convert.py +27 -21
- palimpzest/query/operators/critique_and_refine_convert.py +7 -5
- palimpzest/query/operators/distinct.py +62 -0
- palimpzest/query/operators/filter.py +22 -13
- palimpzest/query/operators/join.py +402 -0
- palimpzest/query/operators/limit.py +3 -3
- palimpzest/query/operators/logical.py +198 -80
- palimpzest/query/operators/mixture_of_agents_convert.py +10 -8
- palimpzest/query/operators/physical.py +27 -21
- palimpzest/query/operators/project.py +3 -3
- palimpzest/query/operators/rag_convert.py +7 -7
- palimpzest/query/operators/retrieve.py +9 -9
- palimpzest/query/operators/scan.py +81 -42
- palimpzest/query/operators/search.py +524 -0
- palimpzest/query/operators/split_convert.py +10 -8
- palimpzest/query/optimizer/__init__.py +7 -9
- palimpzest/query/optimizer/cost_model.py +108 -441
- palimpzest/query/optimizer/optimizer.py +123 -181
- palimpzest/query/optimizer/optimizer_strategy.py +66 -61
- palimpzest/query/optimizer/plan.py +352 -67
- palimpzest/query/optimizer/primitives.py +43 -19
- palimpzest/query/optimizer/rules.py +484 -646
- palimpzest/query/optimizer/tasks.py +127 -58
- palimpzest/query/processor/config.py +41 -76
- palimpzest/query/processor/query_processor.py +73 -18
- palimpzest/query/processor/query_processor_factory.py +46 -38
- palimpzest/schemabuilder/schema_builder.py +15 -28
- palimpzest/utils/model_helpers.py +27 -77
- palimpzest/utils/progress.py +114 -102
- palimpzest/validator/__init__.py +0 -0
- palimpzest/validator/validator.py +306 -0
- {palimpzest-0.7.20.dist-info → palimpzest-0.8.0.dist-info}/METADATA +6 -1
- palimpzest-0.8.0.dist-info/RECORD +95 -0
- palimpzest/core/lib/fields.py +0 -141
- palimpzest/prompts/code_synthesis_prompts.py +0 -28
- palimpzest/query/execution/random_sampling_execution_strategy.py +0 -240
- palimpzest/query/generators/api_client_factory.py +0 -30
- palimpzest/query/operators/code_synthesis_convert.py +0 -488
- palimpzest/query/operators/map.py +0 -130
- palimpzest/query/processor/nosentinel_processor.py +0 -33
- palimpzest/query/processor/processing_strategy_type.py +0 -28
- palimpzest/query/processor/sentinel_processor.py +0 -88
- palimpzest/query/processor/streaming_processor.py +0 -149
- palimpzest/sets.py +0 -405
- palimpzest/utils/datareader_helpers.py +0 -61
- palimpzest/utils/demo_helpers.py +0 -75
- palimpzest/utils/field_helpers.py +0 -69
- palimpzest/utils/generation_helpers.py +0 -69
- palimpzest/utils/sandbox.py +0 -183
- palimpzest-0.7.20.dist-info/RECORD +0 -95
- /palimpzest/core/{elements/index.py → data/index_dataset.py} +0 -0
- {palimpzest-0.7.20.dist-info → palimpzest-0.8.0.dist-info}/WHEEL +0 -0
- {palimpzest-0.7.20.dist-info → palimpzest-0.8.0.dist-info}/licenses/LICENSE +0 -0
- {palimpzest-0.7.20.dist-info → palimpzest-0.8.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
### MAP ###
|
|
2
|
+
MAP_VALIDATOR_PROMPT = """You are an intelligent judge whose job is to evaluate how successfully an agent executed a given instruction.
|
|
3
|
+
You will be presented with the input(s) provided to the agent followed by the output produced by the agent.
|
|
4
|
+
|
|
5
|
+
Each output will be a dictionary. The keys will be **output fields** which were computed by the agent.
|
|
6
|
+
|
|
7
|
+
Your job will be to assign a score of 1.0 to every output field which was computed correctly, and a score of 0.0 to every output field which was computed incorrectly. If the output for a field is a list, you may give a score in between 0.0 and 1.0 representing the fraction of correct items in the list.
|
|
8
|
+
|
|
9
|
+
Here is an example evaluation:
|
|
10
|
+
|
|
11
|
+
INPUT MESSAGES:
|
|
12
|
+
---------------
|
|
13
|
+
You are a helpful assistant whose job is to generate a JSON object. You will be presented with a context and a set of output fields to generate. Your task is to generate a JSON object which fills in the output fields with the correct values.
|
|
14
|
+
You will be provided with a description of each input field and each output field. All of the fields in the output JSON object can be derived using information from the context.
|
|
15
|
+
|
|
16
|
+
INPUT FIELDS:
|
|
17
|
+
- text: a text passage describing a scientist
|
|
18
|
+
- birthday: the scientist's birthday
|
|
19
|
+
|
|
20
|
+
OUTPUT FIELDS:
|
|
21
|
+
- name: the name of the scientist
|
|
22
|
+
- birth_year: the year the scientist was born
|
|
23
|
+
|
|
24
|
+
CONTEXT:
|
|
25
|
+
{{
|
|
26
|
+
"text": "Augusta Ada King, Countess of Lovelace, also known as Ada Lovelace, was an English mathematician and writer chiefly known for her work on Charles Babbage's proposed mechanical general-purpose computer, the Analytical Engine. She was the first to recognise that the machine had applications beyond pure calculation.",
|
|
27
|
+
"birthday": "December 10, 1815"
|
|
28
|
+
}}
|
|
29
|
+
|
|
30
|
+
OUTPUT:
|
|
31
|
+
--------
|
|
32
|
+
{{
|
|
33
|
+
"name": "Charles Babbage",
|
|
34
|
+
"birth_year": 1815
|
|
35
|
+
}}
|
|
36
|
+
|
|
37
|
+
EVALUATION: {"name": 0.0, "birth_year": 1.0}
|
|
38
|
+
|
|
39
|
+
Remember, be sure to output your evaluation as a dictionary where each value contains a 0.0 or 1.0 score for each output field (or a score within [0.0, 1.0] for list output fields).
|
|
40
|
+
|
|
41
|
+
INPUT MESSAGES:
|
|
42
|
+
---------------
|
|
43
|
+
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
MAP_IMAGE_VALIDATOR_PROMPT = """You are an intelligent judge whose job is to evaluate how successfully an agent executed a given instruction.
|
|
47
|
+
You will be presented with the input(s) provided to the agent followed by the output produced by the agent.
|
|
48
|
+
|
|
49
|
+
Each output will be a dictionary. The keys will be **output fields** which were computed by the agent.
|
|
50
|
+
|
|
51
|
+
Your job will be to assign a score of 1.0 to every output field which was computed correctly, and a score of 0.0 to every output field which was computed incorrectly. If the output for a field is a list, you may give a score in between 0.0 and 1.0 representing the fraction of correct items in the list.
|
|
52
|
+
|
|
53
|
+
Here is an example evaluation:
|
|
54
|
+
|
|
55
|
+
INPUT MESSAGES:
|
|
56
|
+
---------------
|
|
57
|
+
You are a helpful assistant whose job is to analyze input image(s) and/or text in order to produce a JSON object. You will be presented with a context and a set of output fields to generate. Your task is to generate a JSON object which fills in the output fields with the correct values.
|
|
58
|
+
You will be provided with a description of each input field and each output field. All of the fields in the output JSON object can be derived using information from the context.
|
|
59
|
+
|
|
60
|
+
INPUT FIELDS:
|
|
61
|
+
- image: an image of a scene
|
|
62
|
+
- photographer: the photographer of the image
|
|
63
|
+
|
|
64
|
+
OUTPUT FIELDS:
|
|
65
|
+
- dog_in_image: true if a dog is in the image and false otherwise
|
|
66
|
+
- person_in_image: true if a person is in the image and false otherwise
|
|
67
|
+
|
|
68
|
+
CONTEXT:
|
|
69
|
+
{{
|
|
70
|
+
"image": <bytes>,
|
|
71
|
+
"photographer": "CameraEnthusiast1"
|
|
72
|
+
}}
|
|
73
|
+
<image content provided here; assume in this example the image shows a dog and a cat playing>
|
|
74
|
+
|
|
75
|
+
OUTPUT:
|
|
76
|
+
--------
|
|
77
|
+
{{
|
|
78
|
+
"dog_in_image": true,
|
|
79
|
+
"person_in_image": true
|
|
80
|
+
}}
|
|
81
|
+
|
|
82
|
+
EVALUATION: {"dog_in_image": 1.0, "person_in_image": 0.0}
|
|
83
|
+
|
|
84
|
+
Remember, be sure to output your evaluation as a dictionary where each value contains a 0.0 or 1.0 score for each output field (or a score within [0.0, 1.0] for list output fields).
|
|
85
|
+
|
|
86
|
+
INPUT MESSAGES:
|
|
87
|
+
---------------
|
|
88
|
+
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
### FLAT MAP ###
|
|
93
|
+
FLAT_MAP_VALIDATOR_PROMPT = """You are an intelligent judge whose job is to evaluate how successfully an agent executed a given instruction.
|
|
94
|
+
You will be presented with the input(s) provided to the agent followed by the output(s) produced by the agent.
|
|
95
|
+
|
|
96
|
+
Each output will be a list of dictionaries. The keys of each dictionary will be **output fields** which were computed by the agent.
|
|
97
|
+
|
|
98
|
+
Your job will be to assign a score of 1.0 to every output field which was computed correctly, and a score of 0.0 to every output field which was computed incorrectly. If the output for a field is a list, you may give a score in between 0.0 and 1.0 representing the fraction of correct items in the list.
|
|
99
|
+
|
|
100
|
+
Here is an example evaluation:
|
|
101
|
+
|
|
102
|
+
INPUT MESSAGES:
|
|
103
|
+
---------------
|
|
104
|
+
You are a helpful assistant whose job is to generate a JSON object. You will be presented with a context and a set of output fields to generate. Your task is to generate a JSON object which fills in the output fields with the correct values.
|
|
105
|
+
You will be provided with a description of each input field and each output field. All of the fields in the output JSON object can be derived using information from the context.
|
|
106
|
+
|
|
107
|
+
INPUT FIELDS:
|
|
108
|
+
- text: a text passage describing scientists
|
|
109
|
+
- birthdays: text containing birth dates
|
|
110
|
+
|
|
111
|
+
OUTPUT FIELDS:
|
|
112
|
+
- name: the name of the scientist
|
|
113
|
+
- birth_year: the year the scientist was born
|
|
114
|
+
|
|
115
|
+
CONTEXT:
|
|
116
|
+
{{
|
|
117
|
+
"text": "Augusta Ada King, Countess of Lovelace, also known as Ada Lovelace, was an English mathematician and writer chiefly known for her work on Charles Babbage's proposed mechanical general-purpose computer, the Analytical Engine. She was the first to recognise that the machine had applications beyond pure calculation.",
|
|
118
|
+
"birthdays": "...Lovelace was born on December 10, 1815, almost exactly 24 years after Babbage's birth on 26 December 1791..."
|
|
119
|
+
}}
|
|
120
|
+
|
|
121
|
+
OUTPUTS:
|
|
122
|
+
--------
|
|
123
|
+
[
|
|
124
|
+
{{
|
|
125
|
+
"name": "Ada Lovelace",
|
|
126
|
+
"birth_year": 1815
|
|
127
|
+
}},
|
|
128
|
+
{{
|
|
129
|
+
"name": "Charles Babbage",
|
|
130
|
+
"birth_year": 1790
|
|
131
|
+
}}
|
|
132
|
+
]
|
|
133
|
+
|
|
134
|
+
EVALUATION: [{"name": 1.0, "birth_year": 1.0}, {"name": 1.0, "birth_year": 0.0}]
|
|
135
|
+
|
|
136
|
+
Remember, be sure to output your evaluation as a list of dictionaries where each dictionary contains a 0.0 or 1.0 score for each output field (or a score within [0.0, 1.0] for list output fields).
|
|
137
|
+
|
|
138
|
+
INPUT MESSAGES:
|
|
139
|
+
---------------
|
|
140
|
+
|
|
141
|
+
"""
|
|
142
|
+
|
|
143
|
+
FLAT_MAP_IMAGE_VALIDATOR_PROMPT = """You are an intelligent judge whose job is to evaluate how successfully an agent executed a given instruction.
|
|
144
|
+
You will be presented with the input(s) provided to the agent followed by the output(s) produced by the agent.
|
|
145
|
+
|
|
146
|
+
Each output will be a list of dictionaries. The keys of each dictionary will be **output fields** which were computed by the agent.
|
|
147
|
+
|
|
148
|
+
Your job will be to assign a score of 1.0 to every output field which was computed correctly, and a score of 0.0 to every output field which was computed incorrectly. If the output for a field is a list, you may give a score in between 0.0 and 1.0 representing the fraction of correct items in the list.
|
|
149
|
+
|
|
150
|
+
Here is an example evaluation:
|
|
151
|
+
|
|
152
|
+
INPUT MESSAGES:
|
|
153
|
+
---------------
|
|
154
|
+
You are a helpful assistant whose job is to analyze input image(s) and/or text in order to produce a JSON object. You will be presented with a context and a set of output fields to generate. Your task is to generate a JSON object which fills in the output fields with the correct values.
|
|
155
|
+
You will be provided with a description of each input field and each output field. All of the fields in the output JSON object can be derived using information from the context.
|
|
156
|
+
|
|
157
|
+
INPUT FIELDS:
|
|
158
|
+
- image: an image of a scene
|
|
159
|
+
- photographer: the photographer of the image
|
|
160
|
+
|
|
161
|
+
OUTPUT FIELDS:
|
|
162
|
+
- animal: the type of animal in the image
|
|
163
|
+
- animal_is_canine: true if the animal is a canine and false otherwise
|
|
164
|
+
|
|
165
|
+
CONTEXT:
|
|
166
|
+
{{
|
|
167
|
+
"image": <bytes>,
|
|
168
|
+
"photographer": "CameraEnthusiast1"
|
|
169
|
+
}}
|
|
170
|
+
<image content provided here; assume in this example the image shows a dog and a cat playing>
|
|
171
|
+
|
|
172
|
+
OUTPUT:
|
|
173
|
+
--------
|
|
174
|
+
[
|
|
175
|
+
{{
|
|
176
|
+
"animal": "dog",
|
|
177
|
+
"animal_is_canine": true
|
|
178
|
+
}},
|
|
179
|
+
{{
|
|
180
|
+
"animal": "cat",
|
|
181
|
+
"animal_is_canine": true
|
|
182
|
+
}}
|
|
183
|
+
]
|
|
184
|
+
|
|
185
|
+
EVALUATION: [{"animal": 1.0, "animal_is_canine": 1.0}, {"animal": 1.0, "animal_is_canine": 0.0}]
|
|
186
|
+
|
|
187
|
+
Remember, be sure to output your evaluation as a list of dictionaries where each dictionary contains a 0.0 or 1.0 score for each output field (or a score within [0.0, 1.0] for list output fields).
|
|
188
|
+
|
|
189
|
+
INPUT MESSAGES:
|
|
190
|
+
---------------
|
|
191
|
+
|
|
192
|
+
"""
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
### RETRIEVE
|
|
196
|
+
RETRIEVE_VALIDATOR_PROMPT = """You are an intelligent judge whose job is to evaluate how successfully an agent executed a given instruction.
|
|
197
|
+
You will be presented with the input(s) provided to the agent followed by the output produced by the agent.
|
|
198
|
+
|
|
199
|
+
Each output will be a dictionary. The keys will be **output fields** which were computed by the agent.
|
|
200
|
+
|
|
201
|
+
Your job will be to assign a score of 1.0 to every output field which was computed correctly, and a score of 0.0 to every output field which was computed incorrectly. If the output for a field is a list, you may give a score in between 0.0 and 1.0 representing the fraction of correct items in the list.
|
|
202
|
+
|
|
203
|
+
Here is an example evaluation:
|
|
204
|
+
|
|
205
|
+
INPUT MESSAGES:
|
|
206
|
+
---------------
|
|
207
|
+
You are a helpful assistant whose job is to generate a JSON object. You will be presented with a context and a set of output fields to generate. Your task is to generate a JSON object which fills in the output fields with the correct values.
|
|
208
|
+
You will be provided with a description of each input field and each output field. All of the fields in the output JSON object can be derived using information from the context.
|
|
209
|
+
|
|
210
|
+
INPUT FIELDS:
|
|
211
|
+
- text: a text passage describing a scientist
|
|
212
|
+
|
|
213
|
+
OUTPUT FIELDS:
|
|
214
|
+
- related_scientists: list of scientists who perform similar work as the scientist described in the text
|
|
215
|
+
|
|
216
|
+
CONTEXT:
|
|
217
|
+
{{
|
|
218
|
+
"text": "Augusta Ada King, Countess of Lovelace, also known as Ada Lovelace, was an English mathematician and writer chiefly known for her work on Charles Babbage's proposed mechanical general-purpose computer, the Analytical Engine. She was the first to recognise that the machine had applications beyond pure calculation.",
|
|
219
|
+
}}
|
|
220
|
+
|
|
221
|
+
OUTPUT:
|
|
222
|
+
--------
|
|
223
|
+
{{
|
|
224
|
+
"related_scientists": [
|
|
225
|
+
"Charles Babbage",
|
|
226
|
+
"Alan Turing",
|
|
227
|
+
"Charles Darwin",
|
|
228
|
+
"John von Neumann",
|
|
229
|
+
]
|
|
230
|
+
}}
|
|
231
|
+
|
|
232
|
+
EVALUATION: {"related_scientists": 0.75}
|
|
233
|
+
|
|
234
|
+
Remember, be sure to output your evaluation as a dictionary where each value contains a 0.0 or 1.0 score for each output field (or a score within [0.0, 1.0] for list output fields).
|
|
235
|
+
|
|
236
|
+
INPUT MESSAGES:
|
|
237
|
+
---------------
|
|
238
|
+
|
|
239
|
+
"""
|
|
@@ -2,13 +2,18 @@ import logging
|
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
|
|
5
|
-
from palimpzest.core.data.
|
|
5
|
+
from palimpzest.core.data.dataset import Dataset
|
|
6
6
|
from palimpzest.core.elements.records import DataRecord, DataRecordSet
|
|
7
|
+
from palimpzest.core.models import SentinelPlanStats
|
|
7
8
|
from palimpzest.query.execution.execution_strategy import SentinelExecutionStrategy
|
|
9
|
+
from palimpzest.query.operators.aggregate import AggregateOp
|
|
10
|
+
from palimpzest.query.operators.filter import FilterOp
|
|
11
|
+
from palimpzest.query.operators.join import JoinOp
|
|
8
12
|
from palimpzest.query.operators.physical import PhysicalOperator
|
|
9
|
-
from palimpzest.query.operators.scan import ScanPhysicalOp
|
|
13
|
+
from palimpzest.query.operators.scan import ContextScanOp, ScanPhysicalOp
|
|
10
14
|
from palimpzest.query.optimizer.plan import SentinelPlan
|
|
11
15
|
from palimpzest.utils.progress import create_progress_manager
|
|
16
|
+
from palimpzest.validator.validator import Validator
|
|
12
17
|
|
|
13
18
|
logger = logging.getLogger(__name__)
|
|
14
19
|
|
|
@@ -21,36 +26,79 @@ class OpSet:
|
|
|
21
26
|
2. has been sampled fewer than j times
|
|
22
27
|
"""
|
|
23
28
|
|
|
24
|
-
def __init__(self, op_set: list[PhysicalOperator], source_indices: list[int]):
|
|
29
|
+
def __init__(self, op_set: list[PhysicalOperator], source_unique_logical_op_ids: list[str], source_indices: list[int]):
|
|
25
30
|
# construct the set of operators
|
|
26
31
|
self.ops = op_set
|
|
27
32
|
|
|
28
33
|
# store the order in which we will sample the source records
|
|
29
34
|
self.source_indices = source_indices
|
|
30
35
|
|
|
36
|
+
# boolean indication of the type of operator in this OpSet
|
|
37
|
+
sample_op = op_set[0]
|
|
38
|
+
self.is_scan_op = isinstance(sample_op, (ScanPhysicalOp, ContextScanOp))
|
|
39
|
+
self.is_filter_op = isinstance(sample_op, FilterOp)
|
|
40
|
+
self.is_aggregate_op = isinstance(sample_op, AggregateOp)
|
|
41
|
+
self.is_llm_join = isinstance(sample_op, JoinOp)
|
|
42
|
+
|
|
31
43
|
# set the initial inputs for this logical operator
|
|
32
|
-
|
|
33
|
-
|
|
44
|
+
self.source_indices_to_inputs = {source_unique_logical_op_id: {} for source_unique_logical_op_id in source_unique_logical_op_ids}
|
|
45
|
+
if self.is_scan_op:
|
|
46
|
+
self.source_indices_to_inputs["source"] = {source_idx: [int(source_idx.split("-")[-1])] for source_idx in self.source_indices}
|
|
34
47
|
|
|
35
|
-
def
|
|
48
|
+
def get_op_inputs(self) -> list[PhysicalOperator, DataRecord | int | None]:
|
|
36
49
|
"""
|
|
37
|
-
Returns the list of frontier operators and their next input to process.
|
|
38
|
-
any indices in `source_indices_to_sample` which this operator does not sample on its own, then
|
|
39
|
-
we also have this frontier process that source_idx's input with its max quality operator.
|
|
50
|
+
Returns the list of frontier operators and their next input to process.
|
|
40
51
|
"""
|
|
41
|
-
#
|
|
42
|
-
|
|
52
|
+
# if this is an aggregate, run on every input
|
|
53
|
+
if self.is_aggregate_op:
|
|
54
|
+
op = self.ops[0]
|
|
55
|
+
all_inputs = []
|
|
56
|
+
for _, source_indices_to_inputs in self.source_indices_to_inputs.items():
|
|
57
|
+
for _, inputs in source_indices_to_inputs.items():
|
|
58
|
+
all_inputs.extend(inputs)
|
|
59
|
+
return [(op, tuple(), all_inputs)]
|
|
60
|
+
|
|
61
|
+
# if this is an un-optimized (non-scan, non-join) operator, flatten inputs and run on each one
|
|
62
|
+
elif not self.is_scan_op and not self.is_llm_join and len(self.ops) == 1:
|
|
63
|
+
op_inputs = []
|
|
64
|
+
op = self.ops[0]
|
|
65
|
+
for _, source_indices_to_inputs in self.source_indices_to_inputs.items():
|
|
66
|
+
for source_indices, inputs in source_indices_to_inputs.items():
|
|
67
|
+
for input in inputs:
|
|
68
|
+
op_inputs.append((op, source_indices, input))
|
|
69
|
+
return op_inputs
|
|
70
|
+
|
|
71
|
+
# get the list of (op, source_indices) pairs which this operator needs to execute
|
|
72
|
+
op_source_indices_pairs = []
|
|
43
73
|
for op in self.ops:
|
|
44
|
-
# construct list of inputs by looking up the input for the given
|
|
45
|
-
for
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
#
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
74
|
+
# construct list of inputs by looking up the input for the given source_indices
|
|
75
|
+
for source_indices in self.source_indices:
|
|
76
|
+
op_source_indices_pairs.append((op, source_indices))
|
|
77
|
+
|
|
78
|
+
# construct the op inputs
|
|
79
|
+
op_inputs = []
|
|
80
|
+
if self.is_llm_join:
|
|
81
|
+
left_source_unique_logical_op_id, right_source_unique_logical_op_id = list(self.source_indices_to_inputs)
|
|
82
|
+
left_source_indices_to_inputs = self.source_indices_to_inputs[left_source_unique_logical_op_id]
|
|
83
|
+
right_source_indices_to_inputs = self.source_indices_to_inputs[right_source_unique_logical_op_id]
|
|
84
|
+
for op, source_indices in op_source_indices_pairs:
|
|
85
|
+
left_source_indices = source_indices[0]
|
|
86
|
+
right_source_indices = source_indices[1]
|
|
87
|
+
left_inputs = left_source_indices_to_inputs.get(left_source_indices, [])
|
|
88
|
+
right_inputs = right_source_indices_to_inputs.get(right_source_indices, [])
|
|
89
|
+
if len(left_inputs) > 0 and len(right_inputs) > 0:
|
|
90
|
+
op_inputs.append((op, (left_source_indices, right_source_indices), (left_inputs, right_inputs)))
|
|
91
|
+
return op_inputs
|
|
92
|
+
|
|
93
|
+
# if operator is not a join
|
|
94
|
+
source_unique_logical_op_id = list(self.source_indices_to_inputs)[0]
|
|
95
|
+
op_inputs = [
|
|
96
|
+
(op, source_indices, input)
|
|
97
|
+
for op, source_indices in op_source_indices_pairs
|
|
98
|
+
for input in self.source_indices_to_inputs[source_unique_logical_op_id].get(source_indices, [])
|
|
99
|
+
]
|
|
100
|
+
|
|
101
|
+
return op_inputs
|
|
54
102
|
|
|
55
103
|
def pick_highest_quality_output(self, record_sets: list[DataRecordSet]) -> DataRecordSet:
|
|
56
104
|
# if there's only one operator in the set, we return its record_set
|
|
@@ -100,75 +148,64 @@ class OpSet:
|
|
|
100
148
|
for record in max_quality_record_set:
|
|
101
149
|
input.append(record if record.passed_operator else None)
|
|
102
150
|
|
|
103
|
-
self.
|
|
104
|
-
|
|
151
|
+
self.source_indices_to_inputs[source_idx] = input
|
|
105
152
|
|
|
106
153
|
class AllSamplingExecutionStrategy(SentinelExecutionStrategy):
|
|
107
154
|
|
|
108
|
-
def _get_source_indices(self):
|
|
109
|
-
"""Get the list of source indices which the sentinel plan should execute over."""
|
|
110
|
-
# create list of all source indices and shuffle it
|
|
111
|
-
total_num_samples = len(self.val_datasource)
|
|
112
|
-
source_indices = list(np.arange(total_num_samples))
|
|
113
|
-
|
|
114
|
-
return source_indices
|
|
115
|
-
|
|
116
155
|
def _execute_sentinel_plan(self,
|
|
117
156
|
plan: SentinelPlan,
|
|
118
157
|
op_sets: dict[str, OpSet],
|
|
119
|
-
|
|
158
|
+
validator: Validator,
|
|
120
159
|
plan_stats: SentinelPlanStats,
|
|
121
160
|
) -> SentinelPlanStats:
|
|
122
161
|
# execute operator sets in sequence
|
|
123
|
-
for
|
|
162
|
+
for topo_idx, (logical_op_id, _) in enumerate(plan):
|
|
163
|
+
# compute unique logical op id within plan
|
|
164
|
+
unique_logical_op_id = f"{topo_idx}-{logical_op_id}"
|
|
165
|
+
|
|
124
166
|
# get frontier ops and their next input
|
|
125
|
-
|
|
167
|
+
op_inputs = op_sets[logical_op_id].get_op_inputs()
|
|
126
168
|
|
|
127
|
-
# break out of the loop if
|
|
128
|
-
if len(
|
|
169
|
+
# break out of the loop if op_inputs is empty, as this means all records have been filtered out
|
|
170
|
+
if len(op_inputs) == 0:
|
|
129
171
|
break
|
|
130
172
|
|
|
131
173
|
# run sampled operators on sampled inputs
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
# FUTURE TODO: have this return the highest quality record set simply based on our posterior (or prior) belief on operator quality
|
|
135
|
-
# get the target record set for each source_idx
|
|
136
|
-
source_idx_to_target_record_set = self._get_target_record_sets(logical_op_id, source_idx_to_record_sets_and_ops, expected_outputs)
|
|
174
|
+
source_indices_to_record_set_tuples, _ = self._execute_op_set(unique_logical_op_id, op_inputs)
|
|
137
175
|
|
|
138
|
-
# TODO: make consistent across here and RandomSampling
|
|
139
|
-
# FUTURE TODO: move this outside of the loop (i.e. assume we only get quality label(s) after executing full program)
|
|
140
176
|
# score the quality of each generated output
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
177
|
+
source_indices_to_all_record_sets = {
|
|
178
|
+
source_indices: [(record_set, op) for record_set, op, _ in record_set_tuples]
|
|
179
|
+
for source_indices, record_set_tuples in source_indices_to_record_set_tuples.items()
|
|
180
|
+
}
|
|
181
|
+
source_indices_to_all_record_sets, val_gen_stats = self._score_quality(validator, source_indices_to_all_record_sets)
|
|
182
|
+
|
|
183
|
+
# remove records that were read from the execution cache before adding to record op stats
|
|
184
|
+
new_record_op_stats = []
|
|
185
|
+
for _, record_set_tuples in source_indices_to_record_set_tuples.items():
|
|
186
|
+
for record_set, _, is_new in record_set_tuples:
|
|
187
|
+
if is_new:
|
|
188
|
+
new_record_op_stats.extend(record_set.record_op_stats)
|
|
150
189
|
|
|
151
190
|
# update plan stats
|
|
152
|
-
plan_stats.add_record_op_stats(
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
# close the cache
|
|
164
|
-
self._close_cache(plan.logical_op_ids)
|
|
191
|
+
plan_stats.add_record_op_stats(unique_logical_op_id, new_record_op_stats)
|
|
192
|
+
plan_stats.add_validation_gen_stats(unique_logical_op_id, val_gen_stats)
|
|
193
|
+
|
|
194
|
+
# provide the best record sets as inputs to the next logical operator
|
|
195
|
+
next_unique_logical_op_id = plan.get_next_unique_logical_op_id(unique_logical_op_id)
|
|
196
|
+
if next_unique_logical_op_id is not None:
|
|
197
|
+
source_indices_to_all_record_sets = {
|
|
198
|
+
source_indices: [record_set for record_set, _ in record_set_tuples]
|
|
199
|
+
for source_indices, record_set_tuples in source_indices_to_all_record_sets.items()
|
|
200
|
+
}
|
|
201
|
+
op_sets[next_unique_logical_op_id].update_inputs(unique_logical_op_id, source_indices_to_all_record_sets)
|
|
165
202
|
|
|
166
203
|
# finalize plan stats
|
|
167
204
|
plan_stats.finish()
|
|
168
205
|
|
|
169
206
|
return plan_stats
|
|
170
207
|
|
|
171
|
-
def execute_sentinel_plan(self, plan: SentinelPlan, expected_outputs: dict[int, dict] | None):
|
|
208
|
+
def execute_sentinel_plan(self, plan: SentinelPlan, train_dataset: dict[str, Dataset], validator: Validator): # expected_outputs: dict[int, dict] | None):
|
|
172
209
|
"""
|
|
173
210
|
NOTE: this function currently requires us to set k and j properly in order to make
|
|
174
211
|
comparison in our research against the corresponding sample budget in MAB.
|
|
@@ -177,8 +214,6 @@ class AllSamplingExecutionStrategy(SentinelExecutionStrategy):
|
|
|
177
214
|
calls does not perfectly match the sample_budget. This may cause some minor discrepancies with
|
|
178
215
|
the progress manager as a result.
|
|
179
216
|
"""
|
|
180
|
-
# for now, assert that the first operator in the plan is a ScanPhysicalOp
|
|
181
|
-
assert all(isinstance(op, ScanPhysicalOp) for op in plan.operator_sets[0]), "First operator in physical plan must be a ScanPhysicalOp"
|
|
182
217
|
logger.info(f"Executing plan {plan.plan_id} with {self.max_workers} workers")
|
|
183
218
|
logger.info(f"Plan Details: {plan}")
|
|
184
219
|
|
|
@@ -186,25 +221,48 @@ class AllSamplingExecutionStrategy(SentinelExecutionStrategy):
|
|
|
186
221
|
plan_stats = SentinelPlanStats.from_plan(plan)
|
|
187
222
|
plan_stats.start()
|
|
188
223
|
|
|
189
|
-
# get
|
|
190
|
-
|
|
224
|
+
# get lists of source indices
|
|
225
|
+
dataset_id_to_source_indices = {}
|
|
226
|
+
for dataset_id, dataset in train_dataset.items():
|
|
227
|
+
total_num_samples = len(dataset)
|
|
228
|
+
source_indices = [f"{dataset_id}-{int(idx)}" for idx in np.arange(total_num_samples)]
|
|
229
|
+
dataset_id_to_source_indices[dataset_id] = source_indices
|
|
191
230
|
|
|
192
231
|
# initialize set of physical operators for each logical operator
|
|
193
|
-
op_sets = {
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
232
|
+
op_sets = {}
|
|
233
|
+
for topo_idx, (logical_op_id, op_set) in enumerate(plan):
|
|
234
|
+
unique_logical_op_id = f"{topo_idx}-{logical_op_id}"
|
|
235
|
+
source_unique_logical_op_ids = plan.get_source_unique_logical_op_ids(unique_logical_op_id)
|
|
236
|
+
sample_op = op_set[0]
|
|
237
|
+
if isinstance(sample_op, (ScanPhysicalOp, ContextScanOp)):
|
|
238
|
+
root_dataset_ids = plan.get_root_dataset_ids(unique_logical_op_id)
|
|
239
|
+
assert len(root_dataset_ids) == 1, f"Scan for {sample_op} has {len(root_dataset_ids)} > 1 root dataset ids"
|
|
240
|
+
root_dataset_id = root_dataset_ids[0]
|
|
241
|
+
source_indices = dataset_id_to_source_indices[root_dataset_id]
|
|
242
|
+
op_sets[unique_logical_op_id] = OpSet(op_set, source_unique_logical_op_ids, source_indices)
|
|
243
|
+
elif isinstance(sample_op, JoinOp):
|
|
244
|
+
assert len(source_unique_logical_op_ids) == 2, f"Join for {sample_op} has {len(source_unique_logical_op_ids)} != 2 source logical operators"
|
|
245
|
+
left_source_indices = op_sets[source_unique_logical_op_ids[0]].source_indices
|
|
246
|
+
right_source_indices = op_sets[source_unique_logical_op_ids[1]].source_indices
|
|
247
|
+
source_indices = []
|
|
248
|
+
for left_source_idx in left_source_indices:
|
|
249
|
+
for right_source_idx in right_source_indices:
|
|
250
|
+
source_indices.append((left_source_idx, right_source_idx))
|
|
251
|
+
op_sets[unique_logical_op_id] = OpSet(op_set, source_unique_logical_op_ids, source_indices)
|
|
252
|
+
else:
|
|
253
|
+
source_indices = op_sets[source_unique_logical_op_ids[0]].source_indices
|
|
254
|
+
op_sets[unique_logical_op_id] = OpSet(op_set, source_unique_logical_op_ids, source_indices)
|
|
197
255
|
|
|
198
256
|
# initialize and start the progress manager
|
|
199
257
|
self.progress_manager = create_progress_manager(plan, sample_budget=self.sample_budget, progress=self.progress)
|
|
200
258
|
self.progress_manager.start()
|
|
201
259
|
|
|
202
|
-
# NOTE: we must handle progress manager outside of
|
|
260
|
+
# NOTE: we must handle progress manager outside of _execute_sentinel_plan to ensure that it is shut down correctly;
|
|
203
261
|
# if we don't have the `finally:` branch, then program crashes can cause future program runs to fail because
|
|
204
262
|
# the progress manager cannot get a handle to the console
|
|
205
263
|
try:
|
|
206
264
|
# execute sentinel plan by sampling records and operators
|
|
207
|
-
plan_stats = self._execute_sentinel_plan(plan, op_sets,
|
|
265
|
+
plan_stats = self._execute_sentinel_plan(plan, op_sets, validator, plan_stats)
|
|
208
266
|
|
|
209
267
|
finally:
|
|
210
268
|
# finish progress tracking
|