palimpzest 0.7.20__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- palimpzest/__init__.py +37 -6
- palimpzest/agents/__init__.py +0 -0
- palimpzest/agents/compute_agents.py +0 -0
- palimpzest/agents/search_agents.py +637 -0
- palimpzest/constants.py +259 -197
- palimpzest/core/data/context.py +393 -0
- palimpzest/core/data/context_manager.py +163 -0
- palimpzest/core/data/dataset.py +634 -0
- palimpzest/core/data/{datareaders.py → iter_dataset.py} +202 -126
- palimpzest/core/elements/groupbysig.py +16 -13
- palimpzest/core/elements/records.py +166 -75
- palimpzest/core/lib/schemas.py +152 -390
- palimpzest/core/{data/dataclasses.py → models.py} +306 -170
- palimpzest/policy.py +2 -27
- palimpzest/prompts/__init__.py +35 -5
- palimpzest/prompts/agent_prompts.py +357 -0
- palimpzest/prompts/context_search.py +9 -0
- palimpzest/prompts/convert_prompts.py +61 -5
- palimpzest/prompts/filter_prompts.py +50 -5
- palimpzest/prompts/join_prompts.py +163 -0
- palimpzest/prompts/moa_proposer_convert_prompts.py +5 -5
- palimpzest/prompts/prompt_factory.py +358 -46
- palimpzest/prompts/validator.py +239 -0
- palimpzest/query/execution/all_sample_execution_strategy.py +134 -76
- palimpzest/query/execution/execution_strategy.py +210 -317
- palimpzest/query/execution/execution_strategy_type.py +5 -7
- palimpzest/query/execution/mab_execution_strategy.py +249 -136
- palimpzest/query/execution/parallel_execution_strategy.py +153 -244
- palimpzest/query/execution/single_threaded_execution_strategy.py +107 -64
- palimpzest/query/generators/generators.py +157 -330
- palimpzest/query/operators/__init__.py +15 -5
- palimpzest/query/operators/aggregate.py +50 -33
- palimpzest/query/operators/compute.py +201 -0
- palimpzest/query/operators/convert.py +27 -21
- palimpzest/query/operators/critique_and_refine_convert.py +7 -5
- palimpzest/query/operators/distinct.py +62 -0
- palimpzest/query/operators/filter.py +22 -13
- palimpzest/query/operators/join.py +402 -0
- palimpzest/query/operators/limit.py +3 -3
- palimpzest/query/operators/logical.py +198 -80
- palimpzest/query/operators/mixture_of_agents_convert.py +10 -8
- palimpzest/query/operators/physical.py +27 -21
- palimpzest/query/operators/project.py +3 -3
- palimpzest/query/operators/rag_convert.py +7 -7
- palimpzest/query/operators/retrieve.py +9 -9
- palimpzest/query/operators/scan.py +81 -42
- palimpzest/query/operators/search.py +524 -0
- palimpzest/query/operators/split_convert.py +10 -8
- palimpzest/query/optimizer/__init__.py +7 -9
- palimpzest/query/optimizer/cost_model.py +108 -441
- palimpzest/query/optimizer/optimizer.py +123 -181
- palimpzest/query/optimizer/optimizer_strategy.py +66 -61
- palimpzest/query/optimizer/plan.py +352 -67
- palimpzest/query/optimizer/primitives.py +43 -19
- palimpzest/query/optimizer/rules.py +484 -646
- palimpzest/query/optimizer/tasks.py +127 -58
- palimpzest/query/processor/config.py +41 -76
- palimpzest/query/processor/query_processor.py +73 -18
- palimpzest/query/processor/query_processor_factory.py +46 -38
- palimpzest/schemabuilder/schema_builder.py +15 -28
- palimpzest/utils/model_helpers.py +27 -77
- palimpzest/utils/progress.py +114 -102
- palimpzest/validator/__init__.py +0 -0
- palimpzest/validator/validator.py +306 -0
- {palimpzest-0.7.20.dist-info → palimpzest-0.8.0.dist-info}/METADATA +6 -1
- palimpzest-0.8.0.dist-info/RECORD +95 -0
- palimpzest/core/lib/fields.py +0 -141
- palimpzest/prompts/code_synthesis_prompts.py +0 -28
- palimpzest/query/execution/random_sampling_execution_strategy.py +0 -240
- palimpzest/query/generators/api_client_factory.py +0 -30
- palimpzest/query/operators/code_synthesis_convert.py +0 -488
- palimpzest/query/operators/map.py +0 -130
- palimpzest/query/processor/nosentinel_processor.py +0 -33
- palimpzest/query/processor/processing_strategy_type.py +0 -28
- palimpzest/query/processor/sentinel_processor.py +0 -88
- palimpzest/query/processor/streaming_processor.py +0 -149
- palimpzest/sets.py +0 -405
- palimpzest/utils/datareader_helpers.py +0 -61
- palimpzest/utils/demo_helpers.py +0 -75
- palimpzest/utils/field_helpers.py +0 -69
- palimpzest/utils/generation_helpers.py +0 -69
- palimpzest/utils/sandbox.py +0 -183
- palimpzest-0.7.20.dist-info/RECORD +0 -95
- /palimpzest/core/{elements/index.py → data/index_dataset.py} +0 -0
- {palimpzest-0.7.20.dist-info → palimpzest-0.8.0.dist-info}/WHEEL +0 -0
- {palimpzest-0.7.20.dist-info → palimpzest-0.8.0.dist-info}/licenses/LICENSE +0 -0
- {palimpzest-0.7.20.dist-info → palimpzest-0.8.0.dist-info}/top_level.txt +0 -0
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
|
|
4
|
-
import pandas as pd
|
|
5
|
-
|
|
6
|
-
from palimpzest import constants
|
|
7
|
-
from palimpzest.core.data.datareaders import (
|
|
8
|
-
DataReader,
|
|
9
|
-
FileReader,
|
|
10
|
-
HTMLFileDirectoryReader,
|
|
11
|
-
ImageFileDirectoryReader,
|
|
12
|
-
MemoryReader,
|
|
13
|
-
PDFFileDirectoryReader,
|
|
14
|
-
TextFileDirectoryReader,
|
|
15
|
-
XLSFileDirectoryReader,
|
|
16
|
-
)
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def get_local_source(path: str | Path, **kwargs) -> DataReader:
|
|
20
|
-
"""Return a DataReader for a local file or directory."""
|
|
21
|
-
if os.path.isfile(path):
|
|
22
|
-
return FileReader(path)
|
|
23
|
-
|
|
24
|
-
elif os.path.isdir(path):
|
|
25
|
-
if all([f.endswith(tuple(constants.IMAGE_EXTENSIONS)) for f in os.listdir(path)]):
|
|
26
|
-
return ImageFileDirectoryReader(path)
|
|
27
|
-
|
|
28
|
-
elif all([f.endswith(tuple(constants.PDF_EXTENSIONS)) for f in os.listdir(path)]):
|
|
29
|
-
pdfprocessor = kwargs.get("pdfprocessor", constants.DEFAULT_PDF_PROCESSOR)
|
|
30
|
-
file_cache_dir = kwargs.get("file_cache_dir", "/tmp")
|
|
31
|
-
return PDFFileDirectoryReader(
|
|
32
|
-
path=path, pdfprocessor=pdfprocessor, file_cache_dir=file_cache_dir
|
|
33
|
-
)
|
|
34
|
-
|
|
35
|
-
elif all([f.endswith(tuple(constants.XLS_EXTENSIONS)) for f in os.listdir(path)]):
|
|
36
|
-
return XLSFileDirectoryReader(path)
|
|
37
|
-
|
|
38
|
-
elif all([f.endswith(tuple(constants.HTML_EXTENSIONS)) for f in os.listdir(path)]):
|
|
39
|
-
return HTMLFileDirectoryReader(path)
|
|
40
|
-
|
|
41
|
-
else:
|
|
42
|
-
return TextFileDirectoryReader(path)
|
|
43
|
-
else:
|
|
44
|
-
raise Exception(f"Path {path} is invalid. Does not point to a file or directory.")
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
def get_local_datareader(source: str | Path | list | pd.DataFrame, **kwargs) -> DataReader:
|
|
48
|
-
"""
|
|
49
|
-
This helper function returns a `DataReader` object based on the `source` type.
|
|
50
|
-
The returned `DataReader` object is guaranteed to have a schema.
|
|
51
|
-
"""
|
|
52
|
-
if isinstance(source, (str, Path)):
|
|
53
|
-
source = get_local_source(source, **kwargs)
|
|
54
|
-
|
|
55
|
-
elif isinstance(source, (list, pd.DataFrame)):
|
|
56
|
-
source = MemoryReader(source)
|
|
57
|
-
|
|
58
|
-
else:
|
|
59
|
-
raise Exception(f"Invalid source type: {type(source)}, We only support str, Path, list[dict], and pd.DataFrame")
|
|
60
|
-
|
|
61
|
-
return source
|
palimpzest/utils/demo_helpers.py
DELETED
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
# for those of us who resist change and are still on Python 3.9
|
|
2
|
-
try:
|
|
3
|
-
from itertools import pairwise
|
|
4
|
-
except Exception:
|
|
5
|
-
from more_itertools import pairwise
|
|
6
|
-
|
|
7
|
-
import gradio as gr
|
|
8
|
-
import pandas as pd
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def flatten_nested_tuples(nested_tuples):
|
|
12
|
-
"""
|
|
13
|
-
This function takes a nested iterable of the form (4,(3,(2,(1,())))) and flattens it to (1, 2, 3, 4).
|
|
14
|
-
"""
|
|
15
|
-
result = []
|
|
16
|
-
|
|
17
|
-
def flatten(item):
|
|
18
|
-
if isinstance(item, tuple):
|
|
19
|
-
if item: # Check if not an empty list
|
|
20
|
-
flatten(item[0]) # Process the head
|
|
21
|
-
flatten(item[1]) # Process the tail
|
|
22
|
-
else:
|
|
23
|
-
result.append(item)
|
|
24
|
-
|
|
25
|
-
flatten(nested_tuples)
|
|
26
|
-
result = list(result)
|
|
27
|
-
result.reverse()
|
|
28
|
-
return result[1:]
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
def create_plan_str(flatten_ops):
|
|
32
|
-
"""Helper function to return string w/physical plan."""
|
|
33
|
-
plan_str = ""
|
|
34
|
-
start = flatten_ops[0]
|
|
35
|
-
plan_str += f" 0. {type(start).__name__} -> {start.output_schema.__name__} \n"
|
|
36
|
-
|
|
37
|
-
for idx, (left, right) in enumerate(pairwise(flatten_ops)):
|
|
38
|
-
in_schema = left.output_schema
|
|
39
|
-
out_schema = right.output_schema
|
|
40
|
-
plan_str += f" {idx+1}. {in_schema.__name__} -> {type(right).__name__} -> {out_schema.__name__} "
|
|
41
|
-
if hasattr(right, "model"):
|
|
42
|
-
plan_str += f"\n Using {right.model}"
|
|
43
|
-
if hasattr(right, "filter"):
|
|
44
|
-
filter_str = (
|
|
45
|
-
right.filter.filter_condition
|
|
46
|
-
if right.filter.filter_condition is not None
|
|
47
|
-
else str(right.filter.filter_fn)
|
|
48
|
-
)
|
|
49
|
-
plan_str += f'\n Filter: "{filter_str}"'
|
|
50
|
-
plan_str += "\n"
|
|
51
|
-
plan_str += (
|
|
52
|
-
f" ({','.join(in_schema.field_names())[:15]}...) -> ({','.join(out_schema.field_names())[:15]}...)"
|
|
53
|
-
)
|
|
54
|
-
plan_str += "\n\n"
|
|
55
|
-
|
|
56
|
-
return plan_str
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
def print_table(records, cols=None, plan_str=None):
|
|
60
|
-
"""Helper function to print execution results using Gradio"""
|
|
61
|
-
if len(records) == 0:
|
|
62
|
-
print("No records met search criteria")
|
|
63
|
-
return
|
|
64
|
-
|
|
65
|
-
records = [record.to_dict() for record in records]
|
|
66
|
-
records_df = pd.DataFrame(records)
|
|
67
|
-
print_cols = records_df.columns if cols is None else cols
|
|
68
|
-
|
|
69
|
-
with gr.Blocks() as demo:
|
|
70
|
-
gr.Dataframe(records_df[print_cols])
|
|
71
|
-
|
|
72
|
-
if plan_str is not None:
|
|
73
|
-
gr.Textbox(value=plan_str, info="Physical Plan")
|
|
74
|
-
|
|
75
|
-
demo.launch()
|
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
import types
|
|
2
|
-
|
|
3
|
-
from palimpzest.core.lib.fields import (
|
|
4
|
-
BooleanField,
|
|
5
|
-
BytesField,
|
|
6
|
-
Field,
|
|
7
|
-
FloatField,
|
|
8
|
-
IntField,
|
|
9
|
-
ListField,
|
|
10
|
-
NumericField,
|
|
11
|
-
StringField,
|
|
12
|
-
)
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def assert_valid_field_type(field_type: type | types.UnionType | types.GenericAlias | Field) -> str:
|
|
16
|
-
"""
|
|
17
|
-
Assert that the field is a valid field type. Return "pz_type" if field_type is a PZ type
|
|
18
|
-
and "python_type" if it is a Python type.
|
|
19
|
-
"""
|
|
20
|
-
try:
|
|
21
|
-
assert issubclass(field_type, Field), "type must be a Python type or palimpzest.core.lib.fields.Field"
|
|
22
|
-
return "pz_type"
|
|
23
|
-
except Exception:
|
|
24
|
-
assert isinstance(field_type, (type, types.UnionType, types.GenericAlias)), "type must be a Python type or palimpzest.core.lib.fields.Field"
|
|
25
|
-
|
|
26
|
-
return "python_type"
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def construct_field_type(field_type: type | types.UnionType | types.GenericAlias | Field, desc: str) -> Field:
|
|
30
|
-
"""Convert a field type and description to the corresponding PZ field.
|
|
31
|
-
|
|
32
|
-
Args:
|
|
33
|
-
type: type for the field (e.g. str, bool, list[int], StringField, etc.)
|
|
34
|
-
desc: description used in the field constructor
|
|
35
|
-
|
|
36
|
-
Returns:
|
|
37
|
-
Corresponding Field class
|
|
38
|
-
|
|
39
|
-
Raises:
|
|
40
|
-
ValueError: If the type is not recognized
|
|
41
|
-
"""
|
|
42
|
-
# if field_type is a PZ type, construct and return the field
|
|
43
|
-
if assert_valid_field_type(field_type) == "pz_type":
|
|
44
|
-
return field_type(desc=desc)
|
|
45
|
-
|
|
46
|
-
# otherwise, map the Python type to a PZ type and construct the field
|
|
47
|
-
supported_types_map = {
|
|
48
|
-
str: StringField,
|
|
49
|
-
bool: BooleanField,
|
|
50
|
-
int: IntField,
|
|
51
|
-
float: FloatField,
|
|
52
|
-
int | float: NumericField,
|
|
53
|
-
bytes: BytesField,
|
|
54
|
-
list[str]: ListField(StringField),
|
|
55
|
-
list[bool]: ListField(BooleanField),
|
|
56
|
-
list[int]: ListField(IntField),
|
|
57
|
-
list[float]: ListField(FloatField),
|
|
58
|
-
list[int | float]: ListField(NumericField),
|
|
59
|
-
list[bytes]: ListField(BytesField),
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
if field_type not in supported_types_map:
|
|
63
|
-
raise ValueError(f"Unsupported type: {field_type}. Supported types are: {list(supported_types_map.keys())}")
|
|
64
|
-
|
|
65
|
-
# get the field class and (if applicable) element field class
|
|
66
|
-
field_cls = supported_types_map[field_type]
|
|
67
|
-
|
|
68
|
-
# construct and return the field
|
|
69
|
-
return field_cls(desc=desc)
|
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
from typing import Any
|
|
3
|
-
|
|
4
|
-
import regex as re # Use regex instead of re to used variable length lookbehind
|
|
5
|
-
|
|
6
|
-
from palimpzest.constants import Cardinality, Model
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
def get_json_from_answer(answer: str, model: Model, cardinality: Cardinality) -> dict[str, Any]:
|
|
10
|
-
"""
|
|
11
|
-
This function parses an LLM response which is supposed to output a JSON object
|
|
12
|
-
and optimistically searches for the substring containing the JSON object.
|
|
13
|
-
"""
|
|
14
|
-
# model-specific trimming for LLAMA3 responses
|
|
15
|
-
if model.is_llama_model():
|
|
16
|
-
answer = answer.split("---")[0]
|
|
17
|
-
answer = answer.replace("True", "true")
|
|
18
|
-
answer = answer.replace("False", "false")
|
|
19
|
-
|
|
20
|
-
# split off context / excess, which models sometimes output after answer
|
|
21
|
-
answer = answer.split("Context:")[0]
|
|
22
|
-
answer = answer.split("# this is the answer")[0]
|
|
23
|
-
|
|
24
|
-
# trim the answer to only include the JSON dictionary
|
|
25
|
-
if cardinality == Cardinality.ONE_TO_ONE:
|
|
26
|
-
if not answer.strip().startswith("{"):
|
|
27
|
-
# Find the start index of the actual JSON string assuming the prefix is followed by the JSON dictionary
|
|
28
|
-
start_index = answer.find("{")
|
|
29
|
-
if start_index != -1:
|
|
30
|
-
# Remove the prefix and any leading characters before the JSON starts
|
|
31
|
-
answer = answer[start_index:]
|
|
32
|
-
|
|
33
|
-
if not answer.strip().endswith("}"):
|
|
34
|
-
# Find the end index of the actual JSON string assuming the suffix is preceded by the JSON dictionary
|
|
35
|
-
end_index = answer.rfind("}")
|
|
36
|
-
if end_index != -1:
|
|
37
|
-
# Remove the suffix and any trailing characters after the JSON ends
|
|
38
|
-
answer = answer[: end_index + 1]
|
|
39
|
-
|
|
40
|
-
# otherwise, trim the answer to only include the JSON array
|
|
41
|
-
else:
|
|
42
|
-
if not answer.strip().startswith("["):
|
|
43
|
-
# Find the start index of the actual JSON string assuming the prefix is followed by the JSON array
|
|
44
|
-
start_index = answer.find("[")
|
|
45
|
-
if start_index != -1:
|
|
46
|
-
# Remove the prefix and any leading characters before the JSON starts
|
|
47
|
-
answer = answer[start_index:]
|
|
48
|
-
|
|
49
|
-
if not answer.strip().endswith("]"):
|
|
50
|
-
# Find the end index of the actual JSON string
|
|
51
|
-
# assuming the suffix is preceded by the JSON object/array
|
|
52
|
-
end_index = answer.rfind("]")
|
|
53
|
-
if end_index != -1:
|
|
54
|
-
# Remove the suffix and any trailing characters after the JSON ends
|
|
55
|
-
answer = answer[: end_index + 1]
|
|
56
|
-
|
|
57
|
-
# Handle weird escaped values. I am not sure why the model
|
|
58
|
-
# is returning these, but the JSON parser can't take them
|
|
59
|
-
answer = answer.replace(r"\_", "_")
|
|
60
|
-
answer = answer.replace("\\n", "\n")
|
|
61
|
-
# Remove https and http prefixes to not conflict with comment detection
|
|
62
|
-
# Handle comments in the JSON response. Use regex from // until end of line
|
|
63
|
-
answer = re.sub(r"(?<!https?:)\/\/.*?$", "", answer, flags=re.MULTILINE)
|
|
64
|
-
answer = re.sub(r",\n.*\.\.\.$", "", answer, flags=re.MULTILINE)
|
|
65
|
-
# Sanitize newlines in the JSON response
|
|
66
|
-
answer = answer.replace("\n", " ")
|
|
67
|
-
|
|
68
|
-
# finally, parse and return the JSON object; errors are handled by the caller
|
|
69
|
-
return json.loads(answer)
|
palimpzest/utils/sandbox.py
DELETED
|
@@ -1,183 +0,0 @@
|
|
|
1
|
-
import contextlib
|
|
2
|
-
import io
|
|
3
|
-
|
|
4
|
-
import IPython.terminal.embed as embed
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class Sandbox:
|
|
8
|
-
def __init__(self, codes: list | None = None, reset: bool = False):
|
|
9
|
-
if codes is None:
|
|
10
|
-
codes = list()
|
|
11
|
-
super().__init__()
|
|
12
|
-
self.shell = None
|
|
13
|
-
self.codes = codes
|
|
14
|
-
if reset:
|
|
15
|
-
self.reset()
|
|
16
|
-
|
|
17
|
-
def __enter__(self):
|
|
18
|
-
self.reset()
|
|
19
|
-
return self
|
|
20
|
-
|
|
21
|
-
def __exit__(self, exc_type, exc_value, traceback):
|
|
22
|
-
self.exit()
|
|
23
|
-
|
|
24
|
-
def reset(self):
|
|
25
|
-
self.exit()
|
|
26
|
-
self.shell = embed.InteractiveShellEmbed(quiet=True)
|
|
27
|
-
for code in self.codes:
|
|
28
|
-
response = self.shell.run_cell(code)
|
|
29
|
-
if not response.success:
|
|
30
|
-
return {"status": False, "response": None, "msg": response["msg"]}
|
|
31
|
-
return {"status": True, "response": self.shell.user_ns["_"], "msg": None}
|
|
32
|
-
|
|
33
|
-
def execute(self, code, reset=False):
|
|
34
|
-
try:
|
|
35
|
-
if reset:
|
|
36
|
-
response = self.reset()
|
|
37
|
-
if not response["status"]:
|
|
38
|
-
return {"status": False, "response": None, "msg": str(response.error_in_exec)}
|
|
39
|
-
with io.StringIO() as buf, contextlib.redirect_stdout(buf), contextlib.redirect_stderr(buf):
|
|
40
|
-
response = self.shell.run_cell(code)
|
|
41
|
-
if response.success:
|
|
42
|
-
return {"status": True, "response": self.shell.user_ns["_"], "msg": None}
|
|
43
|
-
else:
|
|
44
|
-
return {"status": False, "response": None, "msg": str(response.error_in_exec)}
|
|
45
|
-
except Exception as e:
|
|
46
|
-
return {"status": False, "response": None, "msg": str(type(e)) + " " + str(e)}
|
|
47
|
-
|
|
48
|
-
def add(self, code, index=-1):
|
|
49
|
-
self.codes.insert(index, code)
|
|
50
|
-
|
|
51
|
-
def pop(self, index=-1):
|
|
52
|
-
return self.codes.pop(index)
|
|
53
|
-
|
|
54
|
-
def get(self, key="_"):
|
|
55
|
-
return self.shell.user_ns[key] if self.shell else None
|
|
56
|
-
|
|
57
|
-
def export(self, path):
|
|
58
|
-
with open(path + ".py", "w") as f:
|
|
59
|
-
for code in self.codes:
|
|
60
|
-
f.write("# %%\n")
|
|
61
|
-
f.write(code)
|
|
62
|
-
f.write("\n\n")
|
|
63
|
-
f.write("# %%\n")
|
|
64
|
-
|
|
65
|
-
def exit(self):
|
|
66
|
-
if self.shell:
|
|
67
|
-
self.shell.run_cell("quit()")
|
|
68
|
-
self.shell = None
|
|
69
|
-
|
|
70
|
-
@classmethod
|
|
71
|
-
def get_globals(cls):
|
|
72
|
-
with cls(codes=[]) as env:
|
|
73
|
-
response = env.execute("globals()")
|
|
74
|
-
if response["status"]:
|
|
75
|
-
return response["response"]
|
|
76
|
-
else:
|
|
77
|
-
return {}
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
class API:
|
|
81
|
-
def __init__(self, **kwargs):
|
|
82
|
-
super().__init__()
|
|
83
|
-
self.config = kwargs
|
|
84
|
-
assert "name" in self.config, "Name is required!"
|
|
85
|
-
assert "inputs" in self.config, "Inputs are required!"
|
|
86
|
-
assert "outputs" in self.config, "Outputs are required!"
|
|
87
|
-
assert len(self.config["outputs"]) == 1, "Currently only single output is supported!"
|
|
88
|
-
|
|
89
|
-
if "env" not in self.config:
|
|
90
|
-
self.env = Sandbox(codes=list())
|
|
91
|
-
# self.env = Sandbox(codes=["from seed import *"])
|
|
92
|
-
else:
|
|
93
|
-
self.env = Sandbox(codes=self.config["env"])
|
|
94
|
-
|
|
95
|
-
@classmethod
|
|
96
|
-
def from_task_descriptor(cls, td, field_name, input_fields=None):
|
|
97
|
-
name, inputs, outputs = "extraction", list(), list()
|
|
98
|
-
if input_fields is None:
|
|
99
|
-
input_fields = td.input_schema.field_names()
|
|
100
|
-
for input_field_name in input_fields:
|
|
101
|
-
inputs.append({"name": input_field_name, "desc": getattr(td.input_schema, input_field_name).desc})
|
|
102
|
-
outputs = [{"name": field_name, "desc": getattr(td.output_schema, field_name).desc}]
|
|
103
|
-
return cls(name=name, inputs=inputs, outputs=outputs)
|
|
104
|
-
|
|
105
|
-
@classmethod
|
|
106
|
-
def from_input_output_schemas(cls, input_schema, output_schema, field_name, input_fields=None):
|
|
107
|
-
name, inputs, outputs = "extraction", list(), list()
|
|
108
|
-
if input_fields is None:
|
|
109
|
-
input_fields = input_schema.field_names()
|
|
110
|
-
for input_field_name in input_fields:
|
|
111
|
-
inputs.append({"name": input_field_name, "desc": getattr(input_schema, input_field_name).desc})
|
|
112
|
-
outputs = [{"name": field_name, "desc": getattr(output_schema, field_name).desc}]
|
|
113
|
-
return cls(name=name, inputs=inputs, outputs=outputs)
|
|
114
|
-
|
|
115
|
-
@property
|
|
116
|
-
def name(self):
|
|
117
|
-
return self.config["name"]
|
|
118
|
-
|
|
119
|
-
@property
|
|
120
|
-
def inputs(self):
|
|
121
|
-
return [i["name"] for i in self.config["inputs"]]
|
|
122
|
-
|
|
123
|
-
@property
|
|
124
|
-
def input_descs(self):
|
|
125
|
-
return [i["desc"] for i in self.config["inputs"]]
|
|
126
|
-
|
|
127
|
-
@property
|
|
128
|
-
def output(self):
|
|
129
|
-
# Currently only single output is supported!
|
|
130
|
-
return self.config["outputs"][0]["name"]
|
|
131
|
-
|
|
132
|
-
@property
|
|
133
|
-
def output_desc(self):
|
|
134
|
-
# Currently only single output is supported!
|
|
135
|
-
return self.config["outputs"][0]["desc"]
|
|
136
|
-
|
|
137
|
-
@property
|
|
138
|
-
def doc(self):
|
|
139
|
-
return self.config.get("doc", "")
|
|
140
|
-
|
|
141
|
-
@property
|
|
142
|
-
def args(self):
|
|
143
|
-
# attr1, attr2, attr3, ...
|
|
144
|
-
return ", ".join(i["name"] for i in self.config["inputs"])
|
|
145
|
-
|
|
146
|
-
@property
|
|
147
|
-
def asgs(self):
|
|
148
|
-
# attr1=attr1, attr2=attr2, attr3=attr3, ...
|
|
149
|
-
return ", ".join(i["name"] + "=" + i["name"] for i in self.config["inputs"])
|
|
150
|
-
|
|
151
|
-
def inps(self, inputs=None):
|
|
152
|
-
# attr1=..., attr2=..., attr3=..., ...
|
|
153
|
-
if inputs is None:
|
|
154
|
-
inputs = dict()
|
|
155
|
-
return ", ".join(
|
|
156
|
-
i["name"] + "=" + repr(inputs[i["name"]]) for i in self.config["inputs"] if i["name"] in inputs
|
|
157
|
-
)
|
|
158
|
-
|
|
159
|
-
def kwargs_call(self):
|
|
160
|
-
# name(**kwargs)
|
|
161
|
-
return self.config["name"] + "(**kwargs)"
|
|
162
|
-
|
|
163
|
-
def args_call(self, with_kwargs=False):
|
|
164
|
-
# name(attr1, attr2, attr3, ..., **kwargs)
|
|
165
|
-
return self.config["name"] + "(" + self.args + (", **kwargs)" if with_kwargs else ")")
|
|
166
|
-
|
|
167
|
-
def asgs_call(self, with_kwargs=False):
|
|
168
|
-
# name(attr1=attr1, attr2=attr2, attr3=attr3, ..., **kwargs)
|
|
169
|
-
return self.config["name"] + "(" + self.asgs + (", **kwargs)" if with_kwargs else ")")
|
|
170
|
-
|
|
171
|
-
def api_def(self, with_kwargs=False):
|
|
172
|
-
# def name(attr1, attr2, attr3, ..., **kwargs):
|
|
173
|
-
return "def " + self.args_call(with_kwargs=with_kwargs) + ":\n"
|
|
174
|
-
|
|
175
|
-
def api_call(self, inputs, with_kwargs=False):
|
|
176
|
-
# name(attr1=..., attr2=..., attr3=..., ..., **kwargs)
|
|
177
|
-
return self.config["name"] + "(" + self.inps(inputs) + (", **kwargs)" if with_kwargs else ")")
|
|
178
|
-
|
|
179
|
-
def api_execute(self, code, inputs):
|
|
180
|
-
self.env.add(code)
|
|
181
|
-
result = self.env.execute(self.api_call(inputs), reset=True)
|
|
182
|
-
self.env.pop()
|
|
183
|
-
return result
|
|
@@ -1,95 +0,0 @@
|
|
|
1
|
-
palimpzest/__init__.py,sha256=t-xBXOEaah4dEyHcnKJhDHpPRx7-P2QVwjBmdExVdNc,874
|
|
2
|
-
palimpzest/constants.py,sha256=xXkq_y_fdkjHeu9G0nEoOvpu4N4TtFQgTCDAj93Uf1E,17933
|
|
3
|
-
palimpzest/policy.py,sha256=2cMio_AUfZv6lksr_klfP747G4w1nsZJtfmt6zjeaMk,12656
|
|
4
|
-
palimpzest/sets.py,sha256=zWpNy466-Klg18wqAd_1t46JZzu44l_CbTbAkmbK-_w,15050
|
|
5
|
-
palimpzest/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
-
palimpzest/core/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
-
palimpzest/core/data/dataclasses.py,sha256=EJ2l7OGdaIzCKFMp7vk9K4POIE1cTQVN7_zovLo1zhY,34919
|
|
8
|
-
palimpzest/core/data/datareaders.py,sha256=SMX92MVy_fj1WYv_B-EksdbFdj5tOOFMGXqBfGPB9Cs,16725
|
|
9
|
-
palimpzest/core/elements/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
-
palimpzest/core/elements/filters.py,sha256=fU2x0eWDwfP52_5fUmqJXTuhs4H0vvHtPZLdA3IIw8I,1642
|
|
11
|
-
palimpzest/core/elements/groupbysig.py,sha256=1qHuR2-fcW-E4rxPSieYGSXZYwvFaPwf1ld9VPWvWjw,2233
|
|
12
|
-
palimpzest/core/elements/index.py,sha256=adO67DgzHhA4lBME0-h4SjXfdz9UcNMSDGXTpUdKbgE,1929
|
|
13
|
-
palimpzest/core/elements/records.py,sha256=IqCZ7tOcq59viNmwg9lHmqx8tF6SfzqSihs06QMOwH8,15008
|
|
14
|
-
palimpzest/core/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
|
-
palimpzest/core/lib/fields.py,sha256=czzN7v4AgRzMSqgCWOelTEOJz3r-RCvwRTxQ3B0xG0U,4099
|
|
16
|
-
palimpzest/core/lib/schemas.py,sha256=_WMPR_YcAIeUNRne7gQLJHLqDNVz7LBxFXN3osu659Q,17105
|
|
17
|
-
palimpzest/prompts/__init__.py,sha256=klF8bYcNZWTQyuX8ZaqUXmD8Syq-MOHSRKemiwyM4N4,680
|
|
18
|
-
palimpzest/prompts/code_synthesis_prompts.py,sha256=8mlMTPAI5WsoG0LVohoBFL-dnOro-mP3VJgEAiwgxnU,1472
|
|
19
|
-
palimpzest/prompts/convert_prompts.py,sha256=mUt2TkSerAYuYyDg7LC4AQ195Zz-zoZjA0AN_yMH9MQ,3595
|
|
20
|
-
palimpzest/prompts/critique_and_refine_convert_prompts.py,sha256=WoXExBxQ7twswd9VCCST26c-2ehZtpD2iQoBi7sqDnQ,7814
|
|
21
|
-
palimpzest/prompts/filter_prompts.py,sha256=iQjn-39h3L0E5wng_UPgAXRHrP1ok329TXpOgZ6Wn1w,2372
|
|
22
|
-
palimpzest/prompts/moa_aggregator_convert_prompts.py,sha256=BQRrtGdr53PTqvXzmFh8kfQ_w9KoKw-zTtmdo-8RFjo,2887
|
|
23
|
-
palimpzest/prompts/moa_proposer_convert_prompts.py,sha256=vcme_K-QA7--slFo91gKTh7x7l66x0u9rBEXI8NsN_I,3460
|
|
24
|
-
palimpzest/prompts/prompt_factory.py,sha256=ra2swEg46Y5J446mcqeSjomk7j2DOE6jVMiBcZCoU6I,34042
|
|
25
|
-
palimpzest/prompts/split_merge_prompts.py,sha256=0mTZeJhxtvlmv-ro0KwQpxlGgSTwyUhGRHJ-uHk2Zlw,3146
|
|
26
|
-
palimpzest/prompts/split_proposer_prompts.py,sha256=TBHLGaM_ycHjGHrp1JziJoJDw4S5_F4afKSAdt2McKk,2624
|
|
27
|
-
palimpzest/prompts/util_phrases.py,sha256=NWrcHfjJyiOY16Jyt7R50moVnlJDyvSBZ9kBqyX2WQo,751
|
|
28
|
-
palimpzest/query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
|
-
palimpzest/query/execution/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
30
|
-
palimpzest/query/execution/all_sample_execution_strategy.py,sha256=0-n_NxwC4eQ0d7qAfvNdV9grBFBIUgrpYGNefzl33No,10470
|
|
31
|
-
palimpzest/query/execution/execution_strategy.py,sha256=G5q-obhzO_odfXslR-YAQJd6vMs3TJsrfXhdmN-y-EA,21857
|
|
32
|
-
palimpzest/query/execution/execution_strategy_type.py,sha256=7zKpaB5j5cialRX15dx9nAUjtx8b8JjEHlSuPyeyd90,1076
|
|
33
|
-
palimpzest/query/execution/mab_execution_strategy.py,sha256=9c1F5d8RKjmDCS6AN8wrkbQha8kvSR24XtUW3v80R08,34756
|
|
34
|
-
palimpzest/query/execution/parallel_execution_strategy.py,sha256=H47MpktsOYFDcnm1lINNUfgjJhf2WxLA1iROyGFBmEc,17381
|
|
35
|
-
palimpzest/query/execution/random_sampling_execution_strategy.py,sha256=jWPd-sBsTbkwxXRlKgnR3FO6jEEqViaYUvBb0RJkP5g,11585
|
|
36
|
-
palimpzest/query/execution/single_threaded_execution_strategy.py,sha256=-31gg7ha0D97g2zipzF72f7nrJFcDuiRKmo10_q5ykE,12389
|
|
37
|
-
palimpzest/query/generators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
|
-
palimpzest/query/generators/api_client_factory.py,sha256=lH9p5wRehECc7m1dWOGAGivD2F5x6hGh5kQCMw2-tfs,1065
|
|
39
|
-
palimpzest/query/generators/generators.py,sha256=PjohQGJTFoY78eAUQCnA_jKKjm5UF3OYPaEDstrK0Zo,26130
|
|
40
|
-
palimpzest/query/operators/__init__.py,sha256=fq0YvXuqxLtQyI_LuyOe02KYqYCmD-Sh1M-FcVWnr_A,3163
|
|
41
|
-
palimpzest/query/operators/aggregate.py,sha256=G7jH2k0673t0TtzchCB0a9LLhSWwZb8X20FtuNw8xTQ,10288
|
|
42
|
-
palimpzest/query/operators/code_synthesis_convert.py,sha256=Oki3ZJi6lRD3NFKylulmreFniRVYjrbuKWFe_M90HXM,20992
|
|
43
|
-
palimpzest/query/operators/convert.py,sha256=wneGeUmQifsSOmBz6HZBdwvmaO-o7UbsJyrwY3_pkyE,16859
|
|
44
|
-
palimpzest/query/operators/critique_and_refine_convert.py,sha256=PCAJIJV5ljADkCmKfkl5Neaf6mu91zFVY_D9m_Prr1c,5179
|
|
45
|
-
palimpzest/query/operators/filter.py,sha256=nb3cb7OekqA60SsWxbNSVxZ_Bm-ddLLZ5ql2xSC0-dg,10325
|
|
46
|
-
palimpzest/query/operators/limit.py,sha256=m-rob4SxdmR-JeDGGgjgss7uQkjviMxsj1bnd5wyjWY,2113
|
|
47
|
-
palimpzest/query/operators/logical.py,sha256=5AqUeSyGgLV3uRXn0ZCYSYzrk1eeuSAgSPYTM9gHMdI,16163
|
|
48
|
-
palimpzest/query/operators/map.py,sha256=dLMQR2vgkCtr2SsBbgBxvgcqf9TO9B40V8ylPtxukY4,4950
|
|
49
|
-
palimpzest/query/operators/mixture_of_agents_convert.py,sha256=CU6X7KG1Ivx-8OZ6HSye6auoDUaz0EgVai68_AGxEVA,6675
|
|
50
|
-
palimpzest/query/operators/physical.py,sha256=HJu6eksVYILEc0Q_YKCtmBuOXNkryxXKf9LxwESOV-k,8052
|
|
51
|
-
palimpzest/query/operators/project.py,sha256=zm01I404wpUGjigK3ATn5ApY7yf1HVFd1ZYWfVlsZsI,2094
|
|
52
|
-
palimpzest/query/operators/rag_convert.py,sha256=kg5PfukNuUs9MWu9BMqsn0CC2L6WchLeg20BWWD7V0Q,10699
|
|
53
|
-
palimpzest/query/operators/retrieve.py,sha256=daSPOPX_vSBLTeux_GMH52t1giPxmtNH48ILQZx34to,13092
|
|
54
|
-
palimpzest/query/operators/scan.py,sha256=pOS_9fkBEjwsabLK9Fz08c9n7L74-4mhiWWiVbdFrII,5870
|
|
55
|
-
palimpzest/query/operators/split_convert.py,sha256=QSc4_WKeysl32Q8ovK25UgPwskxS9baJ-sl23fv3ykA,7719
|
|
56
|
-
palimpzest/query/optimizer/__init__.py,sha256=qJtR1nwGurkazInpdwJVQ1vVmBfsA_qXGXU71RkycSE,2465
|
|
57
|
-
palimpzest/query/optimizer/cost_model.py,sha256=mUPpcKUMxOtg4guY9d26ExN-bFjXVu_Dvvp4eRXyw6A,30190
|
|
58
|
-
palimpzest/query/optimizer/optimizer.py,sha256=hZtnX7iCpsiU8T_8i7a4c91ZYUAcA0KeAX5TZiZJ65s,21229
|
|
59
|
-
palimpzest/query/optimizer/optimizer_strategy.py,sha256=Aek-LsQQO9VuxI1ZLkmadxik87-tRjy4v1_edbAhLMs,9234
|
|
60
|
-
palimpzest/query/optimizer/optimizer_strategy_type.py,sha256=V-MMHvJdnfZKoUX1xxxwh66q1RjN2FL35IsiT1C62c8,1084
|
|
61
|
-
palimpzest/query/optimizer/plan.py,sha256=i6lOTSNMcVNf0SruMD3lZ_-Nq3_UWq5X6QFd_lRvYA4,5682
|
|
62
|
-
palimpzest/query/optimizer/primitives.py,sha256=r0zjavxxVCCFABXDwdQxRJmQek6WLo1777suoy7CWEc,4038
|
|
63
|
-
palimpzest/query/optimizer/rules.py,sha256=msBWBleUy1sVs1iR67jMf6fPVoRaHsL438pg3nK_hHs,47141
|
|
64
|
-
palimpzest/query/optimizer/tasks.py,sha256=pPDIV-efm_2S9WVYubu0QGuhh5-4c9SM3XtvfhpLDOY,24855
|
|
65
|
-
palimpzest/query/processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
66
|
-
palimpzest/query/processor/config.py,sha256=dA3GB-nEAIQBllEx4rFy1W8iHYYEZOMOi4MgfZgODuQ,3851
|
|
67
|
-
palimpzest/query/processor/nosentinel_processor.py,sha256=E5XvDLqTuG5aIy-g5bMbDx0UTUBwjNlAuo9W5AvpsCo,1224
|
|
68
|
-
palimpzest/query/processor/processing_strategy_type.py,sha256=Blq-nKwjbj-BAzCv5QLdYUbj-RhsGZrDKt1xk4H4RPI,1419
|
|
69
|
-
palimpzest/query/processor/query_processor.py,sha256=13DcIeoISPbEgrTaXR-fCT2XfqYXjC-16midStaaVSg,3496
|
|
70
|
-
palimpzest/query/processor/query_processor_factory.py,sha256=2qiFs6dyskreJti7uf179Bs5aJgYXBpP_pgfaIzqpzg,7492
|
|
71
|
-
palimpzest/query/processor/sentinel_processor.py,sha256=QOgrMBWqZAdHO9WqjrSiRGYWNRRh84UJ-S8EsDM_nUo,3870
|
|
72
|
-
palimpzest/query/processor/streaming_processor.py,sha256=tR0KVW-lLLF1MEahaasuqwiTYVw7Y6P0qw2IBKbDZho,5996
|
|
73
|
-
palimpzest/schemabuilder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
74
|
-
palimpzest/schemabuilder/schema_builder.py,sha256=kGEv-Adba-FNziRrlG0zwx317IuD7rmzNl2GecvnbDw,8528
|
|
75
|
-
palimpzest/tools/README.md,sha256=56_6LPG80uc0CLVhTBP6I1wgIffNv9cyTr0TmVZqmrM,483
|
|
76
|
-
palimpzest/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
77
|
-
palimpzest/tools/allenpdf.py,sha256=fXMOmSDdSSLXDKAPYYJ8k4egtWEBf_Me9Lq9tM3iyoA,1690
|
|
78
|
-
palimpzest/tools/pdfparser.py,sha256=0DOVUZLxYfqjxM8WNEfYcyiXb1qW9BWVIHEB_B_YhWA,9570
|
|
79
|
-
palimpzest/tools/skema_tools.py,sha256=HXUFpjMhbVxZwKKkATeK-FwtlTCawaCbeP-uHntI1Kg,669
|
|
80
|
-
palimpzest/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
81
|
-
palimpzest/utils/datareader_helpers.py,sha256=-tkIf9iOF7mr-gyrrUQqnHWwRig4OGnowU0Wpx5HKG0,2121
|
|
82
|
-
palimpzest/utils/demo_helpers.py,sha256=80s2IxlkEOLtLfIEDw_u0cyuNzsjvh-79m12J1q5i_c,2403
|
|
83
|
-
palimpzest/utils/env_helpers.py,sha256=n81KzoJ459pRxo7QmJA7duazwWsfoMGTHc71D2LatFk,334
|
|
84
|
-
palimpzest/utils/field_helpers.py,sha256=Op18ThAnDlALiAkquUQbelHodZZYg378Ct1I8eIkKio,2291
|
|
85
|
-
palimpzest/utils/generation_helpers.py,sha256=KTeVQDXRwM18auR8YsLNR5AdPDjSgO0NoOBagfFdTMs,3213
|
|
86
|
-
palimpzest/utils/hash_helpers.py,sha256=3A8dA7SbXTwnnvZvPVNqqMLlVRhCKyKF_bjNNAu3Exk,334
|
|
87
|
-
palimpzest/utils/model_helpers.py,sha256=GEtWn0SIoTjEf94ZQZyP99zJ2sOEIYJYxhOCjaTcBO0,3433
|
|
88
|
-
palimpzest/utils/progress.py,sha256=RiV753fKV8dFCNan4_wvdN2S3gvTvyRPq4Ksz6nSV1U,18536
|
|
89
|
-
palimpzest/utils/sandbox.py,sha256=Ge96gmzqeOGlNkMCG9A95_PB8wRQbvTFua136of8FcA,6465
|
|
90
|
-
palimpzest/utils/udfs.py,sha256=LjHic54B1az-rKgNLur0wOpaz2ko_UodjLEJrazkxvY,1854
|
|
91
|
-
palimpzest-0.7.20.dist-info/licenses/LICENSE,sha256=5GUlHy9lr-Py9kvV38FF1m3yy3NqM18fefuE9wkWumo,1079
|
|
92
|
-
palimpzest-0.7.20.dist-info/METADATA,sha256=AQxHqmyHh-hsES7i_ZI2qdwvm9Q-9IP-isb99LwLQoQ,7120
|
|
93
|
-
palimpzest-0.7.20.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
94
|
-
palimpzest-0.7.20.dist-info/top_level.txt,sha256=raV06dJUgohefUn3ZyJS2uqp_Y76EOLA9Y2e_fxt8Ew,11
|
|
95
|
-
palimpzest-0.7.20.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|