pixeltable 0.1.0__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +34 -6
- pixeltable/catalog/__init__.py +13 -0
- pixeltable/catalog/catalog.py +159 -0
- pixeltable/catalog/column.py +200 -0
- pixeltable/catalog/dir.py +32 -0
- pixeltable/catalog/globals.py +33 -0
- pixeltable/catalog/insertable_table.py +191 -0
- pixeltable/catalog/named_function.py +36 -0
- pixeltable/catalog/path.py +58 -0
- pixeltable/catalog/path_dict.py +139 -0
- pixeltable/catalog/schema_object.py +39 -0
- pixeltable/catalog/table.py +581 -0
- pixeltable/catalog/table_version.py +749 -0
- pixeltable/catalog/table_version_path.py +133 -0
- pixeltable/catalog/view.py +203 -0
- pixeltable/client.py +590 -30
- pixeltable/dataframe.py +540 -349
- pixeltable/env.py +359 -45
- pixeltable/exceptions.py +12 -21
- pixeltable/exec/__init__.py +9 -0
- pixeltable/exec/aggregation_node.py +78 -0
- pixeltable/exec/cache_prefetch_node.py +116 -0
- pixeltable/exec/component_iteration_node.py +79 -0
- pixeltable/exec/data_row_batch.py +95 -0
- pixeltable/exec/exec_context.py +22 -0
- pixeltable/exec/exec_node.py +61 -0
- pixeltable/exec/expr_eval_node.py +217 -0
- pixeltable/exec/in_memory_data_node.py +69 -0
- pixeltable/exec/media_validation_node.py +43 -0
- pixeltable/exec/sql_scan_node.py +225 -0
- pixeltable/exprs/__init__.py +24 -0
- pixeltable/exprs/arithmetic_expr.py +102 -0
- pixeltable/exprs/array_slice.py +71 -0
- pixeltable/exprs/column_property_ref.py +77 -0
- pixeltable/exprs/column_ref.py +105 -0
- pixeltable/exprs/comparison.py +77 -0
- pixeltable/exprs/compound_predicate.py +98 -0
- pixeltable/exprs/data_row.py +195 -0
- pixeltable/exprs/expr.py +586 -0
- pixeltable/exprs/expr_set.py +39 -0
- pixeltable/exprs/function_call.py +380 -0
- pixeltable/exprs/globals.py +69 -0
- pixeltable/exprs/image_member_access.py +115 -0
- pixeltable/exprs/image_similarity_predicate.py +58 -0
- pixeltable/exprs/inline_array.py +107 -0
- pixeltable/exprs/inline_dict.py +101 -0
- pixeltable/exprs/is_null.py +38 -0
- pixeltable/exprs/json_mapper.py +121 -0
- pixeltable/exprs/json_path.py +159 -0
- pixeltable/exprs/literal.py +54 -0
- pixeltable/exprs/object_ref.py +41 -0
- pixeltable/exprs/predicate.py +44 -0
- pixeltable/exprs/row_builder.py +355 -0
- pixeltable/exprs/rowid_ref.py +94 -0
- pixeltable/exprs/type_cast.py +53 -0
- pixeltable/exprs/variable.py +45 -0
- pixeltable/func/__init__.py +9 -0
- pixeltable/func/aggregate_function.py +194 -0
- pixeltable/func/batched_function.py +53 -0
- pixeltable/func/callable_function.py +69 -0
- pixeltable/func/expr_template_function.py +82 -0
- pixeltable/func/function.py +110 -0
- pixeltable/func/function_registry.py +227 -0
- pixeltable/func/globals.py +36 -0
- pixeltable/func/nos_function.py +202 -0
- pixeltable/func/signature.py +166 -0
- pixeltable/func/udf.py +163 -0
- pixeltable/functions/__init__.py +52 -103
- pixeltable/functions/eval.py +216 -0
- pixeltable/functions/fireworks.py +34 -0
- pixeltable/functions/huggingface.py +120 -0
- pixeltable/functions/image.py +16 -0
- pixeltable/functions/openai.py +256 -0
- pixeltable/functions/pil/image.py +148 -7
- pixeltable/functions/string.py +13 -0
- pixeltable/functions/together.py +122 -0
- pixeltable/functions/util.py +41 -0
- pixeltable/functions/video.py +62 -0
- pixeltable/iterators/__init__.py +3 -0
- pixeltable/iterators/base.py +48 -0
- pixeltable/iterators/document.py +311 -0
- pixeltable/iterators/video.py +89 -0
- pixeltable/metadata/__init__.py +54 -0
- pixeltable/metadata/converters/convert_10.py +18 -0
- pixeltable/metadata/schema.py +211 -0
- pixeltable/plan.py +656 -0
- pixeltable/store.py +418 -182
- pixeltable/tests/conftest.py +146 -88
- pixeltable/tests/functions/test_fireworks.py +42 -0
- pixeltable/tests/functions/test_functions.py +60 -0
- pixeltable/tests/functions/test_huggingface.py +158 -0
- pixeltable/tests/functions/test_openai.py +152 -0
- pixeltable/tests/functions/test_together.py +111 -0
- pixeltable/tests/test_audio.py +65 -0
- pixeltable/tests/test_catalog.py +27 -0
- pixeltable/tests/test_client.py +14 -14
- pixeltable/tests/test_component_view.py +370 -0
- pixeltable/tests/test_dataframe.py +439 -0
- pixeltable/tests/test_dirs.py +78 -62
- pixeltable/tests/test_document.py +120 -0
- pixeltable/tests/test_exprs.py +592 -135
- pixeltable/tests/test_function.py +297 -67
- pixeltable/tests/test_migration.py +43 -0
- pixeltable/tests/test_nos.py +54 -0
- pixeltable/tests/test_snapshot.py +208 -0
- pixeltable/tests/test_table.py +1195 -263
- pixeltable/tests/test_transactional_directory.py +42 -0
- pixeltable/tests/test_types.py +5 -11
- pixeltable/tests/test_video.py +151 -34
- pixeltable/tests/test_view.py +530 -0
- pixeltable/tests/utils.py +320 -45
- pixeltable/tool/create_test_db_dump.py +149 -0
- pixeltable/tool/create_test_video.py +81 -0
- pixeltable/type_system.py +445 -124
- pixeltable/utils/__init__.py +17 -46
- pixeltable/utils/arrow.py +98 -0
- pixeltable/utils/clip.py +12 -15
- pixeltable/utils/coco.py +136 -0
- pixeltable/utils/documents.py +39 -0
- pixeltable/utils/filecache.py +195 -0
- pixeltable/utils/help.py +11 -0
- pixeltable/utils/hf_datasets.py +157 -0
- pixeltable/utils/media_store.py +76 -0
- pixeltable/utils/parquet.py +167 -0
- pixeltable/utils/pytorch.py +91 -0
- pixeltable/utils/s3.py +13 -0
- pixeltable/utils/sql.py +17 -0
- pixeltable/utils/transactional_directory.py +35 -0
- pixeltable-0.2.4.dist-info/LICENSE +18 -0
- pixeltable-0.2.4.dist-info/METADATA +127 -0
- pixeltable-0.2.4.dist-info/RECORD +132 -0
- {pixeltable-0.1.0.dist-info → pixeltable-0.2.4.dist-info}/WHEEL +1 -1
- pixeltable/catalog.py +0 -1421
- pixeltable/exprs.py +0 -1745
- pixeltable/function.py +0 -269
- pixeltable/functions/clip.py +0 -10
- pixeltable/functions/pil/__init__.py +0 -23
- pixeltable/functions/tf.py +0 -21
- pixeltable/index.py +0 -57
- pixeltable/tests/test_dict.py +0 -24
- pixeltable/tests/test_functions.py +0 -11
- pixeltable/tests/test_tf.py +0 -69
- pixeltable/tf.py +0 -33
- pixeltable/utils/tf.py +0 -33
- pixeltable/utils/video.py +0 -32
- pixeltable-0.1.0.dist-info/METADATA +0 -34
- pixeltable-0.1.0.dist-info/RECORD +0 -36
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
from typing import Any, Callable
|
|
2
|
+
|
|
3
|
+
import PIL.Image
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
import pixeltable as pxt
|
|
7
|
+
import pixeltable.env as env
|
|
8
|
+
import pixeltable.type_system as ts
|
|
9
|
+
from pixeltable.func import Batch
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@pxt.udf(batch_size=32, return_type=ts.ArrayType((None,), dtype=ts.FloatType()))
|
|
13
|
+
def sentence_transformer(sentences: Batch[str], *, model_id: str, normalize_embeddings: bool = False) -> Batch[np.ndarray]:
|
|
14
|
+
env.Env.get().require_package('sentence_transformers')
|
|
15
|
+
from sentence_transformers import SentenceTransformer
|
|
16
|
+
|
|
17
|
+
model = _lookup_model(model_id, SentenceTransformer)
|
|
18
|
+
|
|
19
|
+
array = model.encode(sentences, normalize_embeddings=normalize_embeddings)
|
|
20
|
+
return [array[i] for i in range(array.shape[0])]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@pxt.udf
|
|
24
|
+
def sentence_transformer_list(sentences: list, *, model_id: str, normalize_embeddings: bool = False) -> list:
|
|
25
|
+
env.Env.get().require_package('sentence_transformers')
|
|
26
|
+
from sentence_transformers import SentenceTransformer
|
|
27
|
+
|
|
28
|
+
model = _lookup_model(model_id, SentenceTransformer)
|
|
29
|
+
|
|
30
|
+
array = model.encode(sentences, normalize_embeddings=normalize_embeddings)
|
|
31
|
+
return [array[i].tolist() for i in range(array.shape[0])]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@pxt.udf(batch_size=32)
|
|
35
|
+
def cross_encoder(sentences1: Batch[str], sentences2: Batch[str], *, model_id: str) -> Batch[float]:
|
|
36
|
+
env.Env.get().require_package('sentence_transformers')
|
|
37
|
+
from sentence_transformers import CrossEncoder
|
|
38
|
+
|
|
39
|
+
model = _lookup_model(model_id, CrossEncoder)
|
|
40
|
+
|
|
41
|
+
array = model.predict([[s1, s2] for s1, s2 in zip(sentences1, sentences2)], convert_to_numpy=True)
|
|
42
|
+
return array.tolist()
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@pxt.udf
|
|
46
|
+
def cross_encoder_list(sentence1: str, sentences2: list, *, model_id: str) -> list:
|
|
47
|
+
env.Env.get().require_package('sentence_transformers')
|
|
48
|
+
from sentence_transformers import CrossEncoder
|
|
49
|
+
|
|
50
|
+
model = _lookup_model(model_id, CrossEncoder)
|
|
51
|
+
|
|
52
|
+
array = model.predict([[sentence1, s2] for s2 in sentences2], convert_to_numpy=True)
|
|
53
|
+
return array.tolist()
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@pxt.udf(batch_size=32, return_type=ts.ArrayType((None,), dtype=ts.FloatType(), nullable=False))
|
|
57
|
+
def clip_text(text: Batch[str], *, model_id: str) -> Batch[np.ndarray]:
|
|
58
|
+
env.Env.get().require_package('transformers')
|
|
59
|
+
from transformers import CLIPModel, CLIPProcessor
|
|
60
|
+
|
|
61
|
+
model = _lookup_model(model_id, CLIPModel.from_pretrained)
|
|
62
|
+
processor = _lookup_processor(model_id, CLIPProcessor.from_pretrained)
|
|
63
|
+
|
|
64
|
+
inputs = processor(text=text, return_tensors='pt', padding=True, truncation=True)
|
|
65
|
+
embeddings = model.get_text_features(**inputs).detach().numpy()
|
|
66
|
+
return [embeddings[i] for i in range(embeddings.shape[0])]
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@pxt.udf(batch_size=32, return_type=ts.ArrayType((None,), dtype=ts.FloatType(), nullable=False))
|
|
70
|
+
def clip_image(image: Batch[PIL.Image.Image], *, model_id: str) -> Batch[np.ndarray]:
|
|
71
|
+
env.Env.get().require_package('transformers')
|
|
72
|
+
from transformers import CLIPModel, CLIPProcessor
|
|
73
|
+
|
|
74
|
+
model = _lookup_model(model_id, CLIPModel.from_pretrained)
|
|
75
|
+
processor = _lookup_processor(model_id, CLIPProcessor.from_pretrained)
|
|
76
|
+
|
|
77
|
+
inputs = processor(images=image, return_tensors='pt', padding=True)
|
|
78
|
+
embeddings = model.get_image_features(**inputs).detach().numpy()
|
|
79
|
+
return [embeddings[i] for i in range(embeddings.shape[0])]
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@pxt.udf(batch_size=32)
|
|
83
|
+
def detr_for_object_detection(image: Batch[PIL.Image.Image], *, model_id: str, threshold: float = 0.5) -> Batch[dict]:
|
|
84
|
+
env.Env.get().require_package('transformers')
|
|
85
|
+
from transformers import DetrImageProcessor, DetrForObjectDetection
|
|
86
|
+
|
|
87
|
+
model = _lookup_model(model_id, lambda x: DetrForObjectDetection.from_pretrained(x, revision='no_timm'))
|
|
88
|
+
processor = _lookup_processor(model_id, lambda x: DetrImageProcessor.from_pretrained(x, revision='no_timm'))
|
|
89
|
+
|
|
90
|
+
inputs = processor(images=image, return_tensors='pt')
|
|
91
|
+
outputs = model(**inputs)
|
|
92
|
+
|
|
93
|
+
results = processor.post_process_object_detection(outputs, threshold=threshold)
|
|
94
|
+
return [
|
|
95
|
+
{
|
|
96
|
+
'scores': [score.item() for score in result['scores']],
|
|
97
|
+
'labels': [label.item() for label in result['labels']],
|
|
98
|
+
'label_text': [model.config.id2label[label.item()] for label in result['labels']],
|
|
99
|
+
'boxes': [box.tolist() for box in result['boxes']]
|
|
100
|
+
}
|
|
101
|
+
for result in results
|
|
102
|
+
]
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _lookup_model(model_id: str, create: Callable) -> Any:
|
|
106
|
+
key = (model_id, create) # For safety, include the `create` callable in the cache key
|
|
107
|
+
if key not in _model_cache:
|
|
108
|
+
_model_cache[key] = create(model_id)
|
|
109
|
+
return _model_cache[key]
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _lookup_processor(model_id: str, create: Callable) -> Any:
|
|
113
|
+
key = (model_id, create) # For safety, include the `create` callable in the cache key
|
|
114
|
+
if key not in _processor_cache:
|
|
115
|
+
_processor_cache[key] = create(model_id)
|
|
116
|
+
return _processor_cache[key]
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
_model_cache = {}
|
|
120
|
+
_processor_cache = {}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
|
|
3
|
+
import PIL.Image
|
|
4
|
+
|
|
5
|
+
from pixeltable.type_system import ImageType, StringType
|
|
6
|
+
import pixeltable.func as func
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@func.udf
|
|
10
|
+
def b64_encode(img: PIL.Image.Image, image_format: str = 'png') -> str:
|
|
11
|
+
# Encode this image as a b64-encoded png.
|
|
12
|
+
import io
|
|
13
|
+
bytes_arr = io.BytesIO()
|
|
14
|
+
img.save(bytes_arr, format=image_format)
|
|
15
|
+
b64_bytes = base64.b64encode(bytes_arr.getvalue())
|
|
16
|
+
return b64_bytes.decode('utf-8')
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import io
|
|
3
|
+
import pathlib
|
|
4
|
+
import uuid
|
|
5
|
+
from typing import Optional, TypeVar, Union, Callable
|
|
6
|
+
|
|
7
|
+
import PIL.Image
|
|
8
|
+
import numpy as np
|
|
9
|
+
import openai
|
|
10
|
+
import tenacity
|
|
11
|
+
from openai._types import NOT_GIVEN, NotGiven
|
|
12
|
+
|
|
13
|
+
import pixeltable as pxt
|
|
14
|
+
import pixeltable.type_system as ts
|
|
15
|
+
from pixeltable import env
|
|
16
|
+
from pixeltable.func import Batch
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def openai_client() -> openai.OpenAI:
|
|
20
|
+
return env.Env.get().get_client('openai', lambda api_key: openai.OpenAI(api_key=api_key))
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# Exponential backoff decorator using tenacity.
|
|
24
|
+
# TODO(aaron-siegel): Right now this hardwires random exponential backoff with defaults suggested
|
|
25
|
+
# by OpenAI. Should we investigate making this more customizable in the future?
|
|
26
|
+
def _retry(fn: Callable) -> Callable:
|
|
27
|
+
return tenacity.retry(
|
|
28
|
+
retry=tenacity.retry_if_exception_type(openai.RateLimitError),
|
|
29
|
+
wait=tenacity.wait_random_exponential(min=1, max=60),
|
|
30
|
+
stop=tenacity.stop_after_attempt(6)
|
|
31
|
+
)(fn)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
#####################################
|
|
35
|
+
# Audio Endpoints
|
|
36
|
+
|
|
37
|
+
@pxt.udf(return_type=ts.AudioType())
|
|
38
|
+
@_retry
|
|
39
|
+
def speech(
|
|
40
|
+
input: str,
|
|
41
|
+
*,
|
|
42
|
+
model: str,
|
|
43
|
+
voice: str,
|
|
44
|
+
response_format: Optional[str] = None,
|
|
45
|
+
speed: Optional[float] = None
|
|
46
|
+
) -> str:
|
|
47
|
+
content = openai_client().audio.speech.create(
|
|
48
|
+
input=input,
|
|
49
|
+
model=model,
|
|
50
|
+
voice=voice,
|
|
51
|
+
response_format=_opt(response_format),
|
|
52
|
+
speed=_opt(speed)
|
|
53
|
+
)
|
|
54
|
+
ext = response_format or 'mp3'
|
|
55
|
+
output_filename = str(env.Env.get().tmp_dir / f"{uuid.uuid4()}.{ext}")
|
|
56
|
+
content.stream_to_file(output_filename, chunk_size=1 << 20)
|
|
57
|
+
return output_filename
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@pxt.udf(
|
|
61
|
+
param_types=[ts.AudioType(), ts.StringType(), ts.StringType(nullable=True),
|
|
62
|
+
ts.StringType(nullable=True), ts.FloatType(nullable=True)]
|
|
63
|
+
)
|
|
64
|
+
@_retry
|
|
65
|
+
def transcriptions(
|
|
66
|
+
audio: str,
|
|
67
|
+
*,
|
|
68
|
+
model: str,
|
|
69
|
+
language: Optional[str] = None,
|
|
70
|
+
prompt: Optional[str] = None,
|
|
71
|
+
temperature: Optional[float] = None
|
|
72
|
+
) -> dict:
|
|
73
|
+
file = pathlib.Path(audio)
|
|
74
|
+
transcription = openai_client().audio.transcriptions.create(
|
|
75
|
+
file=file,
|
|
76
|
+
model=model,
|
|
77
|
+
language=_opt(language),
|
|
78
|
+
prompt=_opt(prompt),
|
|
79
|
+
temperature=_opt(temperature)
|
|
80
|
+
)
|
|
81
|
+
return transcription.dict()
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@pxt.udf(
|
|
85
|
+
param_types=[ts.AudioType(), ts.StringType(), ts.StringType(nullable=True), ts.FloatType(nullable=True)]
|
|
86
|
+
)
|
|
87
|
+
@_retry
|
|
88
|
+
def translations(
|
|
89
|
+
audio: str,
|
|
90
|
+
*,
|
|
91
|
+
model: str,
|
|
92
|
+
prompt: Optional[str] = None,
|
|
93
|
+
temperature: Optional[float] = None
|
|
94
|
+
) -> dict:
|
|
95
|
+
file = pathlib.Path(audio)
|
|
96
|
+
translation = openai_client().audio.translations.create(
|
|
97
|
+
file=file,
|
|
98
|
+
model=model,
|
|
99
|
+
prompt=_opt(prompt),
|
|
100
|
+
temperature=_opt(temperature)
|
|
101
|
+
)
|
|
102
|
+
return translation.dict()
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
#####################################
|
|
106
|
+
# Chat Endpoints
|
|
107
|
+
|
|
108
|
+
@pxt.udf
|
|
109
|
+
@_retry
|
|
110
|
+
def chat_completions(
|
|
111
|
+
messages: list,
|
|
112
|
+
*,
|
|
113
|
+
model: str,
|
|
114
|
+
frequency_penalty: Optional[float] = None,
|
|
115
|
+
logit_bias: Optional[dict[str, int]] = None,
|
|
116
|
+
logprobs: Optional[bool] = None,
|
|
117
|
+
top_logprobs: Optional[int] = None,
|
|
118
|
+
max_tokens: Optional[int] = None,
|
|
119
|
+
n: Optional[int] = None,
|
|
120
|
+
presence_penalty: Optional[float] = None,
|
|
121
|
+
response_format: Optional[dict] = None,
|
|
122
|
+
seed: Optional[int] = None,
|
|
123
|
+
stop: Optional[list[str]] = None,
|
|
124
|
+
temperature: Optional[float] = None,
|
|
125
|
+
top_p: Optional[float] = None,
|
|
126
|
+
tools: Optional[list[dict]] = None,
|
|
127
|
+
tool_choice: Optional[dict] = None,
|
|
128
|
+
user: Optional[str] = None
|
|
129
|
+
) -> dict:
|
|
130
|
+
result = openai_client().chat.completions.create(
|
|
131
|
+
messages=messages,
|
|
132
|
+
model=model,
|
|
133
|
+
frequency_penalty=_opt(frequency_penalty),
|
|
134
|
+
logit_bias=_opt(logit_bias),
|
|
135
|
+
logprobs=_opt(logprobs),
|
|
136
|
+
top_logprobs=_opt(top_logprobs),
|
|
137
|
+
max_tokens=_opt(max_tokens),
|
|
138
|
+
n=_opt(n),
|
|
139
|
+
presence_penalty=_opt(presence_penalty),
|
|
140
|
+
response_format=_opt(response_format),
|
|
141
|
+
seed=_opt(seed),
|
|
142
|
+
stop=_opt(stop),
|
|
143
|
+
temperature=_opt(temperature),
|
|
144
|
+
top_p=_opt(top_p),
|
|
145
|
+
tools=_opt(tools),
|
|
146
|
+
tool_choice=_opt(tool_choice),
|
|
147
|
+
user=_opt(user)
|
|
148
|
+
)
|
|
149
|
+
return result.dict()
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
@pxt.udf
|
|
153
|
+
@_retry
|
|
154
|
+
def vision(
|
|
155
|
+
prompt: str,
|
|
156
|
+
image: PIL.Image.Image,
|
|
157
|
+
*,
|
|
158
|
+
model: str = 'gpt-4-vision-preview'
|
|
159
|
+
) -> str:
|
|
160
|
+
# TODO(aaron-siegel): Decompose CPU/GPU ops into separate functions
|
|
161
|
+
bytes_arr = io.BytesIO()
|
|
162
|
+
image.save(bytes_arr, format='png')
|
|
163
|
+
b64_bytes = base64.b64encode(bytes_arr.getvalue())
|
|
164
|
+
b64_encoded_image = b64_bytes.decode('utf-8')
|
|
165
|
+
messages = [
|
|
166
|
+
{'role': 'user',
|
|
167
|
+
'content': [
|
|
168
|
+
{'type': 'text', 'text': prompt},
|
|
169
|
+
{'type': 'image_url', 'image_url': {
|
|
170
|
+
'url': f'data:image/png;base64,{b64_encoded_image}'
|
|
171
|
+
}}
|
|
172
|
+
]}
|
|
173
|
+
]
|
|
174
|
+
result = openai_client().chat.completions.create(
|
|
175
|
+
messages=messages,
|
|
176
|
+
model=model
|
|
177
|
+
)
|
|
178
|
+
return result.choices[0].message.content
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
#####################################
|
|
182
|
+
# Embeddings Endpoints
|
|
183
|
+
|
|
184
|
+
@pxt.udf(batch_size=32, return_type=ts.ArrayType((None,), dtype=ts.FloatType()))
|
|
185
|
+
@_retry
|
|
186
|
+
def embeddings(
|
|
187
|
+
input: Batch[str],
|
|
188
|
+
*,
|
|
189
|
+
model: str,
|
|
190
|
+
user: Optional[str] = None
|
|
191
|
+
) -> Batch[np.ndarray]:
|
|
192
|
+
result = openai_client().embeddings.create(
|
|
193
|
+
input=input,
|
|
194
|
+
model=model,
|
|
195
|
+
user=_opt(user),
|
|
196
|
+
encoding_format='float'
|
|
197
|
+
)
|
|
198
|
+
return [
|
|
199
|
+
np.array(data.embedding, dtype=np.float64)
|
|
200
|
+
for data in result.data
|
|
201
|
+
]
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
#####################################
|
|
205
|
+
# Images Endpoints
|
|
206
|
+
|
|
207
|
+
@pxt.udf
|
|
208
|
+
@_retry
|
|
209
|
+
def image_generations(
|
|
210
|
+
prompt: str,
|
|
211
|
+
*,
|
|
212
|
+
model: Optional[str] = None,
|
|
213
|
+
quality: Optional[str] = None,
|
|
214
|
+
size: Optional[str] = None,
|
|
215
|
+
style: Optional[str] = None,
|
|
216
|
+
user: Optional[str] = None
|
|
217
|
+
) -> PIL.Image.Image:
|
|
218
|
+
# TODO(aaron-siegel): Decompose CPU/GPU ops into separate functions
|
|
219
|
+
result = openai_client().images.generate(
|
|
220
|
+
prompt=prompt,
|
|
221
|
+
model=_opt(model),
|
|
222
|
+
quality=_opt(quality),
|
|
223
|
+
size=_opt(size),
|
|
224
|
+
style=_opt(style),
|
|
225
|
+
user=_opt(user),
|
|
226
|
+
response_format="b64_json"
|
|
227
|
+
)
|
|
228
|
+
b64_str = result.data[0].b64_json
|
|
229
|
+
b64_bytes = base64.b64decode(b64_str)
|
|
230
|
+
img = PIL.Image.open(io.BytesIO(b64_bytes))
|
|
231
|
+
img.load()
|
|
232
|
+
return img
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
#####################################
|
|
236
|
+
# Moderations Endpoints
|
|
237
|
+
|
|
238
|
+
@pxt.udf
|
|
239
|
+
@_retry
|
|
240
|
+
def moderations(
|
|
241
|
+
input: str,
|
|
242
|
+
*,
|
|
243
|
+
model: Optional[str] = None
|
|
244
|
+
) -> dict:
|
|
245
|
+
result = openai_client().moderations.create(
|
|
246
|
+
input=input,
|
|
247
|
+
model=_opt(model)
|
|
248
|
+
)
|
|
249
|
+
return result.dict()
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
_T = TypeVar('_T')
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def _opt(arg: _T) -> Union[_T, NotGiven]:
|
|
256
|
+
return arg if arg is not None else NOT_GIVEN
|
|
@@ -1,9 +1,150 @@
|
|
|
1
|
-
from
|
|
2
|
-
from pixeltable.function import Function
|
|
1
|
+
from typing import Dict, Any, Tuple, Optional
|
|
3
2
|
|
|
3
|
+
import PIL.Image
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
5
|
+
from pixeltable.type_system import FloatType, ImageType, IntType, ArrayType, ColumnType, StringType, JsonType, BoolType
|
|
6
|
+
import pixeltable.func as func
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _caller_return_type(bound_args: Optional[Dict[str, Any]]) -> ColumnType:
|
|
10
|
+
if bound_args is None:
|
|
11
|
+
return ImageType()
|
|
12
|
+
return bound_args['self'].col_type
|
|
13
|
+
|
|
14
|
+
@func.udf(
|
|
15
|
+
py_fn=PIL.Image.alpha_composite, return_type=ImageType(), param_types=[ImageType(), ImageType()])
|
|
16
|
+
def alpha_composite(im1: PIL.Image.Image, im2: PIL.Image.Image) -> PIL.Image.Image:
|
|
17
|
+
pass
|
|
18
|
+
@func.udf(
|
|
19
|
+
py_fn=PIL.Image.blend, return_type=ImageType(), param_types=[ImageType(), ImageType(), FloatType()])
|
|
20
|
+
def blend(im1: PIL.Image.Image, im2: PIL.Image.Image, alpha: float) -> PIL.Image.Image:
|
|
21
|
+
pass
|
|
22
|
+
@func.udf(
|
|
23
|
+
py_fn=PIL.Image.composite, return_type=ImageType(), param_types=[ImageType(), ImageType(), ImageType()])
|
|
24
|
+
def composite(image1: PIL.Image.Image, image2: PIL.Image.Image, mask: PIL.Image.Image) -> PIL.Image.Image:
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# PIL.Image.Image methods
|
|
29
|
+
|
|
30
|
+
# Image.convert()
|
|
31
|
+
def _convert_return_type(bound_args: Dict[str, Any]) -> ColumnType:
|
|
32
|
+
if bound_args is None:
|
|
33
|
+
return ImageType()
|
|
34
|
+
assert 'self' in bound_args
|
|
35
|
+
assert 'mode' in bound_args
|
|
36
|
+
img_type = bound_args['self'].col_type
|
|
37
|
+
return ImageType(size=img_type.size, mode=bound_args['mode'])
|
|
38
|
+
@func.udf(return_type=_convert_return_type, param_types=[ImageType(), StringType()])
|
|
39
|
+
def convert(self: PIL.Image.Image, mode: str) -> PIL.Image.Image:
|
|
40
|
+
return self.convert(mode)
|
|
41
|
+
|
|
42
|
+
# Image.crop()
|
|
43
|
+
def _crop_return_type(bound_args: Dict[str, Any]) -> ColumnType:
|
|
44
|
+
if bound_args is None:
|
|
45
|
+
return ImageType()
|
|
46
|
+
img_type = bound_args['self'].col_type
|
|
47
|
+
box = bound_args['box']
|
|
48
|
+
if isinstance(box, list) and all(isinstance(x, int) for x in box):
|
|
49
|
+
return ImageType(size=(box[2] - box[0], box[3] - box[1]), mode=img_type.mode)
|
|
50
|
+
return ImageType() # we can't compute the size statically
|
|
51
|
+
@func.udf(
|
|
52
|
+
py_fn=PIL.Image.Image.crop, return_type=_crop_return_type,
|
|
53
|
+
param_types=[ImageType(), ArrayType((4,), dtype=IntType())])
|
|
54
|
+
def crop(self: PIL.Image.Image, box: Tuple[int, int, int, int]) -> PIL.Image.Image:
|
|
55
|
+
pass
|
|
56
|
+
|
|
57
|
+
# Image.getchannel()
|
|
58
|
+
def _getchannel_return_type(bound_args: Dict[str, Any]) -> ColumnType:
|
|
59
|
+
if bound_args is None:
|
|
60
|
+
return ImageType()
|
|
61
|
+
img_type = bound_args['self'].col_type
|
|
62
|
+
return ImageType(size=img_type.size, mode='L')
|
|
63
|
+
@func.udf(
|
|
64
|
+
py_fn=PIL.Image.Image.getchannel, return_type=_getchannel_return_type, param_types=[ImageType(), IntType()])
|
|
65
|
+
def getchannel(self: PIL.Image.Image, channel: int) -> PIL.Image.Image:
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
# Image.resize()
|
|
69
|
+
def resize_return_type(bound_args: Dict[str, Any]) -> ColumnType:
|
|
70
|
+
if bound_args is None:
|
|
71
|
+
return ImageType()
|
|
72
|
+
assert 'size' in bound_args
|
|
73
|
+
return ImageType(size=bound_args['size'])
|
|
74
|
+
@func.udf(return_type=resize_return_type, param_types=[ImageType(), ArrayType((2, ), dtype=IntType())])
|
|
75
|
+
def resize(self: PIL.Image.Image, size: Tuple[int, int]) -> PIL.Image.Image:
|
|
76
|
+
return self.resize(size)
|
|
77
|
+
|
|
78
|
+
# Image.rotate()
|
|
79
|
+
@func.udf(return_type=ImageType(), param_types=[ImageType(), IntType()])
|
|
80
|
+
def rotate(self: PIL.Image.Image, angle: int) -> PIL.Image.Image:
|
|
81
|
+
return self.rotate(angle)
|
|
82
|
+
|
|
83
|
+
# Image.transform()
|
|
84
|
+
@func.udf(return_type= _caller_return_type, param_types=[ImageType(), ArrayType((2,), dtype=IntType()), IntType()])
|
|
85
|
+
def transform(self: PIL.Image.Image, size: Tuple[int, int], method: int) -> PIL.Image.Image:
|
|
86
|
+
return self.transform(size, method)
|
|
87
|
+
|
|
88
|
+
@func.udf(
|
|
89
|
+
py_fn=PIL.Image.Image.effect_spread, return_type=_caller_return_type, param_types=[ImageType(), FloatType()])
|
|
90
|
+
def effect_spread(self: PIL.Image.Image, distance: float) -> PIL.Image.Image:
|
|
91
|
+
pass
|
|
92
|
+
|
|
93
|
+
@func.udf(
|
|
94
|
+
py_fn=PIL.Image.Image.entropy, return_type=FloatType(), param_types=[ImageType(), ImageType(), JsonType()])
|
|
95
|
+
def entropy(self: PIL.Image.Image, mask: PIL.Image.Image, histogram: Dict) -> float:
|
|
96
|
+
pass
|
|
97
|
+
|
|
98
|
+
@func.udf(py_fn=PIL.Image.Image.getbands, return_type=JsonType(), param_types=[ImageType()])
|
|
99
|
+
def getbands(self: PIL.Image.Image) -> Tuple[str]:
|
|
100
|
+
pass
|
|
101
|
+
|
|
102
|
+
@func.udf(py_fn=PIL.Image.Image.getbbox, return_type=JsonType(), param_types=[ImageType()])
|
|
103
|
+
def getbbox(self: PIL.Image.Image) -> Tuple[int, int, int, int]:
|
|
104
|
+
pass
|
|
105
|
+
|
|
106
|
+
@func.udf(
|
|
107
|
+
py_fn=PIL.Image.Image.getcolors, return_type=JsonType(), param_types=[ImageType(), IntType()])
|
|
108
|
+
def getcolors(self: PIL.Image.Image, maxcolors: int) -> Tuple[Tuple[int, int, int], int]:
|
|
109
|
+
pass
|
|
110
|
+
|
|
111
|
+
@func.udf(py_fn=PIL.Image.Image.getextrema, return_type=JsonType(), param_types=[ImageType()])
|
|
112
|
+
def getextrema(self: PIL.Image.Image) -> Tuple[int, int]:
|
|
113
|
+
pass
|
|
114
|
+
|
|
115
|
+
@func.udf(
|
|
116
|
+
py_fn=PIL.Image.Image.getpalette, return_type=JsonType(), param_types=[ImageType(), StringType()])
|
|
117
|
+
def getpalette(self: PIL.Image.Image, mode: str) -> Tuple[int]:
|
|
118
|
+
pass
|
|
119
|
+
|
|
120
|
+
@func.udf(
|
|
121
|
+
py_fn=PIL.Image.Image.getpixel, return_type=JsonType(), param_types=[ImageType(), ArrayType((2,), dtype=IntType())])
|
|
122
|
+
def getpixel(self: PIL.Image.Image, xy: Tuple[int, int]) -> Tuple[int]:
|
|
123
|
+
pass
|
|
124
|
+
|
|
125
|
+
@func.udf(
|
|
126
|
+
py_fn=PIL.Image.Image.getprojection, return_type=JsonType(), param_types=[ImageType()])
|
|
127
|
+
def getprojection(self: PIL.Image.Image) -> Tuple[int]:
|
|
128
|
+
pass
|
|
129
|
+
|
|
130
|
+
@func.udf(
|
|
131
|
+
py_fn=PIL.Image.Image.histogram, return_type=JsonType(), param_types=[ImageType(), ImageType(), JsonType()])
|
|
132
|
+
def histogram(self: PIL.Image.Image, mask: PIL.Image.Image, histogram: Dict) -> Tuple[int]:
|
|
133
|
+
pass
|
|
134
|
+
|
|
135
|
+
@func.udf(
|
|
136
|
+
py_fn=PIL.Image.Image.quantize, return_type=ImageType(),
|
|
137
|
+
param_types=[ImageType(), IntType(), IntType(nullable=True), IntType(), IntType(nullable=True), IntType()])
|
|
138
|
+
def quantize(
|
|
139
|
+
self: PIL.Image.Image, colors: int, method: int, kmeans: int, palette: int, dither: int) -> PIL.Image.Image:
|
|
140
|
+
pass
|
|
141
|
+
|
|
142
|
+
@func.udf(
|
|
143
|
+
py_fn=PIL.Image.Image.reduce, return_type=ImageType(), param_types=[ImageType(), IntType(), JsonType()])
|
|
144
|
+
def reduce(self: PIL.Image.Image, factor: int, filter: Tuple[int]) -> PIL.Image.Image:
|
|
145
|
+
pass
|
|
146
|
+
|
|
147
|
+
@func.udf(
|
|
148
|
+
py_fn=PIL.Image.Image.transpose, return_type=_caller_return_type, param_types=[ImageType(), IntType()])
|
|
149
|
+
def transpose(self: PIL.Image.Image, method: int) -> PIL.Image.Image:
|
|
150
|
+
pass
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
from pixeltable.type_system import StringType
|
|
4
|
+
import pixeltable.func as func
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@func.udf(return_type=StringType(), param_types=[StringType()])
|
|
8
|
+
def str_format(format_str: str, *args: Any, **kwargs: Any) -> str:
|
|
9
|
+
""" Return a formatted version of format_str, using substitutions from args and kwargs:
|
|
10
|
+
- {<int>} will be replaced by the corresponding element in args
|
|
11
|
+
- {<key>} will be replaced by the corresponding value in kwargs
|
|
12
|
+
"""
|
|
13
|
+
return format_str.format(*args, **kwargs)
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import io
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
import PIL.Image
|
|
6
|
+
import numpy as np
|
|
7
|
+
import together
|
|
8
|
+
|
|
9
|
+
import pixeltable as pxt
|
|
10
|
+
from pixeltable import env
|
|
11
|
+
from pixeltable.func import Batch
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def together_client() -> together.Together:
|
|
15
|
+
return env.Env.get().get_client('together', lambda api_key: together.Together(api_key=api_key))
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@pxt.udf
|
|
19
|
+
def completions(
|
|
20
|
+
prompt: str,
|
|
21
|
+
*,
|
|
22
|
+
model: str,
|
|
23
|
+
max_tokens: Optional[int] = None,
|
|
24
|
+
stop: Optional[list] = None,
|
|
25
|
+
temperature: Optional[float] = None,
|
|
26
|
+
top_p: Optional[float] = None,
|
|
27
|
+
top_k: Optional[int] = None,
|
|
28
|
+
repetition_penalty: Optional[float] = None,
|
|
29
|
+
logprobs: Optional[int] = None,
|
|
30
|
+
echo: Optional[bool] = None,
|
|
31
|
+
n: Optional[int] = None,
|
|
32
|
+
safety_model: Optional[str] = None
|
|
33
|
+
) -> dict:
|
|
34
|
+
return together_client().completions.create(
|
|
35
|
+
prompt=prompt,
|
|
36
|
+
model=model,
|
|
37
|
+
max_tokens=max_tokens,
|
|
38
|
+
stop=stop,
|
|
39
|
+
temperature=temperature,
|
|
40
|
+
top_p=top_p,
|
|
41
|
+
top_k=top_k,
|
|
42
|
+
repetition_penalty=repetition_penalty,
|
|
43
|
+
logprobs=logprobs,
|
|
44
|
+
echo=echo,
|
|
45
|
+
n=n,
|
|
46
|
+
safety_model=safety_model
|
|
47
|
+
).dict()
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@pxt.udf
|
|
51
|
+
def chat_completions(
|
|
52
|
+
messages: list[dict[str, str]],
|
|
53
|
+
*,
|
|
54
|
+
model: str,
|
|
55
|
+
max_tokens: Optional[int] = None,
|
|
56
|
+
stop: Optional[list[str]] = None,
|
|
57
|
+
temperature: Optional[float] = None,
|
|
58
|
+
top_p: Optional[float] = None,
|
|
59
|
+
top_k: Optional[int] = None,
|
|
60
|
+
repetition_penalty: Optional[float] = None,
|
|
61
|
+
logprobs: Optional[int] = None,
|
|
62
|
+
echo: Optional[bool] = None,
|
|
63
|
+
n: Optional[int] = None,
|
|
64
|
+
safety_model: Optional[str] = None,
|
|
65
|
+
response_format: Optional[dict] = None,
|
|
66
|
+
tools: Optional[dict] = None,
|
|
67
|
+
tool_choice: Optional[dict] = None
|
|
68
|
+
) -> dict:
|
|
69
|
+
return together_client().chat.completions.create(
|
|
70
|
+
messages=messages,
|
|
71
|
+
model=model,
|
|
72
|
+
max_tokens=max_tokens,
|
|
73
|
+
stop=stop,
|
|
74
|
+
temperature=temperature,
|
|
75
|
+
top_p=top_p,
|
|
76
|
+
top_k=top_k,
|
|
77
|
+
repetition_penalty=repetition_penalty,
|
|
78
|
+
logprobs=logprobs,
|
|
79
|
+
echo=echo,
|
|
80
|
+
n=n,
|
|
81
|
+
safety_model=safety_model,
|
|
82
|
+
response_format=response_format,
|
|
83
|
+
tools=tools,
|
|
84
|
+
tool_choice=tool_choice
|
|
85
|
+
).dict()
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@pxt.udf(batch_size=32, return_type=pxt.ArrayType((None,), dtype=pxt.FloatType()))
|
|
89
|
+
def embeddings(input: Batch[str], *, model: str) -> Batch[np.ndarray]:
|
|
90
|
+
result = together_client().embeddings.create(input=input, model=model)
|
|
91
|
+
return [
|
|
92
|
+
np.array(data.embedding, dtype=np.float64)
|
|
93
|
+
for data in result.data
|
|
94
|
+
]
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@pxt.udf
|
|
98
|
+
def image_generations(
|
|
99
|
+
prompt: str,
|
|
100
|
+
*,
|
|
101
|
+
model: str,
|
|
102
|
+
steps: Optional[int] = None,
|
|
103
|
+
seed: Optional[int] = None,
|
|
104
|
+
height: Optional[int] = None,
|
|
105
|
+
width: Optional[int] = None,
|
|
106
|
+
negative_prompt: Optional[str] = None,
|
|
107
|
+
) -> PIL.Image.Image:
|
|
108
|
+
# TODO(aaron-siegel): Decompose CPU/GPU ops into separate functions
|
|
109
|
+
result = together_client().images.generate(
|
|
110
|
+
prompt=prompt,
|
|
111
|
+
model=model,
|
|
112
|
+
steps=steps,
|
|
113
|
+
seed=seed,
|
|
114
|
+
height=height,
|
|
115
|
+
width=width,
|
|
116
|
+
negative_prompt=negative_prompt
|
|
117
|
+
)
|
|
118
|
+
b64_str = result.data[0].b64_json
|
|
119
|
+
b64_bytes = base64.b64decode(b64_str)
|
|
120
|
+
img = PIL.Image.open(io.BytesIO(b64_bytes))
|
|
121
|
+
img.load()
|
|
122
|
+
return img
|