pixeltable 0.1.2__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +21 -4
- pixeltable/catalog/__init__.py +13 -0
- pixeltable/catalog/catalog.py +159 -0
- pixeltable/catalog/column.py +200 -0
- pixeltable/catalog/dir.py +32 -0
- pixeltable/catalog/globals.py +33 -0
- pixeltable/catalog/insertable_table.py +191 -0
- pixeltable/catalog/named_function.py +36 -0
- pixeltable/catalog/path.py +58 -0
- pixeltable/catalog/path_dict.py +139 -0
- pixeltable/catalog/schema_object.py +39 -0
- pixeltable/catalog/table.py +581 -0
- pixeltable/catalog/table_version.py +749 -0
- pixeltable/catalog/table_version_path.py +133 -0
- pixeltable/catalog/view.py +203 -0
- pixeltable/client.py +520 -31
- pixeltable/dataframe.py +540 -349
- pixeltable/env.py +373 -48
- pixeltable/exceptions.py +12 -21
- pixeltable/exec/__init__.py +9 -0
- pixeltable/exec/aggregation_node.py +78 -0
- pixeltable/exec/cache_prefetch_node.py +113 -0
- pixeltable/exec/component_iteration_node.py +79 -0
- pixeltable/exec/data_row_batch.py +95 -0
- pixeltable/exec/exec_context.py +22 -0
- pixeltable/exec/exec_node.py +61 -0
- pixeltable/exec/expr_eval_node.py +217 -0
- pixeltable/exec/in_memory_data_node.py +69 -0
- pixeltable/exec/media_validation_node.py +43 -0
- pixeltable/exec/sql_scan_node.py +225 -0
- pixeltable/exprs/__init__.py +24 -0
- pixeltable/exprs/arithmetic_expr.py +102 -0
- pixeltable/exprs/array_slice.py +71 -0
- pixeltable/exprs/column_property_ref.py +77 -0
- pixeltable/exprs/column_ref.py +105 -0
- pixeltable/exprs/comparison.py +77 -0
- pixeltable/exprs/compound_predicate.py +98 -0
- pixeltable/exprs/data_row.py +187 -0
- pixeltable/exprs/expr.py +586 -0
- pixeltable/exprs/expr_set.py +39 -0
- pixeltable/exprs/function_call.py +380 -0
- pixeltable/exprs/globals.py +69 -0
- pixeltable/exprs/image_member_access.py +115 -0
- pixeltable/exprs/image_similarity_predicate.py +58 -0
- pixeltable/exprs/inline_array.py +107 -0
- pixeltable/exprs/inline_dict.py +101 -0
- pixeltable/exprs/is_null.py +38 -0
- pixeltable/exprs/json_mapper.py +121 -0
- pixeltable/exprs/json_path.py +159 -0
- pixeltable/exprs/literal.py +54 -0
- pixeltable/exprs/object_ref.py +41 -0
- pixeltable/exprs/predicate.py +44 -0
- pixeltable/exprs/row_builder.py +355 -0
- pixeltable/exprs/rowid_ref.py +94 -0
- pixeltable/exprs/type_cast.py +53 -0
- pixeltable/exprs/variable.py +45 -0
- pixeltable/func/__init__.py +9 -0
- pixeltable/func/aggregate_function.py +194 -0
- pixeltable/func/batched_function.py +53 -0
- pixeltable/func/callable_function.py +69 -0
- pixeltable/func/expr_template_function.py +82 -0
- pixeltable/func/function.py +110 -0
- pixeltable/func/function_registry.py +227 -0
- pixeltable/func/globals.py +36 -0
- pixeltable/func/nos_function.py +202 -0
- pixeltable/func/signature.py +166 -0
- pixeltable/func/udf.py +163 -0
- pixeltable/functions/__init__.py +52 -103
- pixeltable/functions/eval.py +216 -0
- pixeltable/functions/fireworks.py +61 -0
- pixeltable/functions/huggingface.py +120 -0
- pixeltable/functions/image.py +16 -0
- pixeltable/functions/openai.py +88 -0
- pixeltable/functions/pil/image.py +148 -7
- pixeltable/functions/string.py +13 -0
- pixeltable/functions/together.py +27 -0
- pixeltable/functions/util.py +41 -0
- pixeltable/functions/video.py +62 -0
- pixeltable/iterators/__init__.py +3 -0
- pixeltable/iterators/base.py +48 -0
- pixeltable/iterators/document.py +311 -0
- pixeltable/iterators/video.py +89 -0
- pixeltable/metadata/__init__.py +54 -0
- pixeltable/metadata/converters/convert_10.py +18 -0
- pixeltable/metadata/schema.py +211 -0
- pixeltable/plan.py +656 -0
- pixeltable/store.py +413 -182
- pixeltable/tests/conftest.py +143 -86
- pixeltable/tests/test_audio.py +65 -0
- pixeltable/tests/test_catalog.py +27 -0
- pixeltable/tests/test_client.py +14 -14
- pixeltable/tests/test_component_view.py +372 -0
- pixeltable/tests/test_dataframe.py +433 -0
- pixeltable/tests/test_dirs.py +78 -62
- pixeltable/tests/test_document.py +117 -0
- pixeltable/tests/test_exprs.py +591 -135
- pixeltable/tests/test_function.py +297 -67
- pixeltable/tests/test_functions.py +283 -1
- pixeltable/tests/test_migration.py +43 -0
- pixeltable/tests/test_nos.py +54 -0
- pixeltable/tests/test_snapshot.py +208 -0
- pixeltable/tests/test_table.py +1086 -258
- pixeltable/tests/test_transactional_directory.py +42 -0
- pixeltable/tests/test_types.py +5 -11
- pixeltable/tests/test_video.py +149 -34
- pixeltable/tests/test_view.py +530 -0
- pixeltable/tests/utils.py +186 -45
- pixeltable/tool/create_test_db_dump.py +149 -0
- pixeltable/type_system.py +490 -133
- pixeltable/utils/__init__.py +17 -46
- pixeltable/utils/clip.py +12 -15
- pixeltable/utils/coco.py +136 -0
- pixeltable/utils/documents.py +39 -0
- pixeltable/utils/filecache.py +195 -0
- pixeltable/utils/help.py +11 -0
- pixeltable/utils/media_store.py +76 -0
- pixeltable/utils/parquet.py +126 -0
- pixeltable/utils/pytorch.py +172 -0
- pixeltable/utils/s3.py +13 -0
- pixeltable/utils/sql.py +17 -0
- pixeltable/utils/transactional_directory.py +35 -0
- pixeltable-0.2.1.dist-info/LICENSE +18 -0
- pixeltable-0.2.1.dist-info/METADATA +119 -0
- pixeltable-0.2.1.dist-info/RECORD +125 -0
- {pixeltable-0.1.2.dist-info → pixeltable-0.2.1.dist-info}/WHEEL +1 -1
- pixeltable/catalog.py +0 -1421
- pixeltable/exprs.py +0 -1745
- pixeltable/function.py +0 -269
- pixeltable/functions/clip.py +0 -10
- pixeltable/functions/pil/__init__.py +0 -23
- pixeltable/functions/tf.py +0 -21
- pixeltable/index.py +0 -57
- pixeltable/tests/test_dict.py +0 -24
- pixeltable/tests/test_tf.py +0 -69
- pixeltable/tf.py +0 -33
- pixeltable/utils/tf.py +0 -33
- pixeltable/utils/video.py +0 -32
- pixeltable-0.1.2.dist-info/LICENSE +0 -201
- pixeltable-0.1.2.dist-info/METADATA +0 -89
- pixeltable-0.1.2.dist-info/RECORD +0 -37
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
from typing import Any, Callable
|
|
2
|
+
|
|
3
|
+
import PIL.Image
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
import pixeltable as pxt
|
|
7
|
+
import pixeltable.env as env
|
|
8
|
+
import pixeltable.type_system as ts
|
|
9
|
+
from pixeltable.func import Batch
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@pxt.udf(batch_size=32, return_type=ts.ArrayType((None,), dtype=ts.FloatType()))
|
|
13
|
+
def sentence_transformer(sentences: Batch[str], *, model_id: str, normalize_embeddings: bool = False) -> Batch[np.ndarray]:
|
|
14
|
+
env.Env.get().require_package('sentence_transformers')
|
|
15
|
+
from sentence_transformers import SentenceTransformer
|
|
16
|
+
|
|
17
|
+
model = _lookup_model(model_id, SentenceTransformer)
|
|
18
|
+
|
|
19
|
+
array = model.encode(sentences, normalize_embeddings=normalize_embeddings)
|
|
20
|
+
return [array[i] for i in range(array.shape[0])]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@pxt.udf
|
|
24
|
+
def sentence_transformer_list(sentences: list, *, model_id: str, normalize_embeddings: bool = False) -> list:
|
|
25
|
+
env.Env.get().require_package('sentence_transformers')
|
|
26
|
+
from sentence_transformers import SentenceTransformer
|
|
27
|
+
|
|
28
|
+
model = _lookup_model(model_id, SentenceTransformer)
|
|
29
|
+
|
|
30
|
+
array = model.encode(sentences, normalize_embeddings=normalize_embeddings)
|
|
31
|
+
return [array[i].tolist() for i in range(array.shape[0])]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@pxt.udf(batch_size=32)
|
|
35
|
+
def cross_encoder(sentences1: Batch[str], sentences2: Batch[str], *, model_id: str) -> Batch[float]:
|
|
36
|
+
env.Env.get().require_package('sentence_transformers')
|
|
37
|
+
from sentence_transformers import CrossEncoder
|
|
38
|
+
|
|
39
|
+
model = _lookup_model(model_id, CrossEncoder)
|
|
40
|
+
|
|
41
|
+
array = model.predict([[s1, s2] for s1, s2 in zip(sentences1, sentences2)], convert_to_numpy=True)
|
|
42
|
+
return array.tolist()
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@pxt.udf
|
|
46
|
+
def cross_encoder_list(sentence1: str, sentences2: list, *, model_id: str) -> list:
|
|
47
|
+
env.Env.get().require_package('sentence_transformers')
|
|
48
|
+
from sentence_transformers import CrossEncoder
|
|
49
|
+
|
|
50
|
+
model = _lookup_model(model_id, CrossEncoder)
|
|
51
|
+
|
|
52
|
+
array = model.predict([[sentence1, s2] for s2 in sentences2], convert_to_numpy=True)
|
|
53
|
+
return array.tolist()
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@pxt.udf(batch_size=32, return_type=ts.ArrayType((None,), dtype=ts.FloatType(), nullable=False))
|
|
57
|
+
def clip_text(text: Batch[str], *, model_id: str) -> Batch[np.ndarray]:
|
|
58
|
+
env.Env.get().require_package('transformers')
|
|
59
|
+
from transformers import CLIPModel, CLIPProcessor
|
|
60
|
+
|
|
61
|
+
model = _lookup_model(model_id, CLIPModel.from_pretrained)
|
|
62
|
+
processor = _lookup_processor(model_id, CLIPProcessor.from_pretrained)
|
|
63
|
+
|
|
64
|
+
inputs = processor(text=text, return_tensors='pt', padding=True, truncation=True)
|
|
65
|
+
embeddings = model.get_text_features(**inputs).detach().numpy()
|
|
66
|
+
return [embeddings[i] for i in range(embeddings.shape[0])]
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@pxt.udf(batch_size=32, return_type=ts.ArrayType((None,), dtype=ts.FloatType(), nullable=False))
|
|
70
|
+
def clip_image(image: Batch[PIL.Image.Image], *, model_id: str) -> Batch[np.ndarray]:
|
|
71
|
+
env.Env.get().require_package('transformers')
|
|
72
|
+
from transformers import CLIPModel, CLIPProcessor
|
|
73
|
+
|
|
74
|
+
model = _lookup_model(model_id, CLIPModel.from_pretrained)
|
|
75
|
+
processor = _lookup_processor(model_id, CLIPProcessor.from_pretrained)
|
|
76
|
+
|
|
77
|
+
inputs = processor(images=image, return_tensors='pt', padding=True)
|
|
78
|
+
embeddings = model.get_image_features(**inputs).detach().numpy()
|
|
79
|
+
return [embeddings[i] for i in range(embeddings.shape[0])]
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@pxt.udf(batch_size=32)
|
|
83
|
+
def detr_for_object_detection(image: Batch[PIL.Image.Image], *, model_id: str, threshold: float = 0.5) -> Batch[dict]:
|
|
84
|
+
env.Env.get().require_package('transformers')
|
|
85
|
+
from transformers import DetrImageProcessor, DetrForObjectDetection
|
|
86
|
+
|
|
87
|
+
model = _lookup_model(model_id, lambda x: DetrForObjectDetection.from_pretrained(x, revision='no_timm'))
|
|
88
|
+
processor = _lookup_processor(model_id, lambda x: DetrImageProcessor.from_pretrained(x, revision='no_timm'))
|
|
89
|
+
|
|
90
|
+
inputs = processor(images=image, return_tensors='pt')
|
|
91
|
+
outputs = model(**inputs)
|
|
92
|
+
|
|
93
|
+
results = processor.post_process_object_detection(outputs, threshold=threshold)
|
|
94
|
+
return [
|
|
95
|
+
{
|
|
96
|
+
'scores': [score.item() for score in result['scores']],
|
|
97
|
+
'labels': [label.item() for label in result['labels']],
|
|
98
|
+
'label_text': [model.config.id2label[label.item()] for label in result['labels']],
|
|
99
|
+
'boxes': [box.tolist() for box in result['boxes']]
|
|
100
|
+
}
|
|
101
|
+
for result in results
|
|
102
|
+
]
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _lookup_model(model_id: str, create: Callable) -> Any:
|
|
106
|
+
key = (model_id, create) # For safety, include the `create` callable in the cache key
|
|
107
|
+
if key not in _model_cache:
|
|
108
|
+
_model_cache[key] = create(model_id)
|
|
109
|
+
return _model_cache[key]
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _lookup_processor(model_id: str, create: Callable) -> Any:
|
|
113
|
+
key = (model_id, create) # For safety, include the `create` callable in the cache key
|
|
114
|
+
if key not in _processor_cache:
|
|
115
|
+
_processor_cache[key] = create(model_id)
|
|
116
|
+
return _processor_cache[key]
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
_model_cache = {}
|
|
120
|
+
_processor_cache = {}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
|
|
3
|
+
import PIL.Image
|
|
4
|
+
|
|
5
|
+
from pixeltable.type_system import ImageType, StringType
|
|
6
|
+
import pixeltable.func as func
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@func.udf
|
|
10
|
+
def b64_encode(img: PIL.Image.Image, image_format: str = 'png') -> str:
|
|
11
|
+
# Encode this image as a b64-encoded png.
|
|
12
|
+
import io
|
|
13
|
+
bytes_arr = io.BytesIO()
|
|
14
|
+
img.save(bytes_arr, format=image_format)
|
|
15
|
+
b64_bytes = base64.b64encode(bytes_arr.getvalue())
|
|
16
|
+
return b64_bytes.decode('utf-8')
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import io
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
import PIL.Image
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
import pixeltable as pxt
|
|
9
|
+
import pixeltable.type_system as ts
|
|
10
|
+
from pixeltable import env
|
|
11
|
+
from pixeltable.func import Batch
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@pxt.udf
|
|
15
|
+
def chat_completions(
|
|
16
|
+
messages: list,
|
|
17
|
+
model: str,
|
|
18
|
+
frequency_penalty: Optional[float] = None,
|
|
19
|
+
logit_bias: Optional[dict] = None,
|
|
20
|
+
max_tokens: Optional[int] = None,
|
|
21
|
+
n: Optional[int] = None,
|
|
22
|
+
presence_penalty: Optional[float] = None,
|
|
23
|
+
response_format: Optional[dict] = None,
|
|
24
|
+
seed: Optional[int] = None,
|
|
25
|
+
top_p: Optional[float] = None,
|
|
26
|
+
temperature: Optional[float] = None
|
|
27
|
+
) -> dict:
|
|
28
|
+
from openai._types import NOT_GIVEN
|
|
29
|
+
result = env.Env.get().openai_client.chat.completions.create(
|
|
30
|
+
messages=messages,
|
|
31
|
+
model=model,
|
|
32
|
+
frequency_penalty=frequency_penalty if frequency_penalty is not None else NOT_GIVEN,
|
|
33
|
+
logit_bias=logit_bias if logit_bias is not None else NOT_GIVEN,
|
|
34
|
+
max_tokens=max_tokens if max_tokens is not None else NOT_GIVEN,
|
|
35
|
+
n=n if n is not None else NOT_GIVEN,
|
|
36
|
+
presence_penalty=presence_penalty if presence_penalty is not None else NOT_GIVEN,
|
|
37
|
+
response_format=response_format if response_format is not None else NOT_GIVEN,
|
|
38
|
+
seed=seed if seed is not None else NOT_GIVEN,
|
|
39
|
+
top_p=top_p if top_p is not None else NOT_GIVEN,
|
|
40
|
+
temperature=temperature if temperature is not None else NOT_GIVEN
|
|
41
|
+
)
|
|
42
|
+
return result.dict()
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@pxt.udf
|
|
46
|
+
def vision(
|
|
47
|
+
prompt: str,
|
|
48
|
+
image: PIL.Image.Image,
|
|
49
|
+
model: str = 'gpt-4-vision-preview'
|
|
50
|
+
) -> str:
|
|
51
|
+
bytes_arr = io.BytesIO()
|
|
52
|
+
image.save(bytes_arr, format='png')
|
|
53
|
+
b64_bytes = base64.b64encode(bytes_arr.getvalue())
|
|
54
|
+
b64_encoded_image = b64_bytes.decode('utf-8')
|
|
55
|
+
messages = [
|
|
56
|
+
{'role': 'user',
|
|
57
|
+
'content': [
|
|
58
|
+
{'type': 'text', 'text': prompt},
|
|
59
|
+
{'type': 'image_url', 'image_url': {
|
|
60
|
+
'url': f'data:image/png;base64,{b64_encoded_image}'
|
|
61
|
+
}}
|
|
62
|
+
]}
|
|
63
|
+
]
|
|
64
|
+
result = env.Env.get().openai_client.chat.completions.create(
|
|
65
|
+
messages=messages,
|
|
66
|
+
model=model
|
|
67
|
+
)
|
|
68
|
+
return result.choices[0].message.content
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@pxt.udf
|
|
72
|
+
def moderations(input: str, model: Optional[str] = None) -> dict:
|
|
73
|
+
result = env.Env().get().openai_client.moderations.create(input=input, model=model)
|
|
74
|
+
return result.dict()
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@pxt.udf(batch_size=32, return_type=ts.ArrayType((None,), dtype=ts.FloatType()))
|
|
78
|
+
def embeddings(input: Batch[str], *, model: str) -> Batch[np.ndarray]:
|
|
79
|
+
result = env.Env().get().openai_client.embeddings.create(
|
|
80
|
+
input=input,
|
|
81
|
+
model=model,
|
|
82
|
+
encoding_format='float'
|
|
83
|
+
)
|
|
84
|
+
embeddings = [
|
|
85
|
+
np.array(data.embedding, dtype=np.float64)
|
|
86
|
+
for data in result.data
|
|
87
|
+
]
|
|
88
|
+
return embeddings
|
|
@@ -1,9 +1,150 @@
|
|
|
1
|
-
from
|
|
2
|
-
from pixeltable.function import Function
|
|
1
|
+
from typing import Dict, Any, Tuple, Optional
|
|
3
2
|
|
|
3
|
+
import PIL.Image
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
5
|
+
from pixeltable.type_system import FloatType, ImageType, IntType, ArrayType, ColumnType, StringType, JsonType, BoolType
|
|
6
|
+
import pixeltable.func as func
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _caller_return_type(bound_args: Optional[Dict[str, Any]]) -> ColumnType:
|
|
10
|
+
if bound_args is None:
|
|
11
|
+
return ImageType()
|
|
12
|
+
return bound_args['self'].col_type
|
|
13
|
+
|
|
14
|
+
@func.udf(
|
|
15
|
+
py_fn=PIL.Image.alpha_composite, return_type=ImageType(), param_types=[ImageType(), ImageType()])
|
|
16
|
+
def alpha_composite(im1: PIL.Image.Image, im2: PIL.Image.Image) -> PIL.Image.Image:
|
|
17
|
+
pass
|
|
18
|
+
@func.udf(
|
|
19
|
+
py_fn=PIL.Image.blend, return_type=ImageType(), param_types=[ImageType(), ImageType(), FloatType()])
|
|
20
|
+
def blend(im1: PIL.Image.Image, im2: PIL.Image.Image, alpha: float) -> PIL.Image.Image:
|
|
21
|
+
pass
|
|
22
|
+
@func.udf(
|
|
23
|
+
py_fn=PIL.Image.composite, return_type=ImageType(), param_types=[ImageType(), ImageType(), ImageType()])
|
|
24
|
+
def composite(image1: PIL.Image.Image, image2: PIL.Image.Image, mask: PIL.Image.Image) -> PIL.Image.Image:
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# PIL.Image.Image methods
|
|
29
|
+
|
|
30
|
+
# Image.convert()
|
|
31
|
+
def _convert_return_type(bound_args: Dict[str, Any]) -> ColumnType:
|
|
32
|
+
if bound_args is None:
|
|
33
|
+
return ImageType()
|
|
34
|
+
assert 'self' in bound_args
|
|
35
|
+
assert 'mode' in bound_args
|
|
36
|
+
img_type = bound_args['self'].col_type
|
|
37
|
+
return ImageType(size=img_type.size, mode=bound_args['mode'])
|
|
38
|
+
@func.udf(return_type=_convert_return_type, param_types=[ImageType(), StringType()])
|
|
39
|
+
def convert(self: PIL.Image.Image, mode: str) -> PIL.Image.Image:
|
|
40
|
+
return self.convert(mode)
|
|
41
|
+
|
|
42
|
+
# Image.crop()
|
|
43
|
+
def _crop_return_type(bound_args: Dict[str, Any]) -> ColumnType:
|
|
44
|
+
if bound_args is None:
|
|
45
|
+
return ImageType()
|
|
46
|
+
img_type = bound_args['self'].col_type
|
|
47
|
+
box = bound_args['box']
|
|
48
|
+
if isinstance(box, list) and all(isinstance(x, int) for x in box):
|
|
49
|
+
return ImageType(size=(box[2] - box[0], box[3] - box[1]), mode=img_type.mode)
|
|
50
|
+
return ImageType() # we can't compute the size statically
|
|
51
|
+
@func.udf(
|
|
52
|
+
py_fn=PIL.Image.Image.crop, return_type=_crop_return_type,
|
|
53
|
+
param_types=[ImageType(), ArrayType((4,), dtype=IntType())])
|
|
54
|
+
def crop(self: PIL.Image.Image, box: Tuple[int, int, int, int]) -> PIL.Image.Image:
|
|
55
|
+
pass
|
|
56
|
+
|
|
57
|
+
# Image.getchannel()
|
|
58
|
+
def _getchannel_return_type(bound_args: Dict[str, Any]) -> ColumnType:
|
|
59
|
+
if bound_args is None:
|
|
60
|
+
return ImageType()
|
|
61
|
+
img_type = bound_args['self'].col_type
|
|
62
|
+
return ImageType(size=img_type.size, mode='L')
|
|
63
|
+
@func.udf(
|
|
64
|
+
py_fn=PIL.Image.Image.getchannel, return_type=_getchannel_return_type, param_types=[ImageType(), IntType()])
|
|
65
|
+
def getchannel(self: PIL.Image.Image, channel: int) -> PIL.Image.Image:
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
# Image.resize()
|
|
69
|
+
def resize_return_type(bound_args: Dict[str, Any]) -> ColumnType:
|
|
70
|
+
if bound_args is None:
|
|
71
|
+
return ImageType()
|
|
72
|
+
assert 'size' in bound_args
|
|
73
|
+
return ImageType(size=bound_args['size'])
|
|
74
|
+
@func.udf(return_type=resize_return_type, param_types=[ImageType(), ArrayType((2, ), dtype=IntType())])
|
|
75
|
+
def resize(self: PIL.Image.Image, size: Tuple[int, int]) -> PIL.Image.Image:
|
|
76
|
+
return self.resize(size)
|
|
77
|
+
|
|
78
|
+
# Image.rotate()
|
|
79
|
+
@func.udf(return_type=ImageType(), param_types=[ImageType(), IntType()])
|
|
80
|
+
def rotate(self: PIL.Image.Image, angle: int) -> PIL.Image.Image:
|
|
81
|
+
return self.rotate(angle)
|
|
82
|
+
|
|
83
|
+
# Image.transform()
|
|
84
|
+
@func.udf(return_type= _caller_return_type, param_types=[ImageType(), ArrayType((2,), dtype=IntType()), IntType()])
|
|
85
|
+
def transform(self: PIL.Image.Image, size: Tuple[int, int], method: int) -> PIL.Image.Image:
|
|
86
|
+
return self.transform(size, method)
|
|
87
|
+
|
|
88
|
+
@func.udf(
|
|
89
|
+
py_fn=PIL.Image.Image.effect_spread, return_type=_caller_return_type, param_types=[ImageType(), FloatType()])
|
|
90
|
+
def effect_spread(self: PIL.Image.Image, distance: float) -> PIL.Image.Image:
|
|
91
|
+
pass
|
|
92
|
+
|
|
93
|
+
@func.udf(
|
|
94
|
+
py_fn=PIL.Image.Image.entropy, return_type=FloatType(), param_types=[ImageType(), ImageType(), JsonType()])
|
|
95
|
+
def entropy(self: PIL.Image.Image, mask: PIL.Image.Image, histogram: Dict) -> float:
|
|
96
|
+
pass
|
|
97
|
+
|
|
98
|
+
@func.udf(py_fn=PIL.Image.Image.getbands, return_type=JsonType(), param_types=[ImageType()])
|
|
99
|
+
def getbands(self: PIL.Image.Image) -> Tuple[str]:
|
|
100
|
+
pass
|
|
101
|
+
|
|
102
|
+
@func.udf(py_fn=PIL.Image.Image.getbbox, return_type=JsonType(), param_types=[ImageType()])
|
|
103
|
+
def getbbox(self: PIL.Image.Image) -> Tuple[int, int, int, int]:
|
|
104
|
+
pass
|
|
105
|
+
|
|
106
|
+
@func.udf(
|
|
107
|
+
py_fn=PIL.Image.Image.getcolors, return_type=JsonType(), param_types=[ImageType(), IntType()])
|
|
108
|
+
def getcolors(self: PIL.Image.Image, maxcolors: int) -> Tuple[Tuple[int, int, int], int]:
|
|
109
|
+
pass
|
|
110
|
+
|
|
111
|
+
@func.udf(py_fn=PIL.Image.Image.getextrema, return_type=JsonType(), param_types=[ImageType()])
|
|
112
|
+
def getextrema(self: PIL.Image.Image) -> Tuple[int, int]:
|
|
113
|
+
pass
|
|
114
|
+
|
|
115
|
+
@func.udf(
|
|
116
|
+
py_fn=PIL.Image.Image.getpalette, return_type=JsonType(), param_types=[ImageType(), StringType()])
|
|
117
|
+
def getpalette(self: PIL.Image.Image, mode: str) -> Tuple[int]:
|
|
118
|
+
pass
|
|
119
|
+
|
|
120
|
+
@func.udf(
|
|
121
|
+
py_fn=PIL.Image.Image.getpixel, return_type=JsonType(), param_types=[ImageType(), ArrayType((2,), dtype=IntType())])
|
|
122
|
+
def getpixel(self: PIL.Image.Image, xy: Tuple[int, int]) -> Tuple[int]:
|
|
123
|
+
pass
|
|
124
|
+
|
|
125
|
+
@func.udf(
|
|
126
|
+
py_fn=PIL.Image.Image.getprojection, return_type=JsonType(), param_types=[ImageType()])
|
|
127
|
+
def getprojection(self: PIL.Image.Image) -> Tuple[int]:
|
|
128
|
+
pass
|
|
129
|
+
|
|
130
|
+
@func.udf(
|
|
131
|
+
py_fn=PIL.Image.Image.histogram, return_type=JsonType(), param_types=[ImageType(), ImageType(), JsonType()])
|
|
132
|
+
def histogram(self: PIL.Image.Image, mask: PIL.Image.Image, histogram: Dict) -> Tuple[int]:
|
|
133
|
+
pass
|
|
134
|
+
|
|
135
|
+
@func.udf(
|
|
136
|
+
py_fn=PIL.Image.Image.quantize, return_type=ImageType(),
|
|
137
|
+
param_types=[ImageType(), IntType(), IntType(nullable=True), IntType(), IntType(nullable=True), IntType()])
|
|
138
|
+
def quantize(
|
|
139
|
+
self: PIL.Image.Image, colors: int, method: int, kmeans: int, palette: int, dither: int) -> PIL.Image.Image:
|
|
140
|
+
pass
|
|
141
|
+
|
|
142
|
+
@func.udf(
|
|
143
|
+
py_fn=PIL.Image.Image.reduce, return_type=ImageType(), param_types=[ImageType(), IntType(), JsonType()])
|
|
144
|
+
def reduce(self: PIL.Image.Image, factor: int, filter: Tuple[int]) -> PIL.Image.Image:
|
|
145
|
+
pass
|
|
146
|
+
|
|
147
|
+
@func.udf(
|
|
148
|
+
py_fn=PIL.Image.Image.transpose, return_type=_caller_return_type, param_types=[ImageType(), IntType()])
|
|
149
|
+
def transpose(self: PIL.Image.Image, method: int) -> PIL.Image.Image:
|
|
150
|
+
pass
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
from pixeltable.type_system import StringType
|
|
4
|
+
import pixeltable.func as func
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@func.udf(return_type=StringType(), param_types=[StringType()])
|
|
8
|
+
def str_format(format_str: str, *args: Any, **kwargs: Any) -> str:
|
|
9
|
+
""" Return a formatted version of format_str, using substitutions from args and kwargs:
|
|
10
|
+
- {<int>} will be replaced by the corresponding element in args
|
|
11
|
+
- {<key>} will be replaced by the corresponding value in kwargs
|
|
12
|
+
"""
|
|
13
|
+
return format_str.format(*args, **kwargs)
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
import pixeltable as pxt
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@pxt.udf
|
|
7
|
+
def completions(
|
|
8
|
+
prompt: str,
|
|
9
|
+
model: str,
|
|
10
|
+
max_tokens: Optional[int] = None,
|
|
11
|
+
repetition_penalty: Optional[float] = None,
|
|
12
|
+
stop: Optional[list] = None,
|
|
13
|
+
top_k: Optional[int] = None,
|
|
14
|
+
top_p: Optional[float] = None,
|
|
15
|
+
temperature: Optional[float] = None
|
|
16
|
+
) -> dict:
|
|
17
|
+
import together
|
|
18
|
+
return together.Complete.create(
|
|
19
|
+
prompt,
|
|
20
|
+
model,
|
|
21
|
+
max_tokens=max_tokens,
|
|
22
|
+
repetition_penalty=repetition_penalty,
|
|
23
|
+
stop=stop,
|
|
24
|
+
top_k=top_k,
|
|
25
|
+
top_p=top_p,
|
|
26
|
+
temperature=temperature
|
|
27
|
+
)
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from typing import Tuple, List, Optional
|
|
2
|
+
import types
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
import pixeltable.func as func
|
|
6
|
+
import pixeltable.type_system as ts
|
|
7
|
+
import pixeltable.env as env
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def create_nos_modules() -> List[types.ModuleType]:
|
|
11
|
+
"""Create module pixeltable.functions.nos with one submodule per task and return the submodules"""
|
|
12
|
+
models = env.Env.get().nos_client.ListModels()
|
|
13
|
+
model_info = [env.Env.get().nos_client.GetModelInfo(model) for model in models]
|
|
14
|
+
model_info.sort(key=lambda info: info.task.value)
|
|
15
|
+
|
|
16
|
+
module_name = 'pixeltable.functions.nos'
|
|
17
|
+
nos_module = types.ModuleType(module_name)
|
|
18
|
+
nos_module.__package__ = 'pixeltable.functions'
|
|
19
|
+
sys.modules[module_name] = nos_module
|
|
20
|
+
|
|
21
|
+
prev_task = ''
|
|
22
|
+
new_modules: List[types.ModuleType] = []
|
|
23
|
+
sub_module: Optional[types.ModuleType] = None
|
|
24
|
+
for info in model_info:
|
|
25
|
+
if info.task.value != prev_task:
|
|
26
|
+
# we construct one submodule per task
|
|
27
|
+
namespace = info.task.name.lower()
|
|
28
|
+
submodule_name = f'{module_name}.{namespace}'
|
|
29
|
+
sub_module = types.ModuleType(submodule_name)
|
|
30
|
+
sub_module.__package__ = module_name
|
|
31
|
+
setattr(nos_module, namespace, sub_module)
|
|
32
|
+
new_modules.append(sub_module)
|
|
33
|
+
sys.modules[submodule_name] = sub_module
|
|
34
|
+
prev_task = info.task.value
|
|
35
|
+
|
|
36
|
+
# add a Function for this model to the module
|
|
37
|
+
model_id = info.name.replace("/", "_").replace("-", "_")
|
|
38
|
+
pt_func = func.NOSFunction(info, f'{submodule_name}.{model_id}')
|
|
39
|
+
setattr(sub_module, model_id, pt_func)
|
|
40
|
+
|
|
41
|
+
return new_modules
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
import uuid
|
|
3
|
+
import av
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
import pixeltable.env as env
|
|
7
|
+
import pixeltable.func as func
|
|
8
|
+
import pixeltable.type_system as ts
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
_format_defaults = { # format -> (codec, ext)
|
|
12
|
+
'wav': ('pcm_s16le', 'wav'),
|
|
13
|
+
'mp3': ('libmp3lame', 'mp3'),
|
|
14
|
+
'flac': ('flac', 'flac'),
|
|
15
|
+
#'mp4': ('aac', 'm4a'),
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
# for mp4:
|
|
19
|
+
# - extract_audio() fails with "Application provided invalid, non monotonically increasing dts to muxer in stream 0: 1146 >= 290"
|
|
20
|
+
# - chatgpt suggests this can be fixed in the following manner
|
|
21
|
+
# for packet in container.demux(audio_stream):
|
|
22
|
+
# packet.pts = None # Reset the PTS and DTS to allow FFmpeg to set them automatically
|
|
23
|
+
# packet.dts = None
|
|
24
|
+
# for frame in packet.decode():
|
|
25
|
+
# frame.pts = None
|
|
26
|
+
# for packet in output_stream.encode(frame):
|
|
27
|
+
# output_container.mux(packet)
|
|
28
|
+
#
|
|
29
|
+
# # Flush remaining packets
|
|
30
|
+
# for packet in output_stream.encode():
|
|
31
|
+
# output_container.mux(packet)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
_extract_audio_param_types = [
|
|
35
|
+
ts.VideoType(nullable=False),
|
|
36
|
+
ts.IntType(nullable=False),
|
|
37
|
+
ts.StringType(nullable=False),
|
|
38
|
+
ts.StringType(nullable=False)
|
|
39
|
+
]
|
|
40
|
+
@func.udf(return_type=ts.AudioType(nullable=True), param_types=_extract_audio_param_types)
|
|
41
|
+
def extract_audio(
|
|
42
|
+
video_path: str, stream_idx: int = 0, format: str = 'wav', codec: Optional[str] = None
|
|
43
|
+
) -> Optional[str]:
|
|
44
|
+
"""Extract an audio stream from a video file, save it as a media file and return its path"""
|
|
45
|
+
if format not in _format_defaults:
|
|
46
|
+
raise ValueError(f'extract_audio(): unsupported audio format: {format}')
|
|
47
|
+
default_codec, ext = _format_defaults[format]
|
|
48
|
+
|
|
49
|
+
with av.open(video_path) as container:
|
|
50
|
+
if len(container.streams.audio) <= stream_idx:
|
|
51
|
+
return None
|
|
52
|
+
audio_stream = container.streams.audio[stream_idx]
|
|
53
|
+
# create this in our tmp directory, so it'll get cleaned up if it's being generated as part of a query
|
|
54
|
+
output_filename = str(env.Env.get().tmp_dir / f"{uuid.uuid4()}.{ext}")
|
|
55
|
+
|
|
56
|
+
with av.open(output_filename, "w", format=format) as output_container:
|
|
57
|
+
output_stream = output_container.add_stream(codec or default_codec)
|
|
58
|
+
for packet in container.demux(audio_stream):
|
|
59
|
+
for frame in packet.decode():
|
|
60
|
+
output_container.mux(output_stream.encode(frame))
|
|
61
|
+
|
|
62
|
+
return output_filename
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Dict, Any, Tuple, List
|
|
3
|
+
from abc import abstractmethod, ABC
|
|
4
|
+
|
|
5
|
+
from pixeltable.type_system import ColumnType
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ComponentIterator(ABC):
|
|
9
|
+
"""Base class for iterators."""
|
|
10
|
+
|
|
11
|
+
@classmethod
|
|
12
|
+
@abstractmethod
|
|
13
|
+
def input_schema(cls) -> Dict[str, ColumnType]:
|
|
14
|
+
"""Provide the Pixeltable types of the init() parameters
|
|
15
|
+
|
|
16
|
+
The keys need to match the names of the init() parameters. This is equivalent to the parameters_types
|
|
17
|
+
parameter of the @function decorator.
|
|
18
|
+
"""
|
|
19
|
+
raise NotImplementedError
|
|
20
|
+
|
|
21
|
+
@classmethod
|
|
22
|
+
@abstractmethod
|
|
23
|
+
def output_schema(cls, *args: Any, **kwargs: Any) -> Tuple[Dict[str, ColumnType], List[str]]:
|
|
24
|
+
"""Specify the dictionary returned by next() and a list of unstored column names
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
a dictionary which is turned into a list of columns in the output table
|
|
28
|
+
a list of unstored column names
|
|
29
|
+
"""
|
|
30
|
+
raise NotImplementedError
|
|
31
|
+
|
|
32
|
+
def __iter__(self) -> ComponentIterator:
|
|
33
|
+
return self
|
|
34
|
+
|
|
35
|
+
@abstractmethod
|
|
36
|
+
def __next__(self) -> Dict[str, Any]:
|
|
37
|
+
"""Return the next element of the iterator as a dictionary or raise StopIteration"""
|
|
38
|
+
raise NotImplementedError
|
|
39
|
+
|
|
40
|
+
@abstractmethod
|
|
41
|
+
def close(self) -> None:
|
|
42
|
+
"""Close the iterator and release all resources"""
|
|
43
|
+
raise NotImplementedError
|
|
44
|
+
|
|
45
|
+
@abstractmethod
|
|
46
|
+
def set_pos(self, pos: int) -> None:
|
|
47
|
+
"""Set the iterator position to pos"""
|
|
48
|
+
raise NotImplementedError
|