pixeltable 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +20 -9
- pixeltable/__version__.py +3 -0
- pixeltable/catalog/column.py +23 -7
- pixeltable/catalog/insertable_table.py +32 -19
- pixeltable/catalog/table.py +210 -20
- pixeltable/catalog/table_version.py +272 -111
- pixeltable/catalog/table_version_path.py +6 -1
- pixeltable/dataframe.py +184 -110
- pixeltable/datatransfer/__init__.py +1 -0
- pixeltable/datatransfer/label_studio.py +526 -0
- pixeltable/datatransfer/remote.py +113 -0
- pixeltable/env.py +213 -79
- pixeltable/exec/__init__.py +2 -1
- pixeltable/exec/data_row_batch.py +6 -7
- pixeltable/exec/expr_eval_node.py +28 -28
- pixeltable/exec/sql_scan_node.py +7 -6
- pixeltable/exprs/__init__.py +4 -3
- pixeltable/exprs/column_ref.py +11 -2
- pixeltable/exprs/comparison.py +39 -1
- pixeltable/exprs/data_row.py +7 -0
- pixeltable/exprs/expr.py +26 -19
- pixeltable/exprs/function_call.py +17 -18
- pixeltable/exprs/globals.py +14 -2
- pixeltable/exprs/image_member_access.py +9 -28
- pixeltable/exprs/in_predicate.py +96 -0
- pixeltable/exprs/inline_array.py +13 -11
- pixeltable/exprs/inline_dict.py +15 -13
- pixeltable/exprs/row_builder.py +7 -1
- pixeltable/exprs/similarity_expr.py +67 -0
- pixeltable/ext/functions/whisperx.py +30 -0
- pixeltable/ext/functions/yolox.py +16 -0
- pixeltable/func/__init__.py +0 -2
- pixeltable/func/aggregate_function.py +5 -2
- pixeltable/func/callable_function.py +57 -13
- pixeltable/func/expr_template_function.py +14 -3
- pixeltable/func/function.py +35 -4
- pixeltable/func/signature.py +5 -15
- pixeltable/func/udf.py +8 -12
- pixeltable/functions/fireworks.py +9 -4
- pixeltable/functions/huggingface.py +48 -5
- pixeltable/functions/openai.py +49 -11
- pixeltable/functions/pil/image.py +61 -64
- pixeltable/functions/together.py +32 -6
- pixeltable/functions/util.py +0 -43
- pixeltable/functions/video.py +46 -8
- pixeltable/globals.py +443 -0
- pixeltable/index/__init__.py +1 -0
- pixeltable/index/base.py +9 -2
- pixeltable/index/btree.py +54 -0
- pixeltable/index/embedding_index.py +91 -15
- pixeltable/io/__init__.py +4 -0
- pixeltable/io/globals.py +59 -0
- pixeltable/{utils → io}/hf_datasets.py +48 -17
- pixeltable/io/pandas.py +148 -0
- pixeltable/{utils → io}/parquet.py +58 -33
- pixeltable/iterators/__init__.py +1 -1
- pixeltable/iterators/base.py +8 -4
- pixeltable/iterators/document.py +225 -93
- pixeltable/iterators/video.py +16 -9
- pixeltable/metadata/__init__.py +8 -4
- pixeltable/metadata/converters/convert_12.py +3 -0
- pixeltable/metadata/converters/convert_13.py +41 -0
- pixeltable/metadata/converters/convert_14.py +13 -0
- pixeltable/metadata/converters/convert_15.py +29 -0
- pixeltable/metadata/converters/util.py +63 -0
- pixeltable/metadata/schema.py +12 -6
- pixeltable/plan.py +11 -24
- pixeltable/store.py +16 -23
- pixeltable/tool/create_test_db_dump.py +49 -14
- pixeltable/type_system.py +27 -58
- pixeltable/utils/coco.py +94 -0
- pixeltable/utils/documents.py +42 -12
- pixeltable/utils/http_server.py +70 -0
- pixeltable-0.2.7.dist-info/METADATA +137 -0
- pixeltable-0.2.7.dist-info/RECORD +126 -0
- {pixeltable-0.2.5.dist-info → pixeltable-0.2.7.dist-info}/WHEEL +1 -1
- pixeltable/client.py +0 -600
- pixeltable/exprs/image_similarity_predicate.py +0 -58
- pixeltable/func/batched_function.py +0 -53
- pixeltable/func/nos_function.py +0 -202
- pixeltable/tests/conftest.py +0 -171
- pixeltable/tests/ext/test_yolox.py +0 -21
- pixeltable/tests/functions/test_fireworks.py +0 -43
- pixeltable/tests/functions/test_functions.py +0 -60
- pixeltable/tests/functions/test_huggingface.py +0 -158
- pixeltable/tests/functions/test_openai.py +0 -162
- pixeltable/tests/functions/test_together.py +0 -112
- pixeltable/tests/test_audio.py +0 -65
- pixeltable/tests/test_catalog.py +0 -27
- pixeltable/tests/test_client.py +0 -21
- pixeltable/tests/test_component_view.py +0 -379
- pixeltable/tests/test_dataframe.py +0 -440
- pixeltable/tests/test_dirs.py +0 -107
- pixeltable/tests/test_document.py +0 -120
- pixeltable/tests/test_exprs.py +0 -802
- pixeltable/tests/test_function.py +0 -332
- pixeltable/tests/test_index.py +0 -138
- pixeltable/tests/test_migration.py +0 -44
- pixeltable/tests/test_nos.py +0 -54
- pixeltable/tests/test_snapshot.py +0 -231
- pixeltable/tests/test_table.py +0 -1343
- pixeltable/tests/test_transactional_directory.py +0 -42
- pixeltable/tests/test_types.py +0 -52
- pixeltable/tests/test_video.py +0 -159
- pixeltable/tests/test_view.py +0 -535
- pixeltable/tests/utils.py +0 -442
- pixeltable/utils/clip.py +0 -18
- pixeltable-0.2.5.dist-info/METADATA +0 -128
- pixeltable-0.2.5.dist-info/RECORD +0 -139
- {pixeltable-0.2.5.dist-info → pixeltable-0.2.7.dist-info}/LICENSE +0 -0
|
@@ -1,158 +0,0 @@
|
|
|
1
|
-
from typing import Dict, Any
|
|
2
|
-
|
|
3
|
-
import pytest
|
|
4
|
-
|
|
5
|
-
import pixeltable as pxt
|
|
6
|
-
from pixeltable.tests.utils import skip_test_if_not_installed, get_sentences, get_image_files, \
|
|
7
|
-
SAMPLE_IMAGE_URL
|
|
8
|
-
from pixeltable.type_system import StringType, JsonType, ImageType, BoolType, FloatType, ArrayType
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class TestHuggingface:
|
|
12
|
-
|
|
13
|
-
def test_hf_function(self, test_client: pxt.Client) -> None:
|
|
14
|
-
skip_test_if_not_installed('sentence_transformers')
|
|
15
|
-
cl = test_client
|
|
16
|
-
t = cl.create_table('test_tbl', {'input': StringType(), 'bool_col': BoolType()})
|
|
17
|
-
from pixeltable.functions.huggingface import sentence_transformer
|
|
18
|
-
model_id = 'intfloat/e5-large-v2'
|
|
19
|
-
t.add_column(e5=sentence_transformer(t.input, model_id=model_id))
|
|
20
|
-
sents = get_sentences()
|
|
21
|
-
status = t.insert({'input': s, 'bool_col': True} for s in sents)
|
|
22
|
-
assert status.num_rows == len(sents)
|
|
23
|
-
assert status.num_excs == 0
|
|
24
|
-
|
|
25
|
-
# verify handling of constant params
|
|
26
|
-
with pytest.raises(ValueError) as exc_info:
|
|
27
|
-
t.add_column(e5_2=sentence_transformer(t.input, model_id=t.input))
|
|
28
|
-
assert ': parameter model_id must be a constant value' in str(exc_info.value)
|
|
29
|
-
with pytest.raises(ValueError) as exc_info:
|
|
30
|
-
t.add_column(e5_2=sentence_transformer(t.input, model_id=model_id, normalize_embeddings=t.bool_col))
|
|
31
|
-
assert ': parameter normalize_embeddings must be a constant value' in str(exc_info.value)
|
|
32
|
-
|
|
33
|
-
# make sure this doesn't cause an exception
|
|
34
|
-
# TODO: is there some way to capture the output?
|
|
35
|
-
t.describe()
|
|
36
|
-
|
|
37
|
-
def test_sentence_transformer(self, test_client: pxt.Client) -> None:
|
|
38
|
-
skip_test_if_not_installed('sentence_transformers')
|
|
39
|
-
cl = test_client
|
|
40
|
-
t = cl.create_table('test_tbl', {'input': StringType(), 'input_list': JsonType()})
|
|
41
|
-
sents = get_sentences(10)
|
|
42
|
-
status = t.insert({'input': s, 'input_list': sents} for s in sents)
|
|
43
|
-
assert status.num_rows == len(sents)
|
|
44
|
-
assert status.num_excs == 0
|
|
45
|
-
|
|
46
|
-
# run multiple models one at a time in order to exercise batching
|
|
47
|
-
from pixeltable.functions.huggingface import sentence_transformer, sentence_transformer_list
|
|
48
|
-
model_ids = ['sentence-transformers/all-mpnet-base-v2', 'BAAI/bge-reranker-base']
|
|
49
|
-
num_dims = [768, 768]
|
|
50
|
-
for idx, model_id in enumerate(model_ids):
|
|
51
|
-
col_name = f'embed{idx}'
|
|
52
|
-
t[col_name] = sentence_transformer(t.input, model_id=model_id, normalize_embeddings=True)
|
|
53
|
-
assert t.column_types()[col_name] == ArrayType((None,), dtype=FloatType(), nullable=False)
|
|
54
|
-
list_col_name = f'embed_list{idx}'
|
|
55
|
-
t[list_col_name] = sentence_transformer_list(t.input_list, model_id=model_id, normalize_embeddings=True)
|
|
56
|
-
assert t.column_types()[list_col_name] == JsonType()
|
|
57
|
-
|
|
58
|
-
def verify_row(row: Dict[str, Any]) -> None:
|
|
59
|
-
for idx, (_, d) in enumerate(zip(model_ids, num_dims)):
|
|
60
|
-
assert row[f'embed{idx}'].shape == (d,)
|
|
61
|
-
assert len(row[f'embed_list{idx}']) == len(sents)
|
|
62
|
-
assert all(len(v) == d for v in row[f'embed_list{idx}'])
|
|
63
|
-
|
|
64
|
-
verify_row(t.tail(1)[0])
|
|
65
|
-
|
|
66
|
-
# execution still works after reload
|
|
67
|
-
cl = pxt.Client(reload=True)
|
|
68
|
-
t = cl.get_table('test_tbl')
|
|
69
|
-
status = t.insert({'input': s, 'input_list': sents} for s in sents)
|
|
70
|
-
assert status.num_rows == len(sents)
|
|
71
|
-
assert status.num_excs == 0
|
|
72
|
-
verify_row(t.tail(1)[0])
|
|
73
|
-
|
|
74
|
-
def test_cross_encoder(self, test_client: pxt.Client) -> None:
|
|
75
|
-
skip_test_if_not_installed('sentence_transformers')
|
|
76
|
-
cl = test_client
|
|
77
|
-
t = cl.create_table('test_tbl', {'input': StringType(), 'input_list': JsonType()})
|
|
78
|
-
sents = get_sentences(10)
|
|
79
|
-
status = t.insert({'input': s, 'input_list': sents} for s in sents)
|
|
80
|
-
assert status.num_rows == len(sents)
|
|
81
|
-
assert status.num_excs == 0
|
|
82
|
-
|
|
83
|
-
# run multiple models one at a time in order to exercise batching
|
|
84
|
-
from pixeltable.functions.huggingface import cross_encoder, cross_encoder_list
|
|
85
|
-
model_ids = ['cross-encoder/ms-marco-MiniLM-L-6-v2', 'cross-encoder/ms-marco-TinyBERT-L-2-v2']
|
|
86
|
-
for idx, model_id in enumerate(model_ids):
|
|
87
|
-
col_name = f'embed{idx}'
|
|
88
|
-
t[col_name] = cross_encoder(t.input, t.input, model_id=model_id)
|
|
89
|
-
assert t.column_types()[col_name] == FloatType()
|
|
90
|
-
list_col_name = f'embed_list{idx}'
|
|
91
|
-
t[list_col_name] = cross_encoder_list(t.input, t.input_list, model_id=model_id)
|
|
92
|
-
assert t.column_types()[list_col_name] == JsonType()
|
|
93
|
-
|
|
94
|
-
def verify_row(row: Dict[str, Any]) -> None:
|
|
95
|
-
for i in range(len(model_ids)):
|
|
96
|
-
assert len(row[f'embed_list{idx}']) == len(sents)
|
|
97
|
-
assert all(isinstance(v, float) for v in row[f'embed_list{idx}'])
|
|
98
|
-
|
|
99
|
-
verify_row(t.tail(1)[0])
|
|
100
|
-
|
|
101
|
-
# execution still works after reload
|
|
102
|
-
cl = pxt.Client(reload=True)
|
|
103
|
-
t = cl.get_table('test_tbl')
|
|
104
|
-
status = t.insert({'input': s, 'input_list': sents} for s in sents)
|
|
105
|
-
assert status.num_rows == len(sents)
|
|
106
|
-
assert status.num_excs == 0
|
|
107
|
-
verify_row(t.tail(1)[0])
|
|
108
|
-
|
|
109
|
-
def test_clip(self, test_client: pxt.Client) -> None:
|
|
110
|
-
skip_test_if_not_installed('transformers')
|
|
111
|
-
cl = test_client
|
|
112
|
-
t = cl.create_table('test_tbl', {'text': StringType(), 'img': ImageType()})
|
|
113
|
-
num_rows = 10
|
|
114
|
-
sents = get_sentences(num_rows)
|
|
115
|
-
imgs = get_image_files()[:num_rows]
|
|
116
|
-
status = t.insert({'text': text, 'img': img} for text, img in zip(sents, imgs))
|
|
117
|
-
assert status.num_rows == len(sents)
|
|
118
|
-
assert status.num_excs == 0
|
|
119
|
-
|
|
120
|
-
# run multiple models one at a time in order to exercise batching
|
|
121
|
-
from pixeltable.functions.huggingface import clip_text, clip_image
|
|
122
|
-
model_ids = ['openai/clip-vit-base-patch32', 'laion/CLIP-ViT-B-32-laion2B-s34B-b79K']
|
|
123
|
-
for idx, model_id in enumerate(model_ids):
|
|
124
|
-
col_name = f'embed_text{idx}'
|
|
125
|
-
t[col_name] = clip_text(t.text, model_id=model_id)
|
|
126
|
-
assert t.column_types()[col_name].is_array_type()
|
|
127
|
-
col_name = f'embed_img{idx}'
|
|
128
|
-
t[col_name] = clip_image(t.img, model_id=model_id)
|
|
129
|
-
assert t.column_types()[col_name].is_array_type()
|
|
130
|
-
|
|
131
|
-
def verify_row(row: Dict[str, Any]) -> None:
|
|
132
|
-
for idx, _ in enumerate(model_ids):
|
|
133
|
-
assert row[f'embed_text{idx}'].shape == (512,)
|
|
134
|
-
assert row[f'embed_img{idx}'].shape == (512,)
|
|
135
|
-
|
|
136
|
-
verify_row(t.tail(1)[0])
|
|
137
|
-
|
|
138
|
-
# execution still works after reload
|
|
139
|
-
cl = pxt.Client(reload=True)
|
|
140
|
-
t = cl.get_table('test_tbl')
|
|
141
|
-
status = t.insert({'text': text, 'img': img} for text, img in zip(sents, imgs))
|
|
142
|
-
assert status.num_rows == len(sents)
|
|
143
|
-
assert status.num_excs == 0
|
|
144
|
-
verify_row(t.tail(1)[0])
|
|
145
|
-
|
|
146
|
-
def test_detr_for_object_detection(self, test_client: pxt.Client) -> None:
|
|
147
|
-
skip_test_if_not_installed('transformers')
|
|
148
|
-
cl = test_client
|
|
149
|
-
t = cl.create_table('test_tbl', {'img': ImageType()})
|
|
150
|
-
from pixeltable.functions.huggingface import detr_for_object_detection
|
|
151
|
-
t['detect'] = detr_for_object_detection(t.img, model_id='facebook/detr-resnet-50', threshold=0.8)
|
|
152
|
-
status = t.insert(img=SAMPLE_IMAGE_URL)
|
|
153
|
-
assert status.num_rows == 1
|
|
154
|
-
assert status.num_excs == 0
|
|
155
|
-
result = t.select(t.detect).collect()[0]['detect']
|
|
156
|
-
assert 'orange' in result['label_text']
|
|
157
|
-
assert 'bowl' in result['label_text']
|
|
158
|
-
assert 'broccoli' in result['label_text']
|
|
@@ -1,162 +0,0 @@
|
|
|
1
|
-
import pytest
|
|
2
|
-
|
|
3
|
-
import pixeltable as pxt
|
|
4
|
-
import pixeltable.exceptions as excs
|
|
5
|
-
from pixeltable.tests.utils import SAMPLE_IMAGE_URL, skip_test_if_not_installed, validate_update_status
|
|
6
|
-
from pixeltable.type_system import StringType, ImageType
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
@pytest.mark.remote_api
|
|
10
|
-
class TestOpenai:
|
|
11
|
-
|
|
12
|
-
def test_audio(self, test_client: pxt.Client) -> None:
|
|
13
|
-
skip_test_if_not_installed('openai')
|
|
14
|
-
TestOpenai.skip_test_if_no_openai_client()
|
|
15
|
-
cl = test_client
|
|
16
|
-
t = cl.create_table('test_tbl', {'input': StringType()})
|
|
17
|
-
from pixeltable.functions.openai import speech, transcriptions, translations
|
|
18
|
-
t.add_column(speech=speech(t.input, model='tts-1', voice='onyx'))
|
|
19
|
-
t.add_column(speech_2=speech(t.input, model='tts-1', voice='onyx', response_format='flac', speed=1.05))
|
|
20
|
-
t.add_column(transcription=transcriptions(t.speech, model='whisper-1'))
|
|
21
|
-
t.add_column(transcription_2=transcriptions(
|
|
22
|
-
t.speech, model='whisper-1', language='en', prompt='Transcribe the contents of this recording.'
|
|
23
|
-
))
|
|
24
|
-
t.add_column(translation=translations(t.speech, model='whisper-1'))
|
|
25
|
-
t.add_column(translation_2=translations(
|
|
26
|
-
t.speech, model='whisper-1', prompt='Translate the recording from Spanish into English.', temperature=0.05
|
|
27
|
-
))
|
|
28
|
-
validate_update_status(t.insert([
|
|
29
|
-
{'input': 'I am a banana.'},
|
|
30
|
-
{'input': 'Es fácil traducir del español al inglés.'}
|
|
31
|
-
]), expected_rows=2)
|
|
32
|
-
# The audio generation -> transcription loop on these examples should be simple and clear enough
|
|
33
|
-
# that the unit test can reliably expect the output closely enough to pass these checks.
|
|
34
|
-
results = t.collect()
|
|
35
|
-
assert results[0]['transcription']['text'] in ['I am a banana.', "I'm a banana."]
|
|
36
|
-
assert results[0]['transcription_2']['text'] in ['I am a banana.', "I'm a banana."]
|
|
37
|
-
assert 'easy to translate' in results[1]['translation']['text']
|
|
38
|
-
assert 'easy to translate' in results[1]['translation_2']['text']
|
|
39
|
-
|
|
40
|
-
def test_chat_completions(self, test_client: pxt.Client) -> None:
|
|
41
|
-
skip_test_if_not_installed('openai')
|
|
42
|
-
TestOpenai.skip_test_if_no_openai_client()
|
|
43
|
-
cl = test_client
|
|
44
|
-
t = cl.create_table('test_tbl', {'input': StringType()})
|
|
45
|
-
from pixeltable.functions.openai import chat_completions
|
|
46
|
-
msgs = [
|
|
47
|
-
{"role": "system", "content": "You are a helpful assistant."},
|
|
48
|
-
{"role": "user", "content": t.input}
|
|
49
|
-
]
|
|
50
|
-
t.add_column(input_msgs=msgs)
|
|
51
|
-
t.add_column(chat_output=chat_completions(model='gpt-3.5-turbo', messages=t.input_msgs))
|
|
52
|
-
# with inlined messages
|
|
53
|
-
t.add_column(chat_output_2=chat_completions(model='gpt-3.5-turbo', messages=msgs))
|
|
54
|
-
# test a bunch of the parameters
|
|
55
|
-
t.add_column(chat_output_3=chat_completions(
|
|
56
|
-
model='gpt-3.5-turbo', messages=msgs, frequency_penalty=0.1, logprobs=True, top_logprobs=3,
|
|
57
|
-
max_tokens=500, n=3, presence_penalty=0.1, seed=4171780, stop=['\n'], temperature=0.7, top_p=0.8,
|
|
58
|
-
user='pixeltable'
|
|
59
|
-
))
|
|
60
|
-
# test with JSON output enforced
|
|
61
|
-
t.add_column(chat_output_4=chat_completions(
|
|
62
|
-
model='gpt-3.5-turbo', messages=msgs, response_format={'type': 'json_object'}
|
|
63
|
-
))
|
|
64
|
-
# TODO Also test the `tools` and `tool_choice` parameters.
|
|
65
|
-
validate_update_status(t.insert(input='Give me an example of a typical JSON structure.'), 1)
|
|
66
|
-
result = t.collect()
|
|
67
|
-
assert len(result['chat_output'][0]['choices'][0]['message']['content']) > 0
|
|
68
|
-
assert len(result['chat_output_2'][0]['choices'][0]['message']['content']) > 0
|
|
69
|
-
assert len(result['chat_output_3'][0]['choices'][0]['message']['content']) > 0
|
|
70
|
-
assert len(result['chat_output_4'][0]['choices'][0]['message']['content']) > 0
|
|
71
|
-
|
|
72
|
-
# When OpenAI gets a request with `response_format` equal to `json_object`, but the prompt does not
|
|
73
|
-
# contain the string "json", it refuses the request.
|
|
74
|
-
# TODO This should probably not be throwing an exception, but rather logging the error in
|
|
75
|
-
# `t.chat_output_4.errormsg` etc.
|
|
76
|
-
with pytest.raises(excs.ExprEvalError) as exc_info:
|
|
77
|
-
t.insert(input='Say something interesting.')
|
|
78
|
-
assert "\\'messages\\' must contain the word \\'json\\'" in str(exc_info.value)
|
|
79
|
-
|
|
80
|
-
def test_gpt_4_vision(self, test_client: pxt.Client) -> None:
|
|
81
|
-
skip_test_if_not_installed('openai')
|
|
82
|
-
TestOpenai.skip_test_if_no_openai_client()
|
|
83
|
-
cl = test_client
|
|
84
|
-
t = cl.create_table('test_tbl', {'prompt': StringType(), 'img': ImageType()})
|
|
85
|
-
from pixeltable.functions.openai import chat_completions, vision
|
|
86
|
-
from pixeltable.functions.string import str_format
|
|
87
|
-
t.add_column(response=vision(prompt="What's in this image?", image=t.img))
|
|
88
|
-
# Also get the response the low-level way, by calling chat_completions
|
|
89
|
-
msgs = [
|
|
90
|
-
{'role': 'user',
|
|
91
|
-
'content': [
|
|
92
|
-
{'type': 'text', 'text': t.prompt},
|
|
93
|
-
{'type': 'image_url', 'image_url': {
|
|
94
|
-
'url': str_format('data:image/png;base64,{0}', t.img.b64_encode())
|
|
95
|
-
}}
|
|
96
|
-
]}
|
|
97
|
-
]
|
|
98
|
-
t.add_column(response_2=chat_completions(model='gpt-4-vision-preview', messages=msgs, max_tokens=300).choices[0].message.content)
|
|
99
|
-
validate_update_status(t.insert(prompt="What's in this image?", img=SAMPLE_IMAGE_URL), 1)
|
|
100
|
-
result = t.collect()['response_2'][0]
|
|
101
|
-
assert len(result) > 0
|
|
102
|
-
|
|
103
|
-
def test_embeddings(self, test_client: pxt.Client) -> None:
|
|
104
|
-
skip_test_if_not_installed('openai')
|
|
105
|
-
TestOpenai.skip_test_if_no_openai_client()
|
|
106
|
-
cl = test_client
|
|
107
|
-
from pixeltable.functions.openai import embeddings
|
|
108
|
-
t = cl.create_table('test_tbl', {'input': StringType()})
|
|
109
|
-
t.add_column(ada_embed=embeddings(model='text-embedding-ada-002', input=t.input))
|
|
110
|
-
t.add_column(text_3=embeddings(model='text-embedding-3-small', input=t.input, user='pixeltable'))
|
|
111
|
-
validate_update_status(t.insert(input='Say something interesting.'), 1)
|
|
112
|
-
_ = t.head()
|
|
113
|
-
|
|
114
|
-
def test_moderations(self, test_client: pxt.Client) -> None:
|
|
115
|
-
skip_test_if_not_installed('openai')
|
|
116
|
-
TestOpenai.skip_test_if_no_openai_client()
|
|
117
|
-
cl = test_client
|
|
118
|
-
t = cl.create_table('test_tbl', {'input': StringType()})
|
|
119
|
-
from pixeltable.functions.openai import moderations
|
|
120
|
-
t.add_column(moderation=moderations(input=t.input))
|
|
121
|
-
t.add_column(moderation_2=moderations(input=t.input, model='text-moderation-stable'))
|
|
122
|
-
validate_update_status(t.insert(input='Say something interesting.'), 1)
|
|
123
|
-
_ = t.head()
|
|
124
|
-
|
|
125
|
-
def test_image_generations(self, test_client: pxt.Client) -> None:
|
|
126
|
-
skip_test_if_not_installed('openai')
|
|
127
|
-
TestOpenai.skip_test_if_no_openai_client()
|
|
128
|
-
cl = test_client
|
|
129
|
-
t = cl.create_table('test_tbl', {'input': StringType()})
|
|
130
|
-
from pixeltable.functions.openai import image_generations
|
|
131
|
-
t.add_column(img=image_generations(t.input))
|
|
132
|
-
# Test dall-e-2 options
|
|
133
|
-
t.add_column(img_2=image_generations(
|
|
134
|
-
t.input, model='dall-e-2', size='512x512', user='pixeltable'
|
|
135
|
-
))
|
|
136
|
-
validate_update_status(t.insert(input='A friendly dinosaur playing tennis in a cornfield'), 1)
|
|
137
|
-
assert t.collect()['img'][0].size == (1024, 1024)
|
|
138
|
-
assert t.collect()['img_2'][0].size == (512, 512)
|
|
139
|
-
|
|
140
|
-
@pytest.mark.skip('Test is expensive and slow')
|
|
141
|
-
def test_image_generations_dall_e_3(self, test_client: pxt.Client) -> None:
|
|
142
|
-
skip_test_if_not_installed('openai')
|
|
143
|
-
TestOpenai.skip_test_if_no_openai_client()
|
|
144
|
-
cl = test_client
|
|
145
|
-
t = cl.create_table('test_tbl', {'input': StringType()})
|
|
146
|
-
from pixeltable.functions.openai import image_generations
|
|
147
|
-
# Test dall-e-3 options
|
|
148
|
-
t.add_column(img_3=image_generations(
|
|
149
|
-
t.input, model='dall-e-3', quality='hd', size='1792x1024', style='natural', user='pixeltable'
|
|
150
|
-
))
|
|
151
|
-
validate_update_status(t.insert(input='A friendly dinosaur playing tennis in a cornfield'), 1)
|
|
152
|
-
assert t.collect()['img_3'][0].size == (1792, 1024)
|
|
153
|
-
|
|
154
|
-
# This ensures that the test will be skipped, rather than returning an error, when no API key is
|
|
155
|
-
# available (for example, when a PR runs in CI).
|
|
156
|
-
@staticmethod
|
|
157
|
-
def skip_test_if_no_openai_client() -> None:
|
|
158
|
-
try:
|
|
159
|
-
import pixeltable.functions.openai
|
|
160
|
-
_ = pixeltable.functions.openai.openai_client()
|
|
161
|
-
except excs.Error as exc:
|
|
162
|
-
pytest.skip(str(exc))
|
|
@@ -1,112 +0,0 @@
|
|
|
1
|
-
import pytest
|
|
2
|
-
|
|
3
|
-
import pixeltable as pxt
|
|
4
|
-
import pixeltable.exceptions as excs
|
|
5
|
-
from pixeltable.tests.utils import skip_test_if_not_installed, validate_update_status
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
@pytest.mark.remote_api
|
|
9
|
-
class TestTogether:
|
|
10
|
-
|
|
11
|
-
def test_completions(self, test_client: pxt.Client) -> None:
|
|
12
|
-
skip_test_if_not_installed('together')
|
|
13
|
-
TestTogether.skip_test_if_no_together_client()
|
|
14
|
-
cl = test_client
|
|
15
|
-
t = cl.create_table('test_tbl', {'input': pxt.StringType()})
|
|
16
|
-
from pixeltable.functions.together import completions
|
|
17
|
-
t.add_column(output=completions(prompt=t.input, model='mistralai/Mixtral-8x7B-v0.1', stop=['\n']))
|
|
18
|
-
t.add_column(output_2=completions(
|
|
19
|
-
prompt=t.input,
|
|
20
|
-
model='mistralai/Mixtral-8x7B-v0.1',
|
|
21
|
-
max_tokens=300,
|
|
22
|
-
stop=['\n'],
|
|
23
|
-
temperature=0.7,
|
|
24
|
-
top_p=0.9,
|
|
25
|
-
top_k=40,
|
|
26
|
-
repetition_penalty=1.1,
|
|
27
|
-
logprobs=1,
|
|
28
|
-
echo=True,
|
|
29
|
-
n=3,
|
|
30
|
-
safety_model='Meta-Llama/Llama-Guard-7b'
|
|
31
|
-
))
|
|
32
|
-
validate_update_status(t.insert(input='I am going to the '), 1)
|
|
33
|
-
result = t.collect()
|
|
34
|
-
assert len(result['output'][0]['choices'][0]['text']) > 0
|
|
35
|
-
assert len(result['output_2'][0]['choices'][0]['text']) > 0
|
|
36
|
-
|
|
37
|
-
def test_chat_completions(self, test_client: pxt.Client) -> None:
|
|
38
|
-
skip_test_if_not_installed('together')
|
|
39
|
-
TestTogether.skip_test_if_no_together_client()
|
|
40
|
-
cl = test_client
|
|
41
|
-
t = cl.create_table('test_tbl', {'input': pxt.StringType()})
|
|
42
|
-
messages = [{'role': 'user', 'content': t.input}]
|
|
43
|
-
from pixeltable.functions.together import chat_completions
|
|
44
|
-
t.add_column(output=chat_completions(messages=messages, model='mistralai/Mixtral-8x7B-v0.1', stop=['\n']))
|
|
45
|
-
t.add_column(output_2=chat_completions(
|
|
46
|
-
messages=messages,
|
|
47
|
-
model='mistralai/Mixtral-8x7B-Instruct-v0.1',
|
|
48
|
-
max_tokens=300,
|
|
49
|
-
stop=['\n'],
|
|
50
|
-
temperature=0.7,
|
|
51
|
-
top_p=0.9,
|
|
52
|
-
top_k=40,
|
|
53
|
-
repetition_penalty=1.1,
|
|
54
|
-
logprobs=1,
|
|
55
|
-
echo=True,
|
|
56
|
-
n=3,
|
|
57
|
-
safety_model='Meta-Llama/Llama-Guard-7b',
|
|
58
|
-
response_format={'type': 'json_object'}
|
|
59
|
-
))
|
|
60
|
-
validate_update_status(t.insert(input='Give me a typical example of a JSON structure.'), 1)
|
|
61
|
-
result = t.collect()
|
|
62
|
-
assert len(result['output'][0]['choices'][0]['message']) > 0
|
|
63
|
-
assert len(result['output_2'][0]['choices'][0]['message']) > 0
|
|
64
|
-
|
|
65
|
-
def test_embeddings(self, test_client: pxt.Client) -> None:
|
|
66
|
-
skip_test_if_not_installed('together')
|
|
67
|
-
TestTogether.skip_test_if_no_together_client()
|
|
68
|
-
cl = test_client
|
|
69
|
-
t = cl.create_table('test_tbl', {'input': pxt.StringType()})
|
|
70
|
-
from pixeltable.functions.together import embeddings
|
|
71
|
-
t.add_column(embed=embeddings(input=t.input, model='togethercomputer/m2-bert-80M-8k-retrieval'))
|
|
72
|
-
validate_update_status(t.insert(input='Together AI provides a variety of embeddings models.'), 1)
|
|
73
|
-
assert len(t.collect()['embed'][0]) > 0
|
|
74
|
-
|
|
75
|
-
def test_image_generations(self, test_client: pxt.Client) -> None:
|
|
76
|
-
skip_test_if_not_installed('together')
|
|
77
|
-
TestTogether.skip_test_if_no_together_client()
|
|
78
|
-
cl = test_client
|
|
79
|
-
t = cl.create_table(
|
|
80
|
-
'test_tbl',
|
|
81
|
-
{'input': pxt.StringType(), 'negative_prompt': pxt.StringType(nullable=True)}
|
|
82
|
-
)
|
|
83
|
-
from pixeltable.functions.together import image_generations
|
|
84
|
-
t.add_column(img=image_generations(t.input, model='runwayml/stable-diffusion-v1-5'))
|
|
85
|
-
t.add_column(img_2=image_generations(
|
|
86
|
-
t.input,
|
|
87
|
-
model='stabilityai/stable-diffusion-2-1',
|
|
88
|
-
steps=30,
|
|
89
|
-
seed=4178780,
|
|
90
|
-
height=768,
|
|
91
|
-
width=512,
|
|
92
|
-
negative_prompt=t.negative_prompt
|
|
93
|
-
))
|
|
94
|
-
validate_update_status(t.insert([
|
|
95
|
-
{'input': 'A friendly dinosaur playing tennis in a cornfield'},
|
|
96
|
-
{'input': 'A friendly dinosaur playing tennis in a cornfield',
|
|
97
|
-
'negative_prompt': 'tennis court'}
|
|
98
|
-
]), 2)
|
|
99
|
-
assert t.collect()['img'][0].size == (512, 512)
|
|
100
|
-
assert t.collect()['img_2'][0].size == (512, 768)
|
|
101
|
-
assert t.collect()['img'][1].size == (512, 512)
|
|
102
|
-
assert t.collect()['img_2'][1].size == (512, 768)
|
|
103
|
-
|
|
104
|
-
# This ensures that the test will be skipped, rather than returning an error, when no API key is
|
|
105
|
-
# available (for example, when a PR runs in CI).
|
|
106
|
-
@staticmethod
|
|
107
|
-
def skip_test_if_no_together_client() -> None:
|
|
108
|
-
try:
|
|
109
|
-
import pixeltable.functions.together
|
|
110
|
-
_ = pixeltable.functions.together.together_client()
|
|
111
|
-
except excs.Error as exc:
|
|
112
|
-
pytest.skip(str(exc))
|
pixeltable/tests/test_audio.py
DELETED
|
@@ -1,65 +0,0 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
|
-
|
|
3
|
-
import av
|
|
4
|
-
|
|
5
|
-
import pixeltable as pxt
|
|
6
|
-
import pixeltable.env as env
|
|
7
|
-
from pixeltable.tests.utils import get_video_files, get_audio_files
|
|
8
|
-
from pixeltable.type_system import VideoType, AudioType
|
|
9
|
-
from pixeltable.utils.media_store import MediaStore
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class TestAudio:
|
|
13
|
-
def check_audio_params(self, path: str, format: Optional[str] = None, codec: Optional[str] = None):
|
|
14
|
-
with av.open(path) as container:
|
|
15
|
-
audio_stream = container.streams.audio[0]
|
|
16
|
-
if format is not None:
|
|
17
|
-
assert format == container.format.name
|
|
18
|
-
if codec is not None:
|
|
19
|
-
assert codec == audio_stream.codec_context.codec.name
|
|
20
|
-
|
|
21
|
-
def test_basic(self, test_client: pxt.Client) -> None:
|
|
22
|
-
audio_filepaths = get_audio_files()
|
|
23
|
-
cl = test_client
|
|
24
|
-
audio_t = cl.create_table('audio', {'audio_file': AudioType()})
|
|
25
|
-
status = audio_t.insert({'audio_file': p} for p in audio_filepaths)
|
|
26
|
-
assert status.num_rows == len(audio_filepaths)
|
|
27
|
-
assert status.num_excs == 0
|
|
28
|
-
paths = audio_t.select(output=audio_t.audio_file.localpath).collect()['output']
|
|
29
|
-
assert set(paths) == set(audio_filepaths)
|
|
30
|
-
|
|
31
|
-
def test_extract(self, test_client: pxt.Client) -> None:
|
|
32
|
-
video_filepaths = get_video_files()
|
|
33
|
-
cl = test_client
|
|
34
|
-
video_t = cl.create_table('videos', {'video': VideoType()})
|
|
35
|
-
from pixeltable.functions.video import extract_audio
|
|
36
|
-
video_t.add_column(audio=extract_audio(video_t.video))
|
|
37
|
-
|
|
38
|
-
# one of the 3 videos doesn't have audio
|
|
39
|
-
status = video_t.insert({'video': p} for p in video_filepaths)
|
|
40
|
-
assert status.num_rows == len(video_filepaths)
|
|
41
|
-
assert status.num_excs == 0
|
|
42
|
-
assert MediaStore.count(video_t.get_id()) == len(video_filepaths) - 1
|
|
43
|
-
assert video_t.where(video_t.audio != None).count() == len(video_filepaths) - 1
|
|
44
|
-
assert env.Env.get().num_tmp_files() == 0
|
|
45
|
-
|
|
46
|
-
# make sure everything works with a fresh client
|
|
47
|
-
cl = pxt.Client()
|
|
48
|
-
video_t = cl.get_table('videos')
|
|
49
|
-
assert video_t.where(video_t.audio != None).count() == len(video_filepaths) - 1
|
|
50
|
-
|
|
51
|
-
# test generating different formats and codecs
|
|
52
|
-
paths = video_t.select(output=extract_audio(video_t.video, format='wav', codec='pcm_s16le')).collect()['output']
|
|
53
|
-
# media files that are created as a part of a query end up in the tmp dir
|
|
54
|
-
assert env.Env.get().num_tmp_files() == video_t.where(video_t.audio != None).count()
|
|
55
|
-
for path in [p for p in paths if p is not None]:
|
|
56
|
-
self.check_audio_params(path, format='wav', codec='pcm_s16le')
|
|
57
|
-
# higher resolution
|
|
58
|
-
paths = video_t.select(output=extract_audio(video_t.video, format='wav', codec='pcm_s32le')).collect()['output']
|
|
59
|
-
for path in [p for p in paths if p is not None]:
|
|
60
|
-
self.check_audio_params(path, format='wav', codec='pcm_s32le')
|
|
61
|
-
|
|
62
|
-
for format in ['mp3', 'flac']:
|
|
63
|
-
paths = video_t.select(output=extract_audio(video_t.video, format=format)).collect()['output']
|
|
64
|
-
for path in [p for p in paths if p is not None]:
|
|
65
|
-
self.check_audio_params(path, format=format)
|
pixeltable/tests/test_catalog.py
DELETED
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
from pixeltable.catalog import is_valid_identifier, is_valid_path
|
|
2
|
-
|
|
3
|
-
class TestCatalog:
|
|
4
|
-
"""Tests for miscellanous catalog functions."""
|
|
5
|
-
def test_valid_identifier(self) -> None:
|
|
6
|
-
valid_ids = ['a', 'a1', 'a_1', 'a_']
|
|
7
|
-
invalid_ids = ['', '_', '__', '_a', '1a', 'a.b', '.a', 'a-b']
|
|
8
|
-
for valid_id in valid_ids:
|
|
9
|
-
assert is_valid_identifier(valid_id), valid_ids
|
|
10
|
-
|
|
11
|
-
for invalid_id in invalid_ids:
|
|
12
|
-
assert not is_valid_identifier(invalid_id), invalid_ids
|
|
13
|
-
|
|
14
|
-
def test_valid_path(self) -> None:
|
|
15
|
-
assert is_valid_path('', empty_is_valid=True)
|
|
16
|
-
assert not is_valid_path('', empty_is_valid=False)
|
|
17
|
-
|
|
18
|
-
valid_paths = ['a', 'a_.b_', 'a.b.c', 'a.b.c.d']
|
|
19
|
-
invalid_paths = ['.', '..', 'a.', '.a', 'a..b']
|
|
20
|
-
|
|
21
|
-
for valid_path in valid_paths:
|
|
22
|
-
assert is_valid_path(valid_path, empty_is_valid=False), valid_path
|
|
23
|
-
assert is_valid_path(valid_path, empty_is_valid=True), valid_path
|
|
24
|
-
|
|
25
|
-
for invalid_path in invalid_paths:
|
|
26
|
-
assert not is_valid_path(invalid_path, empty_is_valid=False), invalid_path
|
|
27
|
-
assert not is_valid_path(invalid_path, empty_is_valid=True), invalid_path
|
pixeltable/tests/test_client.py
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
import pytest
|
|
2
|
-
|
|
3
|
-
import pixeltable as pxt
|
|
4
|
-
import pixeltable.exceptions as excs
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class TestClient:
|
|
8
|
-
def test_list_functions(self, init_env) -> None:
|
|
9
|
-
cl = pxt.Client()
|
|
10
|
-
_ = cl.list_functions()
|
|
11
|
-
print(_)
|
|
12
|
-
|
|
13
|
-
def test_drop_table(self, test_tbl: pxt.Table) -> None:
|
|
14
|
-
cl = pxt.Client()
|
|
15
|
-
t = cl.get_table('test_tbl')
|
|
16
|
-
cl.drop_table('test_tbl')
|
|
17
|
-
with pytest.raises(excs.Error):
|
|
18
|
-
_ = cl.get_table('test_tbl')
|
|
19
|
-
with pytest.raises(excs.Error):
|
|
20
|
-
_ = t.show(1)
|
|
21
|
-
|