pixeltable 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (110) hide show
  1. pixeltable/__init__.py +20 -9
  2. pixeltable/__version__.py +3 -0
  3. pixeltable/catalog/column.py +23 -7
  4. pixeltable/catalog/insertable_table.py +32 -19
  5. pixeltable/catalog/table.py +210 -20
  6. pixeltable/catalog/table_version.py +272 -111
  7. pixeltable/catalog/table_version_path.py +6 -1
  8. pixeltable/dataframe.py +184 -110
  9. pixeltable/datatransfer/__init__.py +1 -0
  10. pixeltable/datatransfer/label_studio.py +526 -0
  11. pixeltable/datatransfer/remote.py +113 -0
  12. pixeltable/env.py +213 -79
  13. pixeltable/exec/__init__.py +2 -1
  14. pixeltable/exec/data_row_batch.py +6 -7
  15. pixeltable/exec/expr_eval_node.py +28 -28
  16. pixeltable/exec/sql_scan_node.py +7 -6
  17. pixeltable/exprs/__init__.py +4 -3
  18. pixeltable/exprs/column_ref.py +11 -2
  19. pixeltable/exprs/comparison.py +39 -1
  20. pixeltable/exprs/data_row.py +7 -0
  21. pixeltable/exprs/expr.py +26 -19
  22. pixeltable/exprs/function_call.py +17 -18
  23. pixeltable/exprs/globals.py +14 -2
  24. pixeltable/exprs/image_member_access.py +9 -28
  25. pixeltable/exprs/in_predicate.py +96 -0
  26. pixeltable/exprs/inline_array.py +13 -11
  27. pixeltable/exprs/inline_dict.py +15 -13
  28. pixeltable/exprs/row_builder.py +7 -1
  29. pixeltable/exprs/similarity_expr.py +67 -0
  30. pixeltable/ext/functions/whisperx.py +30 -0
  31. pixeltable/ext/functions/yolox.py +16 -0
  32. pixeltable/func/__init__.py +0 -2
  33. pixeltable/func/aggregate_function.py +5 -2
  34. pixeltable/func/callable_function.py +57 -13
  35. pixeltable/func/expr_template_function.py +14 -3
  36. pixeltable/func/function.py +35 -4
  37. pixeltable/func/signature.py +5 -15
  38. pixeltable/func/udf.py +8 -12
  39. pixeltable/functions/fireworks.py +9 -4
  40. pixeltable/functions/huggingface.py +48 -5
  41. pixeltable/functions/openai.py +49 -11
  42. pixeltable/functions/pil/image.py +61 -64
  43. pixeltable/functions/together.py +32 -6
  44. pixeltable/functions/util.py +0 -43
  45. pixeltable/functions/video.py +46 -8
  46. pixeltable/globals.py +443 -0
  47. pixeltable/index/__init__.py +1 -0
  48. pixeltable/index/base.py +9 -2
  49. pixeltable/index/btree.py +54 -0
  50. pixeltable/index/embedding_index.py +91 -15
  51. pixeltable/io/__init__.py +4 -0
  52. pixeltable/io/globals.py +59 -0
  53. pixeltable/{utils → io}/hf_datasets.py +48 -17
  54. pixeltable/io/pandas.py +148 -0
  55. pixeltable/{utils → io}/parquet.py +58 -33
  56. pixeltable/iterators/__init__.py +1 -1
  57. pixeltable/iterators/base.py +8 -4
  58. pixeltable/iterators/document.py +225 -93
  59. pixeltable/iterators/video.py +16 -9
  60. pixeltable/metadata/__init__.py +8 -4
  61. pixeltable/metadata/converters/convert_12.py +3 -0
  62. pixeltable/metadata/converters/convert_13.py +41 -0
  63. pixeltable/metadata/converters/convert_14.py +13 -0
  64. pixeltable/metadata/converters/convert_15.py +29 -0
  65. pixeltable/metadata/converters/util.py +63 -0
  66. pixeltable/metadata/schema.py +12 -6
  67. pixeltable/plan.py +11 -24
  68. pixeltable/store.py +16 -23
  69. pixeltable/tool/create_test_db_dump.py +49 -14
  70. pixeltable/type_system.py +27 -58
  71. pixeltable/utils/coco.py +94 -0
  72. pixeltable/utils/documents.py +42 -12
  73. pixeltable/utils/http_server.py +70 -0
  74. pixeltable-0.2.7.dist-info/METADATA +137 -0
  75. pixeltable-0.2.7.dist-info/RECORD +126 -0
  76. {pixeltable-0.2.5.dist-info → pixeltable-0.2.7.dist-info}/WHEEL +1 -1
  77. pixeltable/client.py +0 -600
  78. pixeltable/exprs/image_similarity_predicate.py +0 -58
  79. pixeltable/func/batched_function.py +0 -53
  80. pixeltable/func/nos_function.py +0 -202
  81. pixeltable/tests/conftest.py +0 -171
  82. pixeltable/tests/ext/test_yolox.py +0 -21
  83. pixeltable/tests/functions/test_fireworks.py +0 -43
  84. pixeltable/tests/functions/test_functions.py +0 -60
  85. pixeltable/tests/functions/test_huggingface.py +0 -158
  86. pixeltable/tests/functions/test_openai.py +0 -162
  87. pixeltable/tests/functions/test_together.py +0 -112
  88. pixeltable/tests/test_audio.py +0 -65
  89. pixeltable/tests/test_catalog.py +0 -27
  90. pixeltable/tests/test_client.py +0 -21
  91. pixeltable/tests/test_component_view.py +0 -379
  92. pixeltable/tests/test_dataframe.py +0 -440
  93. pixeltable/tests/test_dirs.py +0 -107
  94. pixeltable/tests/test_document.py +0 -120
  95. pixeltable/tests/test_exprs.py +0 -802
  96. pixeltable/tests/test_function.py +0 -332
  97. pixeltable/tests/test_index.py +0 -138
  98. pixeltable/tests/test_migration.py +0 -44
  99. pixeltable/tests/test_nos.py +0 -54
  100. pixeltable/tests/test_snapshot.py +0 -231
  101. pixeltable/tests/test_table.py +0 -1343
  102. pixeltable/tests/test_transactional_directory.py +0 -42
  103. pixeltable/tests/test_types.py +0 -52
  104. pixeltable/tests/test_video.py +0 -159
  105. pixeltable/tests/test_view.py +0 -535
  106. pixeltable/tests/utils.py +0 -442
  107. pixeltable/utils/clip.py +0 -18
  108. pixeltable-0.2.5.dist-info/METADATA +0 -128
  109. pixeltable-0.2.5.dist-info/RECORD +0 -139
  110. {pixeltable-0.2.5.dist-info → pixeltable-0.2.7.dist-info}/LICENSE +0 -0
@@ -1,158 +0,0 @@
1
- from typing import Dict, Any
2
-
3
- import pytest
4
-
5
- import pixeltable as pxt
6
- from pixeltable.tests.utils import skip_test_if_not_installed, get_sentences, get_image_files, \
7
- SAMPLE_IMAGE_URL
8
- from pixeltable.type_system import StringType, JsonType, ImageType, BoolType, FloatType, ArrayType
9
-
10
-
11
- class TestHuggingface:
12
-
13
- def test_hf_function(self, test_client: pxt.Client) -> None:
14
- skip_test_if_not_installed('sentence_transformers')
15
- cl = test_client
16
- t = cl.create_table('test_tbl', {'input': StringType(), 'bool_col': BoolType()})
17
- from pixeltable.functions.huggingface import sentence_transformer
18
- model_id = 'intfloat/e5-large-v2'
19
- t.add_column(e5=sentence_transformer(t.input, model_id=model_id))
20
- sents = get_sentences()
21
- status = t.insert({'input': s, 'bool_col': True} for s in sents)
22
- assert status.num_rows == len(sents)
23
- assert status.num_excs == 0
24
-
25
- # verify handling of constant params
26
- with pytest.raises(ValueError) as exc_info:
27
- t.add_column(e5_2=sentence_transformer(t.input, model_id=t.input))
28
- assert ': parameter model_id must be a constant value' in str(exc_info.value)
29
- with pytest.raises(ValueError) as exc_info:
30
- t.add_column(e5_2=sentence_transformer(t.input, model_id=model_id, normalize_embeddings=t.bool_col))
31
- assert ': parameter normalize_embeddings must be a constant value' in str(exc_info.value)
32
-
33
- # make sure this doesn't cause an exception
34
- # TODO: is there some way to capture the output?
35
- t.describe()
36
-
37
- def test_sentence_transformer(self, test_client: pxt.Client) -> None:
38
- skip_test_if_not_installed('sentence_transformers')
39
- cl = test_client
40
- t = cl.create_table('test_tbl', {'input': StringType(), 'input_list': JsonType()})
41
- sents = get_sentences(10)
42
- status = t.insert({'input': s, 'input_list': sents} for s in sents)
43
- assert status.num_rows == len(sents)
44
- assert status.num_excs == 0
45
-
46
- # run multiple models one at a time in order to exercise batching
47
- from pixeltable.functions.huggingface import sentence_transformer, sentence_transformer_list
48
- model_ids = ['sentence-transformers/all-mpnet-base-v2', 'BAAI/bge-reranker-base']
49
- num_dims = [768, 768]
50
- for idx, model_id in enumerate(model_ids):
51
- col_name = f'embed{idx}'
52
- t[col_name] = sentence_transformer(t.input, model_id=model_id, normalize_embeddings=True)
53
- assert t.column_types()[col_name] == ArrayType((None,), dtype=FloatType(), nullable=False)
54
- list_col_name = f'embed_list{idx}'
55
- t[list_col_name] = sentence_transformer_list(t.input_list, model_id=model_id, normalize_embeddings=True)
56
- assert t.column_types()[list_col_name] == JsonType()
57
-
58
- def verify_row(row: Dict[str, Any]) -> None:
59
- for idx, (_, d) in enumerate(zip(model_ids, num_dims)):
60
- assert row[f'embed{idx}'].shape == (d,)
61
- assert len(row[f'embed_list{idx}']) == len(sents)
62
- assert all(len(v) == d for v in row[f'embed_list{idx}'])
63
-
64
- verify_row(t.tail(1)[0])
65
-
66
- # execution still works after reload
67
- cl = pxt.Client(reload=True)
68
- t = cl.get_table('test_tbl')
69
- status = t.insert({'input': s, 'input_list': sents} for s in sents)
70
- assert status.num_rows == len(sents)
71
- assert status.num_excs == 0
72
- verify_row(t.tail(1)[0])
73
-
74
- def test_cross_encoder(self, test_client: pxt.Client) -> None:
75
- skip_test_if_not_installed('sentence_transformers')
76
- cl = test_client
77
- t = cl.create_table('test_tbl', {'input': StringType(), 'input_list': JsonType()})
78
- sents = get_sentences(10)
79
- status = t.insert({'input': s, 'input_list': sents} for s in sents)
80
- assert status.num_rows == len(sents)
81
- assert status.num_excs == 0
82
-
83
- # run multiple models one at a time in order to exercise batching
84
- from pixeltable.functions.huggingface import cross_encoder, cross_encoder_list
85
- model_ids = ['cross-encoder/ms-marco-MiniLM-L-6-v2', 'cross-encoder/ms-marco-TinyBERT-L-2-v2']
86
- for idx, model_id in enumerate(model_ids):
87
- col_name = f'embed{idx}'
88
- t[col_name] = cross_encoder(t.input, t.input, model_id=model_id)
89
- assert t.column_types()[col_name] == FloatType()
90
- list_col_name = f'embed_list{idx}'
91
- t[list_col_name] = cross_encoder_list(t.input, t.input_list, model_id=model_id)
92
- assert t.column_types()[list_col_name] == JsonType()
93
-
94
- def verify_row(row: Dict[str, Any]) -> None:
95
- for i in range(len(model_ids)):
96
- assert len(row[f'embed_list{idx}']) == len(sents)
97
- assert all(isinstance(v, float) for v in row[f'embed_list{idx}'])
98
-
99
- verify_row(t.tail(1)[0])
100
-
101
- # execution still works after reload
102
- cl = pxt.Client(reload=True)
103
- t = cl.get_table('test_tbl')
104
- status = t.insert({'input': s, 'input_list': sents} for s in sents)
105
- assert status.num_rows == len(sents)
106
- assert status.num_excs == 0
107
- verify_row(t.tail(1)[0])
108
-
109
- def test_clip(self, test_client: pxt.Client) -> None:
110
- skip_test_if_not_installed('transformers')
111
- cl = test_client
112
- t = cl.create_table('test_tbl', {'text': StringType(), 'img': ImageType()})
113
- num_rows = 10
114
- sents = get_sentences(num_rows)
115
- imgs = get_image_files()[:num_rows]
116
- status = t.insert({'text': text, 'img': img} for text, img in zip(sents, imgs))
117
- assert status.num_rows == len(sents)
118
- assert status.num_excs == 0
119
-
120
- # run multiple models one at a time in order to exercise batching
121
- from pixeltable.functions.huggingface import clip_text, clip_image
122
- model_ids = ['openai/clip-vit-base-patch32', 'laion/CLIP-ViT-B-32-laion2B-s34B-b79K']
123
- for idx, model_id in enumerate(model_ids):
124
- col_name = f'embed_text{idx}'
125
- t[col_name] = clip_text(t.text, model_id=model_id)
126
- assert t.column_types()[col_name].is_array_type()
127
- col_name = f'embed_img{idx}'
128
- t[col_name] = clip_image(t.img, model_id=model_id)
129
- assert t.column_types()[col_name].is_array_type()
130
-
131
- def verify_row(row: Dict[str, Any]) -> None:
132
- for idx, _ in enumerate(model_ids):
133
- assert row[f'embed_text{idx}'].shape == (512,)
134
- assert row[f'embed_img{idx}'].shape == (512,)
135
-
136
- verify_row(t.tail(1)[0])
137
-
138
- # execution still works after reload
139
- cl = pxt.Client(reload=True)
140
- t = cl.get_table('test_tbl')
141
- status = t.insert({'text': text, 'img': img} for text, img in zip(sents, imgs))
142
- assert status.num_rows == len(sents)
143
- assert status.num_excs == 0
144
- verify_row(t.tail(1)[0])
145
-
146
- def test_detr_for_object_detection(self, test_client: pxt.Client) -> None:
147
- skip_test_if_not_installed('transformers')
148
- cl = test_client
149
- t = cl.create_table('test_tbl', {'img': ImageType()})
150
- from pixeltable.functions.huggingface import detr_for_object_detection
151
- t['detect'] = detr_for_object_detection(t.img, model_id='facebook/detr-resnet-50', threshold=0.8)
152
- status = t.insert(img=SAMPLE_IMAGE_URL)
153
- assert status.num_rows == 1
154
- assert status.num_excs == 0
155
- result = t.select(t.detect).collect()[0]['detect']
156
- assert 'orange' in result['label_text']
157
- assert 'bowl' in result['label_text']
158
- assert 'broccoli' in result['label_text']
@@ -1,162 +0,0 @@
1
- import pytest
2
-
3
- import pixeltable as pxt
4
- import pixeltable.exceptions as excs
5
- from pixeltable.tests.utils import SAMPLE_IMAGE_URL, skip_test_if_not_installed, validate_update_status
6
- from pixeltable.type_system import StringType, ImageType
7
-
8
-
9
- @pytest.mark.remote_api
10
- class TestOpenai:
11
-
12
- def test_audio(self, test_client: pxt.Client) -> None:
13
- skip_test_if_not_installed('openai')
14
- TestOpenai.skip_test_if_no_openai_client()
15
- cl = test_client
16
- t = cl.create_table('test_tbl', {'input': StringType()})
17
- from pixeltable.functions.openai import speech, transcriptions, translations
18
- t.add_column(speech=speech(t.input, model='tts-1', voice='onyx'))
19
- t.add_column(speech_2=speech(t.input, model='tts-1', voice='onyx', response_format='flac', speed=1.05))
20
- t.add_column(transcription=transcriptions(t.speech, model='whisper-1'))
21
- t.add_column(transcription_2=transcriptions(
22
- t.speech, model='whisper-1', language='en', prompt='Transcribe the contents of this recording.'
23
- ))
24
- t.add_column(translation=translations(t.speech, model='whisper-1'))
25
- t.add_column(translation_2=translations(
26
- t.speech, model='whisper-1', prompt='Translate the recording from Spanish into English.', temperature=0.05
27
- ))
28
- validate_update_status(t.insert([
29
- {'input': 'I am a banana.'},
30
- {'input': 'Es fácil traducir del español al inglés.'}
31
- ]), expected_rows=2)
32
- # The audio generation -> transcription loop on these examples should be simple and clear enough
33
- # that the unit test can reliably expect the output closely enough to pass these checks.
34
- results = t.collect()
35
- assert results[0]['transcription']['text'] in ['I am a banana.', "I'm a banana."]
36
- assert results[0]['transcription_2']['text'] in ['I am a banana.', "I'm a banana."]
37
- assert 'easy to translate' in results[1]['translation']['text']
38
- assert 'easy to translate' in results[1]['translation_2']['text']
39
-
40
- def test_chat_completions(self, test_client: pxt.Client) -> None:
41
- skip_test_if_not_installed('openai')
42
- TestOpenai.skip_test_if_no_openai_client()
43
- cl = test_client
44
- t = cl.create_table('test_tbl', {'input': StringType()})
45
- from pixeltable.functions.openai import chat_completions
46
- msgs = [
47
- {"role": "system", "content": "You are a helpful assistant."},
48
- {"role": "user", "content": t.input}
49
- ]
50
- t.add_column(input_msgs=msgs)
51
- t.add_column(chat_output=chat_completions(model='gpt-3.5-turbo', messages=t.input_msgs))
52
- # with inlined messages
53
- t.add_column(chat_output_2=chat_completions(model='gpt-3.5-turbo', messages=msgs))
54
- # test a bunch of the parameters
55
- t.add_column(chat_output_3=chat_completions(
56
- model='gpt-3.5-turbo', messages=msgs, frequency_penalty=0.1, logprobs=True, top_logprobs=3,
57
- max_tokens=500, n=3, presence_penalty=0.1, seed=4171780, stop=['\n'], temperature=0.7, top_p=0.8,
58
- user='pixeltable'
59
- ))
60
- # test with JSON output enforced
61
- t.add_column(chat_output_4=chat_completions(
62
- model='gpt-3.5-turbo', messages=msgs, response_format={'type': 'json_object'}
63
- ))
64
- # TODO Also test the `tools` and `tool_choice` parameters.
65
- validate_update_status(t.insert(input='Give me an example of a typical JSON structure.'), 1)
66
- result = t.collect()
67
- assert len(result['chat_output'][0]['choices'][0]['message']['content']) > 0
68
- assert len(result['chat_output_2'][0]['choices'][0]['message']['content']) > 0
69
- assert len(result['chat_output_3'][0]['choices'][0]['message']['content']) > 0
70
- assert len(result['chat_output_4'][0]['choices'][0]['message']['content']) > 0
71
-
72
- # When OpenAI gets a request with `response_format` equal to `json_object`, but the prompt does not
73
- # contain the string "json", it refuses the request.
74
- # TODO This should probably not be throwing an exception, but rather logging the error in
75
- # `t.chat_output_4.errormsg` etc.
76
- with pytest.raises(excs.ExprEvalError) as exc_info:
77
- t.insert(input='Say something interesting.')
78
- assert "\\'messages\\' must contain the word \\'json\\'" in str(exc_info.value)
79
-
80
- def test_gpt_4_vision(self, test_client: pxt.Client) -> None:
81
- skip_test_if_not_installed('openai')
82
- TestOpenai.skip_test_if_no_openai_client()
83
- cl = test_client
84
- t = cl.create_table('test_tbl', {'prompt': StringType(), 'img': ImageType()})
85
- from pixeltable.functions.openai import chat_completions, vision
86
- from pixeltable.functions.string import str_format
87
- t.add_column(response=vision(prompt="What's in this image?", image=t.img))
88
- # Also get the response the low-level way, by calling chat_completions
89
- msgs = [
90
- {'role': 'user',
91
- 'content': [
92
- {'type': 'text', 'text': t.prompt},
93
- {'type': 'image_url', 'image_url': {
94
- 'url': str_format('data:image/png;base64,{0}', t.img.b64_encode())
95
- }}
96
- ]}
97
- ]
98
- t.add_column(response_2=chat_completions(model='gpt-4-vision-preview', messages=msgs, max_tokens=300).choices[0].message.content)
99
- validate_update_status(t.insert(prompt="What's in this image?", img=SAMPLE_IMAGE_URL), 1)
100
- result = t.collect()['response_2'][0]
101
- assert len(result) > 0
102
-
103
- def test_embeddings(self, test_client: pxt.Client) -> None:
104
- skip_test_if_not_installed('openai')
105
- TestOpenai.skip_test_if_no_openai_client()
106
- cl = test_client
107
- from pixeltable.functions.openai import embeddings
108
- t = cl.create_table('test_tbl', {'input': StringType()})
109
- t.add_column(ada_embed=embeddings(model='text-embedding-ada-002', input=t.input))
110
- t.add_column(text_3=embeddings(model='text-embedding-3-small', input=t.input, user='pixeltable'))
111
- validate_update_status(t.insert(input='Say something interesting.'), 1)
112
- _ = t.head()
113
-
114
- def test_moderations(self, test_client: pxt.Client) -> None:
115
- skip_test_if_not_installed('openai')
116
- TestOpenai.skip_test_if_no_openai_client()
117
- cl = test_client
118
- t = cl.create_table('test_tbl', {'input': StringType()})
119
- from pixeltable.functions.openai import moderations
120
- t.add_column(moderation=moderations(input=t.input))
121
- t.add_column(moderation_2=moderations(input=t.input, model='text-moderation-stable'))
122
- validate_update_status(t.insert(input='Say something interesting.'), 1)
123
- _ = t.head()
124
-
125
- def test_image_generations(self, test_client: pxt.Client) -> None:
126
- skip_test_if_not_installed('openai')
127
- TestOpenai.skip_test_if_no_openai_client()
128
- cl = test_client
129
- t = cl.create_table('test_tbl', {'input': StringType()})
130
- from pixeltable.functions.openai import image_generations
131
- t.add_column(img=image_generations(t.input))
132
- # Test dall-e-2 options
133
- t.add_column(img_2=image_generations(
134
- t.input, model='dall-e-2', size='512x512', user='pixeltable'
135
- ))
136
- validate_update_status(t.insert(input='A friendly dinosaur playing tennis in a cornfield'), 1)
137
- assert t.collect()['img'][0].size == (1024, 1024)
138
- assert t.collect()['img_2'][0].size == (512, 512)
139
-
140
- @pytest.mark.skip('Test is expensive and slow')
141
- def test_image_generations_dall_e_3(self, test_client: pxt.Client) -> None:
142
- skip_test_if_not_installed('openai')
143
- TestOpenai.skip_test_if_no_openai_client()
144
- cl = test_client
145
- t = cl.create_table('test_tbl', {'input': StringType()})
146
- from pixeltable.functions.openai import image_generations
147
- # Test dall-e-3 options
148
- t.add_column(img_3=image_generations(
149
- t.input, model='dall-e-3', quality='hd', size='1792x1024', style='natural', user='pixeltable'
150
- ))
151
- validate_update_status(t.insert(input='A friendly dinosaur playing tennis in a cornfield'), 1)
152
- assert t.collect()['img_3'][0].size == (1792, 1024)
153
-
154
- # This ensures that the test will be skipped, rather than returning an error, when no API key is
155
- # available (for example, when a PR runs in CI).
156
- @staticmethod
157
- def skip_test_if_no_openai_client() -> None:
158
- try:
159
- import pixeltable.functions.openai
160
- _ = pixeltable.functions.openai.openai_client()
161
- except excs.Error as exc:
162
- pytest.skip(str(exc))
@@ -1,112 +0,0 @@
1
- import pytest
2
-
3
- import pixeltable as pxt
4
- import pixeltable.exceptions as excs
5
- from pixeltable.tests.utils import skip_test_if_not_installed, validate_update_status
6
-
7
-
8
- @pytest.mark.remote_api
9
- class TestTogether:
10
-
11
- def test_completions(self, test_client: pxt.Client) -> None:
12
- skip_test_if_not_installed('together')
13
- TestTogether.skip_test_if_no_together_client()
14
- cl = test_client
15
- t = cl.create_table('test_tbl', {'input': pxt.StringType()})
16
- from pixeltable.functions.together import completions
17
- t.add_column(output=completions(prompt=t.input, model='mistralai/Mixtral-8x7B-v0.1', stop=['\n']))
18
- t.add_column(output_2=completions(
19
- prompt=t.input,
20
- model='mistralai/Mixtral-8x7B-v0.1',
21
- max_tokens=300,
22
- stop=['\n'],
23
- temperature=0.7,
24
- top_p=0.9,
25
- top_k=40,
26
- repetition_penalty=1.1,
27
- logprobs=1,
28
- echo=True,
29
- n=3,
30
- safety_model='Meta-Llama/Llama-Guard-7b'
31
- ))
32
- validate_update_status(t.insert(input='I am going to the '), 1)
33
- result = t.collect()
34
- assert len(result['output'][0]['choices'][0]['text']) > 0
35
- assert len(result['output_2'][0]['choices'][0]['text']) > 0
36
-
37
- def test_chat_completions(self, test_client: pxt.Client) -> None:
38
- skip_test_if_not_installed('together')
39
- TestTogether.skip_test_if_no_together_client()
40
- cl = test_client
41
- t = cl.create_table('test_tbl', {'input': pxt.StringType()})
42
- messages = [{'role': 'user', 'content': t.input}]
43
- from pixeltable.functions.together import chat_completions
44
- t.add_column(output=chat_completions(messages=messages, model='mistralai/Mixtral-8x7B-v0.1', stop=['\n']))
45
- t.add_column(output_2=chat_completions(
46
- messages=messages,
47
- model='mistralai/Mixtral-8x7B-Instruct-v0.1',
48
- max_tokens=300,
49
- stop=['\n'],
50
- temperature=0.7,
51
- top_p=0.9,
52
- top_k=40,
53
- repetition_penalty=1.1,
54
- logprobs=1,
55
- echo=True,
56
- n=3,
57
- safety_model='Meta-Llama/Llama-Guard-7b',
58
- response_format={'type': 'json_object'}
59
- ))
60
- validate_update_status(t.insert(input='Give me a typical example of a JSON structure.'), 1)
61
- result = t.collect()
62
- assert len(result['output'][0]['choices'][0]['message']) > 0
63
- assert len(result['output_2'][0]['choices'][0]['message']) > 0
64
-
65
- def test_embeddings(self, test_client: pxt.Client) -> None:
66
- skip_test_if_not_installed('together')
67
- TestTogether.skip_test_if_no_together_client()
68
- cl = test_client
69
- t = cl.create_table('test_tbl', {'input': pxt.StringType()})
70
- from pixeltable.functions.together import embeddings
71
- t.add_column(embed=embeddings(input=t.input, model='togethercomputer/m2-bert-80M-8k-retrieval'))
72
- validate_update_status(t.insert(input='Together AI provides a variety of embeddings models.'), 1)
73
- assert len(t.collect()['embed'][0]) > 0
74
-
75
- def test_image_generations(self, test_client: pxt.Client) -> None:
76
- skip_test_if_not_installed('together')
77
- TestTogether.skip_test_if_no_together_client()
78
- cl = test_client
79
- t = cl.create_table(
80
- 'test_tbl',
81
- {'input': pxt.StringType(), 'negative_prompt': pxt.StringType(nullable=True)}
82
- )
83
- from pixeltable.functions.together import image_generations
84
- t.add_column(img=image_generations(t.input, model='runwayml/stable-diffusion-v1-5'))
85
- t.add_column(img_2=image_generations(
86
- t.input,
87
- model='stabilityai/stable-diffusion-2-1',
88
- steps=30,
89
- seed=4178780,
90
- height=768,
91
- width=512,
92
- negative_prompt=t.negative_prompt
93
- ))
94
- validate_update_status(t.insert([
95
- {'input': 'A friendly dinosaur playing tennis in a cornfield'},
96
- {'input': 'A friendly dinosaur playing tennis in a cornfield',
97
- 'negative_prompt': 'tennis court'}
98
- ]), 2)
99
- assert t.collect()['img'][0].size == (512, 512)
100
- assert t.collect()['img_2'][0].size == (512, 768)
101
- assert t.collect()['img'][1].size == (512, 512)
102
- assert t.collect()['img_2'][1].size == (512, 768)
103
-
104
- # This ensures that the test will be skipped, rather than returning an error, when no API key is
105
- # available (for example, when a PR runs in CI).
106
- @staticmethod
107
- def skip_test_if_no_together_client() -> None:
108
- try:
109
- import pixeltable.functions.together
110
- _ = pixeltable.functions.together.together_client()
111
- except excs.Error as exc:
112
- pytest.skip(str(exc))
@@ -1,65 +0,0 @@
1
- from typing import Optional
2
-
3
- import av
4
-
5
- import pixeltable as pxt
6
- import pixeltable.env as env
7
- from pixeltable.tests.utils import get_video_files, get_audio_files
8
- from pixeltable.type_system import VideoType, AudioType
9
- from pixeltable.utils.media_store import MediaStore
10
-
11
-
12
- class TestAudio:
13
- def check_audio_params(self, path: str, format: Optional[str] = None, codec: Optional[str] = None):
14
- with av.open(path) as container:
15
- audio_stream = container.streams.audio[0]
16
- if format is not None:
17
- assert format == container.format.name
18
- if codec is not None:
19
- assert codec == audio_stream.codec_context.codec.name
20
-
21
- def test_basic(self, test_client: pxt.Client) -> None:
22
- audio_filepaths = get_audio_files()
23
- cl = test_client
24
- audio_t = cl.create_table('audio', {'audio_file': AudioType()})
25
- status = audio_t.insert({'audio_file': p} for p in audio_filepaths)
26
- assert status.num_rows == len(audio_filepaths)
27
- assert status.num_excs == 0
28
- paths = audio_t.select(output=audio_t.audio_file.localpath).collect()['output']
29
- assert set(paths) == set(audio_filepaths)
30
-
31
- def test_extract(self, test_client: pxt.Client) -> None:
32
- video_filepaths = get_video_files()
33
- cl = test_client
34
- video_t = cl.create_table('videos', {'video': VideoType()})
35
- from pixeltable.functions.video import extract_audio
36
- video_t.add_column(audio=extract_audio(video_t.video))
37
-
38
- # one of the 3 videos doesn't have audio
39
- status = video_t.insert({'video': p} for p in video_filepaths)
40
- assert status.num_rows == len(video_filepaths)
41
- assert status.num_excs == 0
42
- assert MediaStore.count(video_t.get_id()) == len(video_filepaths) - 1
43
- assert video_t.where(video_t.audio != None).count() == len(video_filepaths) - 1
44
- assert env.Env.get().num_tmp_files() == 0
45
-
46
- # make sure everything works with a fresh client
47
- cl = pxt.Client()
48
- video_t = cl.get_table('videos')
49
- assert video_t.where(video_t.audio != None).count() == len(video_filepaths) - 1
50
-
51
- # test generating different formats and codecs
52
- paths = video_t.select(output=extract_audio(video_t.video, format='wav', codec='pcm_s16le')).collect()['output']
53
- # media files that are created as a part of a query end up in the tmp dir
54
- assert env.Env.get().num_tmp_files() == video_t.where(video_t.audio != None).count()
55
- for path in [p for p in paths if p is not None]:
56
- self.check_audio_params(path, format='wav', codec='pcm_s16le')
57
- # higher resolution
58
- paths = video_t.select(output=extract_audio(video_t.video, format='wav', codec='pcm_s32le')).collect()['output']
59
- for path in [p for p in paths if p is not None]:
60
- self.check_audio_params(path, format='wav', codec='pcm_s32le')
61
-
62
- for format in ['mp3', 'flac']:
63
- paths = video_t.select(output=extract_audio(video_t.video, format=format)).collect()['output']
64
- for path in [p for p in paths if p is not None]:
65
- self.check_audio_params(path, format=format)
@@ -1,27 +0,0 @@
1
- from pixeltable.catalog import is_valid_identifier, is_valid_path
2
-
3
- class TestCatalog:
4
- """Tests for miscellanous catalog functions."""
5
- def test_valid_identifier(self) -> None:
6
- valid_ids = ['a', 'a1', 'a_1', 'a_']
7
- invalid_ids = ['', '_', '__', '_a', '1a', 'a.b', '.a', 'a-b']
8
- for valid_id in valid_ids:
9
- assert is_valid_identifier(valid_id), valid_ids
10
-
11
- for invalid_id in invalid_ids:
12
- assert not is_valid_identifier(invalid_id), invalid_ids
13
-
14
- def test_valid_path(self) -> None:
15
- assert is_valid_path('', empty_is_valid=True)
16
- assert not is_valid_path('', empty_is_valid=False)
17
-
18
- valid_paths = ['a', 'a_.b_', 'a.b.c', 'a.b.c.d']
19
- invalid_paths = ['.', '..', 'a.', '.a', 'a..b']
20
-
21
- for valid_path in valid_paths:
22
- assert is_valid_path(valid_path, empty_is_valid=False), valid_path
23
- assert is_valid_path(valid_path, empty_is_valid=True), valid_path
24
-
25
- for invalid_path in invalid_paths:
26
- assert not is_valid_path(invalid_path, empty_is_valid=False), invalid_path
27
- assert not is_valid_path(invalid_path, empty_is_valid=True), invalid_path
@@ -1,21 +0,0 @@
1
- import pytest
2
-
3
- import pixeltable as pxt
4
- import pixeltable.exceptions as excs
5
-
6
-
7
- class TestClient:
8
- def test_list_functions(self, init_env) -> None:
9
- cl = pxt.Client()
10
- _ = cl.list_functions()
11
- print(_)
12
-
13
- def test_drop_table(self, test_tbl: pxt.Table) -> None:
14
- cl = pxt.Client()
15
- t = cl.get_table('test_tbl')
16
- cl.drop_table('test_tbl')
17
- with pytest.raises(excs.Error):
18
- _ = cl.get_table('test_tbl')
19
- with pytest.raises(excs.Error):
20
- _ = t.show(1)
21
-