PyPI - pixeltable - Versions diffs - 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl - Mend

pixeltable 0.2.3py3-none-any.whl → 0.2.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (63) hide show

pixeltable/catalog/column.py +26 -49
pixeltable/catalog/insertable_table.py +7 -4
pixeltable/catalog/table.py +163 -57
pixeltable/catalog/table_version.py +416 -140
pixeltable/catalog/table_version_path.py +2 -2
pixeltable/client.py +72 -6
pixeltable/dataframe.py +65 -21
pixeltable/env.py +52 -53
pixeltable/exec/cache_prefetch_node.py +1 -1
pixeltable/exec/in_memory_data_node.py +11 -7
pixeltable/exprs/comparison.py +3 -3
pixeltable/exprs/data_row.py +5 -1
pixeltable/exprs/literal.py +16 -4
pixeltable/exprs/row_builder.py +8 -40
pixeltable/ext/__init__.py +5 -0
pixeltable/ext/functions/yolox.py +92 -0
pixeltable/func/aggregate_function.py +15 -15
pixeltable/func/expr_template_function.py +9 -1
pixeltable/func/globals.py +24 -14
pixeltable/func/signature.py +18 -12
pixeltable/func/udf.py +7 -2
pixeltable/functions/__init__.py +9 -9
pixeltable/functions/eval.py +7 -8
pixeltable/functions/fireworks.py +10 -37
pixeltable/functions/huggingface.py +47 -19
pixeltable/functions/openai.py +192 -24
pixeltable/functions/together.py +104 -9
pixeltable/functions/util.py +11 -0
pixeltable/index/__init__.py +2 -0
pixeltable/index/base.py +49 -0
pixeltable/index/embedding_index.py +95 -0
pixeltable/metadata/schema.py +45 -22
pixeltable/plan.py +15 -34
pixeltable/store.py +38 -41
pixeltable/tests/conftest.py +8 -14
pixeltable/tests/ext/test_yolox.py +21 -0
pixeltable/tests/functions/test_fireworks.py +43 -0
pixeltable/tests/functions/test_functions.py +60 -0
pixeltable/tests/{test_functions.py → functions/test_huggingface.py} +7 -143
pixeltable/tests/functions/test_openai.py +162 -0
pixeltable/tests/functions/test_together.py +112 -0
pixeltable/tests/test_component_view.py +14 -5
pixeltable/tests/test_dataframe.py +23 -22
pixeltable/tests/test_exprs.py +99 -102
pixeltable/tests/test_function.py +51 -43
pixeltable/tests/test_index.py +138 -0
pixeltable/tests/test_migration.py +2 -1
pixeltable/tests/test_snapshot.py +24 -1
pixeltable/tests/test_table.py +205 -26
pixeltable/tests/test_types.py +30 -0
pixeltable/tests/test_video.py +16 -16
pixeltable/tests/test_view.py +5 -0
pixeltable/tests/utils.py +171 -14
pixeltable/tool/create_test_db_dump.py +16 -0
pixeltable/type_system.py +77 -128
pixeltable/utils/arrow.py +98 -0
pixeltable/utils/hf_datasets.py +157 -0
pixeltable/utils/parquet.py +68 -27
pixeltable/utils/pytorch.py +16 -97
{pixeltable-0.2.3.dist-info → pixeltable-0.2.5.dist-info}/METADATA +35 -28
{pixeltable-0.2.3.dist-info → pixeltable-0.2.5.dist-info}/RECORD +63 -50
{pixeltable-0.2.3.dist-info → pixeltable-0.2.5.dist-info}/LICENSE +0 -0
{pixeltable-0.2.3.dist-info → pixeltable-0.2.5.dist-info}/WHEEL +0 -0

pixeltable/tests/functions/test_openai.py ADDED Viewed

@@ -0,0 +1,162 @@
+import pytest
+import pixeltable as pxt
+import pixeltable.exceptions as excs
+from pixeltable.tests.utils import SAMPLE_IMAGE_URL, skip_test_if_not_installed, validate_update_status
+from pixeltable.type_system import StringType, ImageType
+@pytest.mark.remote_api
+class TestOpenai:
+    def test_audio(self, test_client: pxt.Client) -> None:
+        skip_test_if_not_installed('openai')
+        TestOpenai.skip_test_if_no_openai_client()
+        cl = test_client
+        t = cl.create_table('test_tbl', {'input': StringType()})
+        from pixeltable.functions.openai import speech, transcriptions, translations
+        t.add_column(speech=speech(t.input, model='tts-1', voice='onyx'))
+        t.add_column(speech_2=speech(t.input, model='tts-1', voice='onyx', response_format='flac', speed=1.05))
+        t.add_column(transcription=transcriptions(t.speech, model='whisper-1'))
+        t.add_column(transcription_2=transcriptions(
+            t.speech, model='whisper-1', language='en', prompt='Transcribe the contents of this recording.'
+        ))
+        t.add_column(translation=translations(t.speech, model='whisper-1'))
+        t.add_column(translation_2=translations(
+            t.speech, model='whisper-1', prompt='Translate the recording from Spanish into English.', temperature=0.05
+        ))
+        validate_update_status(t.insert([
+            {'input': 'I am a banana.'},
+            {'input': 'Es fácil traducir del español al inglés.'}
+        ]), expected_rows=2)
+        # The audio generation -> transcription loop on these examples should be simple and clear enough
+        # that the unit test can reliably expect the output closely enough to pass these checks.
+        results = t.collect()
+        assert results[0]['transcription']['text'] in ['I am a banana.', "I'm a banana."]
+        assert results[0]['transcription_2']['text'] in ['I am a banana.', "I'm a banana."]
+        assert 'easy to translate' in results[1]['translation']['text']
+        assert 'easy to translate' in results[1]['translation_2']['text']
+    def test_chat_completions(self, test_client: pxt.Client) -> None:
+        skip_test_if_not_installed('openai')
+        TestOpenai.skip_test_if_no_openai_client()
+        cl = test_client
+        t = cl.create_table('test_tbl', {'input': StringType()})
+        from pixeltable.functions.openai import chat_completions
+        msgs = [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": t.input}
+        ]
+        t.add_column(input_msgs=msgs)
+        t.add_column(chat_output=chat_completions(model='gpt-3.5-turbo', messages=t.input_msgs))
+        # with inlined messages
+        t.add_column(chat_output_2=chat_completions(model='gpt-3.5-turbo', messages=msgs))
+        # test a bunch of the parameters
+        t.add_column(chat_output_3=chat_completions(
+            model='gpt-3.5-turbo', messages=msgs, frequency_penalty=0.1, logprobs=True, top_logprobs=3,
+            max_tokens=500, n=3, presence_penalty=0.1, seed=4171780, stop=['\n'], temperature=0.7, top_p=0.8,
+            user='pixeltable'
+        ))
+        # test with JSON output enforced
+        t.add_column(chat_output_4=chat_completions(
+            model='gpt-3.5-turbo', messages=msgs, response_format={'type': 'json_object'}
+        ))
+        # TODO Also test the `tools` and `tool_choice` parameters.
+        validate_update_status(t.insert(input='Give me an example of a typical JSON structure.'), 1)
+        result = t.collect()
+        assert len(result['chat_output'][0]['choices'][0]['message']['content']) > 0
+        assert len(result['chat_output_2'][0]['choices'][0]['message']['content']) > 0
+        assert len(result['chat_output_3'][0]['choices'][0]['message']['content']) > 0
+        assert len(result['chat_output_4'][0]['choices'][0]['message']['content']) > 0
+        # When OpenAI gets a request with `response_format` equal to `json_object`, but the prompt does not
+        # contain the string "json", it refuses the request.
+        # TODO This should probably not be throwing an exception, but rather logging the error in
+        # `t.chat_output_4.errormsg` etc.
+        with pytest.raises(excs.ExprEvalError) as exc_info:
+            t.insert(input='Say something interesting.')
+        assert "\\'messages\\' must contain the word \\'json\\'" in str(exc_info.value)
+    def test_gpt_4_vision(self, test_client: pxt.Client) -> None:
+        skip_test_if_not_installed('openai')
+        TestOpenai.skip_test_if_no_openai_client()
+        cl = test_client
+        t = cl.create_table('test_tbl', {'prompt': StringType(), 'img': ImageType()})
+        from pixeltable.functions.openai import chat_completions, vision
+        from pixeltable.functions.string import str_format
+        t.add_column(response=vision(prompt="What's in this image?", image=t.img))
+        # Also get the response the low-level way, by calling chat_completions
+        msgs = [
+            {'role': 'user',
+             'content': [
+                 {'type': 'text', 'text': t.prompt},
+                 {'type': 'image_url', 'image_url': {
+                     'url': str_format('data:image/png;base64,{0}', t.img.b64_encode())
+                 }}
+             ]}
+        ]
+        t.add_column(response_2=chat_completions(model='gpt-4-vision-preview', messages=msgs, max_tokens=300).choices[0].message.content)
+        validate_update_status(t.insert(prompt="What's in this image?", img=SAMPLE_IMAGE_URL), 1)
+        result = t.collect()['response_2'][0]
+        assert len(result) > 0
+    def test_embeddings(self, test_client: pxt.Client) -> None:
+        skip_test_if_not_installed('openai')
+        TestOpenai.skip_test_if_no_openai_client()
+        cl = test_client
+        from pixeltable.functions.openai import embeddings
+        t = cl.create_table('test_tbl', {'input': StringType()})
+        t.add_column(ada_embed=embeddings(model='text-embedding-ada-002', input=t.input))
+        t.add_column(text_3=embeddings(model='text-embedding-3-small', input=t.input, user='pixeltable'))
+        validate_update_status(t.insert(input='Say something interesting.'), 1)
+        _ = t.head()
+    def test_moderations(self, test_client: pxt.Client) -> None:
+        skip_test_if_not_installed('openai')
+        TestOpenai.skip_test_if_no_openai_client()
+        cl = test_client
+        t = cl.create_table('test_tbl', {'input': StringType()})
+        from pixeltable.functions.openai import moderations
+        t.add_column(moderation=moderations(input=t.input))
+        t.add_column(moderation_2=moderations(input=t.input, model='text-moderation-stable'))
+        validate_update_status(t.insert(input='Say something interesting.'), 1)
+        _ = t.head()
+    def test_image_generations(self, test_client: pxt.Client) -> None:
+        skip_test_if_not_installed('openai')
+        TestOpenai.skip_test_if_no_openai_client()
+        cl = test_client
+        t = cl.create_table('test_tbl', {'input': StringType()})
+        from pixeltable.functions.openai import image_generations
+        t.add_column(img=image_generations(t.input))
+        # Test dall-e-2 options
+        t.add_column(img_2=image_generations(
+            t.input, model='dall-e-2', size='512x512', user='pixeltable'
+        ))
+        validate_update_status(t.insert(input='A friendly dinosaur playing tennis in a cornfield'), 1)
+        assert t.collect()['img'][0].size == (1024, 1024)
+        assert t.collect()['img_2'][0].size == (512, 512)
+    @pytest.mark.skip('Test is expensive and slow')
+    def test_image_generations_dall_e_3(self, test_client: pxt.Client) -> None:
+        skip_test_if_not_installed('openai')
+        TestOpenai.skip_test_if_no_openai_client()
+        cl = test_client
+        t = cl.create_table('test_tbl', {'input': StringType()})
+        from pixeltable.functions.openai import image_generations
+        # Test dall-e-3 options
+        t.add_column(img_3=image_generations(
+            t.input, model='dall-e-3', quality='hd', size='1792x1024', style='natural', user='pixeltable'
+        ))
+        validate_update_status(t.insert(input='A friendly dinosaur playing tennis in a cornfield'), 1)
+        assert t.collect()['img_3'][0].size == (1792, 1024)
+    # This ensures that the test will be skipped, rather than returning an error, when no API key is
+    # available (for example, when a PR runs in CI).
+    @staticmethod
+    def skip_test_if_no_openai_client() -> None:
+        try:
+            import pixeltable.functions.openai
+            _ = pixeltable.functions.openai.openai_client()
+        except excs.Error as exc:
+            pytest.skip(str(exc))

pixeltable/tests/functions/test_together.py ADDED Viewed

@@ -0,0 +1,112 @@
+import pytest
+import pixeltable as pxt
+import pixeltable.exceptions as excs
+from pixeltable.tests.utils import skip_test_if_not_installed, validate_update_status
+@pytest.mark.remote_api
+class TestTogether:
+    def test_completions(self, test_client: pxt.Client) -> None:
+        skip_test_if_not_installed('together')
+        TestTogether.skip_test_if_no_together_client()
+        cl = test_client
+        t = cl.create_table('test_tbl', {'input': pxt.StringType()})
+        from pixeltable.functions.together import completions
+        t.add_column(output=completions(prompt=t.input, model='mistralai/Mixtral-8x7B-v0.1', stop=['\n']))
+        t.add_column(output_2=completions(
+            prompt=t.input,
+            model='mistralai/Mixtral-8x7B-v0.1',
+            max_tokens=300,
+            stop=['\n'],
+            temperature=0.7,
+            top_p=0.9,
+            top_k=40,
+            repetition_penalty=1.1,
+            logprobs=1,
+            echo=True,
+            n=3,
+            safety_model='Meta-Llama/Llama-Guard-7b'
+        ))
+        validate_update_status(t.insert(input='I am going to the '), 1)
+        result = t.collect()
+        assert len(result['output'][0]['choices'][0]['text']) > 0
+        assert len(result['output_2'][0]['choices'][0]['text']) > 0
+    def test_chat_completions(self, test_client: pxt.Client) -> None:
+        skip_test_if_not_installed('together')
+        TestTogether.skip_test_if_no_together_client()
+        cl = test_client
+        t = cl.create_table('test_tbl', {'input': pxt.StringType()})
+        messages = [{'role': 'user', 'content': t.input}]
+        from pixeltable.functions.together import chat_completions
+        t.add_column(output=chat_completions(messages=messages, model='mistralai/Mixtral-8x7B-v0.1', stop=['\n']))
+        t.add_column(output_2=chat_completions(
+            messages=messages,
+            model='mistralai/Mixtral-8x7B-Instruct-v0.1',
+            max_tokens=300,
+            stop=['\n'],
+            temperature=0.7,
+            top_p=0.9,
+            top_k=40,
+            repetition_penalty=1.1,
+            logprobs=1,
+            echo=True,
+            n=3,
+            safety_model='Meta-Llama/Llama-Guard-7b',
+            response_format={'type': 'json_object'}
+        ))
+        validate_update_status(t.insert(input='Give me a typical example of a JSON structure.'), 1)
+        result = t.collect()
+        assert len(result['output'][0]['choices'][0]['message']) > 0
+        assert len(result['output_2'][0]['choices'][0]['message']) > 0
+    def test_embeddings(self, test_client: pxt.Client) -> None:
+        skip_test_if_not_installed('together')
+        TestTogether.skip_test_if_no_together_client()
+        cl = test_client
+        t = cl.create_table('test_tbl', {'input': pxt.StringType()})
+        from pixeltable.functions.together import embeddings
+        t.add_column(embed=embeddings(input=t.input, model='togethercomputer/m2-bert-80M-8k-retrieval'))
+        validate_update_status(t.insert(input='Together AI provides a variety of embeddings models.'), 1)
+        assert len(t.collect()['embed'][0]) > 0
+    def test_image_generations(self, test_client: pxt.Client) -> None:
+        skip_test_if_not_installed('together')
+        TestTogether.skip_test_if_no_together_client()
+        cl = test_client
+        t = cl.create_table(
+            'test_tbl',
+            {'input': pxt.StringType(), 'negative_prompt': pxt.StringType(nullable=True)}
+        )
+        from pixeltable.functions.together import image_generations
+        t.add_column(img=image_generations(t.input, model='runwayml/stable-diffusion-v1-5'))
+        t.add_column(img_2=image_generations(
+            t.input,
+            model='stabilityai/stable-diffusion-2-1',
+            steps=30,
+            seed=4178780,
+            height=768,
+            width=512,
+            negative_prompt=t.negative_prompt
+        ))
+        validate_update_status(t.insert([
+            {'input': 'A friendly dinosaur playing tennis in a cornfield'},
+            {'input': 'A friendly dinosaur playing tennis in a cornfield',
+             'negative_prompt': 'tennis court'}
+        ]), 2)
+        assert t.collect()['img'][0].size == (512, 512)
+        assert t.collect()['img_2'][0].size == (512, 768)
+        assert t.collect()['img'][1].size == (512, 512)
+        assert t.collect()['img_2'][1].size == (512, 768)
+    # This ensures that the test will be skipped, rather than returning an error, when no API key is
+    # available (for example, when a PR runs in CI).
+    @staticmethod
+    def skip_test_if_no_together_client() -> None:
+        try:
+            import pixeltable.functions.together
+            _ = pixeltable.functions.together.together_client()
+        except excs.Error as exc:
+            pytest.skip(str(exc))

pixeltable/tests/test_component_view.py CHANGED Viewed

@@ -9,7 +9,7 @@ import pixeltable as pxt
 from pixeltable import exceptions as excs
 from pixeltable.iterators import ComponentIterator
 from pixeltable.iterators.video import FrameIterator
-from pixeltable.tests.utils import assert_resultset_eq, get_test_video_files
+from pixeltable.tests.utils import assert_resultset_eq, get_test_video_files, validate_update_status
 from pixeltable.type_system import IntType, VideoType, JsonType
 class ConstantImgIterator(ComponentIterator):
@@ -157,10 +157,19 @@ class TestComponentView:
         assert status.num_excs == 0
         import urllib
         video_url = urllib.parse.urljoin('file:', urllib.request.pathname2url(video_filepaths[0]))
-        status = view_t.update({'annotation': {'a': 1}}, where=view_t.video == video_url)
-        c1 = view_t.where(view_t.annotation != None).count()
-        c2 = view_t.where(view_t.video == video_url).count()
-        assert c1 == c2
+        validate_update_status(
+            view_t.update({'annotation': {'a': 1}}, where=view_t.video == video_url),
+            expected_rows=view_t.where(view_t.video == video_url).count())
+        assert view_t.where(view_t.annotation != None).count() == view_t.where(view_t.video == video_url).count()
+        # batch update with _rowid works
+        validate_update_status(
+            view_t.batch_update(
+                [{'annotation': {'a': 1}, '_rowid': (1, 0)}, {'annotation': {'a': 1}, '_rowid': (1, 1)}]),
+            expected_rows=2)
+        with pytest.raises(AssertionError):
+            # malformed _rowid
+            view_t.batch_update([{'annotation': {'a': 1}, '_rowid': (1,)}])
         with pytest.raises(excs.Error) as excinfo:
             _ = cl.create_view(

pixeltable/tests/test_dataframe.py CHANGED Viewed

@@ -16,6 +16,22 @@ from pixeltable.tests.utils import get_video_files, get_audio_files, skip_test_i
 class TestDataFrame:
+    @pxt.udf(return_type=pxt.JsonType(nullable=False), param_types=[pxt.JsonType(nullable=False)])
+    def yolo_to_coco(detections):
+        bboxes, labels = detections['bboxes'], detections['labels']
+        num_annotations = len(detections['bboxes'])
+        assert num_annotations == len(detections['labels'])
+        result = []
+        for i in range(num_annotations):
+            bbox = bboxes[i]
+            ann = {
+                'bbox': [round(bbox[0]), round(bbox[1]), round(bbox[2] - bbox[0]), round(bbox[3] - bbox[1])],
+                'category': labels[i],
+            }
+            result.append(ann)
+        return result
     def test_select_where(self, test_tbl: catalog.Table) -> None:
         t = test_tbl
         res1 = t[t.c1, t.c2, t.c3].show(0)
@@ -33,7 +49,7 @@ class TestDataFrame:
         assert res1 == res4
         _ = t.where(t.c2 < 10).select(t.c2, t.c2).show(0) # repeated name no error
         # duplicate select list
         with pytest.raises(excs.Error) as exc_info:
             _ = t.select(t.c1).select(t.c2).show(0)
@@ -156,7 +172,7 @@ class TestDataFrame:
         _ = df.__repr__()
         _ = df._repr_html_()
-    def test_count(self, test_tbl: catalog.Table, indexed_img_tbl: catalog.Table) -> None:
+    def test_count(self, test_tbl: catalog.Table, small_img_tbl) -> None:
         skip_test_if_not_installed('nos')
         t = test_tbl
         cnt = t.count()
@@ -166,7 +182,7 @@ class TestDataFrame:
         assert cnt == 10
         # count() doesn't work with similarity search
-        t = indexed_img_tbl
+        t = small_img_tbl
         probe = t.select(t.img).show(1)
         img = probe[0, 0]
         with pytest.raises(excs.Error):
@@ -220,7 +236,7 @@ class TestDataFrame:
         for tup in ds:
             for col in df.get_column_names():
                 assert col in tup
             arrval = tup['c_array']
             assert isinstance(arrval, np.ndarray)
             col_type = type_dict['c_array']
@@ -304,7 +320,7 @@ class TestDataFrame:
         def restrict_json_for_default_collate(obj):
             keys = ['id', 'label', 'iscrowd', 'bounding_box']
             return {k: obj[k] for k in keys}
         t = all_datatypes_tbl
         df = t.select(
             t.row_id,
@@ -370,7 +386,7 @@ class TestDataFrame:
         #  check result cached
         ds1 = t.to_pytorch_dataset(image_format='pt')
         ds1_mtimes = _get_mtimes(ds1.path)
         ds2 = t.to_pytorch_dataset(image_format='pt')
         ds2_mtimes = _get_mtimes(ds2.path)
         assert ds2.path == ds1.path, 'result should be cached'
@@ -397,22 +413,7 @@ class TestDataFrame:
         view_t.add_column(detections=yolox_medium(view_t.frame))
         base_t.insert(video=get_video_files()[0])
-        @pxt.udf(return_type=pxt.JsonType(nullable=False), param_types=[pxt.JsonType(nullable=False)])
-        def yolo_to_coco(detections):
-            bboxes, labels = detections['bboxes'], detections['labels']
-            num_annotations = len(detections['bboxes'])
-            assert num_annotations == len(detections['labels'])
-            result = []
-            for i in range(num_annotations):
-                bbox = bboxes[i]
-                ann = {
-                    'bbox': [round(bbox[0]), round(bbox[1]), round(bbox[2] - bbox[0]), round(bbox[3] - bbox[1])],
-                    'category': labels[i],
-                }
-                result.append(ann)
-            return result
-        query = view_t.select({'image': view_t.frame, 'annotations': yolo_to_coco(view_t.detections)})
+        query = view_t.select({'image': view_t.frame, 'annotations': self.yolo_to_coco(view_t.detections)})
         path = query.to_coco_dataset()
         # we get a valid COCO dataset
         coco_ds = COCO(path)

pixeltable 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl

Potentially problematic release.

pixeltable 0.2.3py3-none-any.whl → 0.2.5py3-none-any.whl