PyPI - pixeltable - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.4__py3-none-any.whl - Mend

pixeltable 0.1.0py3-none-any.whl → 0.2.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (147) hide show

pixeltable/__init__.py +34 -6
pixeltable/catalog/__init__.py +13 -0
pixeltable/catalog/catalog.py +159 -0
pixeltable/catalog/column.py +200 -0
pixeltable/catalog/dir.py +32 -0
pixeltable/catalog/globals.py +33 -0
pixeltable/catalog/insertable_table.py +191 -0
pixeltable/catalog/named_function.py +36 -0
pixeltable/catalog/path.py +58 -0
pixeltable/catalog/path_dict.py +139 -0
pixeltable/catalog/schema_object.py +39 -0
pixeltable/catalog/table.py +581 -0
pixeltable/catalog/table_version.py +749 -0
pixeltable/catalog/table_version_path.py +133 -0
pixeltable/catalog/view.py +203 -0
pixeltable/client.py +590 -30
pixeltable/dataframe.py +540 -349
pixeltable/env.py +359 -45
pixeltable/exceptions.py +12 -21
pixeltable/exec/__init__.py +9 -0
pixeltable/exec/aggregation_node.py +78 -0
pixeltable/exec/cache_prefetch_node.py +116 -0
pixeltable/exec/component_iteration_node.py +79 -0
pixeltable/exec/data_row_batch.py +95 -0
pixeltable/exec/exec_context.py +22 -0
pixeltable/exec/exec_node.py +61 -0
pixeltable/exec/expr_eval_node.py +217 -0
pixeltable/exec/in_memory_data_node.py +69 -0
pixeltable/exec/media_validation_node.py +43 -0
pixeltable/exec/sql_scan_node.py +225 -0
pixeltable/exprs/__init__.py +24 -0
pixeltable/exprs/arithmetic_expr.py +102 -0
pixeltable/exprs/array_slice.py +71 -0
pixeltable/exprs/column_property_ref.py +77 -0
pixeltable/exprs/column_ref.py +105 -0
pixeltable/exprs/comparison.py +77 -0
pixeltable/exprs/compound_predicate.py +98 -0
pixeltable/exprs/data_row.py +195 -0
pixeltable/exprs/expr.py +586 -0
pixeltable/exprs/expr_set.py +39 -0
pixeltable/exprs/function_call.py +380 -0
pixeltable/exprs/globals.py +69 -0
pixeltable/exprs/image_member_access.py +115 -0
pixeltable/exprs/image_similarity_predicate.py +58 -0
pixeltable/exprs/inline_array.py +107 -0
pixeltable/exprs/inline_dict.py +101 -0
pixeltable/exprs/is_null.py +38 -0
pixeltable/exprs/json_mapper.py +121 -0
pixeltable/exprs/json_path.py +159 -0
pixeltable/exprs/literal.py +54 -0
pixeltable/exprs/object_ref.py +41 -0
pixeltable/exprs/predicate.py +44 -0
pixeltable/exprs/row_builder.py +355 -0
pixeltable/exprs/rowid_ref.py +94 -0
pixeltable/exprs/type_cast.py +53 -0
pixeltable/exprs/variable.py +45 -0
pixeltable/func/__init__.py +9 -0
pixeltable/func/aggregate_function.py +194 -0
pixeltable/func/batched_function.py +53 -0
pixeltable/func/callable_function.py +69 -0
pixeltable/func/expr_template_function.py +82 -0
pixeltable/func/function.py +110 -0
pixeltable/func/function_registry.py +227 -0
pixeltable/func/globals.py +36 -0
pixeltable/func/nos_function.py +202 -0
pixeltable/func/signature.py +166 -0
pixeltable/func/udf.py +163 -0
pixeltable/functions/__init__.py +52 -103
pixeltable/functions/eval.py +216 -0
pixeltable/functions/fireworks.py +34 -0
pixeltable/functions/huggingface.py +120 -0
pixeltable/functions/image.py +16 -0
pixeltable/functions/openai.py +256 -0
pixeltable/functions/pil/image.py +148 -7
pixeltable/functions/string.py +13 -0
pixeltable/functions/together.py +122 -0
pixeltable/functions/util.py +41 -0
pixeltable/functions/video.py +62 -0
pixeltable/iterators/__init__.py +3 -0
pixeltable/iterators/base.py +48 -0
pixeltable/iterators/document.py +311 -0
pixeltable/iterators/video.py +89 -0
pixeltable/metadata/__init__.py +54 -0
pixeltable/metadata/converters/convert_10.py +18 -0
pixeltable/metadata/schema.py +211 -0
pixeltable/plan.py +656 -0
pixeltable/store.py +418 -182
pixeltable/tests/conftest.py +146 -88
pixeltable/tests/functions/test_fireworks.py +42 -0
pixeltable/tests/functions/test_functions.py +60 -0
pixeltable/tests/functions/test_huggingface.py +158 -0
pixeltable/tests/functions/test_openai.py +152 -0
pixeltable/tests/functions/test_together.py +111 -0
pixeltable/tests/test_audio.py +65 -0
pixeltable/tests/test_catalog.py +27 -0
pixeltable/tests/test_client.py +14 -14
pixeltable/tests/test_component_view.py +370 -0
pixeltable/tests/test_dataframe.py +439 -0
pixeltable/tests/test_dirs.py +78 -62
pixeltable/tests/test_document.py +120 -0
pixeltable/tests/test_exprs.py +592 -135
pixeltable/tests/test_function.py +297 -67
pixeltable/tests/test_migration.py +43 -0
pixeltable/tests/test_nos.py +54 -0
pixeltable/tests/test_snapshot.py +208 -0
pixeltable/tests/test_table.py +1195 -263
pixeltable/tests/test_transactional_directory.py +42 -0
pixeltable/tests/test_types.py +5 -11
pixeltable/tests/test_video.py +151 -34
pixeltable/tests/test_view.py +530 -0
pixeltable/tests/utils.py +320 -45
pixeltable/tool/create_test_db_dump.py +149 -0
pixeltable/tool/create_test_video.py +81 -0
pixeltable/type_system.py +445 -124
pixeltable/utils/__init__.py +17 -46
pixeltable/utils/arrow.py +98 -0
pixeltable/utils/clip.py +12 -15
pixeltable/utils/coco.py +136 -0
pixeltable/utils/documents.py +39 -0
pixeltable/utils/filecache.py +195 -0
pixeltable/utils/help.py +11 -0
pixeltable/utils/hf_datasets.py +157 -0
pixeltable/utils/media_store.py +76 -0
pixeltable/utils/parquet.py +167 -0
pixeltable/utils/pytorch.py +91 -0
pixeltable/utils/s3.py +13 -0
pixeltable/utils/sql.py +17 -0
pixeltable/utils/transactional_directory.py +35 -0
pixeltable-0.2.4.dist-info/LICENSE +18 -0
pixeltable-0.2.4.dist-info/METADATA +127 -0
pixeltable-0.2.4.dist-info/RECORD +132 -0
{pixeltable-0.1.0.dist-info → pixeltable-0.2.4.dist-info}/WHEEL +1 -1
pixeltable/catalog.py +0 -1421
pixeltable/exprs.py +0 -1745
pixeltable/function.py +0 -269
pixeltable/functions/clip.py +0 -10
pixeltable/functions/pil/__init__.py +0 -23
pixeltable/functions/tf.py +0 -21
pixeltable/index.py +0 -57
pixeltable/tests/test_dict.py +0 -24
pixeltable/tests/test_functions.py +0 -11
pixeltable/tests/test_tf.py +0 -69
pixeltable/tf.py +0 -33
pixeltable/utils/tf.py +0 -33
pixeltable/utils/video.py +0 -32
pixeltable-0.1.0.dist-info/METADATA +0 -34
pixeltable-0.1.0.dist-info/RECORD +0 -36

pixeltable/tests/test_dataframe.py ADDED Viewed

@@ -0,0 +1,439 @@
+import datetime
+import pickle
+from pathlib import Path
+from typing import Any, Dict
+import bs4
+import numpy as np
+import pytest
+import requests
+import pixeltable as pxt
+from pixeltable import catalog
+from pixeltable import exceptions as excs
+from pixeltable.iterators import FrameIterator
+from pixeltable.tests.utils import get_video_files, get_audio_files, skip_test_if_not_installed
+class TestDataFrame:
+    def test_select_where(self, test_tbl: catalog.Table) -> None:
+        t = test_tbl
+        res1 = t[t.c1, t.c2, t.c3].show(0)
+        res2 = t.select(t.c1, t.c2, t.c3).show(0)
+        assert res1 == res2
+        res1 = t[t.c2 < 10][t.c1, t.c2, t.c3].show(0)
+        res2 = t.where(t.c2 < 10).select(t.c1, t.c2, t.c3).show(0)
+        assert res1 == res2
+        res3 = t.where(t.c2 < 10).select(c1=t.c1, c2=t.c2, c3=t.c3).show(0)
+        assert res1 == res3
+        res4 = t.where(t.c2 < 10).select(t.c1, c2=t.c2, c3=t.c3).show(0)
+        assert res1 == res4
+        _ = t.where(t.c2 < 10).select(t.c2, t.c2).show(0) # repeated name no error
+        # duplicate select list
+        with pytest.raises(excs.Error) as exc_info:
+            _ = t.select(t.c1).select(t.c2).show(0)
+        assert 'already specified' in str(exc_info.value)
+        # invalid expr in select list: Callable is not a valid literal
+        with pytest.raises(TypeError) as exc_info:
+            _ = t.select(datetime.datetime.now).show(0)
+        assert 'Not a valid literal' in str(exc_info.value)
+        # catch invalid name in select list from user input
+        # only check stuff that's not caught by python kwargs checker
+        with pytest.raises(excs.Error) as exc_info:
+            _ = t.select(t.c1, **{'c2-1': t.c2}).show(0)
+        assert 'Invalid name' in str(exc_info.value)
+        with pytest.raises(excs.Error) as exc_info:
+            _ = t.select(t.c1, **{'': t.c2}).show(0)
+        assert 'Invalid name' in str(exc_info.value)
+        with pytest.raises(excs.Error) as exc_info:
+            _ = t.select(t.c1, **{'foo.bar': t.c2}).show(0)
+        assert 'Invalid name' in str(exc_info.value)
+        with pytest.raises(excs.Error) as exc_info:
+            _ = t.select(t.c1, _c3=t.c2).show(0)
+        assert 'Invalid name' in str(exc_info.value)
+        # catch repeated name from user input
+        with pytest.raises(excs.Error) as exc_info:
+            _ = t.select(t.c2, c2=t.c1).show(0)
+        assert 'Repeated column name' in str(exc_info.value)
+        with pytest.raises(excs.Error) as exc_info:
+            _ = t.select(t.c2+1, col_0=t.c2).show(0)
+        assert 'Repeated column name' in str(exc_info.value)
+    def test_result_set_iterator(self, test_tbl: catalog.Table) -> None:
+        t = test_tbl
+        res = t.select(t.c1, t.c2, t.c3).collect()
+        pd_df = res.to_pandas()
+        def check_row(row: Dict[str, Any], idx: int) -> None:
+            assert len(row) == 3
+            assert 'c1' in row
+            assert row['c1'] == pd_df['c1'][idx]
+            assert 'c2' in row
+            assert row['c2'] == pd_df['c2'][idx]
+            assert 'c3' in row
+            assert row['c3'] == pd_df['c3'][idx]
+        # row iteration
+        for idx, row in enumerate(res):
+            check_row(row, idx)
+        # row access
+        row = res[0]
+        check_row(row, 0)
+        # column access
+        col_values = res['c2']
+        assert col_values == pd_df['c2'].values.tolist()
+        # cell access
+        assert res[0, 'c2'] == pd_df['c2'][0]
+        assert res[0, 'c2'] == res[0, 1]
+        with pytest.raises(excs.Error) as exc_info:
+            _ = res['does_not_exist']
+        assert 'Invalid column name' in str(exc_info.value)
+        with pytest.raises(excs.Error) as exc_info:
+            _ = res[0, 'does_not_exist']
+        assert 'Invalid column name' in str(exc_info.value)
+        with pytest.raises(excs.Error) as exc_info:
+            _ = res[0, 0, 0]
+        assert 'Bad index' in str(exc_info.value)
+        with pytest.raises(excs.Error) as exc_info:
+            _ = res['c2', 0]
+        assert 'Bad index' in str(exc_info.value)
+    def test_order_by(self, test_tbl: catalog.Table) -> None:
+        t = test_tbl
+        res = t.select(t.c4, t.c2).order_by(t.c4).order_by(t.c2, asc=False).show(0)
+        # invalid expr in order_by()
+        with pytest.raises(excs.Error) as exc_info:
+            _ = t.order_by(datetime.datetime.now()).show(0)
+        assert 'Invalid expression' in str(exc_info.value)
+    def test_head_tail(self, test_tbl: catalog.Table) -> None:
+        t = test_tbl
+        res = t.head(10).to_pandas()
+        assert np.all(res.c2 == list(range(10)))
+        # Where is applied
+        res = t.where(t.c2 > 9).head(10).to_pandas()
+        assert np.all(res.c2 == list(range(10, 20)))
+        # order_by() is an error
+        with pytest.raises(excs.Error) as exc_info:
+            _ = t.order_by(t.c2).head(10)
+        assert 'cannot be used with order_by' in str(exc_info.value)
+        res = t.tail().to_pandas()
+        assert np.all(res.c2 == list(range(90, 100)))
+        res = t.where(t.c2 < 90).tail().to_pandas()
+        assert np.all(res.c2 == list(range(80, 90)))
+        # order_by() is an error
+        with pytest.raises(excs.Error) as exc_info:
+            _ = t.order_by(t.c2).tail(10)
+        assert 'cannot be used with order_by' in str(exc_info.value)
+    def test_describe(self, test_tbl: catalog.Table) -> None:
+        t = test_tbl
+        df = t.select(t.c1).where(t.c2 < 10).limit(10)
+        df.describe()
+        # TODO: how to you check the output of these?
+        _ = df.__repr__()
+        _ = df._repr_html_()
+    def test_count(self, test_tbl: catalog.Table, indexed_img_tbl: catalog.Table) -> None:
+        skip_test_if_not_installed('nos')
+        t = test_tbl
+        cnt = t.count()
+        assert cnt == 100
+        cnt = t.where(t.c2 < 10).count()
+        assert cnt == 10
+        # count() doesn't work with similarity search
+        t = indexed_img_tbl
+        probe = t.select(t.img).show(1)
+        img = probe[0, 0]
+        with pytest.raises(excs.Error):
+            _ = t.where(t.img.nearest(img)).count()
+        with pytest.raises(excs.Error):
+            _ = t.where(t.img.nearest('car')).count()
+        # for now, count() doesn't work with non-SQL Where clauses
+        with pytest.raises(excs.Error):
+            _ = t.where(t.img.width > 100).count()
+    def test_select_literal(self, test_tbl: catalog.Table) -> None:
+        t = test_tbl
+        res = t.select(1.0).where(t.c2 < 10).collect()
+        assert res[res.column_names()[0]] == [1.0] * 10
+    # TODO This test doesn't work on Windows due to reliance on the structure of file URLs
+    @pytest.mark.skip('Test is not portable')
+    def test_html_media_url(self, test_client: pxt.Client) -> None:
+        tab = test_client.create_table('test_html_repr', {'video': pxt.VideoType(), 'audio': pxt.AudioType()})
+        status = tab.insert(video=get_video_files()[0], audio=get_audio_files()[0])
+        assert status.num_rows == 1
+        assert status.num_excs == 0
+        res = tab.select(tab.video, tab.audio).collect()
+        doc = bs4.BeautifulSoup(res._repr_html_(), features='html.parser')
+        video_tags = doc.find_all('video')
+        assert len(video_tags) == 1
+        audio_tags = doc.find_all('audio')
+        assert len(audio_tags) == 1
+        # get the source elements and test their src attributes
+        for tag in video_tags + audio_tags:
+            sources = tag.find_all('source')
+            assert len(sources) == 1
+            for src in sources:
+                response = requests.get(src['src'])
+                assert response.status_code == 200
+    def test_to_pytorch_dataset(self, all_datatypes_tbl: catalog.Table):
+        """ tests all types are handled correctly in this conversion
+        """
+        skip_test_if_not_installed('torch')
+        import torch
+        t = all_datatypes_tbl
+        df = t.where(t.row_id < 1)
+        assert df.count() > 0
+        ds = df.to_pytorch_dataset()
+        type_dict = dict(zip(df.get_column_names(),df.get_column_types()))
+        for tup in ds:
+            for col in df.get_column_names():
+                assert col in tup
+            arrval = tup['c_array']
+            assert isinstance(arrval, np.ndarray)
+            col_type = type_dict['c_array']
+            assert arrval.dtype == col_type.numpy_dtype()
+            assert arrval.shape == col_type.shape
+            assert arrval.dtype == np.float32
+            assert arrval.flags["WRITEABLE"], 'required by pytorch collate function'
+            assert isinstance(tup['c_bool'], bool)
+            assert isinstance(tup['c_int'], int)
+            assert isinstance(tup['c_float'], float)
+            assert isinstance(tup['c_timestamp'], float)
+            assert torch.is_tensor(tup['c_image'])
+            assert isinstance(tup['c_video'], str)
+            assert isinstance(tup['c_json'], dict)
+    def test_to_pytorch_image_format(self, all_datatypes_tbl: catalog.Table) -> None:
+        """ tests the image_format parameter is honored
+        """
+        skip_test_if_not_installed('torch')
+        import torch
+        import torchvision.transforms as T
+        W, H = 220, 224 # make different from each other
+        t = all_datatypes_tbl
+        df = t.select(
+            t.row_id,
+            t.c_image,
+            c_image_xformed=t.c_image.resize([W, H]).convert('RGB')
+        ).where(t.row_id < 1)
+        pandas_df = df.show().to_pandas()
+        im_plain = pandas_df['c_image'].values[0]
+        im_xformed = pandas_df['c_image_xformed'].values[0]
+        assert pandas_df.shape[0] == 1
+        ds = df.to_pytorch_dataset(image_format='np')
+        ds_ptformat = df.to_pytorch_dataset(image_format='pt')
+        elt_count = 0
+        for elt, elt_pt in zip(ds, ds_ptformat):
+            arr_plain = elt['c_image']
+            assert isinstance(arr_plain, np.ndarray)
+            assert arr_plain.flags["WRITEABLE"], 'required by pytorch collate function'
+            # NB: compare numpy array bc PIL.Image object itself is not using same file.
+            assert (arr_plain == np.array(im_plain)).all(), 'numpy image should be the same as the original'
+            arr_xformed = elt['c_image_xformed']
+            assert isinstance(arr_xformed, np.ndarray)
+            assert arr_xformed.flags["WRITEABLE"], 'required by pytorch collate function'
+            assert arr_xformed.shape == (H, W, 3)
+            assert arr_xformed.dtype == np.uint8
+            # same as above, compare numpy array bc PIL.Image object itself is not using same file.
+            assert (arr_xformed == np.array(im_xformed)).all(),\
+                'numpy image array for xformed image should be the same as the original'
+            # now compare pytorch version
+            arr_pt = elt_pt['c_image']
+            assert torch.is_tensor(arr_pt)
+            arr_pt = elt_pt['c_image_xformed']
+            assert torch.is_tensor(arr_pt)
+            assert arr_pt.shape == (3, H, W)
+            assert arr_pt.dtype == torch.float32
+            assert (0.0 <= arr_pt).all()
+            assert (arr_pt <= 1.0).all()
+            assert torch.isclose(T.ToTensor()(arr_xformed), arr_pt).all(),\
+                'pytorch image should be consistent with numpy image'
+            elt_count += 1
+        assert elt_count == 1
+    @pytest.mark.skip('Flaky test (fails intermittently)')
+    def test_to_pytorch_dataloader(self, all_datatypes_tbl: catalog.Table) -> None:
+        """ Tests the dataset works well with pytorch dataloader:
+            1. compatibility with multiprocessing
+            2. compatibility of all types with default collate_fn
+        """
+        skip_test_if_not_installed('torch')
+        import torch.utils.data
+        @pxt.udf(param_types=[pxt.JsonType()], return_type=pxt.JsonType())
+        def restrict_json_for_default_collate(obj):
+            keys = ['id', 'label', 'iscrowd', 'bounding_box']
+            return {k: obj[k] for k in keys}
+        t = all_datatypes_tbl
+        df = t.select(
+            t.row_id,
+            t.c_int,
+            t.c_float,
+            t.c_bool,
+            t.c_timestamp,
+            t.c_array,
+            t.c_video,
+            # default collate_fn doesnt support null values, nor lists of different lengths
+            # but does allow some dictionaries if they are uniform
+            c_json = restrict_json_for_default_collate(t.c_json.detections[0]),
+            # images must be uniform shape for pytorch collate_fn to not fail
+            c_image=t.c_image.resize([220, 224]).convert('RGB')
+        )
+        df_size = df.count()
+        ds = df.to_pytorch_dataset(image_format='pt')
+        # test serialization:
+        #  - pickle.dumps() and pickle.loads() must work so that
+        #   we can use num_workers > 0
+        x = pickle.dumps(ds)
+        _ = pickle.loads(x)
+        # test we get all rows
+        def check_recover_all_rows(ds, size : int, **kwargs):
+            dl = torch.utils.data.DataLoader(ds, **kwargs)
+            loaded_ids = set()
+            for batch in dl:
+                for row_id in batch['row_id']:
+                    val = int(row_id) # np.int -> int or will fail set equality test below.
+                    assert val not in loaded_ids, val
+                    loaded_ids.add(val)
+            assert loaded_ids == set(range(size))
+        # check different number of workers
+        check_recover_all_rows(ds, size=df_size, batch_size=3, num_workers=0) # within this process
+        check_recover_all_rows(ds, size=df_size, batch_size=3, num_workers=2) # two separate processes
+        # check edge case where some workers get no rows
+        short_size = 1
+        df_short = df.where(t.row_id < short_size)
+        ds_short = df_short.to_pytorch_dataset(image_format='pt')
+        check_recover_all_rows(ds_short, size=short_size, batch_size=13, num_workers=short_size+1)
+    def test_pytorch_dataset_caching(self, all_datatypes_tbl: catalog.Table) -> None:
+        """ Tests that dataset caching works
+            1. using the same dataset twice in a row uses the cache
+            2. adding a row to the table invalidates the cached version
+            3. changing the select list invalidates the cached version
+        """
+        skip_test_if_not_installed('torch')
+        t = all_datatypes_tbl
+        t.drop_column('c_video') # null value video column triggers internal assertions in DataRow
+        # see https://github.com/pixeltable/pixeltable/issues/38
+        t.drop_column('c_array') # no support yet for null array values in the pytorch dataset
+        def _get_mtimes(dir: Path):
+            return {p.name: p.stat().st_mtime for p in dir.iterdir()}
+        #  check result cached
+        ds1 = t.to_pytorch_dataset(image_format='pt')
+        ds1_mtimes = _get_mtimes(ds1.path)
+        ds2 = t.to_pytorch_dataset(image_format='pt')
+        ds2_mtimes = _get_mtimes(ds2.path)
+        assert ds2.path == ds1.path, 'result should be cached'
+        assert ds2_mtimes == ds1_mtimes, 'no extra file system work should have occurred'
+        # check invalidation on insert
+        t_size = t.count()
+        t.insert(row_id=t_size)
+        ds3 = t.to_pytorch_dataset(image_format='pt')
+        assert ds3.path != ds1.path, 'different path should be used'
+        # check select list invalidation
+        ds4 = t.select(t.row_id).to_pytorch_dataset(image_format='pt')
+        assert ds4.path != ds3.path, 'different select list, hence different path should be used'
+    def test_to_coco(self, test_client: pxt.Client) -> None:
+        skip_test_if_not_installed('nos')
+        from pycocotools.coco import COCO
+        cl = test_client
+        base_t = cl.create_table('videos', {'video': pxt.VideoType()})
+        args = {'video': base_t.video, 'fps': 1}
+        view_t = cl.create_view('frames', base_t, iterator_class=FrameIterator, iterator_args=args)
+        from pixeltable.functions.nos.object_detection_2d import yolox_medium
+        view_t.add_column(detections=yolox_medium(view_t.frame))
+        base_t.insert(video=get_video_files()[0])
+        @pxt.udf(return_type=pxt.JsonType(nullable=False), param_types=[pxt.JsonType(nullable=False)])
+        def yolo_to_coco(detections):
+            bboxes, labels = detections['bboxes'], detections['labels']
+            num_annotations = len(detections['bboxes'])
+            assert num_annotations == len(detections['labels'])
+            result = []
+            for i in range(num_annotations):
+                bbox = bboxes[i]
+                ann = {
+                    'bbox': [round(bbox[0]), round(bbox[1]), round(bbox[2] - bbox[0]), round(bbox[3] - bbox[1])],
+                    'category': labels[i],
+                }
+                result.append(ann)
+            return result
+        query = view_t.select({'image': view_t.frame, 'annotations': yolo_to_coco(view_t.detections)})
+        path = query.to_coco_dataset()
+        # we get a valid COCO dataset
+        coco_ds = COCO(path)
+        assert len(coco_ds.imgs) == view_t.count()
+        # we call to_coco_dataset() again and get the cached dataset
+        new_path = query.to_coco_dataset()
+        assert path == new_path
+        # the cache is invalidated when we add more data
+        base_t.insert(video=get_video_files()[1])
+        new_path = query.to_coco_dataset()
+        assert path != new_path
+        coco_ds = COCO(new_path)
+        assert len(coco_ds.imgs) == view_t.count()
+        # incorrect select list
+        with pytest.raises(excs.Error) as exc_info:
+            _ = view_t.select({'image': view_t.frame, 'annotations': view_t.detections}).to_coco_dataset()
+        assert '"annotations" is not a list' in str(exc_info.value)
+        with pytest.raises(excs.Error) as exc_info:
+            _ = view_t.select(view_t.detections).to_coco_dataset()
+        assert 'missing key "image"' in str(exc_info.value).lower()

pixeltable/tests/test_dirs.py CHANGED Viewed

@@ -1,91 +1,107 @@
 import pytest
-import pixeltable as pt
-from pixeltable import exceptions as exc
+import pixeltable as pxt
+from pixeltable import exceptions as excs
 from pixeltable.tests.utils import make_tbl
-from pixeltable import catalog
 class TestDirs:
-    def test_create(self, test_db: catalog.Db) -> None:
-        db = test_db
+    def test_create(self, test_client: pxt.Client) -> None:
+        cl = test_client
         dirs = ['dir1', 'dir1.sub1', 'dir1.sub1.subsub1']
         for name in dirs:
-            db.create_dir(name)
+            cl.create_dir(name)
-        with pytest.raises(exc.BadFormatError):
-            db.create_dir('1dir')
-        with pytest.raises(exc.BadFormatError):
-            db.create_dir('_dir1')
-        with pytest.raises(exc.BadFormatError):
-            db.create_dir('dir 1')
-        with pytest.raises(exc.BadFormatError):
-            db.create_dir('dir1..sub2')
-        with pytest.raises(exc.BadFormatError):
-            db.create_dir('dir1.sub2.')
-        with pytest.raises(exc.BadFormatError):
-            db.create_dir('dir1:sub2.')
+        # invalid names
+        with pytest.raises(excs.Error):
+            cl.create_dir('1dir')
+        with pytest.raises(excs.Error):
+            cl.create_dir('_dir1')
+        with pytest.raises(excs.Error):
+            cl.create_dir('dir 1')
+        with pytest.raises(excs.Error):
+            cl.create_dir('dir1..sub2')
+        with pytest.raises(excs.Error):
+            cl.create_dir('dir1.sub2.')
+        with pytest.raises(excs.Error):
+            cl.create_dir('dir1:sub2.')
         # existing dirs
-        with pytest.raises(exc.DuplicateNameError):
-            db.create_dir('dir1')
-        with pytest.raises(exc.DuplicateNameError):
-            db.create_dir('dir1.sub1')
-        with pytest.raises(exc.DuplicateNameError):
-            db.create_dir('dir1.sub1.subsub1')
+        with pytest.raises(excs.Error):
+            cl.create_dir('dir1')
+        cl.create_dir('dir1', ignore_errors=True)
+        with pytest.raises(excs.Error):
+            cl.create_dir('dir1.sub1')
+        with pytest.raises(excs.Error):
+            cl.create_dir('dir1.sub1.subsub1')
         # existing table
-        make_tbl(db, 'dir1.t1')
-        with pytest.raises(exc.DuplicateNameError):
-            db.create_dir('dir1.t1')
+        make_tbl(cl, 'dir1.t1')
+        with pytest.raises(excs.Error):
+            cl.create_dir('dir1.t1')
-        with pytest.raises(exc.UnknownEntityError):
-            db.create_dir('dir2.sub2')
-        make_tbl(db, 't2')
-        with pytest.raises(exc.UnknownEntityError):
-            db.create_dir('t2.sub2')
+        with pytest.raises(excs.Error):
+            cl.create_dir('dir2.sub2')
+        make_tbl(cl, 't2')
+        with pytest.raises(excs.Error):
+            cl.create_dir('t2.sub2')
         # new client: force loading from store
-        cl2 = pt.Client()
-        db = cl2.get_db('test')
+        cl2 = pxt.Client(reload=True)
-        listing = db.list_dirs(recursive=True)
+        listing = cl2.list_dirs(recursive=True)
         assert listing == dirs
-        listing = db.list_dirs(recursive=False)
+        listing = cl2.list_dirs(recursive=False)
         assert listing == ['dir1']
-        listing = db.list_dirs('dir1', recursive=True)
+        listing = cl2.list_dirs('dir1', recursive=True)
         assert listing == ['dir1.sub1', 'dir1.sub1.subsub1']
-        listing = db.list_dirs('dir1', recursive=False)
+        listing = cl2.list_dirs('dir1', recursive=False)
         assert listing == ['dir1.sub1']
-        listing = db.list_dirs('dir1.sub1', recursive=True)
+        listing = cl2.list_dirs('dir1.sub1', recursive=True)
         assert listing == ['dir1.sub1.subsub1']
-        listing = db.list_dirs('dir1.sub1', recursive=False)
+        listing = cl2.list_dirs('dir1.sub1', recursive=False)
         assert listing == ['dir1.sub1.subsub1']
-    def test_rm(self, test_db: catalog.Db) -> None:
-        db = test_db
+    def test_rm(self, test_client: pxt.Client) -> None:
+        cl = test_client
         dirs = ['dir1', 'dir1.sub1', 'dir1.sub1.subsub1']
         for name in dirs:
-            db.create_dir(name)
-        make_tbl(db, 't1')
-        make_tbl(db, 'dir1.t1')
+            cl.create_dir(name)
+        make_tbl(cl, 't1')
+        make_tbl(cl, 'dir1.t1')
-        with pytest.raises(exc.BadFormatError):
-            db.rm_dir('1dir')
-        with pytest.raises(exc.BadFormatError):
-            db.rm_dir('dir1..sub1')
-        with pytest.raises(exc.UnknownEntityError):
-            db.rm_dir('dir2')
-        with pytest.raises(exc.UnknownEntityError):
-            db.rm_dir('t1')
+        # bad name
+        with pytest.raises(excs.Error):
+            cl.rm_dir('1dir')
+        # bad path
+        with pytest.raises(excs.Error):
+            cl.rm_dir('dir1..sub1')
+        # doesn't exist
+        with pytest.raises(excs.Error):
+            cl.rm_dir('dir2')
+        # not empty
+        with pytest.raises(excs.Error):
+            cl.rm_dir('dir1')
-        with pytest.raises(exc.DirectoryNotEmptyError):
-            db.rm_dir('dir1')
+        cl.rm_dir('dir1.sub1.subsub1')
+        assert cl.list_dirs('dir1.sub1') == []
-    def test_rename_tbl(self, test_db: catalog.Db) -> None:
-        db = test_db
-        db.create_dir('dir1')
-        make_tbl(db, 'dir1.t1')
-        assert db.list_tables('dir1') == ['dir1.t1']
-        db.rename_table('dir1.t1', 't2')
-        assert db.list_tables('dir1') == ['dir1.t2']
+        # check after reloading
+        cl = pxt.Client(reload=True)
+        assert cl.list_dirs('dir1.sub1') == []
+    def test_move(self, test_client: pxt.Client) -> None:
+        cl = test_client
+        cl.create_dir('dir1')
+        cl.create_dir('dir1.sub1')
+        make_tbl(cl, 'dir1.sub1.t1')
+        assert cl.list_tables('dir1') == ['dir1.sub1.t1']
+        cl.move('dir1.sub1.t1', 'dir1.sub1.t2')
+        assert cl.list_tables('dir1') == ['dir1.sub1.t2']
+        cl.create_dir('dir2')
+        cl.move('dir1', 'dir2.dir1')
+        assert cl.list_tables('dir2') == ['dir2.dir1.sub1.t2']
+        # new client: force loading from store
+        cl2 = pxt.Client(reload=True)
+        assert cl2.list_tables('dir2') == ['dir2.dir1.sub1.t2']

pixeltable 0.1.0__py3-none-any.whl → 0.2.4__py3-none-any.whl

Potentially problematic release.

pixeltable 0.1.0py3-none-any.whl → 0.2.4py3-none-any.whl