PyPI - pixeltable - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.4__py3-none-any.whl - Mend

pixeltable 0.1.0py3-none-any.whl → 0.2.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (147) hide show

pixeltable/__init__.py +34 -6
pixeltable/catalog/__init__.py +13 -0
pixeltable/catalog/catalog.py +159 -0
pixeltable/catalog/column.py +200 -0
pixeltable/catalog/dir.py +32 -0
pixeltable/catalog/globals.py +33 -0
pixeltable/catalog/insertable_table.py +191 -0
pixeltable/catalog/named_function.py +36 -0
pixeltable/catalog/path.py +58 -0
pixeltable/catalog/path_dict.py +139 -0
pixeltable/catalog/schema_object.py +39 -0
pixeltable/catalog/table.py +581 -0
pixeltable/catalog/table_version.py +749 -0
pixeltable/catalog/table_version_path.py +133 -0
pixeltable/catalog/view.py +203 -0
pixeltable/client.py +590 -30
pixeltable/dataframe.py +540 -349
pixeltable/env.py +359 -45
pixeltable/exceptions.py +12 -21
pixeltable/exec/__init__.py +9 -0
pixeltable/exec/aggregation_node.py +78 -0
pixeltable/exec/cache_prefetch_node.py +116 -0
pixeltable/exec/component_iteration_node.py +79 -0
pixeltable/exec/data_row_batch.py +95 -0
pixeltable/exec/exec_context.py +22 -0
pixeltable/exec/exec_node.py +61 -0
pixeltable/exec/expr_eval_node.py +217 -0
pixeltable/exec/in_memory_data_node.py +69 -0
pixeltable/exec/media_validation_node.py +43 -0
pixeltable/exec/sql_scan_node.py +225 -0
pixeltable/exprs/__init__.py +24 -0
pixeltable/exprs/arithmetic_expr.py +102 -0
pixeltable/exprs/array_slice.py +71 -0
pixeltable/exprs/column_property_ref.py +77 -0
pixeltable/exprs/column_ref.py +105 -0
pixeltable/exprs/comparison.py +77 -0
pixeltable/exprs/compound_predicate.py +98 -0
pixeltable/exprs/data_row.py +195 -0
pixeltable/exprs/expr.py +586 -0
pixeltable/exprs/expr_set.py +39 -0
pixeltable/exprs/function_call.py +380 -0
pixeltable/exprs/globals.py +69 -0
pixeltable/exprs/image_member_access.py +115 -0
pixeltable/exprs/image_similarity_predicate.py +58 -0
pixeltable/exprs/inline_array.py +107 -0
pixeltable/exprs/inline_dict.py +101 -0
pixeltable/exprs/is_null.py +38 -0
pixeltable/exprs/json_mapper.py +121 -0
pixeltable/exprs/json_path.py +159 -0
pixeltable/exprs/literal.py +54 -0
pixeltable/exprs/object_ref.py +41 -0
pixeltable/exprs/predicate.py +44 -0
pixeltable/exprs/row_builder.py +355 -0
pixeltable/exprs/rowid_ref.py +94 -0
pixeltable/exprs/type_cast.py +53 -0
pixeltable/exprs/variable.py +45 -0
pixeltable/func/__init__.py +9 -0
pixeltable/func/aggregate_function.py +194 -0
pixeltable/func/batched_function.py +53 -0
pixeltable/func/callable_function.py +69 -0
pixeltable/func/expr_template_function.py +82 -0
pixeltable/func/function.py +110 -0
pixeltable/func/function_registry.py +227 -0
pixeltable/func/globals.py +36 -0
pixeltable/func/nos_function.py +202 -0
pixeltable/func/signature.py +166 -0
pixeltable/func/udf.py +163 -0
pixeltable/functions/__init__.py +52 -103
pixeltable/functions/eval.py +216 -0
pixeltable/functions/fireworks.py +34 -0
pixeltable/functions/huggingface.py +120 -0
pixeltable/functions/image.py +16 -0
pixeltable/functions/openai.py +256 -0
pixeltable/functions/pil/image.py +148 -7
pixeltable/functions/string.py +13 -0
pixeltable/functions/together.py +122 -0
pixeltable/functions/util.py +41 -0
pixeltable/functions/video.py +62 -0
pixeltable/iterators/__init__.py +3 -0
pixeltable/iterators/base.py +48 -0
pixeltable/iterators/document.py +311 -0
pixeltable/iterators/video.py +89 -0
pixeltable/metadata/__init__.py +54 -0
pixeltable/metadata/converters/convert_10.py +18 -0
pixeltable/metadata/schema.py +211 -0
pixeltable/plan.py +656 -0
pixeltable/store.py +418 -182
pixeltable/tests/conftest.py +146 -88
pixeltable/tests/functions/test_fireworks.py +42 -0
pixeltable/tests/functions/test_functions.py +60 -0
pixeltable/tests/functions/test_huggingface.py +158 -0
pixeltable/tests/functions/test_openai.py +152 -0
pixeltable/tests/functions/test_together.py +111 -0
pixeltable/tests/test_audio.py +65 -0
pixeltable/tests/test_catalog.py +27 -0
pixeltable/tests/test_client.py +14 -14
pixeltable/tests/test_component_view.py +370 -0
pixeltable/tests/test_dataframe.py +439 -0
pixeltable/tests/test_dirs.py +78 -62
pixeltable/tests/test_document.py +120 -0
pixeltable/tests/test_exprs.py +592 -135
pixeltable/tests/test_function.py +297 -67
pixeltable/tests/test_migration.py +43 -0
pixeltable/tests/test_nos.py +54 -0
pixeltable/tests/test_snapshot.py +208 -0
pixeltable/tests/test_table.py +1195 -263
pixeltable/tests/test_transactional_directory.py +42 -0
pixeltable/tests/test_types.py +5 -11
pixeltable/tests/test_video.py +151 -34
pixeltable/tests/test_view.py +530 -0
pixeltable/tests/utils.py +320 -45
pixeltable/tool/create_test_db_dump.py +149 -0
pixeltable/tool/create_test_video.py +81 -0
pixeltable/type_system.py +445 -124
pixeltable/utils/__init__.py +17 -46
pixeltable/utils/arrow.py +98 -0
pixeltable/utils/clip.py +12 -15
pixeltable/utils/coco.py +136 -0
pixeltable/utils/documents.py +39 -0
pixeltable/utils/filecache.py +195 -0
pixeltable/utils/help.py +11 -0
pixeltable/utils/hf_datasets.py +157 -0
pixeltable/utils/media_store.py +76 -0
pixeltable/utils/parquet.py +167 -0
pixeltable/utils/pytorch.py +91 -0
pixeltable/utils/s3.py +13 -0
pixeltable/utils/sql.py +17 -0
pixeltable/utils/transactional_directory.py +35 -0
pixeltable-0.2.4.dist-info/LICENSE +18 -0
pixeltable-0.2.4.dist-info/METADATA +127 -0
pixeltable-0.2.4.dist-info/RECORD +132 -0
{pixeltable-0.1.0.dist-info → pixeltable-0.2.4.dist-info}/WHEEL +1 -1
pixeltable/catalog.py +0 -1421
pixeltable/exprs.py +0 -1745
pixeltable/function.py +0 -269
pixeltable/functions/clip.py +0 -10
pixeltable/functions/pil/__init__.py +0 -23
pixeltable/functions/tf.py +0 -21
pixeltable/index.py +0 -57
pixeltable/tests/test_dict.py +0 -24
pixeltable/tests/test_functions.py +0 -11
pixeltable/tests/test_tf.py +0 -69
pixeltable/tf.py +0 -33
pixeltable/utils/tf.py +0 -33
pixeltable/utils/video.py +0 -32
pixeltable-0.1.0.dist-info/METADATA +0 -34
pixeltable-0.1.0.dist-info/RECORD +0 -36

pixeltable/tests/test_exprs.py CHANGED Viewed

@@ -1,45 +1,51 @@
-import sqlalchemy as sql
+import json
+import urllib.parse
+import urllib.request
+from typing import List, Dict
 import pytest
+import sqlalchemy as sql
+import pixeltable as pxt
+import pixeltable.func as func
 from pixeltable import catalog
-from pixeltable.type_system import StringType, BoolType, IntType, ImageType, ArrayType, ColumnType, FloatType
-from pixeltable.function import Function
-from pixeltable.exprs import Expr, CompoundPredicate, FunctionCall, Literal, InlineDict, InlineArray, ColumnRef
+from pixeltable import exceptions as excs
+from pixeltable import exprs
+from pixeltable.exprs import Expr, ColumnRef
 from pixeltable.exprs import RELATIVE_PATH_ROOT as R
-from pixeltable.functions import udf_call, dict_map, cast, sum, count
+from pixeltable.functions import cast, sum, count
 from pixeltable.functions.pil.image import blend
-from pixeltable.functions.clip import encode_image
-from pixeltable import exceptions as exc
-from pixeltable.tests import utils
+from pixeltable.iterators import FrameIterator
+from pixeltable.tests.utils import get_image_files, skip_test_if_not_installed
+from pixeltable.type_system import StringType, BoolType, IntType, ArrayType, ColumnType, FloatType, \
+    VideoType
 class TestExprs:
-    # This breaks with exception 'cannot pickle _thread._local obj'
-    # sum = Function(
-    #     IntType(), [IntType()],
-    #     init_fn=lambda: TestExprs.SumAggregator(), update_fn=SumAggregator.update, value_fn=SumAggregator.value)
     def test_basic(self, test_tbl: catalog.Table) -> None:
         t = test_tbl
-        assert isinstance(t['c1'] < 'a', Expr)
+        assert t['c1'].equals(t.c1)
+        assert t['c7']['*'].f5.equals(t.c7['*'].f5)
+        assert isinstance(t.c1 == None, Expr)
         assert isinstance(t.c1 < 'a', Expr)
-        assert isinstance(t['c1'] <= 'a', Expr)
         assert isinstance(t.c1 <= 'a', Expr)
-        assert isinstance(t['c1'] == 'a', Expr)
         assert isinstance(t.c1 == 'a', Expr)
-        assert isinstance(t['c1'] != 'a', Expr)
         assert isinstance(t.c1 != 'a', Expr)
-        assert isinstance(t['c1'] > 'a', Expr)
         assert isinstance(t.c1 > 'a', Expr)
-        assert isinstance(t['c1'] >= 'a', Expr)
         assert isinstance(t.c1 >= 'a', Expr)
         assert isinstance((t.c1 == 'a') & (t.c2 < 5), Expr)
         assert isinstance((t.c1 == 'a') | (t.c2 < 5), Expr)
         assert isinstance(~(t.c1 == 'a'), Expr)
+        with pytest.raises(AttributeError) as excinfo:
+            _ = t.does_not_exist
+        assert 'unknown' in str(excinfo.value).lower()
     def test_compound_predicates(self, test_tbl: catalog.Table) -> None:
         t = test_tbl
         # compound predicates that can be fully evaluated in SQL
+        _ = t.where((t.c1 == 'test string') & (t.c6.f1 > 50)).collect()
+        _ = t.where((t.c1 == 'test string') & (t.c2 > 50)).collect()
         e = ((t.c1 == 'test string') & (t.c2 > 50)).sql_expr()
         assert len(e.clauses) == 2
@@ -52,46 +58,192 @@ class TestExprs:
         e = (~(t.c1 == 'test string')).sql_expr()
         assert isinstance(e, sql.sql.expression.BinaryExpression)
+        with pytest.raises(TypeError) as exc_info:
+            _ = t.where((t.c1 == 'test string') or (t.c6.f1 > 50)).collect()
+        assert 'cannot be used in conjunction with python boolean operators' in str(exc_info.value).lower()
         # compound predicates with Python functions
-        udf = Function(BoolType(), [StringType()], eval_fn=lambda a: True)
-        udf2 = Function(BoolType(), [IntType()], eval_fn=lambda a: True)
-        # & can be split
-        p = (t.c1 == 'test string') & udf(t.c1)
-        assert p.sql_expr() is None
-        sql_pred, other_pred = p.extract_sql_predicate()
-        assert isinstance(sql_pred, sql.sql.expression.BinaryExpression)
-        assert isinstance(other_pred, FunctionCall)
-        p = (t.c1 == 'test string') & udf(t.c1) & (t.c2 > 50)
-        assert p.sql_expr() is None
-        sql_pred, other_pred = p.extract_sql_predicate()
-        assert len(sql_pred.clauses) == 2
-        assert isinstance(other_pred, FunctionCall)
-        p = (t.c1 == 'test string') & udf(t.c1) & (t.c2 > 50) & udf2(t.c2)
-        assert p.sql_expr() is None
-        sql_pred, other_pred = p.extract_sql_predicate()
-        assert len(sql_pred.clauses) == 2
-        assert isinstance(other_pred, CompoundPredicate)
-        # | cannot be split
-        p = (t.c1 == 'test string') | udf(t.c1)
-        assert p.sql_expr() is None
-        sql_pred, other_pred = p.extract_sql_predicate()
-        assert sql_pred is None
-        assert isinstance(other_pred, CompoundPredicate)
-    def test_basic_filter(self, test_tbl: catalog.Table) -> None:
+        @pxt.udf(return_type=BoolType(), param_types=[StringType()])
+        def udf(_: str) -> bool:
+            return True
+        @pxt.udf(return_type=BoolType(), param_types=[IntType()])
+        def udf2(_: int) -> bool:
+            return True
+        # TODO: find a way to test this
+        # # & can be split
+        # p = (t.c1 == 'test string') & udf(t.c1)
+        # assert p.sql_expr() is None
+        # sql_pred, other_pred = p.extract_sql_predicate()
+        # assert isinstance(sql_pred, sql.sql.expression.BinaryExpression)
+        # assert isinstance(other_pred, FunctionCall)
+        #
+        # p = (t.c1 == 'test string') & udf(t.c1) & (t.c2 > 50)
+        # assert p.sql_expr() is None
+        # sql_pred, other_pred = p.extract_sql_predicate()
+        # assert len(sql_pred.clauses) == 2
+        # assert isinstance(other_pred, FunctionCall)
+        #
+        # p = (t.c1 == 'test string') & udf(t.c1) & (t.c2 > 50) & udf2(t.c2)
+        # assert p.sql_expr() is None
+        # sql_pred, other_pred = p.extract_sql_predicate()
+        # assert len(sql_pred.clauses) == 2
+        # assert isinstance(other_pred, CompoundPredicate)
+        #
+        # # | cannot be split
+        # p = (t.c1 == 'test string') | udf(t.c1)
+        # assert p.sql_expr() is None
+        # sql_pred, other_pred = p.extract_sql_predicate()
+        # assert sql_pred is None
+        # assert isinstance(other_pred, CompoundPredicate)
+    def test_filters(self, test_tbl: catalog.Table) -> None:
         t = test_tbl
         _ = t[t.c1 == 'test string'].show()
         print(_)
         _ = t[t.c2 > 50].show()
         print(_)
+        _ = t[t.c1n == None].show()
+        print(_)
+        _ = t[t.c1n != None].show(0)
+        print(_)
+    def test_exception_handling(self, test_tbl: catalog.Table) -> None:
+        t = test_tbl
+        # error in expr that's handled in SQL
+        with pytest.raises(excs.Error):
+            _ = t[(t.c2 + 1) / t.c2].show()
+        # error in expr that's handled in Python
+        with pytest.raises(excs.Error):
+            _ = t[(t.c6.f2 + 1) / (t.c2 - 10)].show()
+        # the same, but with an inline function
+        @pxt.udf(return_type=FloatType(), param_types=[IntType(), IntType()])
+        def f(a: int, b: int) -> float:
+            return a / b
+        with pytest.raises(excs.Error):
+            _ = t[f(t.c2 + 1, t.c2)].show()
+        # error in agg.init()
+        @pxt.uda(update_types=[IntType()], value_type=IntType(), name='agg')
+        class Aggregator(pxt.Aggregator):
+            def __init__(self):
+                self.sum = 1 / 0
+            def update(self, val):
+                pass
+            def value(self):
+                return 1
+        with pytest.raises(excs.Error):
+            _ = t[agg(t.c2)].show()
+        # error in agg.update()
+        @pxt.uda(update_types=[IntType()], value_type=IntType(), name='agg')
+        class Aggregator(pxt.Aggregator):
+            def __init__(self):
+                self.sum = 0
+            def update(self, val):
+                self.sum += 1 / val
+            def value(self):
+                return 1
+        with pytest.raises(excs.Error):
+            _ = t[agg(t.c2 - 10)].show()
+        # error in agg.value()
+        @pxt.uda(update_types=[IntType()], value_type=IntType(), name='agg')
+        class Aggregator(pxt.Aggregator):
+            def __init__(self):
+                self.sum = 0
+            def update(self, val):
+                self.sum += val
+            def value(self):
+                return 1 / self.sum
+        with pytest.raises(excs.Error):
+            _ = t[t.c2 <= 2][agg(t.c2 - 1)].show()
+    def test_props(self, test_tbl: catalog.Table, img_tbl: catalog.Table) -> None:
+        t = test_tbl
+        # errortype/-msg for computed column
+        res = t.select(error=t.c8.errortype).collect()
+        assert res.to_pandas()['error'].isna().all()
+        res = t.select(error=t.c8.errormsg).collect()
+        assert res.to_pandas()['error'].isna().all()
+        img_t = img_tbl
+        # fileurl
+        res = img_t.select(img_t.img.fileurl).show(0).to_pandas()
+        stored_urls = set(res.iloc[:, 0])
+        assert len(stored_urls) == len(res)
+        all_urls = set(urllib.parse.urljoin('file:', urllib.request.pathname2url(path)) for path in get_image_files())
+        assert stored_urls <= all_urls
+        # localpath
+        res = img_t.select(img_t.img.localpath).show(0).to_pandas()
+        stored_paths = set(res.iloc[:, 0])
+        assert len(stored_paths) == len(res)
+        all_paths  = set(get_image_files())
+        assert stored_paths <= all_paths
+        # errortype/-msg for image column
+        res = img_t.select(error=img_t.img.errortype).collect().to_pandas()
+        assert res['error'].isna().all()
+        res = img_t.select(error=img_t.img.errormsg).collect().to_pandas()
+        assert res['error'].isna().all()
+        for c in [t.c1, t.c1n, t.c2, t.c3, t.c4, t.c5, t.c6, t.c7]:
+            # errortype/errormsg only applies to stored computed and media columns
+            with pytest.raises(excs.Error) as excinfo:
+                _ = t.select(c.errortype).show()
+            assert 'only valid for' in str(excinfo.value)
+            with pytest.raises(excs.Error) as excinfo:
+                _ = t.select(c.errormsg).show()
+            assert 'only valid for' in str(excinfo.value)
+            # fileurl/localpath only applies to media columns
+            with pytest.raises(excs.Error) as excinfo:
+                _ = t.select(t.c1.fileurl).show()
+            assert 'only valid for' in str(excinfo.value)
+            with pytest.raises(excs.Error) as excinfo:
+                _ = t.select(t.c1.localpath).show()
+            assert 'only valid for' in str(excinfo.value)
+        # fileurl/localpath doesn't apply to unstored computed img columns
+        img_t.add_column(c9=img_t.img.rotate(30))
+        with pytest.raises(excs.Error) as excinfo:
+            _ = img_t.select(img_t.c9.localpath).show()
+        assert 'computed unstored' in str(excinfo.value)
+    def test_null_args(self, test_client: pxt.Client) -> None:
+        # create table with two int columns
+        schema = {'c1': FloatType(nullable=True), 'c2': FloatType(nullable=True)}
+        t = test_client.create_table('test', schema)
+        # computed column that doesn't allow nulls
+        t.add_column(c3=lambda c1, c2: c1 + c2, type=FloatType(nullable=False))
+        # function that does allow nulls
+        @pxt.udf(return_type=FloatType(nullable=True),
+                 param_types=[FloatType(nullable=False), FloatType(nullable=True)])
+        def f(a: int, b: int) -> int:
+            if b is None:
+                return a
+            return a + b
+        t.add_column(c4=f(t.c1, t.c2))
+        # data that tests all combinations of nulls
+        data = [{'c1': 1.0, 'c2': 1.0}, {'c1': 1.0, 'c2': None}, {'c1': None, 'c2': 1.0}, {'c1': None, 'c2': None}]
+        status = t.insert(data, fail_on_exception=False)
+        assert status.num_rows == len(data)
+        assert status.num_excs == len(data) - 1
+        result = t.select(t.c3, t.c4).collect()
+        assert result['c3'] == [2.0, None, None, None]
+        assert result['c4'] == [2.0, 1.0, None, None]
     def test_arithmetic_exprs(self, test_tbl: catalog.Table) -> None:
         t = test_tbl
+        _ = t[t.c2, t.c6.f3, t.c2 + t.c6.f3, (t.c2 + t.c6.f3) / (t.c6.f3 + 1)].show()
+        _ = t[t.c2 + t.c2].show()
         for op1, op2 in [(t.c2, t.c2), (t.c3, t.c3)]:
             _ = t[op1 + op2].show()
             _ = t[op1 - op2].show()
@@ -103,13 +255,13 @@ class TestExprs:
             (t.c1, t.c2), (t.c1, 1), (t.c2, t.c1), (t.c2, 'a'),
             (t.c1, t.c3), (t.c1, 1.0), (t.c3, t.c1), (t.c3, 'a')
         ]:
-            with pytest.raises(exc.OperationalError):
+            with pytest.raises(excs.Error):
                 _ = t[op1 + op2]
-            with pytest.raises(exc.OperationalError):
+            with pytest.raises(excs.Error):
                 _ = t[op1 - op2]
-            with pytest.raises(exc.OperationalError):
+            with pytest.raises(excs.Error):
                 _ = t[op1 * op2]
-            with pytest.raises(exc.OperationalError):
+            with pytest.raises(excs.Error):
                 _ = t[op1 / op2]
         # TODO: test division; requires predicate
@@ -117,16 +269,18 @@ class TestExprs:
             _ = t[op1 + op2].show()
             _ = t[op1 - op2].show()
             _ = t[op1 * op2].show()
+            with pytest.raises(excs.Error):
+                _ = t[op1 / op2].show()
         for op1, op2 in [
             (t.c6.f1, t.c6.f2), (t.c6.f1, t.c6.f3), (t.c6.f1, 1), (t.c6.f1, 1.0),
             (t.c6.f2, t.c6.f1), (t.c6.f3, t.c6.f1), (t.c6.f2, 'a'), (t.c6.f3, 'a'),
         ]:
-            with pytest.raises(exc.OperationalError):
+            with pytest.raises(excs.Error):
                 _ = t[op1 + op2].show()
-            with pytest.raises(exc.OperationalError):
+            with pytest.raises(excs.Error):
                 _ = t[op1 - op2].show()
-            with pytest.raises(exc.OperationalError):
+            with pytest.raises(excs.Error):
                 _ = t[op1 * op2].show()
@@ -138,8 +292,8 @@ class TestExprs:
     def test_inline_array(self, test_tbl: catalog.Table) -> None:
         t = test_tbl
-        result = t[[ [[t.c2, 1], [t.c2, 2]] ]].show()
-        t = result.col_types[0]
+        result = t.select([[t.c2, 1], [t.c2, 2]]).show()
+        t = result.column_types()[0]
         assert t.is_array_type()
         assert isinstance(t, ArrayType)
         assert t.shape == (2, 2)
@@ -167,41 +321,164 @@ class TestExprs:
         _ = t[t.c6.f1]
         _ = _.show()
         print(_)
+        # predicate on dict field
+        _ = t[t.c6.f2 < 2].show()
         #_ = t[t.c6.f2].show()
         #_ = t[t.c6.f5].show()
         _ = t[t.c6.f6.f8].show()
-        _ = t[cast(t.c6.f6.f8, ArrayType((4,), ColumnType.Type.FLOAT))].show()
+        _ = t[cast(t.c6.f6.f8, ArrayType((4,), FloatType()))].show()
         # top-level is array
         #_ = t[t.c7['*'].f1].show()
         #_ = t[t.c7['*'].f2].show()
         #_ = t[t.c7['*'].f5].show()
         _ = t[t.c7['*'].f6.f8].show()
-        _ = t[cast(t.c7['*'].f6.f8, ArrayType((2, 4), ColumnType.Type.FLOAT))].show()
+        _ = t[t.c7[0].f6.f8].show()
+        _ = t[t.c7[:2].f6.f8].show()
+        _ = t[t.c7[::-1].f6.f8].show()
+        _ = t[cast(t.c7['*'].f6.f8, ArrayType((2, 4), FloatType()))].show()
         print(_)
     def test_arrays(self, test_tbl: catalog.Table) -> None:
         t = test_tbl
-        t.add_column(catalog.Column('array_col', computed_with=[[t.c2, 1], [1, t.c2]]))
+        t.add_column(array_col=[[t.c2, 1], [1, t.c2]])
         _ = t[t.array_col].show()
         print(_)
         _ = t[t.array_col[:, 0]].show()
         print(_)
+    def test_astype(self, test_tbl: catalog.Table) -> None:
+        t = test_tbl
+        # Convert int to float
+        status = t.add_column(c2_as_float=t.c2.astype(FloatType()))
+        assert status.num_excs == 0
+        data = t.select(t.c2, t.c2_as_float).collect()
+        for row in data:
+            assert isinstance(row['c2'], int)
+            assert isinstance(row['c2_as_float'], float)
+            assert row['c2'] == row['c2_as_float']
+        # Compound expression
+        status = t.add_column(compound_as_float=(t.c2 + 1).astype(FloatType()))
+        assert status.num_excs == 0
+        data = t.select(t.c2, t.compound_as_float).collect()
+        for row in data:
+            assert isinstance(row['compound_as_float'], float)
+            assert row['c2'] + 1 == row['compound_as_float']
+        # Type conversion error
+        status = t.add_column(c2_as_string=t.c2.astype(StringType()))
+        assert status.num_excs == t.count()
+    def test_apply(self, test_tbl: catalog.Table) -> None:
+        t = test_tbl
+        # For each column c1, ..., c5, we create a new column ci_as_str that converts it to
+        # a string, then check that each row is correctly converted
+        # (For c1 this is the no-op string-to-string conversion)
+        for col_id in range(1, 6):
+            col_name = f'c{col_id}'
+            str_col_name = f'c{col_id}_str'
+            status = t.add_column(**{str_col_name: t[col_name].apply(str)})
+            assert status.num_excs == 0
+            data = t.select(t[col_name], t[str_col_name]).collect()
+            for row in data:
+                assert row[str_col_name] == str(row[col_name])
+        # Test a compound expression with apply
+        status = t.add_column(c2_plus_1_str=(t.c2 + 1).apply(str))
+        assert status.num_excs == 0
+        data = t.select(t.c2, t.c2_plus_1_str).collect()
+        for row in data:
+            assert row['c2_plus_1_str'] == str(row['c2'] + 1)
+        # For columns c6, c7, try using json.dumps and json.loads to emit and parse JSON <-> str
+        for col_id in range(6, 8):
+            col_name = f'c{col_id}'
+            str_col_name = f'c{col_id}_str'
+            back_to_json_col_name = f'c{col_id}_back_to_json'
+            status = t.add_column(**{str_col_name: t[col_name].apply(json.dumps)})
+            assert status.num_excs == 0
+            status = t.add_column(**{back_to_json_col_name: t[str_col_name].apply(json.loads)})
+            assert status.num_excs == 0
+            data = t.select(t[col_name], t[str_col_name], t[back_to_json_col_name]).collect()
+            for row in data:
+                assert row[str_col_name] == json.dumps(row[col_name])
+                assert row[back_to_json_col_name] == row[col_name]
+        def f1(x):
+            return str(x)
+        # Now test that a function without a return type throws an exception ...
+        with pytest.raises(excs.Error) as exc_info:
+            t.c2.apply(f1)
+        assert 'Column type of `f1` cannot be inferred.' in str(exc_info.value)
+        # ... but works if the type is specified explicitly.
+        status = t.add_column(c2_str_f1=t.c2.apply(f1, col_type=StringType()))
+        assert status.num_excs == 0
+        # Test that the return type of a function can be successfully inferred.
+        def f2(x) -> str:
+            return str(x)
+        status = t.add_column(c2_str_f2=t.c2.apply(f2))
+        assert status.num_excs == 0
+        # Test various validation failures.
+        def f3(x, y) -> str:
+            return f'{x}{y}'
+        with pytest.raises(excs.Error) as exc_info:
+            t.c2.apply(f3)  # Too many required parameters
+        assert str(exc_info.value) == 'Function `f3` has multiple required parameters.'
+        def f4() -> str:
+            return "pixeltable"
+        with pytest.raises(excs.Error) as exc_info:
+            t.c2.apply(f4)  # No positional parameters
+        assert str(exc_info.value) == 'Function `f4` has no positional parameters.'
+        def f5(**kwargs) -> str:
+            return ""
+        with pytest.raises(excs.Error) as exc_info:
+            t.c2.apply(f5)  # No positional parameters
+        assert str(exc_info.value) == 'Function `f5` has no positional parameters.'
+        # Ensure these varargs signatures are acceptable
+        def f6(x, **kwargs) -> str:
+            return x
+        t.c2.apply(f6)
+        def f7(x, *args) -> str:
+            return x
+        t.c2.apply(f7)
+        def f8(*args) -> str:
+            return ''
+        t.c2.apply(f8)
     def test_select_list(self, img_tbl) -> None:
         t = img_tbl
         result = t[t.img].show(n=100)
         _ = result._repr_html_()
-        df = t[t.img, udf_call(lambda img: img.rotate(60), ImageType(), tbl=t)]
-        _ = df.show(n=100)._repr_html_()
         df = t[[t.img, t.img.rotate(60)]]
         _ = df.show(n=100)._repr_html_()
-        with pytest.raises(exc.OperationalError):
+        with pytest.raises(excs.Error):
             _ = t[t.img.rotate]
     def test_img_members(self, img_tbl) -> None:
         t = img_tbl
+        # make sure the limit is applied in Python, not in the SELECT
+        result = t[t.img.height > 200][t.img].show(n=3)
+        assert len(result) == 3
         result = t[t.img.crop((10, 10, 60, 60))].show(n=100)
         result = t[t.img.crop((10, 10, 60, 60)).resize((100, 100))].show(n=100)
         result = t[t.img.crop((10, 10, 60, 60)).resize((100, 100)).convert('L')].show(n=100)
@@ -210,20 +487,21 @@ class TestExprs:
         _ = result._repr_html_()
     def test_img_functions(self, img_tbl) -> None:
+        skip_test_if_not_installed('nos')
         t = img_tbl
+        from pixeltable.functions.pil.image import resize
+        result = t[t.img.resize((224, 224))].show(0)
+        result = t[resize(t.img, (224, 224))].show(0)
         result = t[blend(t.img, t.img.rotate(90), 0.5)].show(100)
         print(result)
-        result = t[encode_image(t.img)].show(10)
+        from pixeltable.functions.nos.image_embedding import openai_clip
+        result = t[openai_clip(t.img.resize((224, 224)))].show(10)
         print(result)
         _ = result._repr_html_()
         _ = t.img.entropy() > 1
-        _ = _.extract_sql_predicate()
         _ = (t.img.entropy() > 1) & (t.split == 'train')
-        _ = _.extract_sql_predicate()
         _ = (t.img.entropy() > 1) & (t.split == 'train') & (t.split == 'val')
-        _ = _.extract_sql_predicate()
         _ = (t.split == 'train') & (t.img.entropy() > 1) & (t.split == 'val') & (t.img.entropy() < 0)
-        _ = _.extract_sql_predicate()
         _ = t[(t.split == 'train') & (t.category == 'n03445777')][t.img].show()
         print(_)
         result = t[t.img.width > 1].show()
@@ -235,32 +513,33 @@ class TestExprs:
         ][t.img, t.split].show()
         print(result)
-    def test_categoricals_map(self, img_tbl) -> None:
-        t = img_tbl
-        m = t[t.category].categorical_map()
-        _ = t[dict_map(t.category, m)].show()
-        print(_)
     def test_similarity(self, indexed_img_tbl: catalog.Table) -> None:
+        skip_test_if_not_installed('nos')
         t = indexed_img_tbl
         _ = t.show(30)
-        probe = t[t.img, t.category].show(1)
+        probe = t.select(t.img, t.category).show(1)
         img = probe[0, 0]
-        result = t[t.img.nearest(img)].show(10)
+        result = t.where(t.img.nearest(img)).show(10)
         assert len(result) == 10
         # nearest() with one SQL predicate and one Python predicate
         result = t[t.img.nearest(img) & (t.category == probe[0, 1]) & (t.img.width > 1)].show(10)
         # TODO: figure out how to verify results
-        #assert len(result) == 3
-        result = t[t.img.matches('musical instrument')].show(10)
+        with pytest.raises(excs.Error) as exc_info:
+            _ = t[t.img.nearest(img)].order_by(t.category).show()
+        assert 'cannot be used in conjunction with' in str(exc_info.value)
+        result = t[t.img.nearest('musical instrument')].show(10)
         assert len(result) == 10
         # matches() with one SQL predicate and one Python predicate
         french_horn_category = 'n03394916'
         result = t[
-            t.img.matches('musical instrument') & (t.category == french_horn_category) & (t.img.width > 1)
+            t.img.nearest('musical instrument') & (t.category == french_horn_category) & (t.img.width > 1)
         ].show(10)
-        #assert len(result) == 6
+        with pytest.raises(excs.Error) as exc_info:
+            _ = t[t.img.nearest(5)].show()
+        assert 'requires' in str(exc_info.value)
     # TODO: this doesn't work when combined with test_similarity(), for some reason the data table for img_tbl
     # doesn't get created; why?
@@ -269,48 +548,47 @@ class TestExprs:
         probe = t[t.img].show(1)
         img = probe[0, 0]
-        with pytest.raises(exc.OperationalError):
+        with pytest.raises(excs.Error):
             _ = t[t.img.nearest(img)].show(10)
-        with pytest.raises(exc.OperationalError):
-            _ = t[t.img.matches('musical instrument')].show(10)
-    def test_serialization(self, test_tbl: catalog.Table, img_tbl: catalog.Table) -> None:
-        t = test_tbl
-        # add array column
-        t.add_column(catalog.Column('c8', computed_with=[[1, 2, 3], [4, 5, 6]]))
-        img_t = img_tbl
-        test_exprs = [
-            t.c1,
-            t.c7['*'].f1,
-            Literal('test'),
-            InlineDict({
-                'a': t.c1, 'b': t.c6.f1, 'c': 17,
-                'd': InlineDict({'e': t.c2}),
-                'f': InlineArray((t.c3, t.c3))
-            }),
-            InlineArray([[t.c2, t.c2], [t.c2, t.c2]]),
-            t.c2 > 5,
-            ~(t.c2 > 5),
-            (t.c2 > 5) & (t.c1 == 'test'),
-            (t.c2 > 5) | (t.c1 == 'test'),
-            t.c7['*'].f5 >> [R[3], R[2], R[1], R[0]],
-            t.c8[0, 1:],
-            utils.sum_uda(t.c2).window(partition_by=t.c4, order_by=t.c3),
-        ]
-        for e in test_exprs:
+        with pytest.raises(excs.Error):
+            _ = t[t.img.nearest('musical instrument')].show(10)
+    def test_ids(
+            self, test_tbl: catalog.Table, test_tbl_exprs: List[exprs.Expr],
+            img_tbl: catalog.Table, img_tbl_exprs: List[exprs.Expr]
+    ) -> None:
+        d: Dict[int, exprs.Expr] = {}
+        for e in test_tbl_exprs:
+            assert e.id is not None
+            d[e.id] = e
+        for e in img_tbl_exprs:
+            assert e.id is not None
+            d[e.id] = e
+        assert len(d) == len(test_tbl_exprs) + len(img_tbl_exprs)
+    def test_serialization(
+            self, test_tbl_exprs: List[exprs.Expr], img_tbl_exprs: List[exprs.Expr]
+    ) -> None:
+        """Test as_dict()/from_dict() (via serialize()/deserialize()) for all exprs."""
+        for e in test_tbl_exprs:
             e_serialized = e.serialize()
-            e_deserialized = Expr.deserialize(e_serialized, t)
+            e_deserialized = Expr.deserialize(e_serialized)
             assert e.equals(e_deserialized)
-        img_test_exprs = [
-            img_t.img.width,
-            img_t.img.rotate(90),
-        ]
-        for e in img_test_exprs:
+        for e in img_tbl_exprs:
             e_serialized = e.serialize()
-            e_deserialized = Expr.deserialize(e_serialized, img_t)
+            e_deserialized = Expr.deserialize(e_serialized)
             assert e.equals(e_deserialized)
+    def test_print(self, test_tbl_exprs: List[exprs.Expr], img_tbl_exprs: List[exprs.Expr]) -> None:
+        _ = func.FunctionRegistry.get().module_fns
+        for e in test_tbl_exprs:
+            _ = str(e)
+            print(_)
+        for e in img_tbl_exprs:
+            _ = str(e)
+            print(_)
     def test_subexprs(self, img_tbl: catalog.Table) -> None:
         t = img_tbl
         e = t.img
@@ -318,31 +596,210 @@ class TestExprs:
         assert len(subexprs) == 1
         e = t.img.rotate(90).resize((224, 224))
         subexprs = [s for s in e.subexprs()]
-        assert len(subexprs) == 3
-        subexprs = [s for s in e.subexprs() if isinstance(s, ColumnRef)]
+        assert len(subexprs) == 4
+        subexprs = [s for s in e.subexprs(expr_class=ColumnRef)]
         assert len(subexprs) == 1
         assert t.img.equals(subexprs[0])
-    def test_window_fns(self, test_db: catalog.Db, test_tbl: catalog.Table) -> None:
-        db = test_db
+    def test_window_fns(self, test_client: pxt.Client, test_tbl: catalog.Table) -> None:
+        cl = test_client
         t = test_tbl
-        _ = t[utils.sum_uda(t.c2).window(partition_by=t.c4, order_by=t.c3)].show(100)
-        print(_)
+        _ = t.select(sum(t.c2, group_by=t.c4, order_by=t.c3)).show(100)
+        # conflicting ordering requirements
+        with pytest.raises(excs.Error):
+            _ = t.select(sum(t.c2, group_by=t.c4, order_by=t.c3), sum(t.c2, group_by=t.c3, order_by=t.c4)).show(100)
+        with pytest.raises(excs.Error):
+            _ = t.select(sum(t.c2, group_by=t.c4, order_by=t.c3), sum(t.c2, group_by=t.c3, order_by=t.c4)).show(100)
         # backfill works
-        t.add_column(catalog.Column('c9', computed_with=utils.sum_uda(t.c2).window(partition_by=t.c4, order_by=t.c3)))
-        c2 = catalog.Column('c2', IntType(), nullable=False)
-        c3 = catalog.Column('c3', FloatType(), nullable=False)
-        c4 = catalog.Column('c4', BoolType(), nullable=False)
-        new_t = db.create_table('insert_test', [c2, c3, c4])
-        new_t.add_column(catalog.Column(
-            'c2_sum', computed_with=utils.sum_uda(new_t.c2).window(partition_by=new_t.c4, order_by=new_t.c3)))
-        data_df = t[t.c2, t.c4, t.c3].show(0).to_pandas()
-        new_t.insert_pandas(data_df)
+        t.add_column(c9=sum(t.c2, group_by=t.c4, order_by=t.c3))
+        _ = t.c9.col.has_window_fn_call()
+        # ordering conflict between frame extraction and window fn
+        base_t = cl.create_table('videos', {'video': VideoType(), 'c2': IntType(nullable=False)})
+        args = {'video': base_t.video, 'fps': 0}
+        v = cl.create_view('frame_view', base_t, iterator_class=FrameIterator, iterator_args=args)
+        # compatible ordering
+        _ = v.select(v.frame, sum(v.frame_idx, group_by=base_t, order_by=v.pos)).show(100)
+        with pytest.raises(excs.Error):
+            # incompatible ordering
+            _ = v.select(v.frame, sum(v.c2, order_by=base_t, group_by=v.pos)).show(100)
+        schema = {
+            'c2': IntType(nullable=False),
+            'c3': FloatType(nullable=False),
+            'c4': BoolType(nullable=False),
+        }
+        new_t = cl.create_table('insert_test', schema=schema)
+        new_t.add_column(c2_sum=sum(new_t.c2, group_by=new_t.c4, order_by=new_t.c3))
+        rows = list(t.select(t.c2, t.c4, t.c3).collect())
+        new_t.insert(rows)
         _ = new_t.show(0)
-        print(_)
     def test_aggregates(self, test_tbl: catalog.Table) -> None:
         t = test_tbl
-        _ = t[t.c2 % 2, sum(t.c2), count(t.c2), sum(t.c2) + count(t.c2), sum(t.c2) + t.c2 % 2].group_by(t.c2 % 2).show()
-        print(_)
+        _ = t[t.c2 % 2, sum(t.c2), count(t.c2), sum(t.c2) + count(t.c2), sum(t.c2) + (t.c2 % 2)]\
+            .group_by(t.c2 % 2).show()
+        # check that aggregates don't show up in the wrong places
+        with pytest.raises(excs.Error):
+            # aggregate in where clause
+            _ = t[sum(t.c2) > 0][sum(t.c2)].group_by(t.c2 % 2).show()
+        with pytest.raises(excs.Error):
+            # aggregate in group_by clause
+            _ = t[sum(t.c2)].group_by(sum(t.c2)).show()
+        with pytest.raises(excs.Error):
+            # mixing aggregates and non-aggregates
+            _ = t[sum(t.c2) + t.c2].group_by(t.c2 % 2).show()
+        with pytest.raises(excs.Error):
+            # nested aggregates
+            _ = t[sum(count(t.c2))].group_by(t.c2 % 2).show()
+    def test_udas(self, test_tbl: catalog.Table) -> None:
+        t = test_tbl
+        @pxt.uda(
+            name='window_agg', init_types=[IntType()], update_types=[IntType()], value_type=IntType(),
+            allows_window=True, requires_order_by=False)
+        class WindowAgg:
+            def __init__(self, val: int = 0):
+                self.val = val
+            def update(self, ignore: int) -> None:
+                pass
+            def value(self) -> int:
+                return self.val
+        @pxt.uda(
+            name='ordered_agg', init_types=[IntType()], update_types=[IntType()], value_type=IntType(),
+            requires_order_by=True, allows_window=True)
+        class WindowAgg:
+            def __init__(self, val: int = 0):
+                self.val = val
+            def update(self, i: int) -> None:
+                pass
+            def value(self) -> int:
+                return self.val
+        @pxt.uda(
+            name='std_agg', init_types=[IntType()], update_types=[IntType()], value_type=IntType(),
+            requires_order_by=False, allows_window=False)
+        class StdAgg:
+            def __init__(self, val: int = 0):
+                self.val = val
+            def update(self, i: int) -> None:
+                pass
+            def value(self) -> int:
+                return self.val
+        # init arg is passed along
+        assert t.select(out=window_agg(t.c2, order_by=t.c2)).collect()[0]['out'] == 0
+        assert t.select(out=window_agg(t.c2, val=1, order_by=t.c2)).collect()[0]['out'] == 1
+        with pytest.raises(excs.Error) as exc_info:
+            _ = t.select(window_agg(t.c2, val=t.c2, order_by=t.c2)).collect()
+        assert 'needs to be a constant' in str(exc_info.value)
+        with pytest.raises(excs.Error) as exc_info:
+            # ordering expression not a pixeltable expr
+            _ = t.select(ordered_agg(1, t.c2)).collect()
+        assert 'but instead is a' in str(exc_info.value).lower()
+        with pytest.raises(excs.Error) as exc_info:
+            # explicit order_by
+            _ = t.select(ordered_agg(t.c2, order_by=t.c2)).collect()
+        assert 'order_by invalid' in str(exc_info.value).lower()
+        with pytest.raises(excs.Error) as exc_info:
+            # order_by for non-window function
+            _ = t.select(std_agg(t.c2, order_by=t.c2)).collect()
+        assert 'does not allow windows' in str(exc_info.value).lower()
+        with pytest.raises(excs.Error) as exc_info:
+            # group_by for non-window function
+            _ = t.select(std_agg(t.c2, group_by=t.c4)).collect()
+        assert 'group_by invalid' in str(exc_info.value).lower()
+        with pytest.raises(excs.Error) as exc_info:
+            # missing init type
+            @pxt.uda(update_types=[IntType()], value_type=IntType())
+            class WindowAgg:
+                def __init__(self, val: int = 0):
+                    self.val = val
+                def update(self, ignore: int) -> None:
+                    pass
+                def value(self) -> int:
+                    return self.val
+        assert 'init_types must be a list of' in str(exc_info.value)
+        with pytest.raises(excs.Error) as exc_info:
+            # missing update parameter
+            @pxt.uda(init_types=[IntType()], update_types=[], value_type=IntType())
+            class WindowAgg:
+                def __init__(self, val: int = 0):
+                    self.val = val
+                def update(self) -> None:
+                    pass
+                def value(self) -> int:
+                    return self.val
+        assert 'must have at least one parameter' in str(exc_info.value)
+        with pytest.raises(excs.Error) as exc_info:
+            # missing update type
+            @pxt.uda(init_types=[IntType()], update_types=[IntType()], value_type=IntType())
+            class WindowAgg:
+                def __init__(self, val: int = 0):
+                    self.val = val
+                def update(self, i1: int, i2: int) -> None:
+                    pass
+                def value(self) -> int:
+                    return self.val
+        assert 'update_types must be a list of' in str(exc_info.value)
+        with pytest.raises(excs.Error) as exc_info:
+            # duplicate parameter names
+            @pxt.uda(init_types=[IntType()], update_types=[IntType()], value_type=IntType())
+            class WindowAgg:
+                def __init__(self, val: int = 0):
+                    self.val = val
+                def update(self, val: int) -> None:
+                    pass
+                def value(self) -> int:
+                    return self.val
+        assert 'cannot have parameters with the same name: val' in str(exc_info.value)
+        with pytest.raises(excs.Error) as exc_info:
+            # invalid name
+            @pxt.uda(name='not an identifier', init_types=[IntType()], update_types=[IntType()], value_type=IntType())
+            class WindowAgg:
+                def __init__(self, val: int = 0):
+                    self.val = val
+                def update(self, i1: int, i2: int) -> None:
+                    pass
+                def value(self) -> int:
+                    return self.val
+        assert 'invalid name' in str(exc_info.value).lower()
+        with pytest.raises(excs.Error) as exc_info:
+            # reserved parameter name
+            @pxt.uda(init_types=[IntType()], update_types=[IntType()], value_type=IntType())
+            class WindowAgg:
+                def __init__(self, val: int = 0):
+                    self.val = val
+                def update(self, order_by: int) -> None:
+                    pass
+                def value(self) -> int:
+                    return self.val
+        assert 'order_by is reserved' in str(exc_info.value).lower()
+        with pytest.raises(excs.Error) as exc_info:
+            # reserved parameter name
+            @pxt.uda(init_types=[IntType()], update_types=[IntType()], value_type=IntType())
+            class WindowAgg:
+                def __init__(self, val: int = 0):
+                    self.val = val
+                def update(self, group_by: int) -> None:
+                    pass
+                def value(self) -> int:
+                    return self.val
+        assert 'group_by is reserved' in str(exc_info.value).lower()

pixeltable 0.1.0__py3-none-any.whl → 0.2.4__py3-none-any.whl

Potentially problematic release.

pixeltable 0.1.0py3-none-any.whl → 0.2.4py3-none-any.whl