PyPI - pixeltable - Versions diffs - 0.2.28__py3-none-any.whl → 0.2.30__py3-none-any.whl - Mend

pixeltable 0.2.28py3-none-any.whl → 0.2.30py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (62) hide show

pixeltable/__init__.py +1 -1
pixeltable/__version__.py +2 -2
pixeltable/catalog/__init__.py +1 -1
pixeltable/catalog/dir.py +6 -0
pixeltable/catalog/globals.py +25 -0
pixeltable/catalog/named_function.py +4 -0
pixeltable/catalog/path_dict.py +37 -11
pixeltable/catalog/schema_object.py +6 -0
pixeltable/catalog/table.py +96 -19
pixeltable/catalog/table_version.py +22 -8
pixeltable/dataframe.py +201 -3
pixeltable/env.py +9 -3
pixeltable/exec/expr_eval_node.py +1 -1
pixeltable/exec/sql_node.py +2 -2
pixeltable/exprs/function_call.py +134 -29
pixeltable/exprs/inline_expr.py +22 -2
pixeltable/exprs/row_builder.py +1 -1
pixeltable/exprs/similarity_expr.py +9 -2
pixeltable/func/__init__.py +1 -0
pixeltable/func/aggregate_function.py +151 -68
pixeltable/func/callable_function.py +50 -16
pixeltable/func/expr_template_function.py +62 -24
pixeltable/func/function.py +191 -23
pixeltable/func/function_registry.py +2 -1
pixeltable/func/query_template_function.py +11 -6
pixeltable/func/signature.py +64 -7
pixeltable/func/tools.py +116 -0
pixeltable/func/udf.py +57 -35
pixeltable/functions/__init__.py +2 -2
pixeltable/functions/anthropic.py +36 -2
pixeltable/functions/globals.py +54 -34
pixeltable/functions/json.py +3 -8
pixeltable/functions/math.py +67 -0
pixeltable/functions/ollama.py +4 -4
pixeltable/functions/openai.py +31 -2
pixeltable/functions/timestamp.py +1 -1
pixeltable/functions/video.py +2 -8
pixeltable/functions/vision.py +1 -1
pixeltable/globals.py +347 -79
pixeltable/index/embedding_index.py +44 -24
pixeltable/metadata/__init__.py +1 -1
pixeltable/metadata/converters/convert_16.py +2 -1
pixeltable/metadata/converters/convert_17.py +2 -1
pixeltable/metadata/converters/convert_23.py +35 -0
pixeltable/metadata/converters/convert_24.py +47 -0
pixeltable/metadata/converters/util.py +4 -2
pixeltable/metadata/notes.py +2 -0
pixeltable/metadata/schema.py +1 -0
pixeltable/type_system.py +192 -48
{pixeltable-0.2.28.dist-info → pixeltable-0.2.30.dist-info}/METADATA +4 -2
{pixeltable-0.2.28.dist-info → pixeltable-0.2.30.dist-info}/RECORD +54 -57
pixeltable-0.2.30.dist-info/entry_points.txt +3 -0
pixeltable/tool/create_test_db_dump.py +0 -311
pixeltable/tool/create_test_video.py +0 -81
pixeltable/tool/doc_plugins/griffe.py +0 -50
pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
pixeltable/tool/embed_udf.py +0 -9
pixeltable/tool/mypy_plugin.py +0 -55
pixeltable-0.2.28.dist-info/entry_points.txt +0 -3
{pixeltable-0.2.28.dist-info → pixeltable-0.2.30.dist-info}/LICENSE +0 -0
{pixeltable-0.2.28.dist-info → pixeltable-0.2.30.dist-info}/WHEEL +0 -0

pixeltable/index/embedding_index.py CHANGED Viewed

@@ -37,6 +37,14 @@ class EmbeddingIndex(IndexBase):
         Metric.L2: 'vector_l2_ops'
     }
+    metric: Metric
+    value_expr: exprs.FunctionCall
+    string_embed: Optional[func.Function]
+    image_embed: Optional[func.Function]
+    string_embed_signature_idx: int
+    image_embed_signature_idx: int
+    index_col_type: pgvector.sqlalchemy.Vector
     def __init__(
             self, c: catalog.Column, metric: str, string_embed: Optional[func.Function] = None,
             image_embed: Optional[func.Function] = None):
@@ -49,18 +57,22 @@ class EmbeddingIndex(IndexBase):
                 raise excs.Error(f"Text embedding function is required for column {c.name} (parameter 'string_embed')")
         if c.col_type.is_image_type() and image_embed is None:
             raise excs.Error(f"Image embedding function is required for column {c.name} (parameter 'image_embed')")
-        if string_embed is not None:
-            # verify signature
-            self._validate_embedding_fn(string_embed, 'string_embed', ts.ColumnType.Type.STRING)
-        if image_embed is not None:
-            # verify signature
-            self._validate_embedding_fn(image_embed, 'image_embed', ts.ColumnType.Type.IMAGE)
+        if string_embed is None:
+            self.string_embed = None
+        else:
+            # verify signature and convert to a monomorphic function
+            self.string_embed = self._validate_embedding_fn(string_embed, 'string_embed', ts.ColumnType.Type.STRING)
+        if image_embed is None:
+            self.image_embed = None
+        else:
+            # verify signature and convert to a monomorphic function
+            self.image_embed = self._validate_embedding_fn(image_embed, 'image_embed', ts.ColumnType.Type.IMAGE)
         self.metric = self.Metric[metric.upper()]
         self.value_expr = string_embed(exprs.ColumnRef(c)) if c.col_type.is_string_type() else image_embed(exprs.ColumnRef(c))
         assert isinstance(self.value_expr.col_type, ts.ArrayType)
-        self.string_embed = string_embed
-        self.image_embed = image_embed
         vector_size = self.value_expr.col_type.shape[0]
         assert vector_size is not None
         self.index_col_type = pgvector.sqlalchemy.Vector(vector_size)
@@ -91,10 +103,10 @@ class EmbeddingIndex(IndexBase):
         assert isinstance(item, (str, PIL.Image.Image))
         if isinstance(item, str):
             assert self.string_embed is not None
-            embedding = self.string_embed.exec(item)
+            embedding = self.string_embed.exec([item], {})
         if isinstance(item, PIL.Image.Image):
             assert self.image_embed is not None
-            embedding = self.image_embed.exec(item)
+            embedding = self.image_embed.exec([item], {})
         if self.metric == self.Metric.COSINE:
             return val_column.sa_col.cosine_distance(embedding) * -1 + 1
@@ -110,10 +122,10 @@ class EmbeddingIndex(IndexBase):
         embedding: Optional[np.ndarray] = None
         if isinstance(item, str):
             assert self.string_embed is not None
-            embedding = self.string_embed.exec(item)
+            embedding = self.string_embed.exec([item], {})
         if isinstance(item, PIL.Image.Image):
             assert self.image_embed is not None
-            embedding = self.image_embed.exec(item)
+            embedding = self.image_embed.exec([item], {})
         assert embedding is not None
         if self.metric == self.Metric.COSINE:
@@ -132,27 +144,33 @@ class EmbeddingIndex(IndexBase):
         return 'embedding'
     @classmethod
-    def _validate_embedding_fn(cls, embed_fn: func.Function, name: str, expected_type: ts.ColumnType.Type) -> None:
-        """Validate the signature"""
+    def _validate_embedding_fn(cls, embed_fn: func.Function, name: str, expected_type: ts.ColumnType.Type) -> func.Function:
+        """Validate that the Function has a matching signature, and return the corresponding monomorphic function."""
         assert isinstance(embed_fn, func.Function)
-        sig = embed_fn.signature
-        # The embedding function must be a 1-ary function of the correct type. But it's ok if the function signature
-        # has more than one parameter, as long as it has at most one *required* parameter.
-        if (len(sig.parameters) == 0
-            or len(sig.required_parameters) > 1
-            or sig.parameters_by_pos[0].col_type.type_enum != expected_type):
-            raise excs.Error(
-                f'{name} must take a single {expected_type.name.lower()} parameter, but has signature {sig}')
+        signature_idx: int = -1
+        for idx, sig in enumerate(embed_fn.signatures):
+            # The embedding function must be a 1-ary function of the correct type. But it's ok if the function signature
+            # has more than one parameter, as long as it has at most one *required* parameter.
+            if (len(sig.parameters) >= 1
+                and len(sig.required_parameters) <= 1
+                and sig.parameters_by_pos[0].col_type.type_enum == expected_type):
+                signature_idx = idx
+                break
+        if signature_idx == -1:
+            raise excs.Error(f'{name} must take a single {expected_type.name.lower()} parameter')
+        resolved_fn = embed_fn._resolved_fns[signature_idx]
         # validate return type
         param_name = sig.parameters_by_pos[0].name
         if expected_type == ts.ColumnType.Type.STRING:
-            return_type = embed_fn.call_return_type({param_name: 'dummy'})
+            return_type = resolved_fn.call_return_type([], {param_name: 'dummy'})
         else:
             assert expected_type == ts.ColumnType.Type.IMAGE
             img = PIL.Image.new('RGB', (512, 512))
-            return_type = embed_fn.call_return_type({param_name: img})
+            return_type = resolved_fn.call_return_type([], {param_name: img})
         assert return_type is not None
         if not isinstance(return_type, ts.ArrayType):
             raise excs.Error(f'{name} must return an array, but returns {return_type}')
@@ -161,6 +179,8 @@ class EmbeddingIndex(IndexBase):
             if len(shape) != 1 or shape[0] == None:
                 raise excs.Error(f'{name} must return a 1D array of a specific length, but returns {return_type}')
+        return resolved_fn
     def as_dict(self) -> dict:
         return {
             'metric': self.metric.name.lower(),

pixeltable/metadata/__init__.py CHANGED Viewed

@@ -10,7 +10,7 @@ import sqlalchemy.orm as orm
 from .schema import SystemInfo, SystemInfoMd
 # current version of the metadata; this is incremented whenever the metadata schema changes
-VERSION = 23
+VERSION = 25
 def create_system_info(engine: sql.engine.Engine) -> None:

pixeltable/metadata/converters/convert_16.py CHANGED Viewed

@@ -1,3 +1,4 @@
+from uuid import UUID
 import sqlalchemy as sql
 from pixeltable.metadata import register_converter
@@ -12,7 +13,7 @@ def _(engine: sql.engine.Engine) -> None:
     )
-def __update_table_md(table_md: dict) -> None:
+def __update_table_md(table_md: dict, table_id: UUID) -> None:
     # External stores are not migratable; just drop them
     del table_md['remotes']
     table_md['external_stores'] = {}

pixeltable/metadata/converters/convert_17.py CHANGED Viewed

@@ -1,3 +1,4 @@
+from uuid import UUID
 import sqlalchemy as sql
 from pixeltable.metadata import register_converter
@@ -12,7 +13,7 @@ def _(engine: sql.engine.Engine) -> None:
     )
-def __update_table_md(table_md: dict) -> None:
+def __update_table_md(table_md: dict, table_id: UUID) -> None:
     # key changes in IndexMd.init_args: img_embed -> image_embed, txt_embed -> string_embed
     if len(table_md['index_md']) == 0:
         return

pixeltable/metadata/converters/convert_23.py ADDED Viewed

@@ -0,0 +1,35 @@
+import logging
+from typing import Any, Optional
+from uuid import UUID
+import sqlalchemy as sql
+from pixeltable.metadata import register_converter
+from pixeltable.metadata.converters.util import convert_table_md
+from pixeltable.metadata.schema import Table
+_logger = logging.getLogger('pixeltable')
+@register_converter(version=23)
+def _(engine: sql.engine.Engine) -> None:
+    convert_table_md(
+        engine,
+        table_md_updater=__update_table_md
+    )
+def __update_table_md(table_md: dict, table_id: UUID) -> None:
+    """update the index metadata to add indexed_col_tbl_id column if it is missing
+    Args:
+        table_md (dict): copy of the original table metadata. this gets updated in place.
+        table_id (UUID): the table id
+    """
+    if len(table_md['index_md']) == 0:
+        return
+    for idx_md in table_md['index_md'].values():
+        if 'indexed_col_tbl_id' not in idx_md:
+            # index metadata is missing indexed_col_tbl_id
+            # assume that the indexed column is in the same table
+            # and update the index metadata.
+            _logger.info(f'Updating index metadata for table: {table_id} index: {idx_md["id"]}')
+            idx_md['indexed_col_tbl_id'] = str(table_id)

pixeltable/metadata/converters/convert_24.py ADDED Viewed

@@ -0,0 +1,47 @@
+import importlib
+from typing import Any, Optional
+import sqlalchemy as sql
+from pixeltable.metadata import register_converter
+from pixeltable.metadata.converters.util import convert_table_md
+@register_converter(version=24)
+def _(engine: sql.engine.Engine) -> None:
+    convert_table_md(engine, substitution_fn=__substitute_md)
+def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
+    from pixeltable import func
+    from pixeltable.func.globals import resolve_symbol
+    if (isinstance(v, dict) and
+        '_classpath' in v and
+        v['_classpath'] in ['pixeltable.func.callable_function.CallableFunction',
+                            'pixeltable.func.aggregate_function.AggregateFunction',
+                            'pixeltable.func.expr_template_function.ExprTemplateFunction']):
+        if 'path' in v:
+            assert 'signature' not in v
+            f = resolve_symbol(v['path'])
+            assert isinstance(f, func.Function)
+            v['signature'] = f.signatures[0].as_dict()
+        return k, v
+    if isinstance(v, dict) and '_classname' in v and v['_classname'] == 'FunctionCall':
+        # Correct an older serialization mechanism where Expr elements of FunctionCall args and
+        # kwargs were indicated with idx == -1 rather than None. This was fixed for InlineList
+        # and InlineDict back in convert_20, but not for FunctionCall.
+        assert 'args' in v and isinstance(v['args'], list)
+        assert 'kwargs' in v and isinstance(v['kwargs'], dict)
+        v['args'] = [
+            (None, arg) if idx == -1 else (idx, arg)
+            for idx, arg in v['args']
+        ]
+        v['kwargs'] = {
+            k: (None, arg) if idx == -1 else (idx, arg)
+            for k, (idx, arg) in v['kwargs'].items()
+        }
+        return k, v
+    return None

pixeltable/metadata/converters/util.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import copy
 import logging
 from typing import Any, Callable, Optional
+from uuid import UUID
 import sqlalchemy as sql
@@ -11,7 +12,7 @@ __logger = logging.getLogger('pixeltable')
 def convert_table_md(
     engine: sql.engine.Engine,
-    table_md_updater: Optional[Callable[[dict], None]] = None,
+    table_md_updater: Optional[Callable[[dict, UUID], None]] = None,
     column_md_updater: Optional[Callable[[dict], None]] = None,
     external_store_md_updater: Optional[Callable[[dict], None]] = None,
     substitution_fn: Optional[Callable[[Optional[str], Any], Optional[tuple[Optional[str], Any]]]] = None
@@ -22,6 +23,7 @@ def convert_table_md(
     Args:
         engine: The SQLAlchemy engine.
         table_md_updater: A function that updates schema.TableMd dicts in place.
+            It takes two arguments: the metadata dict (new values) and the table id.
         column_md_updater: A function that updates schema.ColumnMd dicts in place.
         external_store_md_updater: A function that updates the external store metadata in place.
         substitution_fn: A function that substitutes metadata values. If specified, all metadata will be traversed
@@ -37,7 +39,7 @@ def convert_table_md(
             assert isinstance(table_md, dict)
             updated_table_md = copy.deepcopy(table_md)
             if table_md_updater is not None:
-                table_md_updater(updated_table_md)
+                table_md_updater(updated_table_md, id)
             if column_md_updater is not None:
                 __update_column_md(updated_table_md, column_md_updater)
             if external_store_md_updater is not None:

pixeltable/metadata/notes.py CHANGED Viewed

@@ -2,6 +2,8 @@
 # rather than as a comment, so that the existence of a description can be enforced by
 # the unit tests when new versions are added.
 VERSION_NOTES = {
+    25: 'Functions with multiple signatures',
+    24: 'Added TableMd/IndexMd.indexed_col_tbl_id',
     23: 'DataFrame.from_clause',
     22: 'TableMd/ColumnMd.media_validation',
     21: 'Separate InlineArray and InlineList',

pixeltable/metadata/schema.py CHANGED Viewed

@@ -112,6 +112,7 @@ class IndexMd:
     """
     id: int
     name: str
+    indexed_col_tbl_id: str  # UUID of the table (as string) that contains column being indexed
     indexed_col_id: int  # column being indexed
     index_val_col_id: int  # column holding the values to be indexed
     index_val_undo_col_id: int  # column holding index values for deleted rows

pixeltable/type_system.py CHANGED Viewed

@@ -5,7 +5,6 @@ import datetime
 import enum
 import io
 import json
-import types
 import typing
 import urllib.parse
 import urllib.request
@@ -14,7 +13,11 @@ from typing import Any, Iterable, Mapping, Optional, Sequence, Union
 import PIL.Image
 import av  # type: ignore
+import jsonschema
+import jsonschema.protocols
+import jsonschema.validators
 import numpy as np
+import pydantic
 import sqlalchemy as sql
 from typing import _GenericAlias  # type: ignore[attr-defined]
 from typing_extensions import _AnnotatedAlias
@@ -244,7 +247,7 @@ class ColumnType:
             if col_type is not None:
                 return col_type
             # this could still be json-serializable
-        if isinstance(val, dict) or isinstance(val, list) or isinstance(val, np.ndarray):
+        if isinstance(val, dict) or isinstance(val, list) or isinstance(val, np.ndarray) or isinstance(val, pydantic.BaseModel):
             try:
                 JsonType().validate_literal(val)
                 return JsonType(nullable=nullable)
@@ -337,7 +340,7 @@ class ColumnType:
                     return TimestampType(nullable=nullable_default)
                 if t is PIL.Image.Image:
                     return ImageType(nullable=nullable_default)
-                if issubclass(t, Sequence) or issubclass(t, Mapping):
+                if issubclass(t, Sequence) or issubclass(t, Mapping) or issubclass(t, pydantic.BaseModel):
                     return JsonType(nullable=nullable_default)
         return None
@@ -479,6 +482,20 @@ class ColumnType:
         """
         pass
+    def to_json_schema(self) -> dict[str, Any]:
+        if self.nullable:
+            return {
+                'anyOf': [
+                    self._to_json_schema(),
+                    {'type': 'null'},
+                ]
+            }
+        else:
+            return self._to_json_schema()
+    def _to_json_schema(self) -> dict[str, Any]:
+        raise excs.Error(f'Pixeltable type {self} is not a valid JSON type')
 class InvalidType(ColumnType):
     def __init__(self, nullable: bool = False):
@@ -501,6 +518,9 @@ class StringType(ColumnType):
     def to_sa_type(self) -> sql.types.TypeEngine:
         return sql.String()
+    def _to_json_schema(self) -> dict[str, Any]:
+        return {'type': 'string'}
     def print_value(self, val: Any) -> str:
         return f"'{val}'"
@@ -524,8 +544,13 @@ class IntType(ColumnType):
     def to_sa_type(self) -> sql.types.TypeEngine:
         return sql.BigInteger()
+    def _to_json_schema(self) -> dict[str, Any]:
+        return {'type': 'integer'}
     def _validate_literal(self, val: Any) -> None:
-        if not isinstance(val, int):
+        # bool is a subclass of int, so we need to check for it
+        # explicitly first
+        if isinstance(val, bool) or not isinstance(val, int):
             raise TypeError(f'Expected int, got {val.__class__.__name__}')
@@ -536,6 +561,9 @@ class FloatType(ColumnType):
     def to_sa_type(self) -> sql.types.TypeEngine:
         return sql.Float()
+    def _to_json_schema(self) -> dict[str, Any]:
+        return {'type': 'number'}
     def _validate_literal(self, val: Any) -> None:
         if not isinstance(val, float):
             raise TypeError(f'Expected float, got {val.__class__.__name__}')
@@ -553,6 +581,9 @@ class BoolType(ColumnType):
     def to_sa_type(self) -> sql.types.TypeEngine:
         return sql.Boolean()
+    def _to_json_schema(self) -> dict[str, Any]:
+        return {'type': 'boolean'}
     def _validate_literal(self, val: Any) -> None:
         if not isinstance(val, bool):
             raise TypeError(f'Expected bool, got {val.__class__.__name__}')
@@ -581,61 +612,44 @@ class TimestampType(ColumnType):
 class JsonType(ColumnType):
-    # TODO: type_spec also needs to be able to express lists
-    def __init__(self, type_spec: Optional[dict[str, ColumnType]] = None, nullable: bool = False):
+    json_schema: Optional[dict[str, Any]]
+    __validator: Optional[jsonschema.protocols.Validator]
+    def __init__(self, json_schema: Optional[dict[str, Any]] = None, nullable: bool = False):
         super().__init__(self.Type.JSON, nullable=nullable)
-        self.type_spec = type_spec
+        self.json_schema = json_schema
+        if json_schema is None:
+            self.__validator = None
+        else:
+            validator_cls = jsonschema.validators.validator_for(json_schema)
+            validator_cls.check_schema(json_schema)
+            self.__validator = validator_cls(json_schema)
     def copy(self, nullable: bool) -> ColumnType:
-        return JsonType(self.type_spec, nullable=nullable)
+        return JsonType(json_schema=self.json_schema, nullable=nullable)
     def matches(self, other: ColumnType) -> bool:
-        return isinstance(other, JsonType) and self.type_spec == other.type_spec
-    def supertype(self, other: ColumnType) -> Optional[JsonType]:
-        if not isinstance(other, JsonType):
-            return None
-        if self.type_spec is None:
-            # we don't have a type spec and can accept anything accepted by other
-            return JsonType(nullable=(self.nullable or other.nullable))
-        if other.type_spec is None:
-            # we have a type spec but other doesn't
-            return JsonType(nullable=(self.nullable or other.nullable))
-        # we both have type specs; the supertype's type spec is the union of the two
-        type_spec: dict[str, ColumnType] = {}
-        type_spec.update(self.type_spec)
-        for other_field_name, other_field_type in other.type_spec.items():
-            if other_field_name not in type_spec:
-                type_spec[other_field_name] = other_field_type
-            else:
-                # both type specs have this field
-                field_type = type_spec[other_field_name].supertype(other_field_type)
-                if field_type is None:
-                    # conflicting types
-                    return JsonType(nullable=(self.nullable or other.nullable))
-                type_spec[other_field_name] = field_type
-        return JsonType(type_spec, nullable=(self.nullable or other.nullable))
+        return isinstance(other, JsonType) and self.json_schema == other.json_schema
     def _as_dict(self) -> dict:
         result = super()._as_dict()
-        if self.type_spec is not None:
-            type_spec_dict = {field_name: field_type.serialize() for field_name, field_type in self.type_spec.items()}
-            result.update({'type_spec': type_spec_dict})
+        if self.json_schema is not None:
+            result.update({'json_schema': self.json_schema})
         return result
     @classmethod
     def _from_dict(cls, d: dict) -> ColumnType:
-        type_spec = None
-        if 'type_spec' in d:
-            type_spec = {
-                field_name: cls.deserialize(field_type_dict) for field_name, field_type_dict in d['type_spec'].items()
-            }
-        return cls(type_spec, nullable=d['nullable'])
+        return cls(json_schema=d.get('json_schema'), nullable=d['nullable'])
     def to_sa_type(self) -> sql.types.TypeEngine:
         return sql.dialects.postgresql.JSONB()
+    def _to_json_schema(self) -> dict[str, Any]:
+        if self.json_schema is None:
+            return {}
+        return self.json_schema
     def print_value(self, val: Any) -> str:
         val_type = self.infer_literal_type(val)
         if val_type is None:
@@ -645,27 +659,141 @@ class JsonType(ColumnType):
         return val_type.print_value(val)
     def _validate_literal(self, val: Any) -> None:
-        if not isinstance(val, dict) and not isinstance(val, list):
+        if not isinstance(val, (dict, list)):
             # TODO In the future we should accept scalars too, which would enable us to remove this top-level check
             raise TypeError(f'Expected dict or list, got {val.__class__.__name__}')
-        if not self.__is_valid_literal(val):
+        if not self.__is_valid_json(val):
             raise TypeError(f'That literal is not a valid Pixeltable JSON object: {val}')
+        if self.__validator is not None:
+            self.__validator.validate(val)
     @classmethod
-    def __is_valid_literal(cls, val: Any) -> bool:
+    def __is_valid_json(cls, val: Any) -> bool:
         if val is None or isinstance(val, (str, int, float, bool)):
             return True
         if isinstance(val, (list, tuple)):
-            return all(cls.__is_valid_literal(v) for v in val)
+            return all(cls.__is_valid_json(v) for v in val)
         if isinstance(val, dict):
-            return all(isinstance(k, str) and cls.__is_valid_literal(v) for k, v in val.items())
+            return all(isinstance(k, str) and cls.__is_valid_json(v) for k, v in val.items())
         return False
     def _create_literal(self, val: Any) -> Any:
         if isinstance(val, tuple):
             val = list(val)
+        if isinstance(val, pydantic.BaseModel):
+            return val.model_dump()
         return val
+    def supertype(self, other: ColumnType) -> Optional[JsonType]:
+        # Try using the (much faster) supertype logic in ColumnType first. That will work if, for example, the types
+        # are identical except for nullability. If that doesn't work and both types are JsonType, then we will need to
+        # merge their schemas.
+        basic_supertype = super().supertype(other)
+        if basic_supertype is not None:
+            assert isinstance(basic_supertype, JsonType)
+            return basic_supertype
+        if not isinstance(other, JsonType):
+            return None
+        if self.json_schema is None or other.json_schema is None:
+            return JsonType(nullable=(self.nullable or other.nullable))
+        superschema = self.__superschema(self.json_schema, other.json_schema)
+        return JsonType(
+            json_schema=(None if len(superschema) == 0 else superschema),
+            nullable=(self.nullable or other.nullable)
+        )
+    @classmethod
+    def __superschema(cls, a: dict[str, Any], b: dict[str, Any]) -> Optional[dict[str, Any]]:
+        # Defining a general type hierarchy over all JSON schemas would be a challenging problem. In order to keep
+        # things manageable, we only define a hierarchy among "conforming" schemas, which provides enough generality
+        # for the most important use cases (unions for type inference, validation of inline exprs). A schema is
+        # considered to be conforming if either:
+        # (i) it is a scalar (string, integer, number, boolean) or dictionary (object) type; or
+        # (ii) it is an "anyOf" schema of one of the above types and the exact schema {'type': 'null'}.
+        # Conforming schemas are organized into a type hierarchy in an internally consistent way. Nonconforming
+        # schemas are allowed, but they are isolates in the type hierarchy: a nonconforming schema has no proper
+        # subtypes, and its only proper supertype is an unconstrained JsonType().
+        #
+        # There is some subtlety in the handling of nullable fields. Nullable fields are represented in JSON
+        # schemas as (for example) {'anyOf': [{'type': 'string'}, {'type': 'null'}]}. When finding the supertype
+        # of schemas that might be nullable, we first unpack the 'anyOf's, find the supertype of the underlyings,
+        # then reapply the 'anyOf' if appropriate. The top-level schema (i.e., JsonType.json_schema) is presumed
+        # to NOT be in this form (since nullability is indicated by the `nullable` field of the JsonType object),
+        # so this subtlety is applicable only to types that occur in subfields.
+        #
+        # There is currently no special handling of lists; distinct schemas with type 'array' will union to the
+        # generic {'type': 'array'} schema. This could be a TODO item if there is a need for it in the future.
+        if a == b:
+            return a
+        if 'properties' in a and 'properties' in b:
+            a_props = a['properties']
+            b_props = b['properties']
+            a_req = a.get('required', [])
+            b_req = b.get('required', [])
+            super_props = {}
+            super_req = []
+            for key, a_prop_schema in a_props.items():
+                if key in b_props:  # in both a and b
+                    prop_schema = cls.__superschema_with_nulls(a_prop_schema, b_props[key])
+                    super_props[key] = prop_schema
+                    if key in a_req and key in b_req:
+                        super_req.append(key)
+                else:  # in a but not b
+                    # Add it to the supertype schema as optional (regardless of its status in a)
+                    super_props[key] = a_prop_schema
+            for key, b_prop_schema in b_props.items():
+                if key not in a_props:  # in b but not a
+                    super_props[key] = b_prop_schema
+            schema = {'type': 'object', 'properties': super_props}
+            if len(super_req) > 0:
+                schema['required'] = super_req
+            return schema
+        a_type = a.get('type')
+        b_type = b.get('type')
+        if (a_type in ('string', 'integer', 'number', 'boolean', 'object', 'array') and a_type == b_type):
+            # a and b both have the same type designation, but are not identical. This can happen if
+            # (for example) they have validators or other attributes that differ. In this case, we
+            # generalize to {'type': t}, where t is their shared type, with no other qualifications.
+            return {'type': a_type}
+        return {}  # Unresolvable type conflict; the supertype is an unrestricted JsonType.
+    @classmethod
+    def __superschema_with_nulls(cls, a: dict[str, Any], b: dict[str, Any]) -> Optional[dict[str, Any]]:
+        a, a_nullable = cls.__unpack_null_from_schema(a)
+        b, b_nullable = cls.__unpack_null_from_schema(b)
+        result = cls.__superschema(a, b)
+        if len(result) > 0 and (a_nullable or b_nullable):
+            # if len(result) == 0, then null is implicitly accepted; otherwise, we need to explicitly allow it
+            return {'anyOf': [result, {'type': 'null'}]}
+        return result
+    @classmethod
+    def __unpack_null_from_schema(cls, s: dict[str, Any]) -> tuple[dict[str, Any], bool]:
+        if 'anyOf' in s and len(s['anyOf']) == 2 and {'type': 'null'} in s['anyOf']:
+            try:
+                return next(s for s in s['anyOf'] if s != {'type': 'null'}), True
+            except StopIteration:
+                pass
+        return s, False
+    def _to_base_str(self) -> str:
+        if self.json_schema is None:
+            return 'Json'
+        elif 'title' in self.json_schema:
+            return f'Json[{self.json_schema["title"]}]'
+        else:
+            return f'Json[{self.json_schema}]'
 class ArrayType(ColumnType):
     def __init__(self, shape: tuple[Union[int, None], ...], dtype: ColumnType, nullable: bool = False):
@@ -743,6 +871,12 @@ class ArrayType(ColumnType):
                 return False
         return val.dtype == self.numpy_dtype()
+    def _to_json_schema(self) -> dict[str, Any]:
+        return {
+            'type': 'array',
+            'items': self.pxt_dtype._to_json_schema(),
+        }
     def _validate_literal(self, val: Any) -> None:
         if not isinstance(val, np.ndarray):
             raise TypeError(f'Expected numpy.ndarray, got {val.__class__.__name__}')
@@ -1017,6 +1151,16 @@ class _PxtType:
 class Json(_PxtType):
+    def __class_getitem__(cls, item: Any) -> _AnnotatedAlias:
+        """
+        `item` (the type subscript) must be a `dict` representing a valid JSON Schema.
+        """
+        if not isinstance(item, dict):
+            raise TypeError('Json type parameter must be a dict')
+        # The JsonType initializer will validate the JSON Schema.
+        return typing.Annotated[Any, JsonType(json_schema=item, nullable=False)]
     @classmethod
     def as_col_type(cls, nullable: bool) -> ColumnType:
         return JsonType(nullable=nullable)

pixeltable 0.2.28__py3-none-any.whl → 0.2.30__py3-none-any.whl

Potentially problematic release.

pixeltable 0.2.28py3-none-any.whl → 0.2.30py3-none-any.whl