PyPI - pixeltable - Versions diffs - 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl - Mend

pixeltable 0.2.4py3-none-any.whl → 0.2.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (56) hide show

pixeltable/catalog/column.py +25 -48
pixeltable/catalog/insertable_table.py +7 -4
pixeltable/catalog/table.py +163 -57
pixeltable/catalog/table_version.py +416 -140
pixeltable/catalog/table_version_path.py +2 -2
pixeltable/client.py +0 -4
pixeltable/dataframe.py +65 -21
pixeltable/env.py +16 -1
pixeltable/exec/cache_prefetch_node.py +1 -1
pixeltable/exec/in_memory_data_node.py +11 -7
pixeltable/exprs/comparison.py +3 -3
pixeltable/exprs/data_row.py +5 -1
pixeltable/exprs/literal.py +16 -4
pixeltable/exprs/row_builder.py +8 -40
pixeltable/ext/__init__.py +5 -0
pixeltable/ext/functions/yolox.py +92 -0
pixeltable/func/aggregate_function.py +15 -15
pixeltable/func/expr_template_function.py +9 -1
pixeltable/func/globals.py +24 -14
pixeltable/func/signature.py +18 -12
pixeltable/func/udf.py +7 -2
pixeltable/functions/__init__.py +8 -8
pixeltable/functions/eval.py +7 -8
pixeltable/functions/huggingface.py +47 -19
pixeltable/functions/openai.py +2 -2
pixeltable/functions/util.py +11 -0
pixeltable/index/__init__.py +2 -0
pixeltable/index/base.py +49 -0
pixeltable/index/embedding_index.py +95 -0
pixeltable/metadata/schema.py +45 -22
pixeltable/plan.py +15 -34
pixeltable/store.py +38 -41
pixeltable/tests/conftest.py +5 -11
pixeltable/tests/ext/test_yolox.py +21 -0
pixeltable/tests/functions/test_fireworks.py +1 -0
pixeltable/tests/functions/test_huggingface.py +2 -2
pixeltable/tests/functions/test_openai.py +15 -5
pixeltable/tests/functions/test_together.py +1 -0
pixeltable/tests/test_component_view.py +14 -5
pixeltable/tests/test_dataframe.py +19 -18
pixeltable/tests/test_exprs.py +99 -102
pixeltable/tests/test_function.py +51 -43
pixeltable/tests/test_index.py +138 -0
pixeltable/tests/test_migration.py +2 -1
pixeltable/tests/test_snapshot.py +24 -1
pixeltable/tests/test_table.py +101 -25
pixeltable/tests/test_types.py +30 -0
pixeltable/tests/test_video.py +16 -16
pixeltable/tests/test_view.py +5 -0
pixeltable/tests/utils.py +43 -9
pixeltable/tool/create_test_db_dump.py +16 -0
pixeltable/type_system.py +37 -45
{pixeltable-0.2.4.dist-info → pixeltable-0.2.5.dist-info}/METADATA +5 -4
{pixeltable-0.2.4.dist-info → pixeltable-0.2.5.dist-info}/RECORD +56 -49
{pixeltable-0.2.4.dist-info → pixeltable-0.2.5.dist-info}/LICENSE +0 -0
{pixeltable-0.2.4.dist-info → pixeltable-0.2.5.dist-info}/WHEEL +0 -0

pixeltable/func/globals.py CHANGED Viewed

@@ -1,29 +1,39 @@
-from typing import Optional
-from types import ModuleType
 import importlib
 import inspect
+from types import ModuleType
+from typing import Optional
+import pixeltable.exceptions as excs
-def resolve_symbol(symbol_path: str) -> object:
+def resolve_symbol(symbol_path: str) -> Optional[object]:
     path_elems = symbol_path.split('.')
     module: Optional[ModuleType] = None
-    if path_elems[0:2] == ['pixeltable', 'functions'] and len(path_elems) > 2:
-        # if this is a pixeltable.functions submodule, it cannot be resolved via pixeltable.functions;
-        # try to import the submodule directly
-        submodule_path = '.'.join(path_elems[0:3])
+    i = len(path_elems) - 1
+    while i > 0 and module is None:
         try:
-            module = importlib.import_module(submodule_path)
-            path_elems = path_elems[3:]
+            module = importlib.import_module('.'.join(path_elems[:i]))
         except ModuleNotFoundError:
-            pass
-    if module is None:
-        module = importlib.import_module(path_elems[0])
-        path_elems = path_elems[1:]
+            i -= 1
+    if i == 0:
+        return None  # Not resolvable
     obj = module
-    for el in path_elems:
+    for el in path_elems[i:]:
         obj = getattr(obj, el)
     return obj
+def validate_symbol_path(fn_path: str) -> None:
+    path_elems = fn_path.split('.')
+    fn_name = path_elems[-1]
+    if any(el == '<locals>' for el in path_elems):
+        raise excs.Error(
+            f'{fn_name}(): nested functions are not supported. Move the function to the module level or into a class.')
+    if any(not el.isidentifier() for el in path_elems):
+        raise excs.Error(
+            f'{fn_name}(): cannot resolve symbol path {fn_path}. Move the function to the module level or into a class.')
 def get_caller_module_path() -> str:
     """Return the module path of our caller's caller"""
     stack = inspect.stack()

pixeltable/func/signature.py CHANGED Viewed

@@ -114,20 +114,12 @@ class Signature:
         return (col_type, is_batched)
     @classmethod
-    def create(
-            cls, c: Callable,
-            param_types: Optional[List[ts.ColumnType]] = None,
-            return_type: Optional[Union[ts.ColumnType, Callable]] = None
-    ) -> Signature:
-        """Create a signature for the given Callable.
-        Infer the parameter and return types, if none are specified.
-        Raises an exception if the types cannot be inferred.
-        """
+    def create_parameters(
+            cls, c: Callable, param_types: Optional[List[ts.ColumnType]] = None) -> List[Parameter]:
         sig = inspect.signature(c)
         py_parameters = list(sig.parameters.values())
-        # check non-var parameters for name collisions and default value compatibility
         parameters: List[Parameter] = []
         for idx, param in enumerate(py_parameters):
             if param.name in cls.SPECIAL_PARAM_NAMES:
                 raise excs.Error(f"'{param.name}' is a reserved parameter name")
@@ -135,6 +127,7 @@ class Signature:
                 parameters.append(Parameter(param.name, None, param.kind, False))
                 continue
+            # check non-var parameters for name collisions and default value compatibility
             if param_types is not None:
                 if idx >= len(param_types):
                     raise excs.Error(f'Missing type for parameter {param.name}')
@@ -155,7 +148,20 @@ class Signature:
             parameters.append(Parameter(param.name, param_type, param.kind, is_batched))
-        return_is_batched = False
+        return parameters
+    @classmethod
+    def create(
+            cls, c: Callable,
+            param_types: Optional[List[ts.ColumnType]] = None,
+            return_type: Optional[Union[ts.ColumnType, Callable]] = None
+    ) -> Signature:
+        """Create a signature for the given Callable.
+        Infer the parameter and return types, if none are specified.
+        Raises an exception if the types cannot be inferred.
+        """
+        parameters = cls.create_parameters(c, param_types)
+        sig = inspect.signature(c)
         if return_type is None:
             return_type, return_is_batched = cls._infer_type(sig.return_annotation)
             if return_type is None:

pixeltable/func/udf.py CHANGED Viewed

@@ -11,6 +11,7 @@ from .callable_function import CallableFunction
 from .expr_template_function import ExprTemplateFunction
 from .function import Function
 from .function_registry import FunctionRegistry
+from .globals import validate_symbol_path
 from .signature import Signature
@@ -124,6 +125,8 @@ def make_function(
     # If this function is part of a module, register it
     if function_path is not None:
+        # do the validation at the very end, so it's easier to write tests for other failure scenarios
+        validate_symbol_path(function_path)
         FunctionRegistry.get().register_function(function_path, result)
     return result
@@ -142,17 +145,19 @@ def expr_udf(*args: Any, **kwargs: Any) -> Any:
         else:
             function_path = None
-        sig = Signature.create(py_fn, param_types=param_types, return_type=None)
         # TODO: verify that the inferred return type matches that of the template
         # TODO: verify that the signature doesn't contain batched parameters
         # construct Parameters from the function signature
+        params = Signature.create_parameters(py_fn, param_types=param_types)
         import pixeltable.exprs as exprs
-        var_exprs = [exprs.Variable(param.name, param.col_type) for param in sig.parameters.values()]
+        var_exprs = [exprs.Variable(param.name, param.col_type) for param in params]
         # call the function with the parameter expressions to construct an Expr with parameters
         template = py_fn(*var_exprs)
         assert isinstance(template, exprs.Expr)
         py_sig = inspect.signature(py_fn)
+        if function_path is not None:
+            validate_symbol_path(function_path)
         return ExprTemplateFunction(template, py_signature=py_sig, self_path=function_path, name=py_fn.__name__)
     if len(args) == 1:

pixeltable/functions/__init__.py CHANGED Viewed

@@ -23,8 +23,8 @@ def cast(expr: exprs.Expr, target_type: ColumnType) -> exprs.Expr:
     return expr
 @func.uda(
-    update_types=[IntType()], value_type=IntType(), name='sum', allows_window=True, requires_order_by=False)
-class SumAggregator(func.Aggregator):
+    update_types=[IntType()], value_type=IntType(), allows_window=True, requires_order_by=False)
+class sum(func.Aggregator):
     def __init__(self):
         self.sum: Union[int, float] = 0
     def update(self, val: Union[int, float]) -> None:
@@ -35,8 +35,8 @@ class SumAggregator(func.Aggregator):
 @func.uda(
-    update_types=[IntType()], value_type=IntType(), name='count', allows_window = True, requires_order_by = False)
-class CountAggregator(func.Aggregator):
+    update_types=[IntType()], value_type=IntType(), allows_window = True, requires_order_by = False)
+class count(func.Aggregator):
     def __init__(self):
         self.count = 0
     def update(self, val: int) -> None:
@@ -47,8 +47,8 @@ class CountAggregator(func.Aggregator):
 @func.uda(
-    update_types=[IntType()], value_type=FloatType(), name='mean', allows_window=False, requires_order_by=False)
-class MeanAggregator(func.Aggregator):
+    update_types=[IntType()], value_type=FloatType(), allows_window=False, requires_order_by=False)
+class mean(func.Aggregator):
     def __init__(self):
         self.sum = 0
         self.count = 0
@@ -63,9 +63,9 @@ class MeanAggregator(func.Aggregator):
 @func.uda(
-    init_types=[IntType()], update_types=[ImageType()], value_type=VideoType(), name='make_video',
+    init_types=[IntType()], update_types=[ImageType()], value_type=VideoType(),
     requires_order_by=True, allows_window=False)
-class VideoAggregator(func.Aggregator):
+class make_video(func.Aggregator):
     def __init__(self, fps: int = 25):
         """follows https://pyav.org/docs/develop/cookbook/numpy.html#generating-video"""
         self.container: Optional[av.container.OutputContainer] = None

pixeltable/functions/eval.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from __future__ import annotations
 from typing import List, Tuple, Dict
 from collections import defaultdict
 import sys
@@ -157,16 +156,16 @@ def calculate_image_tpfp(
         ts.JsonType(nullable=False)
     ])
 def eval_detections(
-        pred_bboxes: List[List[int]], pred_classes: List[int], pred_scores: List[float],
-        gt_bboxes: List[List[int]], gt_classes: List[int]
+        pred_bboxes: List[List[int]], pred_labels: List[int], pred_scores: List[float],
+        gt_bboxes: List[List[int]], gt_labels: List[int]
 ) -> Dict:
-    class_idxs = list(set(pred_classes + gt_classes))
+    class_idxs = list(set(pred_labels + gt_labels))
     result: List[Dict] = []
     pred_bboxes_arr = np.asarray(pred_bboxes)
-    pred_classes_arr = np.asarray(pred_classes)
+    pred_classes_arr = np.asarray(pred_labels)
     pred_scores_arr = np.asarray(pred_scores)
     gt_bboxes_arr = np.asarray(gt_bboxes)
-    gt_classes_arr = np.asarray(gt_classes)
+    gt_classes_arr = np.asarray(gt_labels)
     for class_idx in class_idxs:
         pred_filter = pred_classes_arr == class_idx
         gt_filter = gt_classes_arr == class_idx
@@ -181,8 +180,8 @@ def eval_detections(
     return result
 @func.uda(
-    update_types=[ts.JsonType()], value_type=ts.JsonType(), name='mean_ap', allows_std_agg=True, allows_window=False)
-class MeanAPAggregator:
+    update_types=[ts.JsonType()], value_type=ts.JsonType(), allows_std_agg=True, allows_window=False)
+class mean_ap(func.Aggregator):
     def __init__(self):
         self.class_tpfp: Dict[int, List[Dict]] = defaultdict(list)

pixeltable/functions/huggingface.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Any, Callable
+from typing import Callable, TypeVar, Optional
 import PIL.Image
 import numpy as np
@@ -7,10 +7,13 @@ import pixeltable as pxt
 import pixeltable.env as env
 import pixeltable.type_system as ts
 from pixeltable.func import Batch
+from pixeltable.functions.util import resolve_torch_device
 @pxt.udf(batch_size=32, return_type=ts.ArrayType((None,), dtype=ts.FloatType()))
-def sentence_transformer(sentences: Batch[str], *, model_id: str, normalize_embeddings: bool = False) -> Batch[np.ndarray]:
+def sentence_transformer(
+        sentences: Batch[str], *, model_id: str, normalize_embeddings: bool = False
+) -> Batch[np.ndarray]:
     env.Env.get().require_package('sentence_transformers')
     from sentence_transformers import SentenceTransformer
@@ -53,44 +56,60 @@ def cross_encoder_list(sentence1: str, sentences2: list, *, model_id: str) -> li
     return array.tolist()
-@pxt.udf(batch_size=32, return_type=ts.ArrayType((None,), dtype=ts.FloatType(), nullable=False))
+@pxt.udf(batch_size=32, return_type=ts.ArrayType((512,), dtype=ts.FloatType(), nullable=False))
 def clip_text(text: Batch[str], *, model_id: str) -> Batch[np.ndarray]:
     env.Env.get().require_package('transformers')
+    device = resolve_torch_device('auto')
+    import torch
     from transformers import CLIPModel, CLIPProcessor
-    model = _lookup_model(model_id, CLIPModel.from_pretrained)
+    model = _lookup_model(model_id, CLIPModel.from_pretrained, device=device)
+    assert model.config.projection_dim == 512
     processor = _lookup_processor(model_id, CLIPProcessor.from_pretrained)
-    inputs = processor(text=text, return_tensors='pt', padding=True, truncation=True)
-    embeddings = model.get_text_features(**inputs).detach().numpy()
+    with torch.no_grad():
+        inputs = processor(text=text, return_tensors='pt', padding=True, truncation=True)
+        embeddings = model.get_text_features(**inputs.to(device)).detach().to('cpu').numpy()
     return [embeddings[i] for i in range(embeddings.shape[0])]
-@pxt.udf(batch_size=32, return_type=ts.ArrayType((None,), dtype=ts.FloatType(), nullable=False))
+@pxt.udf(batch_size=32, return_type=ts.ArrayType((512,), dtype=ts.FloatType(), nullable=False))
 def clip_image(image: Batch[PIL.Image.Image], *, model_id: str) -> Batch[np.ndarray]:
     env.Env.get().require_package('transformers')
+    device = resolve_torch_device('auto')
+    import torch
     from transformers import CLIPModel, CLIPProcessor
-    model = _lookup_model(model_id, CLIPModel.from_pretrained)
+    model = _lookup_model(model_id, CLIPModel.from_pretrained, device=device)
+    assert model.config.projection_dim == 512
     processor = _lookup_processor(model_id, CLIPProcessor.from_pretrained)
-    inputs = processor(images=image, return_tensors='pt', padding=True)
-    embeddings = model.get_image_features(**inputs).detach().numpy()
+    with torch.no_grad():
+        inputs = processor(images=image, return_tensors='pt', padding=True)
+        embeddings = model.get_image_features(**inputs.to(device)).detach().to('cpu').numpy()
     return [embeddings[i] for i in range(embeddings.shape[0])]
-@pxt.udf(batch_size=32)
+@pxt.udf(batch_size=4)
 def detr_for_object_detection(image: Batch[PIL.Image.Image], *, model_id: str, threshold: float = 0.5) -> Batch[dict]:
     env.Env.get().require_package('transformers')
+    device = resolve_torch_device('auto')
+    import torch
     from transformers import DetrImageProcessor, DetrForObjectDetection
-    model = _lookup_model(model_id, lambda x: DetrForObjectDetection.from_pretrained(x, revision='no_timm'))
+    model = _lookup_model(
+        model_id, lambda x: DetrForObjectDetection.from_pretrained(x, revision='no_timm'), device=device)
     processor = _lookup_processor(model_id, lambda x: DetrImageProcessor.from_pretrained(x, revision='no_timm'))
-    inputs = processor(images=image, return_tensors='pt')
-    outputs = model(**inputs)
+    with torch.no_grad():
+        inputs = processor(images=image, return_tensors='pt')
+        outputs = model(**inputs.to(device))
+        results = processor.post_process_object_detection(
+            outputs, threshold=threshold, target_sizes=[(img.height, img.width) for img in image]
+        )
-    results = processor.post_process_object_detection(outputs, threshold=threshold)
     return [
         {
             'scores': [score.item() for score in result['scores']],
@@ -102,14 +121,23 @@ def detr_for_object_detection(image: Batch[PIL.Image.Image], *, model_id: str, t
     ]
-def _lookup_model(model_id: str, create: Callable) -> Any:
-    key = (model_id, create)  # For safety, include the `create` callable in the cache key
+T = TypeVar('T')
+def _lookup_model(model_id: str, create: Callable[[str], T], device: Optional[str] = None) -> T:
+    from torch import nn
+    key = (model_id, create, device)  # For safety, include the `create` callable in the cache key
     if key not in _model_cache:
-        _model_cache[key] = create(model_id)
+        model = create(model_id)
+        if device is not None:
+            model.to(device)
+        if isinstance(model, nn.Module):
+            model.eval()
+        _model_cache[key] = model
     return _model_cache[key]
-def _lookup_processor(model_id: str, create: Callable) -> Any:
+def _lookup_processor(model_id: str, create: Callable[[str], T]) -> T:
     key = (model_id, create)  # For safety, include the `create` callable in the cache key
     if key not in _processor_cache:
         _processor_cache[key] = create(model_id)

pixeltable/functions/openai.py CHANGED Viewed

@@ -26,8 +26,8 @@ def openai_client() -> openai.OpenAI:
 def _retry(fn: Callable) -> Callable:
     return tenacity.retry(
         retry=tenacity.retry_if_exception_type(openai.RateLimitError),
-        wait=tenacity.wait_random_exponential(min=1, max=60),
-        stop=tenacity.stop_after_attempt(6)
+        wait=tenacity.wait_random_exponential(multiplier=3, max=180),
+        stop=tenacity.stop_after_attempt(20)
     )(fn)

pixeltable/functions/util.py CHANGED Viewed

@@ -39,3 +39,14 @@ def create_nos_modules() -> List[types.ModuleType]:
         setattr(sub_module, model_id, pt_func)
     return new_modules
+def resolve_torch_device(device: str) -> str:
+    import torch
+    if device == 'auto':
+        if torch.cuda.is_available():
+            return 'cuda'
+        if torch.backends.mps.is_available():
+            return 'mps'
+        return 'cpu'
+    return device

pixeltable/index/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ from .base import IndexBase
2	+ from .embedding_index import EmbeddingIndex

pixeltable/index/base.py ADDED Viewed

@@ -0,0 +1,49 @@
+from __future__ import annotations
+import abc
+from typing import Any
+import sqlalchemy as sql
+import pixeltable.catalog as catalog
+class IndexBase(abc.ABC):
+    """
+    Internal interface used by the catalog and runtime system to interact with indices:
+    - types and expressions needed to create and populate the index value column
+    - creating/dropping the index
+    - TODO: translating queries into sqlalchemy predicates
+    """
+    @abc.abstractmethod
+    def __init__(self, c: catalog.Column, **kwargs: Any):
+        pass
+    @abc.abstractmethod
+    def index_value_expr(self) -> 'pixeltable.exprs.Expr':
+        """Return expression that computes the value that goes into the index"""
+        pass
+    @abc.abstractmethod
+    def index_sa_type(self) -> sql.sqltypes.TypeEngine:
+        """Return the sqlalchemy type of the index value column"""
+        pass
+    @abc.abstractmethod
+    def create_index(self, index_name: str, index_value_col: catalog.Column, conn: sql.engine.Connection) -> None:
+        """Create the index on the index value column"""
+        pass
+    @classmethod
+    @abc.abstractmethod
+    def display_name(cls) -> str:
+        pass
+    @abc.abstractmethod
+    def as_dict(self) -> dict:
+        pass
+    @classmethod
+    @abc.abstractmethod
+    def from_dict(cls, c: catalog.Column, d: dict) -> IndexBase:
+        pass

pixeltable/index/embedding_index.py ADDED Viewed

@@ -0,0 +1,95 @@
+from __future__ import annotations
+from typing import Optional
+import pgvector.sqlalchemy
+import sqlalchemy as sql
+import pixeltable.catalog as catalog
+import pixeltable.exceptions as excs
+import pixeltable.func as func
+import pixeltable.type_system as ts
+from .base import IndexBase
+class EmbeddingIndex(IndexBase):
+    """
+    Internal interface used by the catalog and runtime system to interact with (embedding) indices:
+    - types and expressions needed to create and populate the index value column
+    - creating/dropping the index
+    - translating 'matches' queries into sqlalchemy predicates
+    """
+    def __init__(
+            self, c: catalog.Column, text_embed: Optional[func.Function] = None,
+            img_embed: Optional[func.Function] = None):
+        if not c.col_type.is_string_type() and not c.col_type.is_image_type():
+            raise excs.Error(f'Embedding index requires string or image column')
+        if c.col_type.is_string_type() and text_embed is None:
+                raise excs.Error(f'Text embedding function is required for column {c.name} (parameter `txt_embed`)')
+        if c.col_type.is_image_type() and img_embed is None:
+            raise excs.Error(f'Image embedding function is required for column {c.name} (parameter `img_embed`)')
+        if text_embed is not None:
+            # verify signature
+            self._validate_embedding_fn(text_embed, 'txt_embed', ts.ColumnType.Type.STRING)
+        if img_embed is not None:
+            # verify signature
+            self._validate_embedding_fn(img_embed, 'img_embed', ts.ColumnType.Type.IMAGE)
+        from pixeltable.exprs import ColumnRef
+        self.value_expr = text_embed(ColumnRef(c)) if c.col_type.is_string_type() else img_embed(ColumnRef(c))
+        assert self.value_expr.col_type.is_array_type()
+        self.txt_embed = text_embed
+        self.img_embed = img_embed
+        vector_size = self.value_expr.col_type.shape[0]
+        assert vector_size is not None
+        self.index_col_type = pgvector.sqlalchemy.Vector(vector_size)
+    def index_value_expr(self) -> 'pixeltable.exprs.Expr':
+        """Return expression that computes the value that goes into the index"""
+        return self.value_expr
+    def index_sa_type(self) -> sql.sqltypes.TypeEngine:
+        """Return the sqlalchemy type of the index value column"""
+        return self.index_col_type
+    def create_index(self, index_name: str, index_value_col: catalog.Column, conn: sql.engine.Connection) -> None:
+        """Create the index on the index value column"""
+        idx = sql.Index(
+            index_name, index_value_col.sa_col,
+            postgresql_using='hnsw',
+            postgresql_with={'m': 16, 'ef_construction': 64},
+            postgresql_ops={index_value_col.sa_col.name: 'vector_cosine_ops'}
+        )
+        idx.create(bind=conn)
+    @classmethod
+    def display_name(cls) -> str:
+        return 'embedding'
+    @classmethod
+    def _validate_embedding_fn(cls, embed_fn: func.Function, name: str, expected_type: ts.ColumnType.Type) -> None:
+        """Validate the signature"""
+        assert isinstance(embed_fn, func.Function)
+        sig = embed_fn.signature
+        if not sig.return_type.is_array_type():
+            raise excs.Error(f'{name} must return an array, but returns {sig.return_type}')
+        else:
+            shape = sig.return_type.shape
+            if len(shape) != 1 or shape[0] == None:
+                raise excs.Error(f'{name} must return a 1D array of a specific length, but returns {sig.return_type}')
+        if len(sig.parameters) != 1 or sig.parameters_by_pos[0].col_type.type_enum != expected_type:
+            raise excs.Error(
+                f'{name} must take a single {expected_type.name.lower()} parameter, but has signature {sig}')
+    def as_dict(self) -> dict:
+        return {
+            'txt_embed': None if self.txt_embed is None else self.txt_embed.as_dict(),
+            'img_embed': None if self.img_embed is None else self.img_embed.as_dict()
+        }
+    @classmethod
+    def from_dict(cls, c: catalog.Column, d: dict) -> EmbeddingIndex:
+        txt_embed = func.Function.from_dict(d['txt_embed']) if d['txt_embed'] is not None else None
+        img_embed = func.Function.from_dict(d['img_embed']) if d['img_embed'] is not None else None
+        return cls(c, text_embed=txt_embed, img_embed=img_embed)

pixeltable/metadata/schema.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Optional, List, Dict, get_type_hints, Type, Any, TypeVar, Tuple, Union
+from typing import Optional, List, get_type_hints, Type, Any, TypeVar, Tuple, Union
 import platform
 import uuid
 import dataclasses
@@ -71,16 +71,43 @@ class Dir(Base):
 @dataclasses.dataclass
-class ColumnHistory:
+class ColumnMd:
     """
-    Records when a column was added/dropped, which is needed to GC unreachable storage columns
-    (a column that was added after table snapshot n and dropped before table snapshot n+1 can be removed
-    from the stored table).
-    One record per column (across all schema versions).
+    Records the non-versioned metadata of a column.
+    - immutable attributes: type, primary key, etc.
+    - when a column was added/dropped, which is needed to GC unreachable storage columns
+      (a column that was added after table snapshot n and dropped before table snapshot n+1 can be removed
+      from the stored table).
      """
-    col_id: int
+    id: int
     schema_version_add: int
     schema_version_drop: Optional[int]
+    col_type: dict
+    # if True, is part of the primary key
+    is_pk: bool
+    # if set, this is a computed column
+    value_expr: Optional[dict]
+    # if True, the column is present in the stored table
+    stored: Optional[bool]
+@dataclasses.dataclass
+class IndexMd:
+    """
+    Metadata needed to instantiate an EmbeddingIndex
+    """
+    id: int
+    name: str
+    indexed_col_id: int  # column being indexed
+    index_val_col_id: int  # column holding the values to be indexed
+    index_val_undo_col_id: int  # column holding index values for deleted rows
+    schema_version_add: int
+    schema_version_drop: Optional[int]
+    class_fqn: str
+    init_args: dict[str, Any]
 @dataclasses.dataclass
@@ -91,13 +118,13 @@ class ViewMd:
     base_versions: List[Tuple[str, Optional[int]]]
     # filter predicate applied to the base table; view-only
-    predicate: Optional[Dict[str, Any]]
+    predicate: Optional[dict[str, Any]]
     # ComponentIterator subclass; only for component views
     iterator_class_fqn: Optional[str]
     # args to pass to the iterator class constructor; only for component views
-    iterator_args: Optional[Dict[str, Any]]
+    iterator_args: Optional[dict[str, Any]]
 @dataclasses.dataclass
@@ -109,15 +136,15 @@ class TableMd:
     # each version has a corresponding schema version (current_version >= current_schema_version)
     current_schema_version: int
-    # used to assign Column.id
-    next_col_id: int
+    next_col_id: int  # used to assign Column.id
+    next_idx_id: int  # used to assign IndexMd.id
     # - used to assign the rowid column in the storage table
     # - every row is assigned a unique and immutable rowid on insertion
     next_row_id: int
-    column_history: Dict[int, ColumnHistory]  # col_id -> ColumnHistory
+    column_md: dict[int, ColumnMd]  # col_id -> ColumnMd
+    index_md: dict[int, IndexMd]  # index_id -> IndexMd
     view_md: Optional[ViewMd]
@@ -155,24 +182,20 @@ class TableVersion(Base):
 @dataclasses.dataclass
 class SchemaColumn:
     """
-    Records the logical (user-visible) schema of a table.
-    Contains the full set of columns for each new schema version: one record per (column x schema version).
+    Records the versioned metadata of a column.
     """
     pos: int
     name: str
-    col_type: dict
-    is_pk: bool
-    value_expr: Optional[dict]
-    stored: Optional[bool]
-    # if True, creates vector index for this column
-    is_indexed: bool
 @dataclasses.dataclass
 class TableSchemaVersionMd:
+    """
+    Records all versioned table metadata.
+    """
     schema_version: int
     preceding_schema_version: Optional[int]
-    columns: Dict[int, SchemaColumn]  # col_id -> SchemaColumn
+    columns: dict[int, SchemaColumn]  # col_id -> SchemaColumn
     num_retained_versions: int
     comment: str

pixeltable 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl

Potentially problematic release.

pixeltable 0.2.4py3-none-any.whl → 0.2.5py3-none-any.whl