PyPI - pixeltable - Versions diffs - 0.2.19__py3-none-any.whl → 0.2.21__py3-none-any.whl - Mend

pixeltable 0.2.19py3-none-any.whl → 0.2.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (88) hide show

pixeltable/__init__.py +7 -19
pixeltable/__version__.py +2 -2
pixeltable/catalog/__init__.py +7 -7
pixeltable/catalog/globals.py +3 -0
pixeltable/catalog/insertable_table.py +9 -7
pixeltable/catalog/table.py +220 -143
pixeltable/catalog/table_version.py +36 -18
pixeltable/catalog/table_version_path.py +0 -8
pixeltable/catalog/view.py +3 -3
pixeltable/dataframe.py +9 -24
pixeltable/env.py +107 -36
pixeltable/exceptions.py +7 -4
pixeltable/exec/__init__.py +1 -1
pixeltable/exec/aggregation_node.py +22 -15
pixeltable/exec/component_iteration_node.py +62 -41
pixeltable/exec/data_row_batch.py +7 -7
pixeltable/exec/exec_node.py +35 -7
pixeltable/exec/expr_eval_node.py +2 -1
pixeltable/exec/in_memory_data_node.py +9 -9
pixeltable/exec/sql_node.py +265 -136
pixeltable/exprs/__init__.py +1 -0
pixeltable/exprs/data_row.py +30 -19
pixeltable/exprs/expr.py +15 -14
pixeltable/exprs/expr_dict.py +55 -0
pixeltable/exprs/expr_set.py +21 -15
pixeltable/exprs/function_call.py +21 -8
pixeltable/exprs/json_path.py +3 -6
pixeltable/exprs/rowid_ref.py +2 -2
pixeltable/exprs/sql_element_cache.py +5 -1
pixeltable/ext/functions/whisperx.py +7 -2
pixeltable/func/callable_function.py +2 -2
pixeltable/func/function_registry.py +6 -7
pixeltable/func/query_template_function.py +11 -12
pixeltable/func/signature.py +17 -15
pixeltable/func/udf.py +0 -4
pixeltable/functions/__init__.py +1 -1
pixeltable/functions/audio.py +4 -6
pixeltable/functions/globals.py +86 -42
pixeltable/functions/huggingface.py +12 -14
pixeltable/functions/image.py +59 -45
pixeltable/functions/json.py +0 -1
pixeltable/functions/mistralai.py +2 -2
pixeltable/functions/openai.py +22 -25
pixeltable/functions/string.py +50 -50
pixeltable/functions/timestamp.py +20 -20
pixeltable/functions/together.py +26 -12
pixeltable/functions/video.py +11 -20
pixeltable/functions/whisper.py +2 -20
pixeltable/globals.py +57 -56
pixeltable/index/base.py +2 -2
pixeltable/index/btree.py +7 -7
pixeltable/index/embedding_index.py +8 -10
pixeltable/io/external_store.py +11 -5
pixeltable/io/globals.py +3 -1
pixeltable/io/hf_datasets.py +4 -4
pixeltable/io/label_studio.py +6 -6
pixeltable/io/parquet.py +14 -13
pixeltable/iterators/document.py +10 -8
pixeltable/iterators/video.py +10 -1
pixeltable/metadata/__init__.py +3 -2
pixeltable/metadata/converters/convert_14.py +4 -2
pixeltable/metadata/converters/convert_15.py +1 -1
pixeltable/metadata/converters/convert_19.py +1 -0
pixeltable/metadata/converters/convert_20.py +1 -1
pixeltable/metadata/converters/util.py +9 -8
pixeltable/metadata/schema.py +32 -21
pixeltable/plan.py +136 -154
pixeltable/store.py +51 -36
pixeltable/tool/create_test_db_dump.py +7 -7
pixeltable/tool/doc_plugins/griffe.py +3 -34
pixeltable/tool/mypy_plugin.py +32 -0
pixeltable/type_system.py +243 -60
pixeltable/utils/arrow.py +10 -9
pixeltable/utils/coco.py +4 -4
pixeltable/utils/documents.py +1 -1
pixeltable/utils/filecache.py +131 -84
pixeltable/utils/formatter.py +1 -1
pixeltable/utils/http_server.py +2 -5
pixeltable/utils/media_store.py +6 -6
pixeltable/utils/pytorch.py +10 -11
pixeltable/utils/sql.py +2 -1
{pixeltable-0.2.19.dist-info → pixeltable-0.2.21.dist-info}/METADATA +16 -7
pixeltable-0.2.21.dist-info/RECORD +148 -0
pixeltable/utils/help.py +0 -11
pixeltable-0.2.19.dist-info/RECORD +0 -147
{pixeltable-0.2.19.dist-info → pixeltable-0.2.21.dist-info}/LICENSE +0 -0
{pixeltable-0.2.19.dist-info → pixeltable-0.2.21.dist-info}/WHEEL +0 -0
{pixeltable-0.2.19.dist-info → pixeltable-0.2.21.dist-info}/entry_points.txt +0 -0

pixeltable/index/embedding_index.py CHANGED Viewed

@@ -1,18 +1,17 @@
 from __future__ import annotations
-from typing import Optional, Any
 import enum
+from typing import Any, Optional
-import PIL.Image
 import numpy as np
-import pgvector.sqlalchemy
+import pgvector.sqlalchemy  # type: ignore[import-untyped]
 import PIL.Image
 import sqlalchemy as sql
-import pixeltable.catalog as catalog
 import pixeltable.exceptions as excs
-import pixeltable.func as func
 import pixeltable.type_system as ts
+from pixeltable import catalog, exprs, func
 from .base import IndexBase
@@ -58,16 +57,15 @@ class EmbeddingIndex(IndexBase):
             self._validate_embedding_fn(image_embed, 'image_embed', ts.ColumnType.Type.IMAGE)
         self.metric = self.Metric[metric.upper()]
-        from pixeltable.exprs import ColumnRef
-        self.value_expr = string_embed(ColumnRef(c)) if c.col_type.is_string_type() else image_embed(ColumnRef(c))
-        assert self.value_expr.col_type.is_array_type()
+        self.value_expr = string_embed(exprs.ColumnRef(c)) if c.col_type.is_string_type() else image_embed(exprs.ColumnRef(c))
+        assert isinstance(self.value_expr.col_type, ts.ArrayType)
         self.string_embed = string_embed
         self.image_embed = image_embed
         vector_size = self.value_expr.col_type.shape[0]
         assert vector_size is not None
         self.index_col_type = pgvector.sqlalchemy.Vector(vector_size)
-    def index_value_expr(self) -> 'pixeltable.exprs.Expr':
+    def index_value_expr(self) -> exprs.Expr:
         """Return expression that computes the value that goes into the index"""
         return self.value_expr
@@ -151,7 +149,7 @@ class EmbeddingIndex(IndexBase):
             img = PIL.Image.new('RGB', (512, 512))
             return_type = embed_fn.call_return_type({param_name: img})
         assert return_type is not None
-        if not return_type.is_array_type():
+        if not isinstance(return_type, ts.ArrayType):
             raise excs.Error(f'{name} must return an array, but returns {return_type}')
         else:
             shape = return_type.shape

pixeltable/io/external_store.py CHANGED Viewed

@@ -69,6 +69,9 @@ class Project(ExternalStore, abc.ABC):
     An `ExternalStore` that represents a labeling project. Extends `ExternalStore` with a few
     additional capabilities specific to such projects.
     """
+    stored_proxies: dict[Column, Column]
     def __init__(self, name: str, col_mapping: dict[Column, str], stored_proxies: Optional[dict[Column, Column]]):
         super().__init__(name)
         self._col_mapping = col_mapping
@@ -116,7 +119,7 @@ class Project(ExternalStore, abc.ABC):
             tbl_version.schema_version = tbl_version.version
             proxy_cols = [self.create_stored_proxy(tbl_version, col) for col in stored_proxies_needed]
             # Add the columns; this will also update table metadata.
-            tbl_version._add_columns(proxy_cols, conn)
+            tbl_version._add_columns(proxy_cols, conn, print_stats=False, on_error='ignore')
             # We don't need to retain `UpdateStatus` since the stored proxies are intended to be
             # invisible to the user.
             tbl_version._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
@@ -126,7 +129,7 @@ class Project(ExternalStore, abc.ABC):
         # any *other* external store for this table.)
         deletions_needed: set[Column] = set(self.stored_proxies.values())
         for name, store in tbl_version.external_stores.items():
-            if name != self.name:
+            if isinstance(store, Project) and name != self.name:
                 deletions_needed = deletions_needed.difference(set(store.stored_proxies.values()))
         if len(deletions_needed) > 0:
             _logger.info(f'Removing stored proxies for columns: {[col.name for col in deletions_needed]}')
@@ -210,6 +213,8 @@ class Project(ExternalStore, abc.ABC):
         If validation fails, an exception will be raised. If validation succeeds, a new mapping will be returned
         in which the Pixeltable column names are resolved to the corresponding `Column` objects.
         """
+        from pixeltable import exprs
         is_user_specified_col_mapping = col_mapping is not None
         if col_mapping is None:
             col_mapping = {col: col for col in itertools.chain(export_cols.keys(), import_cols.keys())}
@@ -235,8 +240,9 @@ class Project(ExternalStore, abc.ABC):
                     f'Column name `{ext_col}` appears as a value in `col_mapping`, but the external store '
                     f'configuration has no column `{ext_col}`.'
                 )
-            col = table[t_col].col
-            resolved_col_mapping[col] = ext_col
+            col_ref = table[t_col]
+            assert isinstance(col_ref, exprs.ColumnRef)
+            resolved_col_mapping[col_ref.col] = ext_col
         # Validate column specs
         t_col_types = table._schema
         for t_col, ext_col in col_mapping.items():
@@ -329,7 +335,7 @@ class MockProject(Project):
     def get_import_columns(self) -> dict[str, ts.ColumnType]:
         return self.import_cols
-    def sync(self, t: Table, export_data: bool, import_data: bool) -> NotImplemented:
+    def sync(self, t: Table, export_data: bool, import_data: bool) -> SyncStatus:
         raise NotImplementedError()
     def delete(self) -> None:

pixeltable/io/globals.py CHANGED Viewed

@@ -43,7 +43,7 @@ def create_label_studio_project(
     The API key and URL for a valid Label Studio server must be specified in Pixeltable config. Either:
     * Set the `LABEL_STUDIO_API_KEY` and `LABEL_STUDIO_URL` environment variables; or
-    * Specify `api_key` and `url` fields in the `label-studio` section of `$PIXELTABLE_HOME/config.yaml`.
+    * Specify `api_key` and `url` fields in the `label-studio` section of `$PIXELTABLE_HOME/config.toml`.
     __Requirements:__
@@ -187,6 +187,8 @@ def import_rows(
                 # If `key` is not in `schema_overrides`, then we infer its type from the data.
                 # The column type will always be nullable by default.
                 col_type = pxt.ColumnType.infer_literal_type(value, nullable=True)
+                if col_type is None:
+                    raise excs.Error(f'Could not infer type for column `{col_name}`; the value in row {n} has an unsupported type: {type(value)}')
                 if col_name not in schema:
                     schema[col_name] = col_type
                 else:

pixeltable/io/hf_datasets.py CHANGED Viewed

@@ -11,7 +11,7 @@ import pixeltable.type_system as ts
 from pixeltable import exceptions as excs
 if typing.TYPE_CHECKING:
-    import datasets
+    import datasets  # type: ignore[import-untyped]
 _logger = logging.getLogger(__name__)
@@ -34,9 +34,7 @@ _hf_to_pxt: dict[str, ts.ColumnType] = {
 }
-def _to_pixeltable_type(
-    feature_type: Union[datasets.ClassLabel, datasets.Value, datasets.Sequence],
-) -> Optional[ts.ColumnType]:
+def _to_pixeltable_type(feature_type: Any) -> Optional[ts.ColumnType]:
     """Convert a huggingface feature type to a pixeltable ColumnType if one is defined."""
     import datasets
@@ -51,6 +49,8 @@ def _to_pixeltable_type(
         dtype = _to_pixeltable_type(feature_type.feature)
         length = feature_type.length if feature_type.length != -1 else None
         return ts.ArrayType(shape=(length,), dtype=dtype)
+    elif isinstance(feature_type, datasets.Image):
+        return ts.ImageType(nullable=True)
     else:
         return None

pixeltable/io/label_studio.py CHANGED Viewed

@@ -4,17 +4,17 @@ import logging
 import os
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Any, Iterator, Optional, Literal
+from typing import Any, Iterator, Literal, Optional, cast
 from xml.etree import ElementTree
+import label_studio_sdk  # type: ignore[import-untyped]
 import PIL.Image
-import label_studio_sdk
 from requests.exceptions import HTTPError
 import pixeltable as pxt
 import pixeltable.env as env
 import pixeltable.exceptions as excs
-from pixeltable import Table, Column
+from pixeltable import Column, Table
 from pixeltable.exprs import ColumnRef, DataRow, Expr
 from pixeltable.io.external_store import Project, SyncStatus
 from pixeltable.utils import coco
@@ -211,7 +211,7 @@ class LabelStudioProject(Project):
                     assert isinstance(row[media_col_idx], PIL.Image.Image)
                     file = env.Env.get().create_tmp_path(extension='.png')
                     row[media_col_idx].save(file, format='png')
-                    task_id: int = self.project.import_tasks(file)[0]
+                    task_id = self.project.import_tasks(file)[0]
                     os.remove(file)
                 # Update the task with `rowid` metadata
@@ -256,7 +256,7 @@ class LabelStudioProject(Project):
                 assert self.media_import_method == 'file'
                 if not col.col_type.is_media_type():
                     # Not a media column; query the data directly
-                    expr_refs[col_name] = t[col_name]
+                    expr_refs[col_name] = cast(ColumnRef, t[col_name])
                 elif col in self.stored_proxies:
                     # Media column that has a stored proxy; use it. We have to give it a name,
                     # since it's an anonymous column
@@ -267,7 +267,7 @@ class LabelStudioProject(Project):
                     # and we can just use the localpath
                     expr_refs[col_name] = t[col_name].localpath
-        df = t.select(*[t[col] for col in t_rl_cols], **expr_refs)
+        df = t.select(*[t[col.name] for col in t_rl_cols], **expr_refs)
         # The following buffers will hold `DataRow` indices that correspond to each of the selected
         # columns. `rl_col_idxs` holds the indices for the columns that map to RectangleLabels
         # preannotations; `data_col_idxs` holds the indices for the columns that map to data fields.

pixeltable/io/parquet.py CHANGED Viewed

@@ -7,24 +7,23 @@ import random
 import typing
 from collections import deque
 from pathlib import Path
-from typing import Dict, Optional, Any
+from typing import Any, Optional
-import PIL.Image
 import numpy as np
+import PIL.Image
 import pixeltable.exceptions as exc
 import pixeltable.type_system as ts
 from pixeltable.utils.transactional_directory import transactional_directory
 if typing.TYPE_CHECKING:
-    import pixeltable as pxt
     import pyarrow as pa
-    from pyarrow import parquet
+    import pixeltable as pxt
 _logger = logging.getLogger(__name__)
-def _write_batch(value_batch: Dict[str, deque], schema: pa.Schema, output_path: Path) -> None:
+def _write_batch(value_batch: dict[str, deque], schema: pa.Schema, output_path: Path) -> None:
     import pyarrow as pa
     from pyarrow import parquet
@@ -37,7 +36,7 @@ def _write_batch(value_batch: Dict[str, deque], schema: pa.Schema, output_path:
             pydict[field.name] = value_batch[field.name]
     tab = pa.Table.from_pydict(pydict, schema=schema)
-    parquet.write_table(tab, output_path)
+    parquet.write_table(tab, str(output_path))
 def save_parquet(df: pxt.DataFrame, dest_path: Path, partition_size_bytes: int = 100_000_000) -> None:
@@ -67,7 +66,7 @@ def save_parquet(df: pxt.DataFrame, dest_path: Path, partition_size_bytes: int =
         json.dump(type_dict, (temp_path / '.pixeltable.column_types.json').open('w'))  # keep type metadata
         batch_num = 0
-        current_value_batch: Dict[str, deque] = {k: deque() for k in df.schema.keys()}
+        current_value_batch: dict[str, deque] = {k: deque() for k in df.schema.keys()}
         current_byte_estimate = 0
         for data_row in df._exec():
@@ -128,13 +127,14 @@ def save_parquet(df: pxt.DataFrame, dest_path: Path, partition_size_bytes: int =
         _write_batch(current_value_batch, arrow_schema, temp_path / f'part-{batch_num:05d}.parquet')
-def parquet_schema_to_pixeltable_schema(parquet_path: str) -> Dict[str, Optional[ts.ColumnType]]:
+def parquet_schema_to_pixeltable_schema(parquet_path: str) -> dict[str, Optional[ts.ColumnType]]:
     """Generate a default pixeltable schema for the given parquet file. Returns None for unknown types."""
     from pyarrow import parquet
     from pixeltable.utils.arrow import to_pixeltable_schema
     input_path = Path(parquet_path).expanduser()
-    parquet_dataset = parquet.ParquetDataset(input_path)
+    parquet_dataset = parquet.ParquetDataset(str(input_path))
     return to_pixeltable_schema(parquet_dataset.schema)
@@ -142,7 +142,7 @@ def import_parquet(
     table_path: str,
     *,
     parquet_path: str,
-    schema_overrides: Optional[Dict[str, ts.ColumnType]] = None,
+    schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
     **kwargs: Any,
 ) -> pxt.Table:
     """Creates a new base table from a Parquet file or set of files. Requires pyarrow to be installed.
@@ -159,12 +159,13 @@ def import_parquet(
     Returns:
         A handle to the newly created [`Table`][pixeltable.Table].
     """
-    import pixeltable as pxt
     from pyarrow import parquet
+    import pixeltable as pxt
     from pixeltable.utils.arrow import iter_tuples
     input_path = Path(parquet_path).expanduser()
-    parquet_dataset = parquet.ParquetDataset(input_path)
+    parquet_dataset = parquet.ParquetDataset(str(input_path))
     schema = parquet_schema_to_pixeltable_schema(parquet_path)
     if schema_overrides is None:
@@ -181,7 +182,7 @@ def import_parquet(
     try:
         tmp_name = f'{table_path}_tmp_{random.randint(0, 100000000)}'
         tab = pxt.create_table(tmp_name, schema, **kwargs)
-        for fragment in parquet_dataset.fragments:
+        for fragment in parquet_dataset.fragments:  # type: ignore[attr-defined]
             for batch in fragment.to_batches():
                 dict_batch = list(iter_tuples(batch))
                 tab.insert(dict_batch)

pixeltable/iterators/document.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import dataclasses
 import enum
 import logging
-from typing import Any, Iterable, Iterator, Optional
+from typing import Any, Iterable, Iterator, Optional, Union
 import ftfy
@@ -166,7 +166,7 @@ class DocumentSplitter(ComponentIterator):
         return {
             'document': DocumentType(nullable=False),
             'separators': StringType(nullable=False),
-            'metadata': StringType(nullable=True),
+            'metadata': StringType(nullable=False),
             'limit': IntType(nullable=True),
             'overlap': IntType(nullable=True),
             'skip_tags': StringType(nullable=True),
@@ -176,7 +176,7 @@ class DocumentSplitter(ComponentIterator):
     @classmethod
     def output_schema(cls, *args: Any, **kwargs: Any) -> tuple[dict[str, ColumnType], list[str]]:
-        schema = {'text': StringType()}
+        schema: dict[str, ColumnType] = {'text': StringType()}
         md_fields = _parse_metadata(kwargs['metadata']) if 'metadata' in kwargs else []
         for md_field in md_fields:
@@ -214,7 +214,7 @@ class DocumentSplitter(ComponentIterator):
             section = next(self._sections)
             if section.text is None:
                 continue
-            result = {'text': section.text}
+            result: dict[str, Any] = {'text': section.text}
             for md_field in self._metadata_fields:
                 if md_field == ChunkMetadata.TITLE:
                     result[md_field.name.lower()] = self._doc_title
@@ -234,7 +234,7 @@ class DocumentSplitter(ComponentIterator):
         emit_on_paragraph = Separator.PARAGRAPH in self._separators or Separator.SENTENCE in self._separators
         emit_on_heading = Separator.HEADING in self._separators or emit_on_paragraph
         # current state
-        accumulated_text = []  # currently accumulated text
+        accumulated_text: list[str] = []  # currently accumulated text
         # accumulate pieces then join before emit to avoid quadratic complexity of string concatenation
         headings: dict[str, str] = {}   # current state of observed headings (level -> text)
@@ -260,9 +260,10 @@ class DocumentSplitter(ComponentIterator):
                 yield DocumentSection(text=full_text, metadata=md)
                 accumulated_text = []
-        def process_element(el: bs4.PageElement) -> Iterator[DocumentSection]:
+        def process_element(el: Union[bs4.element.Tag, bs4.NavigableString]) -> Iterator[DocumentSection]:
             # process the element and emit sections as necessary
             nonlocal accumulated_text, headings, sourceline, emit_on_heading, emit_on_paragraph
             if el.name in self._skip_tags:
                 return
@@ -282,6 +283,7 @@ class DocumentSplitter(ComponentIterator):
                     yield from emit()
                 update_metadata(el)
             for child in el.children:
+                assert isinstance(child, (bs4.element.Tag, bs4.NavigableString)), type(el)
                 yield from process_element(child)
         yield from process_element(self._doc_handle.bs_doc)
@@ -293,7 +295,7 @@ class DocumentSplitter(ComponentIterator):
         emit_on_paragraph = Separator.PARAGRAPH in self._separators or Separator.SENTENCE in self._separators
         emit_on_heading = Separator.HEADING in self._separators or emit_on_paragraph
         # current state
-        accumulated_text = []  # currently accumulated text
+        accumulated_text: list[str] = []  # currently accumulated text
         # accumulate pieces then join before emit to avoid quadratic complexity of string concatenation
         headings: dict[str, str] = {}   # current state of observed headings (level -> text)
@@ -347,7 +349,7 @@ class DocumentSplitter(ComponentIterator):
     def _pdf_sections(self) -> Iterator[DocumentSection]:
         """Create DocumentSections reflecting the pdf-specific separators"""
-        import fitz
+        import fitz  # type: ignore[import-untyped]
         doc: fitz.Document = self._doc_handle.pdf_doc
         assert doc is not None

pixeltable/iterators/video.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import logging
 import math
 from pathlib import Path
-from typing import Any, Optional
+from typing import Any, Optional, Sequence
 import cv2
 import PIL.Image
@@ -29,6 +29,15 @@ class FrameIterator(ComponentIterator):
             num_frames: Exact number of frames to extract. The frames will be spaced as evenly as possible. If
                 `num_frames` is greater than the number of frames in the video, all frames will be extracted.
     """
+    video_path: Path
+    video_reader: cv2.VideoCapture
+    fps: Optional[float]
+    num_frames: Optional[int]
+    frames_to_extract: Sequence[int]
+    frames_set: set[int]
+    next_frame_idx: int
     def __init__(self, video: str, *, fps: Optional[float] = None, num_frames: Optional[int] = None):
         if fps is not None and num_frames is not None:
             raise Error('At most one of `fps` or `num_frames` may be specified')

pixeltable/metadata/__init__.py CHANGED Viewed

@@ -2,7 +2,7 @@ import dataclasses
 import importlib
 import os
 import pkgutil
-from typing import Callable, Dict
+from typing import Callable
 import sqlalchemy as sql
 import sqlalchemy.orm as orm
@@ -24,7 +24,7 @@ def create_system_info(engine: sql.engine.Engine) -> None:
 # conversion functions for upgrading the metadata schema from one version to the following
 # key: old schema version
-converter_cbs: Dict[int, Callable[[sql.engine.Engine], None]] = {}
+converter_cbs: dict[int, Callable[[sql.engine.Engine], None]] = {}
 def register_converter(version: int) -> Callable[[Callable[[sql.engine.Engine], None]], None]:
     def decorator(fn: Callable[[sql.engine.Engine], None]) -> None:
@@ -41,6 +41,7 @@ def upgrade_md(engine: sql.engine.Engine) -> None:
     with orm.Session(engine) as session:
         system_info = session.query(SystemInfo).one().md
         md_version = system_info['schema_version']
+        assert isinstance(md_version, int)
         if md_version == VERSION:
             return
         while md_version < VERSION:

pixeltable/metadata/converters/convert_14.py CHANGED Viewed

@@ -1,11 +1,13 @@
+from typing import Any
 import sqlalchemy as sql
-from pixeltable.metadata.schema import Table
 from pixeltable.metadata import register_converter
+from pixeltable.metadata.schema import Table
 @register_converter(version=14)
 def _(engine: sql.engine.Engine) -> None:
-    default_remotes = {'remotes': []}
+    default_remotes: dict[str, Any] = {'remotes': []}
     with engine.begin() as conn:
         conn.execute(sql.update(Table).where(Table.md['remotes'] == None).values(md=Table.md.concat(default_remotes)))

pixeltable/metadata/converters/convert_15.py CHANGED Viewed

@@ -3,7 +3,7 @@ import inspect
 import logging
 from typing import Any
-import cloudpickle
+import cloudpickle  # type: ignore[import-untyped]
 import sqlalchemy as sql
 import pixeltable.func as func

pixeltable/metadata/converters/convert_19.py CHANGED Viewed

@@ -44,3 +44,4 @@ def __update_timestamp_literals(k: Any, v: Any) -> Optional[tuple[Any, Any]]:
         dt_utc = dt.astimezone(datetime.timezone.utc)
         v['val'] = dt_utc.isoformat()
         return k, v
+    return None

pixeltable/metadata/converters/convert_20.py CHANGED Viewed

@@ -35,7 +35,7 @@ def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], A
             # but it might actually be transformed into an InlineList when it is instantiated
             # (unfortunately, there is no way to disambiguate at this stage; see comments in
             # InlineArray._from_dict() for more details).
-            updated_v = {'_classname': 'InlineList' if v.get('is_json') else 'InlineArray'}
+            updated_v: dict[str, Any] = {'_classname': 'InlineList' if v.get('is_json') else 'InlineArray'}
             if len(updated_components) > 0:
                 updated_v['components'] = updated_components
             return k, updated_v

pixeltable/metadata/converters/util.py CHANGED Viewed

@@ -68,24 +68,25 @@ def __substitute_md_rec(
     substitution_fn: Callable[[Optional[str], Any], Optional[tuple[Optional[str], Any]]]
 ) -> Any:
     if isinstance(md, dict):
-        updated_md = {}
+        updated_dict: dict[str, Any] = {}
         for k, v in md.items():
+            assert isinstance(k, str)
             substitute = substitution_fn(k, v)
             if substitute is not None:
                 updated_k, updated_v = substitute
-                updated_md[updated_k] = __substitute_md_rec(updated_v, substitution_fn)
+                updated_dict[updated_k] = __substitute_md_rec(updated_v, substitution_fn)
             else:
-                updated_md[k] = __substitute_md_rec(v, substitution_fn)
-        return updated_md
+                updated_dict[k] = __substitute_md_rec(v, substitution_fn)
+        return updated_dict
     elif isinstance(md, list):
-        updated_md = []
+        updated_list: list[Any] = []
         for v in md:
             substitute = substitution_fn(None, v)
             if substitute is not None:
                 _, updated_v = substitute
-                updated_md.append(__substitute_md_rec(updated_v, substitution_fn))
+                updated_list.append(__substitute_md_rec(updated_v, substitution_fn))
             else:
-                updated_md.append(__substitute_md_rec(v, substitution_fn))
-        return updated_md
+                updated_list.append(__substitute_md_rec(v, substitution_fn))
+        return updated_list
     else:
         return md

pixeltable/metadata/schema.py CHANGED Viewed

@@ -1,37 +1,48 @@
 import dataclasses
+import typing
 import uuid
-from typing import Optional, List, get_type_hints, Type, Any, TypeVar, Tuple, Union
+from typing import Any, Optional, TypeVar, Union, get_type_hints
 import sqlalchemy as sql
 import sqlalchemy.orm as orm
-from sqlalchemy import ForeignKey
-from sqlalchemy import Integer, BigInteger, LargeBinary
-from sqlalchemy.dialects.postgresql import UUID, JSONB
+from sqlalchemy import BigInteger, ForeignKey, Integer, LargeBinary
+from sqlalchemy.dialects.postgresql import JSONB, UUID
 from sqlalchemy.orm import declarative_base
+from sqlalchemy.orm.decl_api import DeclarativeMeta
-Base = declarative_base()
+# Base has to be marked explicitly as a type, in order to be used elsewhere as a type hint. But in addition to being
+# a type, it's also a `DeclarativeMeta`. The following pattern enables us to expose both `Base` and `Base.metadata`
+# outside of the module in a typesafe way.
+Base: type = declarative_base()
+assert isinstance(Base, DeclarativeMeta)
+base_metadata = Base.metadata
 T = TypeVar('T')
-def md_from_dict(data_class_type: Type[T], data: Any) -> T:
+def md_from_dict(data_class_type: type[T], data: Any) -> T:
     """Re-instantiate a dataclass instance that contains nested dataclasses from a dict."""
     if dataclasses.is_dataclass(data_class_type):
         fieldtypes = {f: t for f, t in get_type_hints(data_class_type).items()}
-        return data_class_type(**{f: md_from_dict(fieldtypes[f], data[f]) for f in data})
-    elif hasattr(data_class_type, '__origin__'):
-        if data_class_type.__origin__ is Union and type(None) in data_class_type.__args__:
+        return data_class_type(**{f: md_from_dict(fieldtypes[f], data[f]) for f in data})  # type: ignore[return-value]
+    origin = typing.get_origin(data_class_type)
+    if origin is not None:
+        type_args = typing.get_args(data_class_type)
+        if origin is Union and type(None) in type_args:
             # Handling Optional types
-            non_none_args = [arg for arg in data_class_type.__args__ if arg is not type(None)]
-            if len(non_none_args) == 1:
-                return md_from_dict(non_none_args[0], data) if data is not None else None
-        elif data_class_type.__origin__ is list:
-            return [md_from_dict(data_class_type.__args__[0], elem) for elem in data]
-        elif data_class_type.__origin__ is dict:
-            key_type = data_class_type.__args__[0]
-            val_type = data_class_type.__args__[1]
-            return {key_type(key): md_from_dict(val_type, val) for key, val in data.items()}
-        elif data_class_type.__origin__ is tuple:
-            return tuple(md_from_dict(arg_type, elem) for arg_type, elem in zip(data_class_type.__args__, data))
+            non_none_args = [arg for arg in type_args if arg is not type(None)]
+            assert len(non_none_args) == 1
+            return md_from_dict(non_none_args[0], data) if data is not None else None
+        elif origin is list:
+            return [md_from_dict(type_args[0], elem) for elem in data]  # type: ignore[return-value]
+        elif origin is dict:
+            key_type = type_args[0]
+            val_type = type_args[1]
+            return {key_type(key): md_from_dict(val_type, val) for key, val in data.items()}  # type: ignore[return-value]
+        elif origin is tuple:
+            return tuple(md_from_dict(arg_type, elem) for arg_type, elem in zip(type_args, data))  # type: ignore[return-value]
+        else:
+            assert False
     else:
         return data
@@ -115,7 +126,7 @@ class ViewMd:
     is_snapshot: bool
     # (table id, version); for mutable views, all versions are None
-    base_versions: List[Tuple[str, Optional[int]]]
+    base_versions: list[tuple[str, Optional[int]]]
     # filter predicate applied to the base table; view-only
     predicate: Optional[dict[str, Any]]

pixeltable 0.2.19__py3-none-any.whl → 0.2.21__py3-none-any.whl

Potentially problematic release.

pixeltable 0.2.19py3-none-any.whl → 0.2.21py3-none-any.whl