PyPI - pixeltable - Versions diffs - 0.2.12__py3-none-any.whl → 0.2.14__py3-none-any.whl - Mend

pixeltable 0.2.12py3-none-any.whl → 0.2.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (67) hide show

pixeltable/__init__.py +1 -1
pixeltable/__version__.py +2 -2
pixeltable/catalog/column.py +5 -0
pixeltable/catalog/globals.py +8 -0
pixeltable/catalog/insertable_table.py +2 -2
pixeltable/catalog/table.py +27 -9
pixeltable/catalog/table_version.py +41 -68
pixeltable/catalog/view.py +3 -3
pixeltable/dataframe.py +7 -6
pixeltable/exec/__init__.py +2 -1
pixeltable/exec/expr_eval_node.py +8 -1
pixeltable/exec/row_update_node.py +61 -0
pixeltable/exec/{sql_scan_node.py → sql_node.py} +120 -56
pixeltable/exprs/__init__.py +1 -2
pixeltable/exprs/comparison.py +5 -5
pixeltable/exprs/compound_predicate.py +12 -12
pixeltable/exprs/expr.py +67 -22
pixeltable/exprs/function_call.py +60 -29
pixeltable/exprs/globals.py +2 -0
pixeltable/exprs/in_predicate.py +3 -3
pixeltable/exprs/inline_array.py +18 -11
pixeltable/exprs/is_null.py +5 -5
pixeltable/exprs/method_ref.py +63 -0
pixeltable/ext/__init__.py +9 -0
pixeltable/ext/functions/__init__.py +8 -0
pixeltable/ext/functions/whisperx.py +45 -5
pixeltable/ext/functions/yolox.py +60 -14
pixeltable/func/aggregate_function.py +10 -4
pixeltable/func/callable_function.py +16 -4
pixeltable/func/expr_template_function.py +1 -1
pixeltable/func/function.py +12 -2
pixeltable/func/function_registry.py +26 -9
pixeltable/func/udf.py +32 -4
pixeltable/functions/__init__.py +1 -1
pixeltable/functions/fireworks.py +33 -0
pixeltable/functions/globals.py +36 -1
pixeltable/functions/huggingface.py +155 -7
pixeltable/functions/image.py +242 -40
pixeltable/functions/openai.py +214 -0
pixeltable/functions/string.py +600 -8
pixeltable/functions/timestamp.py +210 -0
pixeltable/functions/together.py +106 -0
pixeltable/functions/video.py +28 -10
pixeltable/functions/whisper.py +32 -0
pixeltable/globals.py +3 -3
pixeltable/io/__init__.py +1 -1
pixeltable/io/globals.py +186 -5
pixeltable/io/label_studio.py +42 -2
pixeltable/io/pandas.py +70 -34
pixeltable/metadata/__init__.py +1 -1
pixeltable/metadata/converters/convert_18.py +39 -0
pixeltable/metadata/notes.py +10 -0
pixeltable/plan.py +82 -7
pixeltable/tool/create_test_db_dump.py +4 -5
pixeltable/tool/doc_plugins/griffe.py +81 -0
pixeltable/tool/doc_plugins/mkdocstrings.py +6 -0
pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +135 -0
pixeltable/type_system.py +15 -14
pixeltable/utils/s3.py +1 -1
pixeltable-0.2.14.dist-info/METADATA +206 -0
{pixeltable-0.2.12.dist-info → pixeltable-0.2.14.dist-info}/RECORD +64 -56
pixeltable-0.2.14.dist-info/entry_points.txt +3 -0
pixeltable/exprs/image_member_access.py +0 -96
pixeltable/exprs/predicate.py +0 -44
pixeltable-0.2.12.dist-info/METADATA +0 -137
{pixeltable-0.2.12.dist-info → pixeltable-0.2.14.dist-info}/LICENSE +0 -0
{pixeltable-0.2.12.dist-info → pixeltable-0.2.14.dist-info}/WHEEL +0 -0

pixeltable/io/globals.py CHANGED Viewed

@@ -1,5 +1,7 @@
-from typing import Any, Optional, Literal
+from typing import Any, Literal, Optional, Union
+import urllib.request
+import pixeltable as pxt
 import pixeltable.exceptions as excs
 from pixeltable import Table
 from pixeltable.io.external_store import SyncStatus
@@ -13,11 +15,14 @@ def create_label_studio_project(
         media_import_method: Literal['post', 'file', 'url'] = 'post',
         col_mapping: Optional[dict[str, str]] = None,
         sync_immediately: bool = True,
+        s3_configuration: Optional[dict[str, Any]] = None,
         **kwargs: Any
 ) -> SyncStatus:
-    # TODO(aaron-siegel): Add link in docstring to a Label Studio howto
     """
-    Creates a new Label Studio project and links it to the specified `Table`.
+    Create a new Label Studio project and link it to the specified `Table`.
+    - A tutorial notebook with fully worked examples can be found here:
+      [Using Label Studio for Annotations with Pixeltable](https://pixeltable.readme.io/docs/label-studio)
     The required parameter `label_config` specifies the Label Studio project configuration,
     in XML format, as described in the Label Studio documentation. The linked project will
@@ -41,6 +46,11 @@ def create_label_studio_project(
     * Set the `LABEL_STUDIO_API_KEY` and `LABEL_STUDIO_URL` environment variables; or
     * Specify `api_key` and `url` fields in the `label-studio` section of `$PIXELTABLE_HOME/config.yaml`.
+    __Requirements:__
+    - `pip install label-studio-sdk`
+    - `pip install boto3` (if using S3 import storage)
     Args:
         t: The Table to link to.
         label_config: The Label Studio project configuration, in XML format.
@@ -52,6 +62,7 @@ def create_label_studio_project(
             will see inside Label Studio. Unlike `name`, it does not need to be an identifier and
             does not need to be unique. If not specified, the table name `t.name` will be used.
         media_import_method: The method to use when transferring media files to Label Studio:
             - `post`: Media will be sent to Label Studio via HTTP post. This should generally only be used for
                 prototyping; due to restrictions in Label Studio, it can only be used with projects that have
                 just one data field, and does not scale well.
@@ -63,9 +74,48 @@ def create_label_studio_project(
         col_mapping: An optional mapping of local column names to Label Studio fields.
         sync_immediately: If `True`, immediately perform an initial synchronization by
             exporting all rows of the `Table` as Label Studio tasks.
+        s3_configuration: If specified, S3 import storage will be configured for the new project. This can only
+            be used with `media_import_method='url'`, and if `media_import_method='url'` and any of the media data is
+            referenced by `s3://` URLs, then it must be specified in order for such media to display correctly
+            in the Label Studio interface.
+            The items in the `s3_configuration` dictionary correspond to kwarg
+            parameters of the Label Studio `connect_s3_import_storage` method, as described in the
+            [Label Studio connect_s3_import_storage docs](https://labelstud.io/sdk/project.html#label_studio_sdk.project.Project.connect_s3_import_storage).
+            `bucket` must be specified; all other parameters are optional. If credentials are not specified explicitly,
+            Pixeltable will attempt to retrieve them from the environment (such as from `~/.aws/credentials`). If a title is not
+            specified, Pixeltable will use the default `'Pixeltable-S3-Import-Storage'`. All other parameters use their Label
+            Studio defaults.
         kwargs: Additional keyword arguments are passed to the `start_project` method in the Label
-            Studio SDK, as described here:
-            https://labelstud.io/sdk/project.html#label_studio_sdk.project.Project.start_project
+            Studio SDK, as described in the
+            [Label Studio start_project docs](https://labelstud.io/sdk/project.html#label_studio_sdk.project.Project.start_project).
+    Returns:
+        A `SyncStatus` representing the status of any synchronization operations that occurred.
+    Examples:
+        Create a Label Studio project whose tasks correspond to videos stored in the `video_col` column of the table `tbl`:
+        >>> config = \"\"\"
+            <View>
+                <Video name="video_obj" value="$video_col"/>
+                <Choices name="video-category" toName="video" showInLine="true">
+                    <Choice value="city"/>
+                    <Choice value="food"/>
+                    <Choice value="sports"/>
+                </Choices>
+            </View>\"\"\"
+            create_label_studio_project(tbl, config)
+        Create a Label Studio project with the same configuration, using `media_import_method='url'`,
+        whose media are stored in an S3 bucket:
+        >>> create_label_studio_project(
+                tbl,
+                config,
+                media_import_method='url',
+                s3_configuration={'bucket': 'my-bucket', 'region_name': 'us-east-2'}
+            )
     """
     from pixeltable.io.label_studio import LabelStudioProject
@@ -76,6 +126,7 @@ def create_label_studio_project(
         title,
         media_import_method,
         col_mapping,
+        s3_configuration,
         **kwargs
     )
@@ -85,3 +136,133 @@ def create_label_studio_project(
         return t.sync()
     else:
         return SyncStatus.empty()
+def import_rows(
+    tbl_path: str,
+    rows: list[dict[str, Any]],
+    *,
+    schema_overrides: Optional[dict[str, pxt.ColumnType]] = None,
+    primary_key: Optional[Union[str, list[str]]] = None,
+    num_retained_versions: int = 10,
+    comment: str = ''
+    ) -> Table:
+    """
+    Creates a new `Table` from a list of dictionaries. The dictionaries must be of the form
+    `{column_name: value, ...}`. Pixeltable will attempt to infer the schema of the table from the
+    supplied data, using the most specific type that can represent all the values in a column.
+    If `schema_overrides` is specified, then for each entry `(column_name, type)` in `schema_overrides`,
+    Pixeltable will force the specified column to the specified type (and will not attempt any type inference
+    for that column).
+    All column types of the new `Table` will be nullable unless explicitly specified as non-nullable in
+    `schema_overrides`.
+    Args:
+        tbl_path: The qualified name of the table to create.
+        rows: The list of dictionaries to import.
+        schema_overrides: If specified, then columns in `schema_overrides` will be given the specified types
+            as described above.
+        primary_key: The primary key of the table (see [`create_table()`][pixeltable.create_table]).
+        num_retained_versions: The number of retained versions of the table (see [`create_table()`][pixeltable.create_table]).
+        comment: A comment to attach to the table (see [`create_table()`][pixeltable.create_table]).
+    Returns:
+        The newly created `Table`.
+    """
+    if schema_overrides is None:
+        schema_overrides = {}
+    schema: dict[str, pxt.ColumnType] = {}
+    cols_with_nones: set[str] = set()
+    for n, row in enumerate(rows):
+        for col_name, value in row.items():
+            if col_name in schema_overrides:
+                # We do the insertion here; this will ensure that the column order matches the order
+                # in which the column names are encountered in the input data, even if `schema_overrides`
+                # is specified.
+                if col_name not in schema:
+                    schema[col_name] = schema_overrides[col_name]
+            elif value is not None:
+                # If `key` is not in `schema_overrides`, then we infer its type from the data.
+                # The column type will always be nullable by default.
+                col_type = pxt.ColumnType.infer_literal_type(value).copy(nullable=True)
+                if col_name not in schema:
+                    schema[col_name] = col_type
+                else:
+                    supertype = pxt.ColumnType.supertype(schema[col_name], col_type)
+                    if supertype is None:
+                        raise excs.Error(
+                            f'Could not infer type of column `{col_name}`; the value in row {n} does not match preceding type {schema[col_name]}: {value!r}\n'
+                            'Consider specifying the type explicitly in `schema_overrides`.'
+                        )
+                    schema[col_name] = supertype
+            else:
+                cols_with_nones.add(col_name)
+    extraneous_keys = schema_overrides.keys() - schema.keys()
+    if len(extraneous_keys) > 0:
+        raise excs.Error(f'The following columns specified in `schema_overrides` are not present in the data: {", ".join(extraneous_keys)}')
+    entirely_none_cols = cols_with_nones - schema.keys()
+    if len(entirely_none_cols) > 0:
+        # A column can only end up in `entirely_null_cols` if it was not in `schema_overrides` and
+        # was not encountered in any row with a non-None value.
+        raise excs.Error(
+            f'The following columns have no non-null values: {", ".join(entirely_none_cols)}\n'
+            'Consider specifying the type(s) explicitly in `schema_overrides`.'
+        )
+    t = pxt.create_table(tbl_path, schema, primary_key=primary_key, num_retained_versions=num_retained_versions, comment=comment)
+    t.insert(rows)
+    return t
+def import_json(
+    tbl_path: str,
+    filepath_or_url: str,
+    *,
+    schema_overrides: Optional[dict[str, pxt.ColumnType]] = None,
+    primary_key: Optional[Union[str, list[str]]] = None,
+    num_retained_versions: int = 10,
+    comment: str = '',
+    **kwargs: Any
+) -> Table:
+    """
+    Creates a new `Table` from a JSON file. This is a convenience method and is equivalent
+    to calling `import_data(table_path, json.loads(file_contents, **kwargs), ...)`, where `file_contents`
+    is the contents of the specified `filepath_or_url`.
+    Args:
+        tbl_path: The name of the table to create.
+        filepath_or_url: The path or URL of the JSON file.
+        schema_overrides: If specified, then columns in `schema_overrides` will be given the specified types
+            (see [`import_rows()`][pixeltable.io.import_rows]).
+        primary_key: The primary key of the table (see [`create_table()`][pixeltable.create_table]).
+        num_retained_versions: The number of retained versions of the table (see [`create_table()`][pixeltable.create_table]).
+        comment: A comment to attach to the table (see [`create_table()`][pixeltable.create_table]).
+        kwargs: Additional keyword arguments to pass to `json.loads`.
+    Returns:
+        The newly created `Table`.
+    """
+    import json
+    import urllib.parse
+    import urllib.request
+    # TODO Consolidate this logic with other places where files/URLs are parsed
+    parsed = urllib.parse.urlparse(filepath_or_url)
+    if len(parsed.scheme) <= 1 or parsed.scheme == 'file':
+        # local file path
+        if len(parsed.scheme) <= 1:
+            filepath = filepath_or_url
+        else:
+            filepath = urllib.parse.unquote(urllib.request.url2pathname(parsed.path))
+        with open(filepath) as fp:
+            contents = fp.read()
+    else:
+        # URL
+        contents = urllib.request.urlopen(filepath_or_url).read()
+    data = json.loads(contents, **kwargs)
+    return import_rows(tbl_path, data, schema_overrides=schema_overrides, primary_key=primary_key, num_retained_versions=num_retained_versions, comment=comment)

pixeltable/io/label_studio.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import copy
 import json
 import logging
 import os
@@ -18,6 +19,15 @@ from pixeltable.exprs import ColumnRef, DataRow, Expr
 from pixeltable.io.external_store import Project, SyncStatus
 from pixeltable.utils import coco
+# label_studio_sdk>=1 and label_studio_sdk<1 are not compatible, so we need to try
+# the import two different ways to insure intercompatibility
+try:
+    # label_studio_sdk<1 compatibility
+    import label_studio_sdk.project as ls_project  # type: ignore
+except ImportError:
+    # label_studio_sdk>=1 compatibility
+    import label_studio_sdk._legacy.project as ls_project  # type: ignore
 _logger = logging.getLogger('pixeltable')
@@ -50,11 +60,11 @@ class LabelStudioProject(Project):
         """
         self.project_id = project_id
         self.media_import_method = media_import_method
-        self._project: Optional[label_studio_sdk.project.Project] = None
+        self._project: Optional[ls_project.Project] = None
         super().__init__(name, col_mapping, stored_proxies)
     @property
-    def project(self) -> label_studio_sdk.project.Project:
+    def project(self) -> ls_project.Project:
         """The `Project` object corresponding to this Label Studio project."""
         if self._project is None:
             try:
@@ -536,6 +546,7 @@ class LabelStudioProject(Project):
             title: Optional[str],
             media_import_method: Literal['post', 'file', 'url'],
             col_mapping: Optional[dict[str, str]],
+            s3_configuration: Optional[dict[str, Any]],
             **kwargs: Any
     ) -> 'LabelStudioProject':
         """
@@ -572,6 +583,31 @@ class LabelStudioProject(Project):
         if media_import_method == 'post' and len(config.data_keys) > 1:
             raise excs.Error('`media_import_method` cannot be `post` if there is more than one data key')
+        if s3_configuration is not None:
+            if media_import_method != 'url':
+                raise excs.Error("`s3_configuration` is only valid when `media_import_method == 'url'`")
+            s3_configuration = copy.copy(s3_configuration)
+            if not 'bucket' in s3_configuration:
+                raise excs.Error('`s3_configuration` must contain a `bucket` field')
+            if not 'title' in s3_configuration:
+                s3_configuration['title'] = 'Pixeltable-S3-Import-Storage'
+            if ('aws_access_key_id' not in s3_configuration and
+                'aws_secret_access_key' not in s3_configuration and
+                'aws_session_token' not in s3_configuration):
+                # Attempt to fill any missing credentials from the environment
+                try:
+                    import boto3
+                    s3_credentials = boto3.Session().get_credentials().get_frozen_credentials()
+                    _logger.info(f'Using AWS credentials from the environment for Label Studio project: {title}')
+                    s3_configuration['aws_access_key_id'] = s3_credentials.access_key
+                    s3_configuration['aws_secret_access_key'] = s3_credentials.secret_key
+                    s3_configuration['aws_session_token'] = s3_credentials.token
+                except Exception as exc:
+                    # This is not necessarily a problem, but we should log that it happened
+                    _logger.debug(f'Unable to retrieve AWS credentials from the environment: {exc}')
+                    pass
+        _logger.info(f'Creating Label Studio project: {title}')
         project = _label_studio_client().start_project(title=title, label_config=label_config, **kwargs)
         if media_import_method == 'file':
@@ -591,6 +627,10 @@ class LabelStudioProject(Project):
                         ) from exc
                 raise  # Handle any other exception type normally
+        if s3_configuration is not None:
+            _logger.info(f'Setting up S3 import storage for Label Studio project: {title}')
+            project.connect_s3_import_storage(**s3_configuration)
         project_id = project.get_params()['id']
         return LabelStudioProject(name, project_id, media_import_method, resolved_col_mapping)

pixeltable/io/pandas.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Optional, Any, Iterable
+from typing import Any, Optional, Union
 import numpy as np
 import pandas as pd
@@ -9,10 +9,13 @@ import pixeltable.type_system as ts
 def import_pandas(
-    tbl_name: str, df: pd.DataFrame, *, schema_overrides: Optional[dict[str, pxt.ColumnType]] = None
+    tbl_name: str, df: pd.DataFrame, *, schema_overrides: Optional[dict[str, pxt.ColumnType]] = None,
+    primary_key: Optional[Union[str, list[str]]] = None,
+    num_retained_versions: int = 10,
+    comment: str = ''
 ) -> pxt.catalog.InsertableTable:
     """Creates a new `Table` from a Pandas `DataFrame`, with the specified name. The schema of the table
-    will be inferred from the `DataFrame`, unless `schema` is specified.
+    will be inferred from the `DataFrame`.
     The column names of the new `Table` will be identical to those in the `DataFrame`, as long as they are valid
     Pixeltable identifiers. If a column name is not a valid Pixeltable identifier, it will be normalized according to
@@ -29,15 +32,26 @@ def import_pandas(
             `schema_overrides` should be the column names of the `DataFrame` (whether or not they are valid
             Pixeltable identifiers).
     """
-    schema = _df_to_pxt_schema(df, schema_overrides)
-    tbl_rows = (dict(_df_row_to_pxt_row(row, schema)) for row in df.itertuples())
-    table = pxt.create_table(tbl_name, schema)
+    if schema_overrides is None:
+        schema_overrides = {}
+    if primary_key is None:
+        primary_key = []
+    elif isinstance(primary_key, str):
+        primary_key = [primary_key]
+    schema, pxt_pk = __df_to_pxt_schema(df, schema_overrides, primary_key)
+    tbl_rows = (dict(__df_row_to_pxt_row(row, schema)) for row in df.itertuples())
+    table = pxt.create_table(tbl_name, schema, primary_key=pxt_pk, num_retained_versions=num_retained_versions, comment=comment)
     table.insert(tbl_rows)
     return table
 def import_csv(
-    table_path: str, filepath_or_buffer, schema_overrides: Optional[dict[str, ts.ColumnType]] = None, **kwargs
+    tbl_name: str, filepath_or_buffer, schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
+    primary_key: Optional[Union[str, list[str]]] = None,
+    num_retained_versions: int = 10,
+    comment: str = '',
+    **kwargs
 ) -> pxt.catalog.InsertableTable:
     """
     Creates a new `Table` from a csv file. This is a convenience method and is equivalent
@@ -45,11 +59,15 @@ def import_csv(
     See the Pandas documentation for `read_csv` for more details.
     """
     df = pd.read_csv(filepath_or_buffer, **kwargs)
-    return import_pandas(table_path, df, schema_overrides=schema_overrides)
+    return import_pandas(tbl_name, df, schema_overrides=schema_overrides, primary_key=primary_key, num_retained_versions=num_retained_versions, comment=comment)
 def import_excel(
-    table_path: str, io, *args, schema_overrides: Optional[dict[str, ts.ColumnType]] = None, **kwargs
+    tbl_name: str, io, *args, schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
+    primary_key: Optional[Union[str, list[str]]] = None,
+    num_retained_versions: int = 10,
+    comment: str = '',
+    **kwargs
 ) -> pxt.catalog.InsertableTable:
     """
     Creates a new `Table` from an excel (.xlsx) file. This is a convenience method and is equivalent
@@ -57,25 +75,36 @@ def import_excel(
     See the Pandas documentation for `read_excel` for more details.
     """
     df = pd.read_excel(io, *args, **kwargs)
-    return import_pandas(table_path, df, schema_overrides=schema_overrides)
-def _df_to_pxt_schema(
-    df: pd.DataFrame, schema_overrides: Optional[dict[str, pxt.ColumnType]]
-) -> dict[str, pxt.ColumnType]:
-    if schema_overrides is not None:
-        for pd_name in schema_overrides:
-            if pd_name not in df.columns:
-                raise excs.Error(
-                    f'Column `{pd_name}` specified in `schema_overrides` does not exist in the given `DataFrame`.'
-                )
-    schema = {}
+    return import_pandas(tbl_name, df, schema_overrides=schema_overrides, primary_key=primary_key, num_retained_versions=num_retained_versions, comment=comment)
+def __df_to_pxt_schema(
+    df: pd.DataFrame, schema_overrides: dict[str, pxt.ColumnType], primary_key: list[str]
+) -> tuple[dict[str, pxt.ColumnType], list[str]]:
+    """
+    Infers a Pixeltable schema from a Pandas DataFrame.
+    Returns:
+        A tuple containing a Pixeltable schema and a list of primary key column names.
+    """
+    for pd_name in schema_overrides:
+        if pd_name not in df.columns:
+            raise excs.Error(
+                f'Column `{pd_name}` specified in `schema_overrides` does not exist in the given `DataFrame`.'
+            )
+    for pd_name in primary_key:
+        if pd_name not in df.columns:
+            raise excs.Error(f'Primary key column `{pd_name}` does not exist in the given `DataFrame`.')
+    schema: dict[str, pxt.ColumnType] = {}
+    col_mapping: dict[str, str] = {}  # Maps Pandas column names to Pixeltable column names
     for pd_name, pd_dtype in zip(df.columns, df.dtypes):
-        if schema_overrides is not None and pd_name in schema_overrides:
+        if pd_name in schema_overrides:
             pxt_type = schema_overrides[pd_name]
         else:
-            pxt_type = _np_dtype_to_pxt_type(pd_dtype, df[pd_name])
-        pxt_name = _normalize_pxt_col_name(pd_name)
+            pxt_type = __np_dtype_to_pxt_type(pd_dtype, df[pd_name], pd_name not in primary_key)
+        pxt_name = __normalize_pxt_col_name(pd_name)
         # Ensure that column names are unique by appending a distinguishing suffix
         # to any collisions
         if pxt_name in schema:
@@ -84,10 +113,13 @@ def _df_to_pxt_schema(
                 n += 1
             pxt_name = f'{pxt_name}_{n}'
         schema[pxt_name] = pxt_type
-    return schema
+        col_mapping[pd_name] = pxt_name
+    pxt_pk = [col_mapping[pk] for pk in primary_key]
+    return schema, pxt_pk
-def _normalize_pxt_col_name(pd_name: str) -> str:
+def __normalize_pxt_col_name(pd_name: str) -> str:
     """
     Normalizes an arbitrary DataFrame column name into a valid Pixeltable identifier by:
     - replacing any non-ascii or non-alphanumeric characters with an underscore _
@@ -102,26 +134,30 @@ def _normalize_pxt_col_name(pd_name: str) -> str:
     return id
-def _np_dtype_to_pxt_type(np_dtype: np.dtype, data_col: pd.Series) -> pxt.ColumnType:
+def __np_dtype_to_pxt_type(np_dtype: np.dtype, data_col: pd.Series, nullable: bool) -> pxt.ColumnType:
     """
     Infers a Pixeltable type based on a Numpy dtype.
     """
     if np.issubdtype(np_dtype, np.integer):
-        return pxt.IntType()
+        return pxt.IntType(nullable=nullable)
     if np.issubdtype(np_dtype, np.floating):
-        return pxt.FloatType()
+        return pxt.FloatType(nullable=nullable)
     if np.issubdtype(np_dtype, np.bool_):
-        return pxt.BoolType()
+        return pxt.BoolType(nullable=nullable)
     if np_dtype == np.object_ or np.issubdtype(np_dtype, np.character):
         has_nan = any(isinstance(val, float) and np.isnan(val) for val in data_col)
-        return pxt.StringType(nullable=has_nan)
+        if has_nan and not nullable:
+            raise excs.Error(f'Primary key column `{data_col.name}` cannot contain null values.')
+        return pxt.StringType(nullable=nullable)
     if np.issubdtype(np_dtype, np.datetime64):
         has_nat = any(pd.isnull(val) for val in data_col)
-        return pxt.TimestampType(nullable=has_nat)
+        if has_nat and not nullable:
+            raise excs.Error(f'Primary key column `{data_col.name}` cannot contain null values.')
+        return pxt.TimestampType(nullable=nullable)
     raise excs.Error(f'Unsupported dtype: {np_dtype}')
-def _df_row_to_pxt_row(row: tuple[Any, ...], schema: dict[str, pxt.ColumnType]) -> dict[str, Any]:
+def __df_row_to_pxt_row(row: tuple[Any, ...], schema: dict[str, pxt.ColumnType]) -> dict[str, Any]:
     rows = {}
     for val, (col_name, pxt_type) in zip(row[1:], schema.items()):
         if pxt_type.is_float_type():

pixeltable/metadata/__init__.py CHANGED Viewed

@@ -10,7 +10,7 @@ import sqlalchemy.orm as orm
 from .schema import SystemInfo, SystemInfoMd
 # current version of the metadata; this is incremented whenever the metadata schema changes
-VERSION = 18
+VERSION = 19
 def create_system_info(engine: sql.engine.Engine) -> None:

pixeltable/metadata/converters/convert_18.py ADDED Viewed

@@ -0,0 +1,39 @@
+from typing import Any, Optional
+import sqlalchemy as sql
+from pixeltable.metadata import register_converter
+from pixeltable.metadata.converters.util import convert_table_md
+@register_converter(version=18)
+def _(engine: sql.engine.Engine) -> None:
+    convert_table_md(
+        engine,
+        substitution_fn=__substitute_md
+    )
+def __substitute_md(k: Any, v: Any) -> Optional[tuple[Any, Any]]:
+    # Migrate a few changed function names
+    if k == 'path' and v == 'pixeltable.functions.string.str_format':
+        return 'path', 'pixeltable.functions.string.format'
+    if k == 'path' and v.startswith('pixeltable.functions.pil.image'):
+        return 'path', v.replace('pixeltable.functions.pil.image', 'pixeltable.functions.image')
+    # Migrate deprecated `ImageMemberAccess` expressions to `FunctionCall`s
+    if isinstance(v, dict) and '_classname' in v and v['_classname'] == 'ImageMemberAccess':
+        member_name = v['member_name']
+        new_v = {
+            'fn': {
+                'path': f'pixeltable.functions.image.{member_name}',
+                '_classpath': 'pixeltable.func.callable_function.CallableFunction',
+            },
+            'args': [[0, None]],
+            'kwargs': {},
+            '_classname': 'FunctionCall',
+            'components': v['components'],
+            'group_by_stop_idx': 0,
+            'group_by_start_idx': 0,
+            'order_by_start_idx': 1,
+        }
+        return k, new_v
+    return None

pixeltable/metadata/notes.py ADDED Viewed

@@ -0,0 +1,10 @@
+# Descriptive notes for each new metadata version. These are stored in a Python dict
+# rather than as a comment, so that the existence of a description can be enforced by
+# the unit tests when new versions are added.
+VERSION_NOTES = {
+    19: 'UDF renames; ImageMemberAccess removal',
+    18: 'Restructured index metadata',
+    17: 'Renamed remotes to external_stores',
+    16: 'Query functions; deferred Expr deserialization',
+    15: 'Remotes in table metadata',
+}

pixeltable 0.2.12__py3-none-any.whl → 0.2.14__py3-none-any.whl

Potentially problematic release.

pixeltable 0.2.12py3-none-any.whl → 0.2.14py3-none-any.whl