PyPI - pixeltable - Versions diffs - 0.2.17__py3-none-any.whl → 0.2.18__py3-none-any.whl - Mend

pixeltable 0.2.17py3-none-any.whl → 0.2.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (79) hide show

pixeltable/__version__.py +2 -2
pixeltable/catalog/catalog.py +8 -7
pixeltable/catalog/column.py +11 -8
pixeltable/catalog/insertable_table.py +1 -1
pixeltable/catalog/path_dict.py +8 -6
pixeltable/catalog/table.py +20 -13
pixeltable/catalog/table_version.py +91 -54
pixeltable/catalog/table_version_path.py +7 -9
pixeltable/catalog/view.py +2 -1
pixeltable/dataframe.py +1 -1
pixeltable/env.py +173 -83
pixeltable/exec/aggregation_node.py +2 -1
pixeltable/exec/component_iteration_node.py +1 -1
pixeltable/exec/sql_node.py +11 -8
pixeltable/exprs/__init__.py +1 -0
pixeltable/exprs/arithmetic_expr.py +4 -4
pixeltable/exprs/array_slice.py +2 -1
pixeltable/exprs/column_property_ref.py +9 -7
pixeltable/exprs/column_ref.py +2 -1
pixeltable/exprs/comparison.py +10 -7
pixeltable/exprs/compound_predicate.py +3 -2
pixeltable/exprs/data_row.py +19 -4
pixeltable/exprs/expr.py +46 -35
pixeltable/exprs/expr_set.py +32 -9
pixeltable/exprs/function_call.py +56 -32
pixeltable/exprs/in_predicate.py +3 -2
pixeltable/exprs/inline_array.py +2 -1
pixeltable/exprs/inline_dict.py +2 -1
pixeltable/exprs/is_null.py +3 -2
pixeltable/exprs/json_mapper.py +5 -4
pixeltable/exprs/json_path.py +7 -1
pixeltable/exprs/literal.py +34 -7
pixeltable/exprs/method_ref.py +3 -3
pixeltable/exprs/object_ref.py +6 -5
pixeltable/exprs/row_builder.py +25 -17
pixeltable/exprs/rowid_ref.py +2 -1
pixeltable/exprs/similarity_expr.py +2 -1
pixeltable/exprs/sql_element_cache.py +30 -0
pixeltable/exprs/type_cast.py +3 -3
pixeltable/exprs/variable.py +2 -1
pixeltable/ext/functions/whisperx.py +4 -4
pixeltable/ext/functions/yolox.py +6 -6
pixeltable/func/aggregate_function.py +1 -0
pixeltable/func/function.py +28 -4
pixeltable/functions/__init__.py +4 -2
pixeltable/functions/anthropic.py +15 -5
pixeltable/functions/fireworks.py +1 -1
pixeltable/functions/globals.py +6 -1
pixeltable/functions/huggingface.py +2 -2
pixeltable/functions/image.py +17 -2
pixeltable/functions/json.py +5 -5
pixeltable/functions/mistralai.py +188 -0
pixeltable/functions/openai.py +6 -10
pixeltable/functions/string.py +3 -2
pixeltable/functions/timestamp.py +95 -7
pixeltable/functions/together.py +4 -4
pixeltable/functions/video.py +2 -2
pixeltable/functions/vision.py +27 -17
pixeltable/functions/whisper.py +1 -1
pixeltable/io/hf_datasets.py +17 -15
pixeltable/io/pandas.py +0 -2
pixeltable/io/parquet.py +15 -14
pixeltable/iterators/document.py +16 -15
pixeltable/metadata/__init__.py +1 -1
pixeltable/metadata/converters/convert_19.py +46 -0
pixeltable/metadata/notes.py +1 -0
pixeltable/metadata/schema.py +5 -4
pixeltable/plan.py +100 -78
pixeltable/store.py +5 -1
pixeltable/tool/create_test_db_dump.py +4 -3
pixeltable/type_system.py +12 -14
pixeltable/utils/documents.py +45 -42
pixeltable/utils/formatter.py +2 -2
{pixeltable-0.2.17.dist-info → pixeltable-0.2.18.dist-info}/METADATA +79 -21
pixeltable-0.2.18.dist-info/RECORD +147 -0
pixeltable-0.2.17.dist-info/RECORD +0 -144
{pixeltable-0.2.17.dist-info → pixeltable-0.2.18.dist-info}/LICENSE +0 -0
{pixeltable-0.2.17.dist-info → pixeltable-0.2.18.dist-info}/WHEEL +0 -0
{pixeltable-0.2.17.dist-info → pixeltable-0.2.18.dist-info}/entry_points.txt +0 -0

pixeltable/functions/json.py CHANGED Viewed

@@ -12,18 +12,18 @@ t.select(pxt.functions.json.make_list()).collect()
 from typing import Any
-import pixeltable.func as func
+import pixeltable as pxt
 import pixeltable.type_system as ts
 from pixeltable.utils.code import local_public_names
-@func.uda(
-    update_types=[ts.JsonType(nullable=True)],
-    value_type=ts.JsonType(),
+@pxt.uda(
+    update_types=[pxt.JsonType(nullable=True)],
+    value_type=pxt.JsonType(),
     requires_order_by=False,
     allows_window=False,
 )
-class make_list(func.Aggregator):
+class make_list(pxt.Aggregator):
     """
     Collects arguments into a list.
     """

pixeltable/functions/mistralai.py ADDED Viewed

@@ -0,0 +1,188 @@
+"""
+Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
+that wrap various endpoints from the Mistral AI API. In order to use them, you must
+first `pip install mistralai` and configure your Mistral AI credentials, as described in
+the [Working with Mistral AI](https://pixeltable.readme.io/docs/working-with-mistralai) tutorial.
+"""
+from typing import TYPE_CHECKING, Optional, TypeVar, Union
+import numpy as np
+import pixeltable as pxt
+from pixeltable.env import Env, register_client
+from pixeltable.func.signature import Batch
+from pixeltable.utils.code import local_public_names
+if TYPE_CHECKING:
+    import mistralai.types.basemodel
+@register_client('mistral')
+def _(api_key: str) -> 'mistralai.Mistral':
+    import mistralai
+    return mistralai.Mistral(api_key=api_key)
+def _mistralai_client() -> 'mistralai.Mistral':
+    return Env.get().get_client('mistral')
+@pxt.udf
+def chat_completions(
+    messages: list[dict[str, str]],
+    *,
+    model: str,
+    temperature: Optional[float] = 0.7,
+    top_p: Optional[float] = 1.0,
+    max_tokens: Optional[int] = None,
+    min_tokens: Optional[int] = None,
+    stop: Optional[list[str]] = None,
+    random_seed: Optional[int] = None,
+    response_format: Optional[dict] = None,
+    safe_prompt: Optional[bool] = False,
+) -> dict:
+    """
+    Chat Completion API.
+    Equivalent to the Mistral AI `chat/completions` API endpoint.
+    For additional details, see: <https://docs.mistral.ai/api/#tag/chat>
+    __Requirements:__
+    - `pip install mistralai`
+    Args:
+        messages: The prompt(s) to generate completions for.
+        model: ID of the model to use. (See overview here: <https://docs.mistral.ai/getting-started/models/>)
+    For details on the other parameters, see: <https://docs.mistral.ai/api/#tag/chat>
+    Returns:
+        A dictionary containing the response and other metadata.
+    Examples:
+        Add a computed column that applies the model `mistral-latest-small`
+        to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
+        >>> messages = [{'role': 'user', 'content': tbl.prompt}]
+        ... tbl['response'] = completions(messages, model='mistral-latest-small')
+    """
+    Env.get().require_package('mistralai')
+    return _mistralai_client().chat.complete(
+        messages=messages,  # type: ignore[arg-type]
+        model=model,
+        temperature=temperature,
+        top_p=top_p,
+        max_tokens=_opt(max_tokens),
+        min_tokens=_opt(min_tokens),
+        stop=stop,
+        random_seed=_opt(random_seed),
+        response_format=response_format,  # type: ignore[arg-type]
+        safe_prompt=safe_prompt,
+    ).dict()
+@pxt.udf
+def fim_completions(
+    prompt: str,
+    *,
+    model: str,
+    temperature: Optional[float] = 0.7,
+    top_p: Optional[float] = 1.0,
+    max_tokens: Optional[int] = None,
+    min_tokens: Optional[int] = None,
+    stop: Optional[list[str]] = None,
+    random_seed: Optional[int] = None,
+    suffix: Optional[str] = None,
+) -> dict:
+    """
+    Fill-in-the-middle Completion API.
+    Equivalent to the Mistral AI `fim/completions` API endpoint.
+    For additional details, see: <https://docs.mistral.ai/api/#tag/fim>
+    __Requirements:__
+    - `pip install mistralai`
+    Args:
+        prompt: The text/code to complete.
+        model: ID of the model to use. (See overview here: <https://docs.mistral.ai/getting-started/models/>)
+    For details on the other parameters, see: <https://docs.mistral.ai/api/#tag/fim>
+    Returns:
+        A dictionary containing the response and other metadata.
+    Examples:
+        Add a computed column that applies the model `codestral-latest`
+        to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
+        >>> tbl['response'] = completions(tbl.prompt, model='codestral-latest')
+    """
+    Env.get().require_package('mistralai')
+    return _mistralai_client().fim.complete(
+        prompt=prompt,
+        model=model,
+        temperature=temperature,
+        top_p=top_p,
+        max_tokens=_opt(max_tokens),
+        min_tokens=_opt(min_tokens),
+        stop=stop,
+        random_seed=_opt(random_seed),
+        suffix=_opt(suffix)
+    ).dict()
+_embedding_dimensions_cache: dict[str, int] = {
+    'mistral-embed': 1024
+}
+@pxt.udf(batch_size=16, return_type=pxt.ArrayType((None,), dtype=pxt.FloatType()))
+def embeddings(input: Batch[str], *, model: str) -> Batch[np.ndarray]:
+    """
+    Embeddings API.
+    Equivalent to the Mistral AI `embeddings` API endpoint.
+    For additional details, see: <https://docs.mistral.ai/api/#tag/embeddings>
+    __Requirements:__
+    - `pip install mistralai`
+    Args:
+        input: Text to embed.
+        model: ID of the model to use. (See overview here: <https://docs.mistral.ai/getting-started/models/>)
+    Returns:
+        An array representing the application of the given embedding to `input`.
+    """
+    Env.get().require_package('mistralai')
+    result = _mistralai_client().embeddings.create(
+        inputs=input,
+        model=model,
+    )
+    return [np.array(data.embedding, dtype=np.float64) for data in result.data]
+@embeddings.conditional_return_type
+def _(model: str) -> pxt.ArrayType:
+    dimensions = _embedding_dimensions_cache.get(model)  # `None` if unknown model
+    return pxt.ArrayType((dimensions,), dtype=pxt.FloatType())
+_T = TypeVar('_T')
+def _opt(arg: Optional[_T]) -> Union[_T, 'mistralai.types.basemodel.Unset']:
+    from mistralai.types import UNSET
+    return arg if arg is not None else UNSET
+__all__ = local_public_names(__name__)
+def __dir__():
+    return __all__

pixeltable/functions/openai.py CHANGED Viewed

@@ -9,10 +9,10 @@ import base64
 import io
 import pathlib
 import uuid
-from typing import Optional, TypeVar, Union, Callable, TYPE_CHECKING
+from typing import TYPE_CHECKING, Callable, Optional, TypeVar, Union
-import PIL.Image
 import numpy as np
+import PIL.Image
 import tenacity
 import pixeltable as pxt
@@ -23,13 +23,11 @@ from pixeltable.utils.code import local_public_names
 if TYPE_CHECKING:
     import openai
-    from openai._types import NotGiven
 @env.register_client('openai')
 def _(api_key: str) -> 'openai.OpenAI':
     import openai
     return openai.OpenAI(api_key=api_key)
@@ -42,10 +40,9 @@ def _openai_client() -> 'openai.OpenAI':
 # by OpenAI. Should we investigate making this more customizable in the future?
 def _retry(fn: Callable) -> Callable:
     import openai
     return tenacity.retry(
         retry=tenacity.retry_if_exception_type(openai.RateLimitError),
-        wait=tenacity.wait_random_exponential(multiplier=3, max=180),
+        wait=tenacity.wait_random_exponential(multiplier=1, max=60),
         stop=tenacity.stop_after_attempt(20),
     )(fn)
@@ -462,10 +459,9 @@ def moderations(input: str, *, model: Optional[str] = None) -> dict:
 _T = TypeVar('_T')
-def _opt(arg: _T) -> Union[_T, 'NotGiven']:
-    from openai._types import NOT_GIVEN
-    return arg if arg is not None else NOT_GIVEN
+def _opt(arg: _T) -> Union[_T, 'openai.NotGiven']:
+    import openai
+    return arg if arg is not None else openai.NOT_GIVEN
 __all__ = local_public_names(__name__)

pixeltable/functions/string.py CHANGED Viewed

@@ -14,6 +14,7 @@ t.select(pxt_str.capitalize(t.str_col)).collect()
 from typing import Any, Optional
+import pixeltable.exceptions as excs
 import pixeltable.func as func
 from pixeltable.utils.code import local_public_names
@@ -352,7 +353,7 @@ def normalize(self: str, form: str) -> str:
         form: Unicode normal form (`‘NFC’`, `‘NFKC’`, `‘NFD’`, `‘NFKD’`)
     """
     import unicodedata
-    return unicodedata.normalize(form, self)
+    return unicodedata.normalize(form, self)  # type: ignore[arg-type]
 @func.udf(is_method=True)
 def pad(self: str, width: int, side: str = 'left', fillchar: str = ' ') -> str:
@@ -579,7 +580,7 @@ def upper(self: str) -> str:
     return self.upper()
 @func.udf(is_method=True)
-def wrap(self: str, width: int, **kwargs: Any) -> dict:
+def wrap(self: str, width: int, **kwargs: Any) -> list[str]:
     """
     Wraps the single paragraph in string so every line is at most `width` characters long.
     Returns a list of output lines, without final newlines.

pixeltable/functions/timestamp.py CHANGED Viewed

@@ -13,11 +13,14 @@ t.select(t.timestamp_col.year, t.timestamp_col.weekday()).collect()
 from datetime import datetime
 from typing import Optional
+import sqlalchemy as sql
+from pixeltable.env import Env
 import pixeltable.func as func
 from pixeltable.utils.code import local_public_names
-@func.udf(is_method=True)
+@func.udf(is_property=True)
 def year(self: datetime) -> int:
     """
     Between [`MINYEAR`](https://docs.python.org/3/library/datetime.html#datetime.MINYEAR) and
@@ -28,7 +31,12 @@ def year(self: datetime) -> int:
     return self.year
-@func.udf(is_method=True)
+@year.to_sql
+def _(self: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.extract('year', self)
+@func.udf(is_property=True)
 def month(self: datetime) -> int:
     """
     Between 1 and 12 inclusive.
@@ -38,7 +46,12 @@ def month(self: datetime) -> int:
     return self.month
-@func.udf(is_method=True)
+@month.to_sql
+def _(self: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.extract('month', self)
+@func.udf(is_property=True)
 def day(self: datetime) -> int:
     """
     Between 1 and the number of days in the given month of the given year.
@@ -48,7 +61,12 @@ def day(self: datetime) -> int:
     return self.day
-@func.udf(is_method=True)
+@day.to_sql
+def _(self: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.extract('day', self)
+@func.udf(is_property=True)
 def hour(self: datetime) -> int:
     """
     Between 0 and 23 inclusive.
@@ -58,7 +76,12 @@ def hour(self: datetime) -> int:
     return self.hour
-@func.udf(is_method=True)
+@hour.to_sql
+def _(self: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.extract('hour', self)
+@func.udf(is_property=True)
 def minute(self: datetime) -> int:
     """
     Between 0 and 59 inclusive.
@@ -68,7 +91,12 @@ def minute(self: datetime) -> int:
     return self.minute
-@func.udf(is_method=True)
+@minute.to_sql
+def _(self: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.extract('minute', self)
+@func.udf(is_property=True)
 def second(self: datetime) -> int:
     """
     Between 0 and 59 inclusive.
@@ -78,7 +106,12 @@ def second(self: datetime) -> int:
     return self.second
-@func.udf(is_method=True)
+@second.to_sql
+def _(self: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.extract('second', self)
+@func.udf(is_property=True)
 def microsecond(self: datetime) -> int:
     """
     Between 0 and 999999 inclusive.
@@ -88,6 +121,24 @@ def microsecond(self: datetime) -> int:
     return self.microsecond
+@microsecond.to_sql
+def _(self: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.extract('microseconds', self) - sql.extract('second', self) * 1000000
+@func.udf(is_method=True)
+def astimezone(self: datetime, tz: str) -> datetime:
+    """
+    Convert the datetime to the given time zone.
+    Args:
+        tz: The time zone to convert to. Must be a valid time zone name from the IANA Time Zone Database.
+    """
+    from zoneinfo import ZoneInfo
+    tzinfo = ZoneInfo(tz)
+    return self.astimezone(tzinfo)
 @func.udf(is_method=True)
 def weekday(self: datetime) -> int:
     """
@@ -97,6 +148,12 @@ def weekday(self: datetime) -> int:
     """
     return self.weekday()
+@weekday.to_sql
+def _(self: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.extract('isodow', self) - 1
 @func.udf(is_method=True)
 def isoweekday(self: datetime) -> int:
     """
@@ -107,6 +164,11 @@ def isoweekday(self: datetime) -> int:
     return self.isoweekday()
+@isoweekday.to_sql
+def _(self: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.extract('isodow', self)
 @func.udf(is_method=True)
 def isocalendar(self: datetime) -> dict:
     """
@@ -146,6 +208,32 @@ def strftime(self: datetime, format: str) -> str:
     return self.strftime(format)
+@func.udf(is_method=True)
+def make_timestamp(
+        year: int, month: int, day: int, hour: int = 0, minute: int = 0, second: int = 0, microsecond: int = 0
+) -> datetime:
+    """
+    Create a timestamp.
+    Equivalent to [`datetime()`](https://docs.python.org/3/library/datetime.html#datetime.datetime).
+    """
+    return datetime(year, month, day, hour, minute, second, microsecond, tzinfo=Env.get().default_time_zone)
+@make_timestamp.to_sql
+def _(
+        year: sql.ColumnElement, month: sql.ColumnElement, day: sql.ColumnElement,
+        hour: sql.ColumnElement = sql.literal(0), minute: sql.ColumnElement = sql.literal(0),
+        second: sql.ColumnElement = sql.literal(0), microsecond: sql.ColumnElement = sql.literal(0)
+) -> sql.ColumnElement:
+    return sql.func.make_timestamptz(
+        sql.cast(year, sql.Integer),
+        sql.cast(month, sql.Integer),
+        sql.cast(day, sql.Integer),
+        sql.cast(hour, sql.Integer),
+        sql.cast(minute, sql.Integer),
+        sql.cast(second + microsecond / 1000000.0, sql.Double))
 # @func.udf
 # def date(self: datetime) -> datetime:
 #     """

pixeltable/functions/together.py CHANGED Viewed

@@ -6,19 +6,19 @@ the [Working with Together AI](https://pixeltable.readme.io/docs/together-ai) tu
 """
 import base64
-from typing import Optional, TYPE_CHECKING
+import io
+from typing import TYPE_CHECKING, Optional
-import PIL.Image
 import numpy as np
+import PIL.Image
-import io
 import pixeltable as pxt
 from pixeltable import env
 from pixeltable.func import Batch
 from pixeltable.utils.code import local_public_names
 if TYPE_CHECKING:
-    import together
+    import together  # type: ignore[import-untyped]
 @env.register_client('together')

pixeltable/functions/video.py CHANGED Viewed

@@ -16,9 +16,9 @@ import uuid
 from pathlib import Path
 from typing import Optional
-import PIL.Image
-import av
+import av  # type: ignore[import-untyped]
 import numpy as np
+import PIL.Image
 import pixeltable.env as env
 import pixeltable.func as func

pixeltable/functions/vision.py CHANGED Viewed

@@ -13,19 +13,16 @@ t.select(pxtv.draw_bounding_boxes(t.img, boxes=t.boxes, label=t.labels)).collect
 import colorsys
 import hashlib
-import random
 from collections import defaultdict
-from typing import Optional, Union, Any
+from typing import Any, Optional, Union
-import PIL.Image
-import PIL.Image
 import numpy as np
+import PIL.Image
 import pixeltable.func as func
 import pixeltable.type_system as ts
 from pixeltable.utils.code import local_public_names
 # TODO: figure out a better submodule structure
@@ -180,7 +177,7 @@ def eval_detections(
     pred_scores: list[float],
     gt_bboxes: list[list[int]],
     gt_labels: list[int],
-) -> dict:
+) -> list[dict]:
     """
     Evaluates the performance of a set of predicted bounding boxes against a set of ground truth bounding boxes.
     """
@@ -195,7 +192,7 @@ def eval_detections(
         pred_filter = pred_classes_arr == class_idx
         gt_filter = gt_classes_arr == class_idx
         class_pred_scores = pred_scores_arr[pred_filter]
-        tp, fp = __calculate_image_tpfp(pred_bboxes_arr[pred_filter], class_pred_scores, gt_bboxes_arr[gt_filter], [0.5])
+        tp, fp = __calculate_image_tpfp(pred_bboxes_arr[pred_filter], class_pred_scores, gt_bboxes_arr[gt_filter], 0.5)
         ordered_class_pred_scores = -np.sort(-class_pred_scores)
         result.append(
             {
@@ -330,31 +327,44 @@ def draw_bounding_boxes(
             label_colors = _create_label_colors(labels)
             box_colors = [label_colors[label] for label in labels]
-    from PIL import ImageDraw, ImageFont, ImageColor
+    from PIL import ImageColor, ImageDraw, ImageFont
     # set default font if not provided
-    if font is None:
-        txt_font = ImageFont.load_default()
-    else:
-        txt_font = ImageFont.truetype(font=font, size=font_size or 10)
+    txt_font: Union[ImageFont.ImageFont, ImageFont.FreeTypeFont] = (
+        ImageFont.load_default() if font is None else ImageFont.truetype(font=font, size=font_size or 10)
+    )
     img_to_draw = img.copy()
     draw = ImageDraw.Draw(img_to_draw, 'RGBA' if fill else 'RGB')
-    for i, (bbox, label) in enumerate(zip(boxes, labels)):
+    # Draw bounding boxes
+    for i, bbox in enumerate(boxes):
         # determine color for the current box and label
         color = box_colors[i % len(box_colors)]
         if fill:
             rgb_color = ImageColor.getrgb(color)
             fill_color = rgb_color + (100,)  # semi-transparent
-            draw.rectangle(bbox, outline=color, width=width, fill=fill_color)
+            draw.rectangle(bbox, outline=color, width=width, fill=fill_color)  # type: ignore[arg-type]
         else:
-            draw.rectangle(bbox, outline=color, width=width)
+            draw.rectangle(bbox, outline=color, width=width)  # type: ignore[arg-type]
+    # Now draw labels separately, so they are not obscured by the boxes
+    for i, (bbox, label) in enumerate(zip(boxes, labels)):
         if label is not None:
             label_str = str(label)
-            margin = width + 1
-            draw.text((bbox[0] + margin, bbox[1] + margin), label_str, fill=color, font=txt_font)
+            _, _, text_width, text_height = draw.textbbox((0, 0), label_str, font=txt_font)
+            if bbox[1] - text_height - 2 >= 0:
+                # draw text above the box
+                y = bbox[1] - text_height - 2
+            else:
+                y = bbox[3]
+            if bbox[0] + text_width + 2 < img.width:
+                x = bbox[0]
+            else:
+                x = img.width - text_width - 2
+            draw.rectangle((x, y, x + text_width + 1, y + text_height + 1), fill='black')
+            draw.text((x, y), label_str, fill='white', font=txt_font)
     return img_to_draw

pixeltable/functions/whisper.py CHANGED Viewed

@@ -11,7 +11,7 @@ from typing import TYPE_CHECKING, Optional
 import pixeltable as pxt
 if TYPE_CHECKING:
-    from whisper import Whisper
+    from whisper import Whisper  # type: ignore[import-untyped]
 @pxt.udf(

pixeltable/io/hf_datasets.py CHANGED Viewed

@@ -6,7 +6,7 @@ import random
 import typing
 from typing import Union, Optional, Any
-import pixeltable
+import pixeltable as pxt
 import pixeltable.type_system as ts
 from pixeltable import exceptions as excs
@@ -81,24 +81,26 @@ def import_huggingface_dataset(
     dataset: Union[datasets.Dataset, datasets.DatasetDict],
     *,
     column_name_for_split: Optional[str] = None,
-    schema_override: Optional[dict[str, Any]] = None,
-    **kwargs,
-) -> 'pixeltable.InsertableTable':
-    """Create a new `Table` from a Huggingface dataset, or dataset dict with multiple splits.
-        Requires datasets library to be installed.
+    schema_overrides: Optional[dict[str, Any]] = None,
+    **kwargs: Any,
+) -> pxt.Table:
+    """Create a new base table from a Huggingface dataset, or dataset dict with multiple splits.
+        Requires `datasets` library to be installed.
     Args:
-        path_str: Path to the table.
-        dataset: Huggingface datasets.Dataset or datasets.DatasetDict to insert into the table.
+        table_path: Path to the table.
+        dataset: Huggingface [`datasets.Dataset`](https://huggingface.co/docs/datasets/en/package_reference/main_classes#datasets.Dataset)
+            or [`datasets.DatasetDict`](https://huggingface.co/docs/datasets/en/package_reference/main_classes#datasets.DatasetDict)
+            to insert into the table.
         column_name_for_split: column name to use for split information. If None, no split information will be stored.
-        schema_override: Optional dictionary mapping column names to column type to override the corresponding defaults from
-        `pixeltable.utils.hf_datasets.huggingface_schema_to_pixeltable_schema`. The column type should be a pixeltable ColumnType.
-        For example, {'col_vid': VideoType()}, rather than {'col_vid': StringType()}.
+        schema_overrides: If specified, then for each (name, type) pair in `schema_overrides`, the column with
+            name `name` will be given type `type`, instead of being inferred from the `Dataset` or `DatasetDict`. The keys in
+            `schema_overrides` should be the column names of the `Dataset` or `DatasetDict` (whether or not they are valid
+            Pixeltable identifiers).
         kwargs: Additional arguments to pass to `create_table`.
     Returns:
-        The newly created table. The table will have loaded the data from the dataset.
+        A handle to the newly created [`Table`][pixeltable.Table].
     """
     import datasets
     import pixeltable as pxt
@@ -118,8 +120,8 @@ def import_huggingface_dataset(
         dataset_dict = dataset
     pixeltable_schema = huggingface_schema_to_pixeltable_schema(dataset)
-    if schema_override is not None:
-        pixeltable_schema.update(schema_override)
+    if schema_overrides is not None:
+        pixeltable_schema.update(schema_overrides)
     if column_name_for_split is not None:
         if column_name_for_split in pixeltable_schema:

pixeltable/io/pandas.py CHANGED Viewed

@@ -1,9 +1,7 @@
-import datetime
 from typing import Any, Optional, Union
 import numpy as np
 import pandas as pd
-import PIL.Image
 import pixeltable as pxt
 import pixeltable.exceptions as excs

pixeltable 0.2.17__py3-none-any.whl → 0.2.18__py3-none-any.whl

Potentially problematic release.

pixeltable 0.2.17py3-none-any.whl → 0.2.18py3-none-any.whl