PyPI - pixeltable - Versions diffs - 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl - Mend

pixeltable 0.2.17py3-none-any.whl → 0.2.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (87) hide show

pixeltable/__init__.py +1 -1
pixeltable/__version__.py +2 -2
pixeltable/catalog/catalog.py +8 -7
pixeltable/catalog/column.py +11 -8
pixeltable/catalog/insertable_table.py +1 -1
pixeltable/catalog/path_dict.py +8 -6
pixeltable/catalog/table.py +20 -14
pixeltable/catalog/table_version.py +92 -55
pixeltable/catalog/table_version_path.py +7 -9
pixeltable/catalog/view.py +3 -2
pixeltable/dataframe.py +2 -2
pixeltable/env.py +205 -86
pixeltable/exceptions.py +5 -1
pixeltable/exec/aggregation_node.py +2 -1
pixeltable/exec/component_iteration_node.py +2 -2
pixeltable/exec/sql_node.py +11 -8
pixeltable/exprs/__init__.py +2 -2
pixeltable/exprs/arithmetic_expr.py +4 -4
pixeltable/exprs/array_slice.py +2 -1
pixeltable/exprs/column_property_ref.py +9 -7
pixeltable/exprs/column_ref.py +2 -1
pixeltable/exprs/comparison.py +10 -7
pixeltable/exprs/compound_predicate.py +3 -2
pixeltable/exprs/data_row.py +19 -4
pixeltable/exprs/expr.py +51 -41
pixeltable/exprs/expr_set.py +32 -9
pixeltable/exprs/function_call.py +62 -40
pixeltable/exprs/in_predicate.py +3 -2
pixeltable/exprs/inline_expr.py +200 -0
pixeltable/exprs/is_null.py +3 -2
pixeltable/exprs/json_mapper.py +5 -4
pixeltable/exprs/json_path.py +7 -1
pixeltable/exprs/literal.py +34 -7
pixeltable/exprs/method_ref.py +3 -3
pixeltable/exprs/object_ref.py +6 -5
pixeltable/exprs/row_builder.py +25 -17
pixeltable/exprs/rowid_ref.py +2 -1
pixeltable/exprs/similarity_expr.py +2 -1
pixeltable/exprs/sql_element_cache.py +30 -0
pixeltable/exprs/type_cast.py +3 -3
pixeltable/exprs/variable.py +2 -1
pixeltable/ext/functions/whisperx.py +6 -4
pixeltable/ext/functions/yolox.py +11 -9
pixeltable/func/aggregate_function.py +1 -0
pixeltable/func/function.py +28 -4
pixeltable/functions/__init__.py +4 -2
pixeltable/functions/anthropic.py +15 -5
pixeltable/functions/fireworks.py +1 -1
pixeltable/functions/globals.py +6 -1
pixeltable/functions/huggingface.py +91 -14
pixeltable/functions/image.py +20 -5
pixeltable/functions/json.py +5 -5
pixeltable/functions/mistralai.py +188 -0
pixeltable/functions/openai.py +6 -10
pixeltable/functions/string.py +3 -2
pixeltable/functions/timestamp.py +95 -7
pixeltable/functions/together.py +18 -11
pixeltable/functions/video.py +2 -2
pixeltable/functions/vision.py +69 -37
pixeltable/functions/whisper.py +4 -1
pixeltable/globals.py +5 -1
pixeltable/io/hf_datasets.py +17 -15
pixeltable/io/pandas.py +0 -2
pixeltable/io/parquet.py +15 -14
pixeltable/iterators/document.py +16 -15
pixeltable/metadata/__init__.py +1 -1
pixeltable/metadata/converters/convert_18.py +1 -1
pixeltable/metadata/converters/convert_19.py +46 -0
pixeltable/metadata/converters/convert_20.py +56 -0
pixeltable/metadata/converters/util.py +29 -4
pixeltable/metadata/notes.py +2 -0
pixeltable/metadata/schema.py +5 -4
pixeltable/plan.py +100 -78
pixeltable/store.py +5 -1
pixeltable/tool/create_test_db_dump.py +18 -6
pixeltable/type_system.py +15 -15
pixeltable/utils/documents.py +45 -42
pixeltable/utils/formatter.py +2 -2
pixeltable-0.2.19.dist-info/LICENSE +201 -0
{pixeltable-0.2.17.dist-info → pixeltable-0.2.19.dist-info}/METADATA +84 -24
pixeltable-0.2.19.dist-info/RECORD +147 -0
pixeltable/exprs/inline_array.py +0 -116
pixeltable/exprs/inline_dict.py +0 -103
pixeltable-0.2.17.dist-info/LICENSE +0 -18
pixeltable-0.2.17.dist-info/RECORD +0 -144
{pixeltable-0.2.17.dist-info → pixeltable-0.2.19.dist-info}/WHEEL +0 -0
{pixeltable-0.2.17.dist-info → pixeltable-0.2.19.dist-info}/entry_points.txt +0 -0

pixeltable/functions/openai.py CHANGED Viewed

@@ -9,10 +9,10 @@ import base64
 import io
 import pathlib
 import uuid
-from typing import Optional, TypeVar, Union, Callable, TYPE_CHECKING
+from typing import TYPE_CHECKING, Callable, Optional, TypeVar, Union
-import PIL.Image
 import numpy as np
+import PIL.Image
 import tenacity
 import pixeltable as pxt
@@ -23,13 +23,11 @@ from pixeltable.utils.code import local_public_names
 if TYPE_CHECKING:
     import openai
-    from openai._types import NotGiven
 @env.register_client('openai')
 def _(api_key: str) -> 'openai.OpenAI':
     import openai
     return openai.OpenAI(api_key=api_key)
@@ -42,10 +40,9 @@ def _openai_client() -> 'openai.OpenAI':
 # by OpenAI. Should we investigate making this more customizable in the future?
 def _retry(fn: Callable) -> Callable:
     import openai
     return tenacity.retry(
         retry=tenacity.retry_if_exception_type(openai.RateLimitError),
-        wait=tenacity.wait_random_exponential(multiplier=3, max=180),
+        wait=tenacity.wait_random_exponential(multiplier=1, max=60),
         stop=tenacity.stop_after_attempt(20),
     )(fn)
@@ -462,10 +459,9 @@ def moderations(input: str, *, model: Optional[str] = None) -> dict:
 _T = TypeVar('_T')
-def _opt(arg: _T) -> Union[_T, 'NotGiven']:
-    from openai._types import NOT_GIVEN
-    return arg if arg is not None else NOT_GIVEN
+def _opt(arg: _T) -> Union[_T, 'openai.NotGiven']:
+    import openai
+    return arg if arg is not None else openai.NOT_GIVEN
 __all__ = local_public_names(__name__)

pixeltable/functions/string.py CHANGED Viewed

@@ -14,6 +14,7 @@ t.select(pxt_str.capitalize(t.str_col)).collect()
 from typing import Any, Optional
+import pixeltable.exceptions as excs
 import pixeltable.func as func
 from pixeltable.utils.code import local_public_names
@@ -352,7 +353,7 @@ def normalize(self: str, form: str) -> str:
         form: Unicode normal form (`‘NFC’`, `‘NFKC’`, `‘NFD’`, `‘NFKD’`)
     """
     import unicodedata
-    return unicodedata.normalize(form, self)
+    return unicodedata.normalize(form, self)  # type: ignore[arg-type]
 @func.udf(is_method=True)
 def pad(self: str, width: int, side: str = 'left', fillchar: str = ' ') -> str:
@@ -579,7 +580,7 @@ def upper(self: str) -> str:
     return self.upper()
 @func.udf(is_method=True)
-def wrap(self: str, width: int, **kwargs: Any) -> dict:
+def wrap(self: str, width: int, **kwargs: Any) -> list[str]:
     """
     Wraps the single paragraph in string so every line is at most `width` characters long.
     Returns a list of output lines, without final newlines.

pixeltable/functions/timestamp.py CHANGED Viewed

@@ -13,11 +13,14 @@ t.select(t.timestamp_col.year, t.timestamp_col.weekday()).collect()
 from datetime import datetime
 from typing import Optional
+import sqlalchemy as sql
+from pixeltable.env import Env
 import pixeltable.func as func
 from pixeltable.utils.code import local_public_names
-@func.udf(is_method=True)
+@func.udf(is_property=True)
 def year(self: datetime) -> int:
     """
     Between [`MINYEAR`](https://docs.python.org/3/library/datetime.html#datetime.MINYEAR) and
@@ -28,7 +31,12 @@ def year(self: datetime) -> int:
     return self.year
-@func.udf(is_method=True)
+@year.to_sql
+def _(self: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.extract('year', self)
+@func.udf(is_property=True)
 def month(self: datetime) -> int:
     """
     Between 1 and 12 inclusive.
@@ -38,7 +46,12 @@ def month(self: datetime) -> int:
     return self.month
-@func.udf(is_method=True)
+@month.to_sql
+def _(self: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.extract('month', self)
+@func.udf(is_property=True)
 def day(self: datetime) -> int:
     """
     Between 1 and the number of days in the given month of the given year.
@@ -48,7 +61,12 @@ def day(self: datetime) -> int:
     return self.day
-@func.udf(is_method=True)
+@day.to_sql
+def _(self: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.extract('day', self)
+@func.udf(is_property=True)
 def hour(self: datetime) -> int:
     """
     Between 0 and 23 inclusive.
@@ -58,7 +76,12 @@ def hour(self: datetime) -> int:
     return self.hour
-@func.udf(is_method=True)
+@hour.to_sql
+def _(self: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.extract('hour', self)
+@func.udf(is_property=True)
 def minute(self: datetime) -> int:
     """
     Between 0 and 59 inclusive.
@@ -68,7 +91,12 @@ def minute(self: datetime) -> int:
     return self.minute
-@func.udf(is_method=True)
+@minute.to_sql
+def _(self: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.extract('minute', self)
+@func.udf(is_property=True)
 def second(self: datetime) -> int:
     """
     Between 0 and 59 inclusive.
@@ -78,7 +106,12 @@ def second(self: datetime) -> int:
     return self.second
-@func.udf(is_method=True)
+@second.to_sql
+def _(self: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.extract('second', self)
+@func.udf(is_property=True)
 def microsecond(self: datetime) -> int:
     """
     Between 0 and 999999 inclusive.
@@ -88,6 +121,24 @@ def microsecond(self: datetime) -> int:
     return self.microsecond
+@microsecond.to_sql
+def _(self: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.extract('microseconds', self) - sql.extract('second', self) * 1000000
+@func.udf(is_method=True)
+def astimezone(self: datetime, tz: str) -> datetime:
+    """
+    Convert the datetime to the given time zone.
+    Args:
+        tz: The time zone to convert to. Must be a valid time zone name from the IANA Time Zone Database.
+    """
+    from zoneinfo import ZoneInfo
+    tzinfo = ZoneInfo(tz)
+    return self.astimezone(tzinfo)
 @func.udf(is_method=True)
 def weekday(self: datetime) -> int:
     """
@@ -97,6 +148,12 @@ def weekday(self: datetime) -> int:
     """
     return self.weekday()
+@weekday.to_sql
+def _(self: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.extract('isodow', self) - 1
 @func.udf(is_method=True)
 def isoweekday(self: datetime) -> int:
     """
@@ -107,6 +164,11 @@ def isoweekday(self: datetime) -> int:
     return self.isoweekday()
+@isoweekday.to_sql
+def _(self: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.extract('isodow', self)
 @func.udf(is_method=True)
 def isocalendar(self: datetime) -> dict:
     """
@@ -146,6 +208,32 @@ def strftime(self: datetime, format: str) -> str:
     return self.strftime(format)
+@func.udf(is_method=True)
+def make_timestamp(
+        year: int, month: int, day: int, hour: int = 0, minute: int = 0, second: int = 0, microsecond: int = 0
+) -> datetime:
+    """
+    Create a timestamp.
+    Equivalent to [`datetime()`](https://docs.python.org/3/library/datetime.html#datetime.datetime).
+    """
+    return datetime(year, month, day, hour, minute, second, microsecond, tzinfo=Env.get().default_time_zone)
+@make_timestamp.to_sql
+def _(
+        year: sql.ColumnElement, month: sql.ColumnElement, day: sql.ColumnElement,
+        hour: sql.ColumnElement = sql.literal(0), minute: sql.ColumnElement = sql.literal(0),
+        second: sql.ColumnElement = sql.literal(0), microsecond: sql.ColumnElement = sql.literal(0)
+) -> sql.ColumnElement:
+    return sql.func.make_timestamptz(
+        sql.cast(year, sql.Integer),
+        sql.cast(month, sql.Integer),
+        sql.cast(day, sql.Integer),
+        sql.cast(hour, sql.Integer),
+        sql.cast(minute, sql.Integer),
+        sql.cast(second + microsecond / 1000000.0, sql.Double))
 # @func.udf
 # def date(self: datetime) -> datetime:
 #     """

pixeltable/functions/together.py CHANGED Viewed

@@ -6,25 +6,25 @@ the [Working with Together AI](https://pixeltable.readme.io/docs/together-ai) tu
 """
 import base64
-from typing import Optional, TYPE_CHECKING
+import io
+from typing import TYPE_CHECKING, Callable, Optional
-import PIL.Image
 import numpy as np
+import PIL.Image
+import tenacity
-import io
 import pixeltable as pxt
 from pixeltable import env
 from pixeltable.func import Batch
 from pixeltable.utils.code import local_public_names
 if TYPE_CHECKING:
-    import together
+    import together  # type: ignore[import-untyped]
 @env.register_client('together')
 def _(api_key: str) -> 'together.Together':
     import together
     return together.Together(api_key=api_key)
@@ -32,6 +32,15 @@ def _together_client() -> 'together.Together':
     return env.Env.get().get_client('together')
+def _retry(fn: Callable) -> Callable:
+    import together
+    return tenacity.retry(
+        retry=tenacity.retry_if_exception_type(together.error.RateLimitError),
+        wait=tenacity.wait_random_exponential(multiplier=1, max=60),
+        stop=tenacity.stop_after_attempt(20),
+    )(fn)
 @pxt.udf
 def completions(
     prompt: str,
@@ -74,8 +83,7 @@ def completions(
         >>> tbl['response'] = completions(tbl.prompt, model='mistralai/Mixtral-8x7B-v0.1')
     """
     return (
-        _together_client()
-        .completions.create(
+        _retry(_together_client().completions.create)(
             prompt=prompt,
             model=model,
             max_tokens=max_tokens,
@@ -139,8 +147,7 @@ def chat_completions(
         ... tbl['response'] = chat_completions(messages, model='mistralai/Mixtral-8x7B-v0.1')
     """
     return (
-        _together_client()
-        .chat.completions.create(
+        _retry(_together_client().chat.completions.create)(
             messages=messages,
             model=model,
             max_tokens=max_tokens,
@@ -198,7 +205,7 @@ def embeddings(input: Batch[str], *, model: str) -> Batch[np.ndarray]:
         >>> tbl['response'] = embeddings(tbl.text, model='togethercomputer/m2-bert-80M-8k-retrieval')
     """
-    result = _together_client().embeddings.create(input=input, model=model)
+    result = _retry(_together_client().embeddings.create)(input=input, model=model)
     return [np.array(data.embedding, dtype=np.float64) for data in result.data]
@@ -248,7 +255,7 @@ def image_generations(
         >>> tbl['response'] = image_generations(tbl.prompt, model='runwayml/stable-diffusion-v1-5')
     """
     # TODO(aaron-siegel): Decompose CPU/GPU ops into separate functions
-    result = _together_client().images.generate(
+    result = _retry(_together_client().images.generate)(
         prompt=prompt, model=model, steps=steps, seed=seed, height=height, width=width, negative_prompt=negative_prompt
     )
     b64_str = result.data[0].b64_json

pixeltable/functions/video.py CHANGED Viewed

@@ -16,9 +16,9 @@ import uuid
 from pathlib import Path
 from typing import Optional
-import PIL.Image
-import av
+import av  # type: ignore[import-untyped]
 import numpy as np
+import PIL.Image
 import pixeltable.env as env
 import pixeltable.func as func

pixeltable/functions/vision.py CHANGED Viewed

@@ -13,22 +13,16 @@ t.select(pxtv.draw_bounding_boxes(t.img, boxes=t.boxes, label=t.labels)).collect
 import colorsys
 import hashlib
-import random
 from collections import defaultdict
-from typing import Optional, Union, Any
+from typing import Any, Optional, Union
-import PIL.Image
-import PIL.Image
 import numpy as np
+import PIL.Image
-import pixeltable.func as func
-import pixeltable.type_system as ts
+import pixeltable as pxt
 from pixeltable.utils.code import local_public_names
-# TODO: figure out a better submodule structure
 # the following function has been adapted from MMEval
 # (sources at https://github.com/open-mmlab/mmeval)
 # Copyright (c) OpenMMLab. All rights reserved.
@@ -164,25 +158,41 @@ def __calculate_image_tpfp(
     return tp, fp
-@func.udf(
-    return_type=ts.JsonType(nullable=False),
-    param_types=[
-        ts.JsonType(nullable=False),
-        ts.JsonType(nullable=False),
-        ts.JsonType(nullable=False),
-        ts.JsonType(nullable=False),
-        ts.JsonType(nullable=False),
-    ],
-)
+@pxt.udf
 def eval_detections(
     pred_bboxes: list[list[int]],
     pred_labels: list[int],
     pred_scores: list[float],
     gt_bboxes: list[list[int]],
     gt_labels: list[int],
-) -> dict:
+    min_iou: float = 0.5,
+) -> list[dict]:
     """
     Evaluates the performance of a set of predicted bounding boxes against a set of ground truth bounding boxes.
+    Args:
+        pred_bboxes: List of predicted bounding boxes, each represented as [xmin, ymin, xmax, ymax].
+        pred_labels: List of predicted labels.
+        pred_scores: List of predicted scores.
+        gt_bboxes: List of ground truth bounding boxes, each represented as [xmin, ymin, xmax, ymax].
+        gt_labels: List of ground truth labels.
+        min_iou: Minimum intersection-over-union (IoU) threshold for a predicted bounding box to be
+            considered a true positive.
+    Returns:
+        A list of dictionaries, one per label class, with the following structure:
+        ```python
+        {
+            'min_iou': float,  # The value of `min_iou` used for the detections
+            'class': int,  # The label class
+            'tp': list[int],  # List of 1's and 0's indicating true positives for each
+                              # predicted bounding box of this class
+            'fp': list[int],  # List of 1's and 0's indicating false positives for each
+                              # predicted bounding box of this class; `fp[n] == 1 - tp[n]`
+            'scores': list[float],  # List of predicted scores for each bounding box of this class
+            'num_gts': int,  # Number of ground truth bounding boxes of this class
+        }
+        ```
     """
     class_idxs = list(set(pred_labels + gt_labels))
     result: list[dict] = []
@@ -195,11 +205,11 @@ def eval_detections(
         pred_filter = pred_classes_arr == class_idx
         gt_filter = gt_classes_arr == class_idx
         class_pred_scores = pred_scores_arr[pred_filter]
-        tp, fp = __calculate_image_tpfp(pred_bboxes_arr[pred_filter], class_pred_scores, gt_bboxes_arr[gt_filter], [0.5])
+        tp, fp = __calculate_image_tpfp(pred_bboxes_arr[pred_filter], class_pred_scores, gt_bboxes_arr[gt_filter], min_iou)
         ordered_class_pred_scores = -np.sort(-class_pred_scores)
         result.append(
             {
-                'min_iou': 0.5,
+                'min_iou': min_iou,
                 'class': class_idx,
                 'tp': tp.tolist(),
                 'fp': fp.tolist(),
@@ -210,11 +220,20 @@ def eval_detections(
     return result
-@func.uda(update_types=[ts.JsonType()], value_type=ts.JsonType(), allows_std_agg=True, allows_window=False)
-class mean_ap(func.Aggregator):
+@pxt.uda(update_types=[pxt.JsonType()], value_type=pxt.JsonType(), allows_std_agg=True, allows_window=False)
+class mean_ap(pxt.Aggregator):
     """
     Calculates the mean average precision (mAP) over
     [`eval_detections()`][pixeltable.functions.vision.eval_detections] results.
+    __Parameters:__
+    - `eval_dicts` (list[dict]): List of dictionaries as returned by
+        [`eval_detections()`][pixeltable.functions.vision.eval_detections].
+    __Returns:__
+    - A `dict[int, float]` mapping each label class to an average precision (AP) value for that class.
     """
     def __init__(self):
         self.class_tpfp: dict[int, list[dict]] = defaultdict(list)
@@ -249,7 +268,7 @@ class mean_ap(func.Aggregator):
         return result
-def _create_label_colors(labels: list[Any]) -> dict[Any, str]:
+def __create_label_colors(labels: list[Any]) -> dict[Any, str]:
     """
     Create random colors for labels such that a particular label always gets the same color.
@@ -268,7 +287,7 @@ def _create_label_colors(labels: list[Any]) -> dict[Any, str]:
     return result
-@func.udf
+@pxt.udf
 def draw_bounding_boxes(
         img: PIL.Image.Image,
         boxes: list[list[int]],
@@ -327,34 +346,47 @@ def draw_bounding_boxes(
         if color is not None:
             box_colors = [color] * num_boxes
         else:
-            label_colors = _create_label_colors(labels)
+            label_colors = __create_label_colors(labels)
             box_colors = [label_colors[label] for label in labels]
-    from PIL import ImageDraw, ImageFont, ImageColor
+    from PIL import ImageColor, ImageDraw, ImageFont
     # set default font if not provided
-    if font is None:
-        txt_font = ImageFont.load_default()
-    else:
-        txt_font = ImageFont.truetype(font=font, size=font_size or 10)
+    txt_font: Union[ImageFont.ImageFont, ImageFont.FreeTypeFont] = (
+        ImageFont.load_default() if font is None else ImageFont.truetype(font=font, size=font_size or 10)
+    )
     img_to_draw = img.copy()
     draw = ImageDraw.Draw(img_to_draw, 'RGBA' if fill else 'RGB')
-    for i, (bbox, label) in enumerate(zip(boxes, labels)):
+    # Draw bounding boxes
+    for i, bbox in enumerate(boxes):
         # determine color for the current box and label
         color = box_colors[i % len(box_colors)]
         if fill:
             rgb_color = ImageColor.getrgb(color)
             fill_color = rgb_color + (100,)  # semi-transparent
-            draw.rectangle(bbox, outline=color, width=width, fill=fill_color)
+            draw.rectangle(bbox, outline=color, width=width, fill=fill_color)  # type: ignore[arg-type]
         else:
-            draw.rectangle(bbox, outline=color, width=width)
+            draw.rectangle(bbox, outline=color, width=width)  # type: ignore[arg-type]
+    # Now draw labels separately, so they are not obscured by the boxes
+    for i, (bbox, label) in enumerate(zip(boxes, labels)):
         if label is not None:
             label_str = str(label)
-            margin = width + 1
-            draw.text((bbox[0] + margin, bbox[1] + margin), label_str, fill=color, font=txt_font)
+            _, _, text_width, text_height = draw.textbbox((0, 0), label_str, font=txt_font)
+            if bbox[1] - text_height - 2 >= 0:
+                # draw text above the box
+                y = bbox[1] - text_height - 2
+            else:
+                y = bbox[3]
+            if bbox[0] + text_width + 2 < img.width:
+                x = bbox[0]
+            else:
+                x = img.width - text_width - 2
+            draw.rectangle((x, y, x + text_width + 1, y + text_height + 1), fill='black')
+            draw.text((x, y), label_str, fill='white', font=txt_font)
     return img_to_draw

pixeltable/functions/whisper.py CHANGED Viewed

@@ -9,9 +9,10 @@ first `pip install openai-whisper`.
 from typing import TYPE_CHECKING, Optional
 import pixeltable as pxt
+from pixeltable.env import Env
 if TYPE_CHECKING:
-    from whisper import Whisper
+    from whisper import Whisper  # type: ignore[import-untyped]
 @pxt.udf(
@@ -71,6 +72,8 @@ def transcribe(
         >>> tbl['result'] = transcribe(tbl.audio, model='base.en')
     """
+    Env.get().require_package('whisper')
+    Env.get().require_package('torch')
     import torch
     if decode_options is None:

pixeltable/globals.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import dataclasses
 import logging
-from typing import Any, Optional, Union
+from typing import Any, Iterable, Optional, Union
 from uuid import UUID
 import pandas as pd
@@ -487,3 +487,7 @@ def configure_logging(
         remove: comma-separated list of module names
     """
     return Env.get().configure_logging(to_stdout=to_stdout, level=level, add=add, remove=remove)
+def array(elements: Iterable) -> exprs.Expr:
+    return exprs.InlineArray(elements)

pixeltable/io/hf_datasets.py CHANGED Viewed

@@ -6,7 +6,7 @@ import random
 import typing
 from typing import Union, Optional, Any
-import pixeltable
+import pixeltable as pxt
 import pixeltable.type_system as ts
 from pixeltable import exceptions as excs
@@ -81,24 +81,26 @@ def import_huggingface_dataset(
     dataset: Union[datasets.Dataset, datasets.DatasetDict],
     *,
     column_name_for_split: Optional[str] = None,
-    schema_override: Optional[dict[str, Any]] = None,
-    **kwargs,
-) -> 'pixeltable.InsertableTable':
-    """Create a new `Table` from a Huggingface dataset, or dataset dict with multiple splits.
-        Requires datasets library to be installed.
+    schema_overrides: Optional[dict[str, Any]] = None,
+    **kwargs: Any,
+) -> pxt.Table:
+    """Create a new base table from a Huggingface dataset, or dataset dict with multiple splits.
+        Requires `datasets` library to be installed.
     Args:
-        path_str: Path to the table.
-        dataset: Huggingface datasets.Dataset or datasets.DatasetDict to insert into the table.
+        table_path: Path to the table.
+        dataset: Huggingface [`datasets.Dataset`](https://huggingface.co/docs/datasets/en/package_reference/main_classes#datasets.Dataset)
+            or [`datasets.DatasetDict`](https://huggingface.co/docs/datasets/en/package_reference/main_classes#datasets.DatasetDict)
+            to insert into the table.
         column_name_for_split: column name to use for split information. If None, no split information will be stored.
-        schema_override: Optional dictionary mapping column names to column type to override the corresponding defaults from
-        `pixeltable.utils.hf_datasets.huggingface_schema_to_pixeltable_schema`. The column type should be a pixeltable ColumnType.
-        For example, {'col_vid': VideoType()}, rather than {'col_vid': StringType()}.
+        schema_overrides: If specified, then for each (name, type) pair in `schema_overrides`, the column with
+            name `name` will be given type `type`, instead of being inferred from the `Dataset` or `DatasetDict`. The keys in
+            `schema_overrides` should be the column names of the `Dataset` or `DatasetDict` (whether or not they are valid
+            Pixeltable identifiers).
         kwargs: Additional arguments to pass to `create_table`.
     Returns:
-        The newly created table. The table will have loaded the data from the dataset.
+        A handle to the newly created [`Table`][pixeltable.Table].
     """
     import datasets
     import pixeltable as pxt
@@ -118,8 +120,8 @@ def import_huggingface_dataset(
         dataset_dict = dataset
     pixeltable_schema = huggingface_schema_to_pixeltable_schema(dataset)
-    if schema_override is not None:
-        pixeltable_schema.update(schema_override)
+    if schema_overrides is not None:
+        pixeltable_schema.update(schema_overrides)
     if column_name_for_split is not None:
         if column_name_for_split in pixeltable_schema:

pixeltable/io/pandas.py CHANGED Viewed

@@ -1,9 +1,7 @@
-import datetime
 from typing import Any, Optional, Union
 import numpy as np
 import pandas as pd
-import PIL.Image
 import pixeltable as pxt
 import pixeltable.exceptions as excs

pixeltable 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl

Potentially problematic release.

pixeltable 0.2.17py3-none-any.whl → 0.2.19py3-none-any.whl