pixeltable 0.2.12__py3-none-any.whl → 0.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/insertable_table.py +2 -2
- pixeltable/catalog/table.py +5 -5
- pixeltable/catalog/table_version.py +12 -14
- pixeltable/catalog/view.py +2 -2
- pixeltable/dataframe.py +7 -6
- pixeltable/exec/expr_eval_node.py +8 -1
- pixeltable/exec/sql_scan_node.py +1 -1
- pixeltable/exprs/__init__.py +0 -1
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +12 -12
- pixeltable/exprs/expr.py +32 -0
- pixeltable/exprs/in_predicate.py +3 -3
- pixeltable/exprs/is_null.py +5 -5
- pixeltable/func/aggregate_function.py +10 -4
- pixeltable/func/callable_function.py +4 -0
- pixeltable/func/function_registry.py +2 -0
- pixeltable/functions/globals.py +36 -1
- pixeltable/functions/huggingface.py +62 -4
- pixeltable/functions/image.py +17 -0
- pixeltable/functions/string.py +622 -7
- pixeltable/functions/video.py +26 -8
- pixeltable/globals.py +3 -3
- pixeltable/io/globals.py +53 -4
- pixeltable/io/label_studio.py +42 -2
- pixeltable/io/pandas.py +18 -7
- pixeltable/plan.py +6 -6
- pixeltable/tool/create_test_db_dump.py +1 -1
- pixeltable/tool/doc_plugins/griffe.py +77 -0
- pixeltable/tool/doc_plugins/mkdocstrings.py +6 -0
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +135 -0
- pixeltable/utils/s3.py +1 -1
- pixeltable-0.2.13.dist-info/METADATA +206 -0
- {pixeltable-0.2.12.dist-info → pixeltable-0.2.13.dist-info}/RECORD +37 -34
- pixeltable-0.2.13.dist-info/entry_points.txt +3 -0
- pixeltable/exprs/predicate.py +0 -44
- pixeltable-0.2.12.dist-info/METADATA +0 -137
- {pixeltable-0.2.12.dist-info → pixeltable-0.2.13.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.12.dist-info → pixeltable-0.2.13.dist-info}/WHEEL +0 -0
pixeltable/functions/video.py
CHANGED
|
@@ -1,3 +1,16 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs) for `VideoType`.
|
|
3
|
+
|
|
4
|
+
Example:
|
|
5
|
+
```python
|
|
6
|
+
import pixeltable as pxt
|
|
7
|
+
from pixeltable.functions import video as pxt_video
|
|
8
|
+
|
|
9
|
+
t = pxt.get_table(...)
|
|
10
|
+
t.select(pxt_video.extract_audio(t.video_col)).collect()
|
|
11
|
+
```
|
|
12
|
+
"""
|
|
13
|
+
|
|
1
14
|
import tempfile
|
|
2
15
|
import uuid
|
|
3
16
|
from pathlib import Path
|
|
@@ -43,6 +56,9 @@ _format_defaults = { # format -> (codec, ext)
|
|
|
43
56
|
allows_window=False,
|
|
44
57
|
)
|
|
45
58
|
class make_video(func.Aggregator):
|
|
59
|
+
"""
|
|
60
|
+
Aggregator that creates a video from a sequence of images.
|
|
61
|
+
"""
|
|
46
62
|
def __init__(self, fps: int = 25):
|
|
47
63
|
"""follows https://pyav.org/docs/develop/cookbook/numpy.html#generating-video"""
|
|
48
64
|
self.container: Optional[av.container.OutputContainer] = None
|
|
@@ -84,7 +100,14 @@ _extract_audio_param_types = [
|
|
|
84
100
|
def extract_audio(
|
|
85
101
|
video_path: str, stream_idx: int = 0, format: str = 'wav', codec: Optional[str] = None
|
|
86
102
|
) -> Optional[str]:
|
|
87
|
-
"""
|
|
103
|
+
"""
|
|
104
|
+
Extract an audio stream from a video file, save it as a media file and return its path.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
stream_idx: Index of the audio stream to extract.
|
|
108
|
+
format: The target audio format. (`'wav'`, `'mp3'`, `'flac'`).
|
|
109
|
+
codec: The codec to use for the audio stream. If not provided, a default codec will be used.
|
|
110
|
+
"""
|
|
88
111
|
if format not in _format_defaults:
|
|
89
112
|
raise ValueError(f'extract_audio(): unsupported audio format: {format}')
|
|
90
113
|
default_codec, ext = _format_defaults[format]
|
|
@@ -107,13 +130,8 @@ def extract_audio(
|
|
|
107
130
|
|
|
108
131
|
@func.udf(return_type=ts.JsonType(nullable=False), param_types=[ts.VideoType(nullable=False)])
|
|
109
132
|
def get_metadata(video: str) -> dict:
|
|
110
|
-
"""
|
|
111
|
-
|
|
112
|
-
Args:
|
|
113
|
-
video (str): Path to the video file.
|
|
114
|
-
|
|
115
|
-
Returns:
|
|
116
|
-
A dictionary containing the associated metadata.
|
|
133
|
+
"""
|
|
134
|
+
Gets various metadata associated with a video file and returns it as a dictionary.
|
|
117
135
|
"""
|
|
118
136
|
with av.open(video) as container:
|
|
119
137
|
assert isinstance(container, av.container.InputContainer)
|
pixeltable/globals.py
CHANGED
|
@@ -7,10 +7,10 @@ import sqlalchemy as sql
|
|
|
7
7
|
from sqlalchemy.util.preloaded import orm
|
|
8
8
|
|
|
9
9
|
import pixeltable.exceptions as excs
|
|
10
|
+
import pixeltable.exprs as exprs
|
|
10
11
|
from pixeltable import catalog, func, DataFrame
|
|
11
12
|
from pixeltable.catalog import Catalog
|
|
12
13
|
from pixeltable.env import Env
|
|
13
|
-
from pixeltable.exprs import Predicate
|
|
14
14
|
from pixeltable.iterators import ComponentIterator
|
|
15
15
|
from pixeltable.metadata import schema
|
|
16
16
|
|
|
@@ -81,7 +81,7 @@ def create_view(
|
|
|
81
81
|
base: Union[catalog.Table, DataFrame],
|
|
82
82
|
*,
|
|
83
83
|
schema: Optional[dict[str, Any]] = None,
|
|
84
|
-
filter: Optional[
|
|
84
|
+
filter: Optional[exprs.Expr] = None,
|
|
85
85
|
is_snapshot: bool = False,
|
|
86
86
|
iterator: Optional[tuple[type[ComponentIterator], dict[str, Any]]] = None,
|
|
87
87
|
num_retained_versions: int = 10,
|
|
@@ -94,7 +94,7 @@ def create_view(
|
|
|
94
94
|
path_str: Path to the view.
|
|
95
95
|
base: Table (i.e., table or view or snapshot) or DataFrame to base the view on.
|
|
96
96
|
schema: dictionary mapping column names to column types, value expressions, or to column specifications.
|
|
97
|
-
filter:
|
|
97
|
+
filter: predicate to filter rows of the base table.
|
|
98
98
|
is_snapshot: Whether the view is a snapshot.
|
|
99
99
|
iterator: The iterator to use for this view. If specified, then this view will be a one-to-many view of
|
|
100
100
|
the base table.
|
pixeltable/io/globals.py
CHANGED
|
@@ -13,11 +13,14 @@ def create_label_studio_project(
|
|
|
13
13
|
media_import_method: Literal['post', 'file', 'url'] = 'post',
|
|
14
14
|
col_mapping: Optional[dict[str, str]] = None,
|
|
15
15
|
sync_immediately: bool = True,
|
|
16
|
+
s3_configuration: Optional[dict[str, Any]] = None,
|
|
16
17
|
**kwargs: Any
|
|
17
18
|
) -> SyncStatus:
|
|
18
|
-
# TODO(aaron-siegel): Add link in docstring to a Label Studio howto
|
|
19
19
|
"""
|
|
20
|
-
|
|
20
|
+
Create a new Label Studio project and link it to the specified `Table`.
|
|
21
|
+
|
|
22
|
+
- A tutorial notebook with fully worked examples can be found here:
|
|
23
|
+
[Using Label Studio for Annotations with Pixeltable](https://pixeltable.readme.io/docs/label-studio)
|
|
21
24
|
|
|
22
25
|
The required parameter `label_config` specifies the Label Studio project configuration,
|
|
23
26
|
in XML format, as described in the Label Studio documentation. The linked project will
|
|
@@ -41,6 +44,11 @@ def create_label_studio_project(
|
|
|
41
44
|
* Set the `LABEL_STUDIO_API_KEY` and `LABEL_STUDIO_URL` environment variables; or
|
|
42
45
|
* Specify `api_key` and `url` fields in the `label-studio` section of `$PIXELTABLE_HOME/config.yaml`.
|
|
43
46
|
|
|
47
|
+
__Requirements:__
|
|
48
|
+
|
|
49
|
+
- `pip install label-studio-sdk`
|
|
50
|
+
- `pip install boto3` (if using S3 import storage)
|
|
51
|
+
|
|
44
52
|
Args:
|
|
45
53
|
t: The Table to link to.
|
|
46
54
|
label_config: The Label Studio project configuration, in XML format.
|
|
@@ -52,6 +60,7 @@ def create_label_studio_project(
|
|
|
52
60
|
will see inside Label Studio. Unlike `name`, it does not need to be an identifier and
|
|
53
61
|
does not need to be unique. If not specified, the table name `t.name` will be used.
|
|
54
62
|
media_import_method: The method to use when transferring media files to Label Studio:
|
|
63
|
+
|
|
55
64
|
- `post`: Media will be sent to Label Studio via HTTP post. This should generally only be used for
|
|
56
65
|
prototyping; due to restrictions in Label Studio, it can only be used with projects that have
|
|
57
66
|
just one data field, and does not scale well.
|
|
@@ -63,9 +72,48 @@ def create_label_studio_project(
|
|
|
63
72
|
col_mapping: An optional mapping of local column names to Label Studio fields.
|
|
64
73
|
sync_immediately: If `True`, immediately perform an initial synchronization by
|
|
65
74
|
exporting all rows of the `Table` as Label Studio tasks.
|
|
75
|
+
s3_configuration: If specified, S3 import storage will be configured for the new project. This can only
|
|
76
|
+
be used with `media_import_method='url'`, and if `media_import_method='url'` and any of the media data is
|
|
77
|
+
referenced by `s3://` URLs, then it must be specified in order for such media to display correctly
|
|
78
|
+
in the Label Studio interface.
|
|
79
|
+
|
|
80
|
+
The items in the `s3_configuration` dictionary correspond to kwarg
|
|
81
|
+
parameters of the Label Studio `connect_s3_import_storage` method, as described in the
|
|
82
|
+
[Label Studio connect_s3_import_storage docs](https://labelstud.io/sdk/project.html#label_studio_sdk.project.Project.connect_s3_import_storage).
|
|
83
|
+
`bucket` must be specified; all other parameters are optional. If credentials are not specified explicitly,
|
|
84
|
+
Pixeltable will attempt to retrieve them from the environment (such as from `~/.aws/credentials`). If a title is not
|
|
85
|
+
specified, Pixeltable will use the default `'Pixeltable-S3-Import-Storage'`. All other parameters use their Label
|
|
86
|
+
Studio defaults.
|
|
66
87
|
kwargs: Additional keyword arguments are passed to the `start_project` method in the Label
|
|
67
|
-
Studio SDK, as described
|
|
68
|
-
https://labelstud.io/sdk/project.html#label_studio_sdk.project.Project.start_project
|
|
88
|
+
Studio SDK, as described in the
|
|
89
|
+
[Label Studio start_project docs](https://labelstud.io/sdk/project.html#label_studio_sdk.project.Project.start_project).
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
A `SyncStatus` representing the status of any synchronization operations that occurred.
|
|
93
|
+
|
|
94
|
+
Examples:
|
|
95
|
+
Create a Label Studio project whose tasks correspond to videos stored in the `video_col` column of the table `tbl`:
|
|
96
|
+
|
|
97
|
+
>>> config = \"\"\"
|
|
98
|
+
<View>
|
|
99
|
+
<Video name="video_obj" value="$video_col"/>
|
|
100
|
+
<Choices name="video-category" toName="video" showInLine="true">
|
|
101
|
+
<Choice value="city"/>
|
|
102
|
+
<Choice value="food"/>
|
|
103
|
+
<Choice value="sports"/>
|
|
104
|
+
</Choices>
|
|
105
|
+
</View>\"\"\"
|
|
106
|
+
create_label_studio_project(tbl, config)
|
|
107
|
+
|
|
108
|
+
Create a Label Studio project with the same configuration, using `media_import_method='url'`,
|
|
109
|
+
whose media are stored in an S3 bucket:
|
|
110
|
+
|
|
111
|
+
>>> create_label_studio_project(
|
|
112
|
+
tbl,
|
|
113
|
+
config,
|
|
114
|
+
media_import_method='url',
|
|
115
|
+
s3_configuration={'bucket': 'my-bucket', 'region_name': 'us-east-2'}
|
|
116
|
+
)
|
|
69
117
|
"""
|
|
70
118
|
from pixeltable.io.label_studio import LabelStudioProject
|
|
71
119
|
|
|
@@ -76,6 +124,7 @@ def create_label_studio_project(
|
|
|
76
124
|
title,
|
|
77
125
|
media_import_method,
|
|
78
126
|
col_mapping,
|
|
127
|
+
s3_configuration,
|
|
79
128
|
**kwargs
|
|
80
129
|
)
|
|
81
130
|
|
pixeltable/io/label_studio.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import copy
|
|
1
2
|
import json
|
|
2
3
|
import logging
|
|
3
4
|
import os
|
|
@@ -18,6 +19,15 @@ from pixeltable.exprs import ColumnRef, DataRow, Expr
|
|
|
18
19
|
from pixeltable.io.external_store import Project, SyncStatus
|
|
19
20
|
from pixeltable.utils import coco
|
|
20
21
|
|
|
22
|
+
# label_studio_sdk>=1 and label_studio_sdk<1 are not compatible, so we need to try
|
|
23
|
+
# the import two different ways to insure intercompatibility
|
|
24
|
+
try:
|
|
25
|
+
# label_studio_sdk<1 compatibility
|
|
26
|
+
import label_studio_sdk.project as ls_project # type: ignore
|
|
27
|
+
except ImportError:
|
|
28
|
+
# label_studio_sdk>=1 compatibility
|
|
29
|
+
import label_studio_sdk._legacy.project as ls_project # type: ignore
|
|
30
|
+
|
|
21
31
|
_logger = logging.getLogger('pixeltable')
|
|
22
32
|
|
|
23
33
|
|
|
@@ -50,11 +60,11 @@ class LabelStudioProject(Project):
|
|
|
50
60
|
"""
|
|
51
61
|
self.project_id = project_id
|
|
52
62
|
self.media_import_method = media_import_method
|
|
53
|
-
self._project: Optional[
|
|
63
|
+
self._project: Optional[ls_project.Project] = None
|
|
54
64
|
super().__init__(name, col_mapping, stored_proxies)
|
|
55
65
|
|
|
56
66
|
@property
|
|
57
|
-
def project(self) ->
|
|
67
|
+
def project(self) -> ls_project.Project:
|
|
58
68
|
"""The `Project` object corresponding to this Label Studio project."""
|
|
59
69
|
if self._project is None:
|
|
60
70
|
try:
|
|
@@ -536,6 +546,7 @@ class LabelStudioProject(Project):
|
|
|
536
546
|
title: Optional[str],
|
|
537
547
|
media_import_method: Literal['post', 'file', 'url'],
|
|
538
548
|
col_mapping: Optional[dict[str, str]],
|
|
549
|
+
s3_configuration: Optional[dict[str, Any]],
|
|
539
550
|
**kwargs: Any
|
|
540
551
|
) -> 'LabelStudioProject':
|
|
541
552
|
"""
|
|
@@ -572,6 +583,31 @@ class LabelStudioProject(Project):
|
|
|
572
583
|
if media_import_method == 'post' and len(config.data_keys) > 1:
|
|
573
584
|
raise excs.Error('`media_import_method` cannot be `post` if there is more than one data key')
|
|
574
585
|
|
|
586
|
+
if s3_configuration is not None:
|
|
587
|
+
if media_import_method != 'url':
|
|
588
|
+
raise excs.Error("`s3_configuration` is only valid when `media_import_method == 'url'`")
|
|
589
|
+
s3_configuration = copy.copy(s3_configuration)
|
|
590
|
+
if not 'bucket' in s3_configuration:
|
|
591
|
+
raise excs.Error('`s3_configuration` must contain a `bucket` field')
|
|
592
|
+
if not 'title' in s3_configuration:
|
|
593
|
+
s3_configuration['title'] = 'Pixeltable-S3-Import-Storage'
|
|
594
|
+
if ('aws_access_key_id' not in s3_configuration and
|
|
595
|
+
'aws_secret_access_key' not in s3_configuration and
|
|
596
|
+
'aws_session_token' not in s3_configuration):
|
|
597
|
+
# Attempt to fill any missing credentials from the environment
|
|
598
|
+
try:
|
|
599
|
+
import boto3
|
|
600
|
+
s3_credentials = boto3.Session().get_credentials().get_frozen_credentials()
|
|
601
|
+
_logger.info(f'Using AWS credentials from the environment for Label Studio project: {title}')
|
|
602
|
+
s3_configuration['aws_access_key_id'] = s3_credentials.access_key
|
|
603
|
+
s3_configuration['aws_secret_access_key'] = s3_credentials.secret_key
|
|
604
|
+
s3_configuration['aws_session_token'] = s3_credentials.token
|
|
605
|
+
except Exception as exc:
|
|
606
|
+
# This is not necessarily a problem, but we should log that it happened
|
|
607
|
+
_logger.debug(f'Unable to retrieve AWS credentials from the environment: {exc}')
|
|
608
|
+
pass
|
|
609
|
+
|
|
610
|
+
_logger.info(f'Creating Label Studio project: {title}')
|
|
575
611
|
project = _label_studio_client().start_project(title=title, label_config=label_config, **kwargs)
|
|
576
612
|
|
|
577
613
|
if media_import_method == 'file':
|
|
@@ -591,6 +627,10 @@ class LabelStudioProject(Project):
|
|
|
591
627
|
) from exc
|
|
592
628
|
raise # Handle any other exception type normally
|
|
593
629
|
|
|
630
|
+
if s3_configuration is not None:
|
|
631
|
+
_logger.info(f'Setting up S3 import storage for Label Studio project: {title}')
|
|
632
|
+
project.connect_s3_import_storage(**s3_configuration)
|
|
633
|
+
|
|
594
634
|
project_id = project.get_params()['id']
|
|
595
635
|
return LabelStudioProject(name, project_id, media_import_method, resolved_col_mapping)
|
|
596
636
|
|
pixeltable/io/pandas.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Optional, Any, Iterable
|
|
1
|
+
from typing import Optional, Any, Iterable, Union
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import pandas as pd
|
|
@@ -9,7 +9,10 @@ import pixeltable.type_system as ts
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
def import_pandas(
|
|
12
|
-
tbl_name: str, df: pd.DataFrame, *, schema_overrides: Optional[dict[str, pxt.ColumnType]] = None
|
|
12
|
+
tbl_name: str, df: pd.DataFrame, *, schema_overrides: Optional[dict[str, pxt.ColumnType]] = None,
|
|
13
|
+
primary_key: Optional[Union[str, list[str]]] = None,
|
|
14
|
+
num_retained_versions: int = 10,
|
|
15
|
+
comment: str = ''
|
|
13
16
|
) -> pxt.catalog.InsertableTable:
|
|
14
17
|
"""Creates a new `Table` from a Pandas `DataFrame`, with the specified name. The schema of the table
|
|
15
18
|
will be inferred from the `DataFrame`, unless `schema` is specified.
|
|
@@ -31,13 +34,17 @@ def import_pandas(
|
|
|
31
34
|
"""
|
|
32
35
|
schema = _df_to_pxt_schema(df, schema_overrides)
|
|
33
36
|
tbl_rows = (dict(_df_row_to_pxt_row(row, schema)) for row in df.itertuples())
|
|
34
|
-
table = pxt.create_table(tbl_name, schema)
|
|
37
|
+
table = pxt.create_table(tbl_name, schema, primary_key=primary_key, num_retained_versions=num_retained_versions, comment=comment)
|
|
35
38
|
table.insert(tbl_rows)
|
|
36
39
|
return table
|
|
37
40
|
|
|
38
41
|
|
|
39
42
|
def import_csv(
|
|
40
|
-
|
|
43
|
+
tbl_name: str, filepath_or_buffer, schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
|
|
44
|
+
primary_key: Optional[Union[str, list[str]]] = None,
|
|
45
|
+
num_retained_versions: int = 10,
|
|
46
|
+
comment: str = '',
|
|
47
|
+
**kwargs
|
|
41
48
|
) -> pxt.catalog.InsertableTable:
|
|
42
49
|
"""
|
|
43
50
|
Creates a new `Table` from a csv file. This is a convenience method and is equivalent
|
|
@@ -45,11 +52,15 @@ def import_csv(
|
|
|
45
52
|
See the Pandas documentation for `read_csv` for more details.
|
|
46
53
|
"""
|
|
47
54
|
df = pd.read_csv(filepath_or_buffer, **kwargs)
|
|
48
|
-
return import_pandas(
|
|
55
|
+
return import_pandas(tbl_name, df, schema_overrides=schema_overrides, primary_key=primary_key, num_retained_versions=num_retained_versions, comment=comment)
|
|
49
56
|
|
|
50
57
|
|
|
51
58
|
def import_excel(
|
|
52
|
-
|
|
59
|
+
tbl_name: str, io, *args, schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
|
|
60
|
+
primary_key: Optional[Union[str, list[str]]] = None,
|
|
61
|
+
num_retained_versions: int = 10,
|
|
62
|
+
comment: str = '',
|
|
63
|
+
**kwargs
|
|
53
64
|
) -> pxt.catalog.InsertableTable:
|
|
54
65
|
"""
|
|
55
66
|
Creates a new `Table` from an excel (.xlsx) file. This is a convenience method and is equivalent
|
|
@@ -57,7 +68,7 @@ def import_excel(
|
|
|
57
68
|
See the Pandas documentation for `read_excel` for more details.
|
|
58
69
|
"""
|
|
59
70
|
df = pd.read_excel(io, *args, **kwargs)
|
|
60
|
-
return import_pandas(
|
|
71
|
+
return import_pandas(tbl_name, df, schema_overrides=schema_overrides, primary_key=primary_key, num_retained_versions=num_retained_versions, comment=comment)
|
|
61
72
|
|
|
62
73
|
|
|
63
74
|
def _df_to_pxt_schema(
|
pixeltable/plan.py
CHANGED
|
@@ -40,7 +40,7 @@ class Analyzer:
|
|
|
40
40
|
|
|
41
41
|
def __init__(
|
|
42
42
|
self, tbl: catalog.TableVersionPath, select_list: List[exprs.Expr],
|
|
43
|
-
where_clause: Optional[exprs.
|
|
43
|
+
where_clause: Optional[exprs.Expr] = None, group_by_clause: Optional[List[exprs.Expr]] = None,
|
|
44
44
|
order_by_clause: Optional[List[Tuple[exprs.Expr, bool]]] = None):
|
|
45
45
|
if group_by_clause is None:
|
|
46
46
|
group_by_clause = []
|
|
@@ -58,7 +58,7 @@ class Analyzer:
|
|
|
58
58
|
# Where clause of the Select stmt of the SQL scan
|
|
59
59
|
self.sql_where_clause: Optional[exprs.Expr] = None
|
|
60
60
|
# filter predicate applied to output rows of the SQL scan
|
|
61
|
-
self.filter: Optional[exprs.
|
|
61
|
+
self.filter: Optional[exprs.Expr] = None
|
|
62
62
|
# not executable
|
|
63
63
|
#self.similarity_clause: Optional[exprs.ImageSimilarityPredicate] = None
|
|
64
64
|
if where_clause is not None:
|
|
@@ -183,7 +183,7 @@ class Planner:
|
|
|
183
183
|
# TODO: create an exec.CountNode and change this to create_count_plan()
|
|
184
184
|
@classmethod
|
|
185
185
|
def create_count_stmt(
|
|
186
|
-
cls, tbl: catalog.TableVersionPath, where_clause: Optional[exprs.
|
|
186
|
+
cls, tbl: catalog.TableVersionPath, where_clause: Optional[exprs.Expr] = None
|
|
187
187
|
) -> sql.Select:
|
|
188
188
|
stmt = sql.select(sql.func.count('*'))
|
|
189
189
|
refd_tbl_ids: Set[UUID] = set()
|
|
@@ -239,7 +239,7 @@ class Planner:
|
|
|
239
239
|
cls, tbl: catalog.TableVersionPath,
|
|
240
240
|
update_targets: dict[catalog.Column, exprs.Expr],
|
|
241
241
|
recompute_targets: List[catalog.Column],
|
|
242
|
-
where_clause: Optional[exprs.
|
|
242
|
+
where_clause: Optional[exprs.Expr], cascade: bool
|
|
243
243
|
) -> Tuple[exec.ExecNode, List[str], List[catalog.Column]]:
|
|
244
244
|
"""Creates a plan to materialize updated rows.
|
|
245
245
|
The plan:
|
|
@@ -505,7 +505,7 @@ class Planner:
|
|
|
505
505
|
@classmethod
|
|
506
506
|
def create_query_plan(
|
|
507
507
|
cls, tbl: catalog.TableVersionPath, select_list: Optional[List[exprs.Expr]] = None,
|
|
508
|
-
where_clause: Optional[exprs.
|
|
508
|
+
where_clause: Optional[exprs.Expr] = None, group_by_clause: Optional[List[exprs.Expr]] = None,
|
|
509
509
|
order_by_clause: Optional[List[Tuple[exprs.Expr, bool]]] = None, limit: Optional[int] = None,
|
|
510
510
|
with_pk: bool = False, ignore_errors: bool = False, exact_version_only: Optional[List[catalog.TableVersion]] = None
|
|
511
511
|
) -> exec.ExecNode:
|
|
@@ -597,7 +597,7 @@ class Planner:
|
|
|
597
597
|
return plan
|
|
598
598
|
|
|
599
599
|
@classmethod
|
|
600
|
-
def analyze(cls, tbl: catalog.TableVersionPath, where_clause: exprs.
|
|
600
|
+
def analyze(cls, tbl: catalog.TableVersionPath, where_clause: exprs.Expr) -> Analyzer:
|
|
601
601
|
return Analyzer(tbl, [], where_clause=where_clause)
|
|
602
602
|
|
|
603
603
|
@classmethod
|
|
@@ -208,7 +208,7 @@ class Dumper:
|
|
|
208
208
|
add_column('not', ~(t.c2 > 20))
|
|
209
209
|
|
|
210
210
|
# function_call
|
|
211
|
-
add_column('function_call', pxt.functions.string.
|
|
211
|
+
add_column('function_call', pxt.functions.string.format('{0} {key}', t.c1, key=t.c1)) # library function
|
|
212
212
|
add_column('test_udf', test_udf_stored(t.c2)) # stored udf
|
|
213
213
|
add_column('test_udf_batched', test_udf_stored_batched(t.c1, upper=False)) # batched stored udf
|
|
214
214
|
if include_expensive_functions:
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import ast
|
|
2
|
+
from typing import Optional, Union
|
|
3
|
+
|
|
4
|
+
import griffe
|
|
5
|
+
import griffe.expressions
|
|
6
|
+
from griffe import Extension, Object, ObjectNode
|
|
7
|
+
|
|
8
|
+
import pixeltable as pxt
|
|
9
|
+
|
|
10
|
+
logger = griffe.get_logger(__name__)
|
|
11
|
+
|
|
12
|
+
class PxtGriffeExtension(Extension):
|
|
13
|
+
"""Implementation of a Pixeltable custom griffe extension."""
|
|
14
|
+
|
|
15
|
+
def on_instance(self, node: Union[ast.AST, ObjectNode], obj: Object) -> None:
|
|
16
|
+
if obj.docstring is None:
|
|
17
|
+
# Skip over entities without a docstring
|
|
18
|
+
return
|
|
19
|
+
|
|
20
|
+
if isinstance(obj, griffe.Function):
|
|
21
|
+
# See if the (Python) function has a @pxt.udf decorator
|
|
22
|
+
if any(
|
|
23
|
+
isinstance(dec.value, griffe.expressions.Expr) and dec.value.canonical_path in ['pixeltable.func.udf', 'pixeltable.udf']
|
|
24
|
+
for dec in obj.decorators
|
|
25
|
+
):
|
|
26
|
+
# Update the template
|
|
27
|
+
self.__modify_pxt_udf(obj)
|
|
28
|
+
|
|
29
|
+
def __modify_pxt_udf(self, func: griffe.Function) -> None:
|
|
30
|
+
"""
|
|
31
|
+
Instructs the doc snippet for `func` to use the custom Pixeltable UDF jinja template, and
|
|
32
|
+
converts all type hints to Pixeltable column type references, in accordance with the @udf
|
|
33
|
+
decorator behavior.
|
|
34
|
+
"""
|
|
35
|
+
func.extra['mkdocstrings']['template'] = 'udf.html.jinja'
|
|
36
|
+
# Dynamically load the UDF reference so we can inspect the Pixeltable signature directly
|
|
37
|
+
udf = griffe.dynamic_import(func.path)
|
|
38
|
+
assert isinstance(udf, pxt.Function)
|
|
39
|
+
# Convert the return type to a Pixeltable type reference
|
|
40
|
+
func.returns = self.__column_type_to_display_str(udf.signature.get_return_type())
|
|
41
|
+
# Convert the parameter types to Pixeltable type references
|
|
42
|
+
for griffe_param in func.parameters:
|
|
43
|
+
assert isinstance(griffe_param.annotation, griffe.expressions.Expr)
|
|
44
|
+
if griffe_param.name not in udf.signature.parameters:
|
|
45
|
+
logger.warning(f'Parameter `{griffe_param.name}` not found in signature for UDF: {udf.display_name}')
|
|
46
|
+
continue
|
|
47
|
+
pxt_param = udf.signature.parameters[griffe_param.name]
|
|
48
|
+
griffe_param.annotation = self.__column_type_to_display_str(pxt_param.col_type)
|
|
49
|
+
|
|
50
|
+
def __column_type_to_display_str(self, column_type: Optional[pxt.ColumnType]) -> str:
|
|
51
|
+
# TODO: When we enhance the Pixeltable type system, we may want to refactor some of this logic out.
|
|
52
|
+
# I'm putting it here for now though.
|
|
53
|
+
if column_type is None:
|
|
54
|
+
return 'None'
|
|
55
|
+
if column_type.is_string_type():
|
|
56
|
+
base = 'str'
|
|
57
|
+
elif column_type.is_int_type():
|
|
58
|
+
base = 'int'
|
|
59
|
+
elif column_type.is_float_type():
|
|
60
|
+
base = 'float'
|
|
61
|
+
elif column_type.is_bool_type():
|
|
62
|
+
base = 'bool'
|
|
63
|
+
elif column_type.is_array_type():
|
|
64
|
+
base = 'ArrayT'
|
|
65
|
+
elif column_type.is_json_type():
|
|
66
|
+
base = 'JsonT'
|
|
67
|
+
elif column_type.is_image_type():
|
|
68
|
+
base = 'ImageT'
|
|
69
|
+
elif column_type.is_video_type():
|
|
70
|
+
base = 'VideoT'
|
|
71
|
+
elif column_type.is_audio_type():
|
|
72
|
+
base = 'AudioT'
|
|
73
|
+
elif column_type.is_document_type():
|
|
74
|
+
base = 'DocumentT'
|
|
75
|
+
else:
|
|
76
|
+
assert False
|
|
77
|
+
return f'Optional[{base}]' if column_type.nullable else base
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
{#- Template for Pixeltable UDFs. Cargo-culted (with modification) from _base/function.html.jinja. -#}
|
|
2
|
+
|
|
3
|
+
{% block logs scoped %}
|
|
4
|
+
{#- Logging block.
|
|
5
|
+
|
|
6
|
+
This block can be used to log debug messages, deprecation messages, warnings, etc.
|
|
7
|
+
-#}
|
|
8
|
+
{{ log.debug("Rendering " + function.path) }}
|
|
9
|
+
{% endblock logs %}
|
|
10
|
+
|
|
11
|
+
{% import "language"|get_template as lang with context %}
|
|
12
|
+
{#- Language module providing the `t` translation method. -#}
|
|
13
|
+
|
|
14
|
+
<div class="doc doc-object doc-function">
|
|
15
|
+
{% with obj = function, html_id = function.path %}
|
|
16
|
+
|
|
17
|
+
{% if root %}
|
|
18
|
+
{% set show_full_path = config.show_root_full_path %}
|
|
19
|
+
{% set root_members = True %}
|
|
20
|
+
{% elif root_members %}
|
|
21
|
+
{% set show_full_path = config.show_root_members_full_path or config.show_object_full_path %}
|
|
22
|
+
{% set root_members = False %}
|
|
23
|
+
{% else %}
|
|
24
|
+
{% set show_full_path = config.show_object_full_path %}
|
|
25
|
+
{% endif %}
|
|
26
|
+
|
|
27
|
+
{% set function_name = function.path if show_full_path else function.name %}
|
|
28
|
+
{#- Brief or full function name depending on configuration. -#}
|
|
29
|
+
{% set symbol_type = "udf" %}
|
|
30
|
+
{#- Symbol type: method when parent is a class, function otherwise. -#}
|
|
31
|
+
|
|
32
|
+
{% if not root or config.show_root_heading %}
|
|
33
|
+
{% filter heading(
|
|
34
|
+
heading_level,
|
|
35
|
+
role="function",
|
|
36
|
+
id=html_id,
|
|
37
|
+
class="doc doc-heading",
|
|
38
|
+
toc_label=(('<code class="doc-symbol doc-symbol-toc doc-symbol-' + symbol_type + '"></code> ')|safe if config.show_symbol_type_toc else '') + function.name,
|
|
39
|
+
) %}
|
|
40
|
+
|
|
41
|
+
{% block heading scoped %}
|
|
42
|
+
{#- Heading block.
|
|
43
|
+
|
|
44
|
+
This block renders the heading for the function.
|
|
45
|
+
-#}
|
|
46
|
+
{% if config.show_symbol_type_heading %}<code class="doc-symbol doc-symbol-heading doc-symbol-{{ symbol_type }}"></code>{% endif %}
|
|
47
|
+
{% if config.separate_signature %}
|
|
48
|
+
<span class="doc doc-object-name doc-function-name">{{ function_name }}</span>
|
|
49
|
+
{% else %}
|
|
50
|
+
{%+ filter highlight(language="python", inline=True) %}
|
|
51
|
+
{{ function_name }}{% include "signature"|get_template with context %}
|
|
52
|
+
{% endfilter %}
|
|
53
|
+
{% endif %}
|
|
54
|
+
{% endblock heading %}
|
|
55
|
+
|
|
56
|
+
{% block labels scoped %}
|
|
57
|
+
{#- Labels block.
|
|
58
|
+
|
|
59
|
+
This block renders the labels for the function.
|
|
60
|
+
-#}
|
|
61
|
+
{% with labels = function.labels %}
|
|
62
|
+
{% include "labels"|get_template with context %}
|
|
63
|
+
{% endwith %}
|
|
64
|
+
{% endblock labels %}
|
|
65
|
+
|
|
66
|
+
{% endfilter %}
|
|
67
|
+
|
|
68
|
+
{% block signature scoped %}
|
|
69
|
+
{#- Signature block.
|
|
70
|
+
|
|
71
|
+
This block renders the signature for the function.
|
|
72
|
+
-#}
|
|
73
|
+
{% if config.separate_signature %}
|
|
74
|
+
{% filter format_signature(function, config.line_length, crossrefs=config.signature_crossrefs) %}
|
|
75
|
+
{{ function.name }}
|
|
76
|
+
{% endfilter %}
|
|
77
|
+
{% endif %}
|
|
78
|
+
{% endblock signature %}
|
|
79
|
+
|
|
80
|
+
{% else %}
|
|
81
|
+
|
|
82
|
+
{% if config.show_root_toc_entry %}
|
|
83
|
+
{% filter heading(
|
|
84
|
+
heading_level,
|
|
85
|
+
role="function",
|
|
86
|
+
id=html_id,
|
|
87
|
+
toc_label=(('<code class="doc-symbol doc-symbol-toc doc-symbol-' + symbol_type + '"></code> ')|safe if config.show_symbol_type_toc else '') + function.name,
|
|
88
|
+
hidden=True,
|
|
89
|
+
) %}
|
|
90
|
+
{% endfilter %}
|
|
91
|
+
{% endif %}
|
|
92
|
+
{% set heading_level = heading_level - 1 %}
|
|
93
|
+
{% endif %}
|
|
94
|
+
|
|
95
|
+
<div class="doc doc-contents {% if root %}first{% endif %}">
|
|
96
|
+
{% block contents scoped %}
|
|
97
|
+
{#- Contents block.
|
|
98
|
+
|
|
99
|
+
This block renders the contents of the function.
|
|
100
|
+
It contains other blocks that users can override.
|
|
101
|
+
Overriding the contents block allows to rearrange the order of the blocks.
|
|
102
|
+
-#}
|
|
103
|
+
{% block docstring scoped %}
|
|
104
|
+
{#- Docstring block.
|
|
105
|
+
|
|
106
|
+
This block renders the docstring for the function.
|
|
107
|
+
-#}
|
|
108
|
+
{% with docstring_sections = function.docstring.parsed %}
|
|
109
|
+
{% include "docstring"|get_template with context %}
|
|
110
|
+
{% endwith %}
|
|
111
|
+
{% endblock docstring %}
|
|
112
|
+
|
|
113
|
+
{% block source scoped %}
|
|
114
|
+
{#- Source block.
|
|
115
|
+
|
|
116
|
+
This block renders the source code for the function.
|
|
117
|
+
-#}
|
|
118
|
+
{% if config.show_source and function.source %}
|
|
119
|
+
<details class="quote">
|
|
120
|
+
<summary>{{ lang.t("Source code in") }} <code>
|
|
121
|
+
{%- if function.relative_filepath.is_absolute() -%}
|
|
122
|
+
{{ function.relative_package_filepath }}
|
|
123
|
+
{%- else -%}
|
|
124
|
+
{{ function.relative_filepath }}
|
|
125
|
+
{%- endif -%}
|
|
126
|
+
</code></summary>
|
|
127
|
+
{{ function.source|highlight(language="python", linestart=function.lineno, linenums=True) }}
|
|
128
|
+
</details>
|
|
129
|
+
{% endif %}
|
|
130
|
+
{% endblock source %}
|
|
131
|
+
{% endblock contents %}
|
|
132
|
+
</div>
|
|
133
|
+
|
|
134
|
+
{% endwith %}
|
|
135
|
+
</div>
|
pixeltable/utils/s3.py
CHANGED