pixeltable 0.2.12__py3-none-any.whl → 0.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (39) hide show
  1. pixeltable/__version__.py +2 -2
  2. pixeltable/catalog/insertable_table.py +2 -2
  3. pixeltable/catalog/table.py +5 -5
  4. pixeltable/catalog/table_version.py +12 -14
  5. pixeltable/catalog/view.py +2 -2
  6. pixeltable/dataframe.py +7 -6
  7. pixeltable/exec/expr_eval_node.py +8 -1
  8. pixeltable/exec/sql_scan_node.py +1 -1
  9. pixeltable/exprs/__init__.py +0 -1
  10. pixeltable/exprs/comparison.py +5 -5
  11. pixeltable/exprs/compound_predicate.py +12 -12
  12. pixeltable/exprs/expr.py +32 -0
  13. pixeltable/exprs/in_predicate.py +3 -3
  14. pixeltable/exprs/is_null.py +5 -5
  15. pixeltable/func/aggregate_function.py +10 -4
  16. pixeltable/func/callable_function.py +4 -0
  17. pixeltable/func/function_registry.py +2 -0
  18. pixeltable/functions/globals.py +36 -1
  19. pixeltable/functions/huggingface.py +62 -4
  20. pixeltable/functions/image.py +17 -0
  21. pixeltable/functions/string.py +622 -7
  22. pixeltable/functions/video.py +26 -8
  23. pixeltable/globals.py +3 -3
  24. pixeltable/io/globals.py +53 -4
  25. pixeltable/io/label_studio.py +42 -2
  26. pixeltable/io/pandas.py +18 -7
  27. pixeltable/plan.py +6 -6
  28. pixeltable/tool/create_test_db_dump.py +1 -1
  29. pixeltable/tool/doc_plugins/griffe.py +77 -0
  30. pixeltable/tool/doc_plugins/mkdocstrings.py +6 -0
  31. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +135 -0
  32. pixeltable/utils/s3.py +1 -1
  33. pixeltable-0.2.13.dist-info/METADATA +206 -0
  34. {pixeltable-0.2.12.dist-info → pixeltable-0.2.13.dist-info}/RECORD +37 -34
  35. pixeltable-0.2.13.dist-info/entry_points.txt +3 -0
  36. pixeltable/exprs/predicate.py +0 -44
  37. pixeltable-0.2.12.dist-info/METADATA +0 -137
  38. {pixeltable-0.2.12.dist-info → pixeltable-0.2.13.dist-info}/LICENSE +0 -0
  39. {pixeltable-0.2.12.dist-info → pixeltable-0.2.13.dist-info}/WHEEL +0 -0
@@ -1,3 +1,16 @@
1
+ """
2
+ Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs) for `VideoType`.
3
+
4
+ Example:
5
+ ```python
6
+ import pixeltable as pxt
7
+ from pixeltable.functions import video as pxt_video
8
+
9
+ t = pxt.get_table(...)
10
+ t.select(pxt_video.extract_audio(t.video_col)).collect()
11
+ ```
12
+ """
13
+
1
14
  import tempfile
2
15
  import uuid
3
16
  from pathlib import Path
@@ -43,6 +56,9 @@ _format_defaults = { # format -> (codec, ext)
43
56
  allows_window=False,
44
57
  )
45
58
  class make_video(func.Aggregator):
59
+ """
60
+ Aggregator that creates a video from a sequence of images.
61
+ """
46
62
  def __init__(self, fps: int = 25):
47
63
  """follows https://pyav.org/docs/develop/cookbook/numpy.html#generating-video"""
48
64
  self.container: Optional[av.container.OutputContainer] = None
@@ -84,7 +100,14 @@ _extract_audio_param_types = [
84
100
  def extract_audio(
85
101
  video_path: str, stream_idx: int = 0, format: str = 'wav', codec: Optional[str] = None
86
102
  ) -> Optional[str]:
87
- """Extract an audio stream from a video file, save it as a media file and return its path"""
103
+ """
104
+ Extract an audio stream from a video file, save it as a media file and return its path.
105
+
106
+ Args:
107
+ stream_idx: Index of the audio stream to extract.
108
+ format: The target audio format. (`'wav'`, `'mp3'`, `'flac'`).
109
+ codec: The codec to use for the audio stream. If not provided, a default codec will be used.
110
+ """
88
111
  if format not in _format_defaults:
89
112
  raise ValueError(f'extract_audio(): unsupported audio format: {format}')
90
113
  default_codec, ext = _format_defaults[format]
@@ -107,13 +130,8 @@ def extract_audio(
107
130
 
108
131
  @func.udf(return_type=ts.JsonType(nullable=False), param_types=[ts.VideoType(nullable=False)])
109
132
  def get_metadata(video: str) -> dict:
110
- """Gets various metadata associated with a video file.
111
-
112
- Args:
113
- video (str): Path to the video file.
114
-
115
- Returns:
116
- A dictionary containing the associated metadata.
133
+ """
134
+ Gets various metadata associated with a video file and returns it as a dictionary.
117
135
  """
118
136
  with av.open(video) as container:
119
137
  assert isinstance(container, av.container.InputContainer)
pixeltable/globals.py CHANGED
@@ -7,10 +7,10 @@ import sqlalchemy as sql
7
7
  from sqlalchemy.util.preloaded import orm
8
8
 
9
9
  import pixeltable.exceptions as excs
10
+ import pixeltable.exprs as exprs
10
11
  from pixeltable import catalog, func, DataFrame
11
12
  from pixeltable.catalog import Catalog
12
13
  from pixeltable.env import Env
13
- from pixeltable.exprs import Predicate
14
14
  from pixeltable.iterators import ComponentIterator
15
15
  from pixeltable.metadata import schema
16
16
 
@@ -81,7 +81,7 @@ def create_view(
81
81
  base: Union[catalog.Table, DataFrame],
82
82
  *,
83
83
  schema: Optional[dict[str, Any]] = None,
84
- filter: Optional[Predicate] = None,
84
+ filter: Optional[exprs.Expr] = None,
85
85
  is_snapshot: bool = False,
86
86
  iterator: Optional[tuple[type[ComponentIterator], dict[str, Any]]] = None,
87
87
  num_retained_versions: int = 10,
@@ -94,7 +94,7 @@ def create_view(
94
94
  path_str: Path to the view.
95
95
  base: Table (i.e., table or view or snapshot) or DataFrame to base the view on.
96
96
  schema: dictionary mapping column names to column types, value expressions, or to column specifications.
97
- filter: Predicate to filter rows of the base table.
97
+ filter: predicate to filter rows of the base table.
98
98
  is_snapshot: Whether the view is a snapshot.
99
99
  iterator: The iterator to use for this view. If specified, then this view will be a one-to-many view of
100
100
  the base table.
pixeltable/io/globals.py CHANGED
@@ -13,11 +13,14 @@ def create_label_studio_project(
13
13
  media_import_method: Literal['post', 'file', 'url'] = 'post',
14
14
  col_mapping: Optional[dict[str, str]] = None,
15
15
  sync_immediately: bool = True,
16
+ s3_configuration: Optional[dict[str, Any]] = None,
16
17
  **kwargs: Any
17
18
  ) -> SyncStatus:
18
- # TODO(aaron-siegel): Add link in docstring to a Label Studio howto
19
19
  """
20
- Creates a new Label Studio project and links it to the specified `Table`.
20
+ Create a new Label Studio project and link it to the specified `Table`.
21
+
22
+ - A tutorial notebook with fully worked examples can be found here:
23
+ [Using Label Studio for Annotations with Pixeltable](https://pixeltable.readme.io/docs/label-studio)
21
24
 
22
25
  The required parameter `label_config` specifies the Label Studio project configuration,
23
26
  in XML format, as described in the Label Studio documentation. The linked project will
@@ -41,6 +44,11 @@ def create_label_studio_project(
41
44
  * Set the `LABEL_STUDIO_API_KEY` and `LABEL_STUDIO_URL` environment variables; or
42
45
  * Specify `api_key` and `url` fields in the `label-studio` section of `$PIXELTABLE_HOME/config.yaml`.
43
46
 
47
+ __Requirements:__
48
+
49
+ - `pip install label-studio-sdk`
50
+ - `pip install boto3` (if using S3 import storage)
51
+
44
52
  Args:
45
53
  t: The Table to link to.
46
54
  label_config: The Label Studio project configuration, in XML format.
@@ -52,6 +60,7 @@ def create_label_studio_project(
52
60
  will see inside Label Studio. Unlike `name`, it does not need to be an identifier and
53
61
  does not need to be unique. If not specified, the table name `t.name` will be used.
54
62
  media_import_method: The method to use when transferring media files to Label Studio:
63
+
55
64
  - `post`: Media will be sent to Label Studio via HTTP post. This should generally only be used for
56
65
  prototyping; due to restrictions in Label Studio, it can only be used with projects that have
57
66
  just one data field, and does not scale well.
@@ -63,9 +72,48 @@ def create_label_studio_project(
63
72
  col_mapping: An optional mapping of local column names to Label Studio fields.
64
73
  sync_immediately: If `True`, immediately perform an initial synchronization by
65
74
  exporting all rows of the `Table` as Label Studio tasks.
75
+ s3_configuration: If specified, S3 import storage will be configured for the new project. This can only
76
+ be used with `media_import_method='url'`, and if `media_import_method='url'` and any of the media data is
77
+ referenced by `s3://` URLs, then it must be specified in order for such media to display correctly
78
+ in the Label Studio interface.
79
+
80
+ The items in the `s3_configuration` dictionary correspond to kwarg
81
+ parameters of the Label Studio `connect_s3_import_storage` method, as described in the
82
+ [Label Studio connect_s3_import_storage docs](https://labelstud.io/sdk/project.html#label_studio_sdk.project.Project.connect_s3_import_storage).
83
+ `bucket` must be specified; all other parameters are optional. If credentials are not specified explicitly,
84
+ Pixeltable will attempt to retrieve them from the environment (such as from `~/.aws/credentials`). If a title is not
85
+ specified, Pixeltable will use the default `'Pixeltable-S3-Import-Storage'`. All other parameters use their Label
86
+ Studio defaults.
66
87
  kwargs: Additional keyword arguments are passed to the `start_project` method in the Label
67
- Studio SDK, as described here:
68
- https://labelstud.io/sdk/project.html#label_studio_sdk.project.Project.start_project
88
+ Studio SDK, as described in the
89
+ [Label Studio start_project docs](https://labelstud.io/sdk/project.html#label_studio_sdk.project.Project.start_project).
90
+
91
+ Returns:
92
+ A `SyncStatus` representing the status of any synchronization operations that occurred.
93
+
94
+ Examples:
95
+ Create a Label Studio project whose tasks correspond to videos stored in the `video_col` column of the table `tbl`:
96
+
97
+ >>> config = \"\"\"
98
+ <View>
99
+ <Video name="video_obj" value="$video_col"/>
100
+ <Choices name="video-category" toName="video" showInLine="true">
101
+ <Choice value="city"/>
102
+ <Choice value="food"/>
103
+ <Choice value="sports"/>
104
+ </Choices>
105
+ </View>\"\"\"
106
+ create_label_studio_project(tbl, config)
107
+
108
+ Create a Label Studio project with the same configuration, using `media_import_method='url'`,
109
+ whose media are stored in an S3 bucket:
110
+
111
+ >>> create_label_studio_project(
112
+ tbl,
113
+ config,
114
+ media_import_method='url',
115
+ s3_configuration={'bucket': 'my-bucket', 'region_name': 'us-east-2'}
116
+ )
69
117
  """
70
118
  from pixeltable.io.label_studio import LabelStudioProject
71
119
 
@@ -76,6 +124,7 @@ def create_label_studio_project(
76
124
  title,
77
125
  media_import_method,
78
126
  col_mapping,
127
+ s3_configuration,
79
128
  **kwargs
80
129
  )
81
130
 
@@ -1,3 +1,4 @@
1
+ import copy
1
2
  import json
2
3
  import logging
3
4
  import os
@@ -18,6 +19,15 @@ from pixeltable.exprs import ColumnRef, DataRow, Expr
18
19
  from pixeltable.io.external_store import Project, SyncStatus
19
20
  from pixeltable.utils import coco
20
21
 
22
+ # label_studio_sdk>=1 and label_studio_sdk<1 are not compatible, so we need to try
23
+ # the import two different ways to insure intercompatibility
24
+ try:
25
+ # label_studio_sdk<1 compatibility
26
+ import label_studio_sdk.project as ls_project # type: ignore
27
+ except ImportError:
28
+ # label_studio_sdk>=1 compatibility
29
+ import label_studio_sdk._legacy.project as ls_project # type: ignore
30
+
21
31
  _logger = logging.getLogger('pixeltable')
22
32
 
23
33
 
@@ -50,11 +60,11 @@ class LabelStudioProject(Project):
50
60
  """
51
61
  self.project_id = project_id
52
62
  self.media_import_method = media_import_method
53
- self._project: Optional[label_studio_sdk.project.Project] = None
63
+ self._project: Optional[ls_project.Project] = None
54
64
  super().__init__(name, col_mapping, stored_proxies)
55
65
 
56
66
  @property
57
- def project(self) -> label_studio_sdk.project.Project:
67
+ def project(self) -> ls_project.Project:
58
68
  """The `Project` object corresponding to this Label Studio project."""
59
69
  if self._project is None:
60
70
  try:
@@ -536,6 +546,7 @@ class LabelStudioProject(Project):
536
546
  title: Optional[str],
537
547
  media_import_method: Literal['post', 'file', 'url'],
538
548
  col_mapping: Optional[dict[str, str]],
549
+ s3_configuration: Optional[dict[str, Any]],
539
550
  **kwargs: Any
540
551
  ) -> 'LabelStudioProject':
541
552
  """
@@ -572,6 +583,31 @@ class LabelStudioProject(Project):
572
583
  if media_import_method == 'post' and len(config.data_keys) > 1:
573
584
  raise excs.Error('`media_import_method` cannot be `post` if there is more than one data key')
574
585
 
586
+ if s3_configuration is not None:
587
+ if media_import_method != 'url':
588
+ raise excs.Error("`s3_configuration` is only valid when `media_import_method == 'url'`")
589
+ s3_configuration = copy.copy(s3_configuration)
590
+ if not 'bucket' in s3_configuration:
591
+ raise excs.Error('`s3_configuration` must contain a `bucket` field')
592
+ if not 'title' in s3_configuration:
593
+ s3_configuration['title'] = 'Pixeltable-S3-Import-Storage'
594
+ if ('aws_access_key_id' not in s3_configuration and
595
+ 'aws_secret_access_key' not in s3_configuration and
596
+ 'aws_session_token' not in s3_configuration):
597
+ # Attempt to fill any missing credentials from the environment
598
+ try:
599
+ import boto3
600
+ s3_credentials = boto3.Session().get_credentials().get_frozen_credentials()
601
+ _logger.info(f'Using AWS credentials from the environment for Label Studio project: {title}')
602
+ s3_configuration['aws_access_key_id'] = s3_credentials.access_key
603
+ s3_configuration['aws_secret_access_key'] = s3_credentials.secret_key
604
+ s3_configuration['aws_session_token'] = s3_credentials.token
605
+ except Exception as exc:
606
+ # This is not necessarily a problem, but we should log that it happened
607
+ _logger.debug(f'Unable to retrieve AWS credentials from the environment: {exc}')
608
+ pass
609
+
610
+ _logger.info(f'Creating Label Studio project: {title}')
575
611
  project = _label_studio_client().start_project(title=title, label_config=label_config, **kwargs)
576
612
 
577
613
  if media_import_method == 'file':
@@ -591,6 +627,10 @@ class LabelStudioProject(Project):
591
627
  ) from exc
592
628
  raise # Handle any other exception type normally
593
629
 
630
+ if s3_configuration is not None:
631
+ _logger.info(f'Setting up S3 import storage for Label Studio project: {title}')
632
+ project.connect_s3_import_storage(**s3_configuration)
633
+
594
634
  project_id = project.get_params()['id']
595
635
  return LabelStudioProject(name, project_id, media_import_method, resolved_col_mapping)
596
636
 
pixeltable/io/pandas.py CHANGED
@@ -1,4 +1,4 @@
1
- from typing import Optional, Any, Iterable
1
+ from typing import Optional, Any, Iterable, Union
2
2
 
3
3
  import numpy as np
4
4
  import pandas as pd
@@ -9,7 +9,10 @@ import pixeltable.type_system as ts
9
9
 
10
10
 
11
11
  def import_pandas(
12
- tbl_name: str, df: pd.DataFrame, *, schema_overrides: Optional[dict[str, pxt.ColumnType]] = None
12
+ tbl_name: str, df: pd.DataFrame, *, schema_overrides: Optional[dict[str, pxt.ColumnType]] = None,
13
+ primary_key: Optional[Union[str, list[str]]] = None,
14
+ num_retained_versions: int = 10,
15
+ comment: str = ''
13
16
  ) -> pxt.catalog.InsertableTable:
14
17
  """Creates a new `Table` from a Pandas `DataFrame`, with the specified name. The schema of the table
15
18
  will be inferred from the `DataFrame`, unless `schema` is specified.
@@ -31,13 +34,17 @@ def import_pandas(
31
34
  """
32
35
  schema = _df_to_pxt_schema(df, schema_overrides)
33
36
  tbl_rows = (dict(_df_row_to_pxt_row(row, schema)) for row in df.itertuples())
34
- table = pxt.create_table(tbl_name, schema)
37
+ table = pxt.create_table(tbl_name, schema, primary_key=primary_key, num_retained_versions=num_retained_versions, comment=comment)
35
38
  table.insert(tbl_rows)
36
39
  return table
37
40
 
38
41
 
39
42
  def import_csv(
40
- table_path: str, filepath_or_buffer, schema_overrides: Optional[dict[str, ts.ColumnType]] = None, **kwargs
43
+ tbl_name: str, filepath_or_buffer, schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
44
+ primary_key: Optional[Union[str, list[str]]] = None,
45
+ num_retained_versions: int = 10,
46
+ comment: str = '',
47
+ **kwargs
41
48
  ) -> pxt.catalog.InsertableTable:
42
49
  """
43
50
  Creates a new `Table` from a csv file. This is a convenience method and is equivalent
@@ -45,11 +52,15 @@ def import_csv(
45
52
  See the Pandas documentation for `read_csv` for more details.
46
53
  """
47
54
  df = pd.read_csv(filepath_or_buffer, **kwargs)
48
- return import_pandas(table_path, df, schema_overrides=schema_overrides)
55
+ return import_pandas(tbl_name, df, schema_overrides=schema_overrides, primary_key=primary_key, num_retained_versions=num_retained_versions, comment=comment)
49
56
 
50
57
 
51
58
  def import_excel(
52
- table_path: str, io, *args, schema_overrides: Optional[dict[str, ts.ColumnType]] = None, **kwargs
59
+ tbl_name: str, io, *args, schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
60
+ primary_key: Optional[Union[str, list[str]]] = None,
61
+ num_retained_versions: int = 10,
62
+ comment: str = '',
63
+ **kwargs
53
64
  ) -> pxt.catalog.InsertableTable:
54
65
  """
55
66
  Creates a new `Table` from an excel (.xlsx) file. This is a convenience method and is equivalent
@@ -57,7 +68,7 @@ def import_excel(
57
68
  See the Pandas documentation for `read_excel` for more details.
58
69
  """
59
70
  df = pd.read_excel(io, *args, **kwargs)
60
- return import_pandas(table_path, df, schema_overrides=schema_overrides)
71
+ return import_pandas(tbl_name, df, schema_overrides=schema_overrides, primary_key=primary_key, num_retained_versions=num_retained_versions, comment=comment)
61
72
 
62
73
 
63
74
  def _df_to_pxt_schema(
pixeltable/plan.py CHANGED
@@ -40,7 +40,7 @@ class Analyzer:
40
40
 
41
41
  def __init__(
42
42
  self, tbl: catalog.TableVersionPath, select_list: List[exprs.Expr],
43
- where_clause: Optional[exprs.Predicate] = None, group_by_clause: Optional[List[exprs.Expr]] = None,
43
+ where_clause: Optional[exprs.Expr] = None, group_by_clause: Optional[List[exprs.Expr]] = None,
44
44
  order_by_clause: Optional[List[Tuple[exprs.Expr, bool]]] = None):
45
45
  if group_by_clause is None:
46
46
  group_by_clause = []
@@ -58,7 +58,7 @@ class Analyzer:
58
58
  # Where clause of the Select stmt of the SQL scan
59
59
  self.sql_where_clause: Optional[exprs.Expr] = None
60
60
  # filter predicate applied to output rows of the SQL scan
61
- self.filter: Optional[exprs.Predicate] = None
61
+ self.filter: Optional[exprs.Expr] = None
62
62
  # not executable
63
63
  #self.similarity_clause: Optional[exprs.ImageSimilarityPredicate] = None
64
64
  if where_clause is not None:
@@ -183,7 +183,7 @@ class Planner:
183
183
  # TODO: create an exec.CountNode and change this to create_count_plan()
184
184
  @classmethod
185
185
  def create_count_stmt(
186
- cls, tbl: catalog.TableVersionPath, where_clause: Optional[exprs.Predicate] = None
186
+ cls, tbl: catalog.TableVersionPath, where_clause: Optional[exprs.Expr] = None
187
187
  ) -> sql.Select:
188
188
  stmt = sql.select(sql.func.count('*'))
189
189
  refd_tbl_ids: Set[UUID] = set()
@@ -239,7 +239,7 @@ class Planner:
239
239
  cls, tbl: catalog.TableVersionPath,
240
240
  update_targets: dict[catalog.Column, exprs.Expr],
241
241
  recompute_targets: List[catalog.Column],
242
- where_clause: Optional[exprs.Predicate], cascade: bool
242
+ where_clause: Optional[exprs.Expr], cascade: bool
243
243
  ) -> Tuple[exec.ExecNode, List[str], List[catalog.Column]]:
244
244
  """Creates a plan to materialize updated rows.
245
245
  The plan:
@@ -505,7 +505,7 @@ class Planner:
505
505
  @classmethod
506
506
  def create_query_plan(
507
507
  cls, tbl: catalog.TableVersionPath, select_list: Optional[List[exprs.Expr]] = None,
508
- where_clause: Optional[exprs.Predicate] = None, group_by_clause: Optional[List[exprs.Expr]] = None,
508
+ where_clause: Optional[exprs.Expr] = None, group_by_clause: Optional[List[exprs.Expr]] = None,
509
509
  order_by_clause: Optional[List[Tuple[exprs.Expr, bool]]] = None, limit: Optional[int] = None,
510
510
  with_pk: bool = False, ignore_errors: bool = False, exact_version_only: Optional[List[catalog.TableVersion]] = None
511
511
  ) -> exec.ExecNode:
@@ -597,7 +597,7 @@ class Planner:
597
597
  return plan
598
598
 
599
599
  @classmethod
600
- def analyze(cls, tbl: catalog.TableVersionPath, where_clause: exprs.Predicate) -> Analyzer:
600
+ def analyze(cls, tbl: catalog.TableVersionPath, where_clause: exprs.Expr) -> Analyzer:
601
601
  return Analyzer(tbl, [], where_clause=where_clause)
602
602
 
603
603
  @classmethod
@@ -208,7 +208,7 @@ class Dumper:
208
208
  add_column('not', ~(t.c2 > 20))
209
209
 
210
210
  # function_call
211
- add_column('function_call', pxt.functions.string.str_format('{0} {key}', t.c1, key=t.c1)) # library function
211
+ add_column('function_call', pxt.functions.string.format('{0} {key}', t.c1, key=t.c1)) # library function
212
212
  add_column('test_udf', test_udf_stored(t.c2)) # stored udf
213
213
  add_column('test_udf_batched', test_udf_stored_batched(t.c1, upper=False)) # batched stored udf
214
214
  if include_expensive_functions:
@@ -0,0 +1,77 @@
1
+ import ast
2
+ from typing import Optional, Union
3
+
4
+ import griffe
5
+ import griffe.expressions
6
+ from griffe import Extension, Object, ObjectNode
7
+
8
+ import pixeltable as pxt
9
+
10
+ logger = griffe.get_logger(__name__)
11
+
12
+ class PxtGriffeExtension(Extension):
13
+ """Implementation of a Pixeltable custom griffe extension."""
14
+
15
+ def on_instance(self, node: Union[ast.AST, ObjectNode], obj: Object) -> None:
16
+ if obj.docstring is None:
17
+ # Skip over entities without a docstring
18
+ return
19
+
20
+ if isinstance(obj, griffe.Function):
21
+ # See if the (Python) function has a @pxt.udf decorator
22
+ if any(
23
+ isinstance(dec.value, griffe.expressions.Expr) and dec.value.canonical_path in ['pixeltable.func.udf', 'pixeltable.udf']
24
+ for dec in obj.decorators
25
+ ):
26
+ # Update the template
27
+ self.__modify_pxt_udf(obj)
28
+
29
+ def __modify_pxt_udf(self, func: griffe.Function) -> None:
30
+ """
31
+ Instructs the doc snippet for `func` to use the custom Pixeltable UDF jinja template, and
32
+ converts all type hints to Pixeltable column type references, in accordance with the @udf
33
+ decorator behavior.
34
+ """
35
+ func.extra['mkdocstrings']['template'] = 'udf.html.jinja'
36
+ # Dynamically load the UDF reference so we can inspect the Pixeltable signature directly
37
+ udf = griffe.dynamic_import(func.path)
38
+ assert isinstance(udf, pxt.Function)
39
+ # Convert the return type to a Pixeltable type reference
40
+ func.returns = self.__column_type_to_display_str(udf.signature.get_return_type())
41
+ # Convert the parameter types to Pixeltable type references
42
+ for griffe_param in func.parameters:
43
+ assert isinstance(griffe_param.annotation, griffe.expressions.Expr)
44
+ if griffe_param.name not in udf.signature.parameters:
45
+ logger.warning(f'Parameter `{griffe_param.name}` not found in signature for UDF: {udf.display_name}')
46
+ continue
47
+ pxt_param = udf.signature.parameters[griffe_param.name]
48
+ griffe_param.annotation = self.__column_type_to_display_str(pxt_param.col_type)
49
+
50
+ def __column_type_to_display_str(self, column_type: Optional[pxt.ColumnType]) -> str:
51
+ # TODO: When we enhance the Pixeltable type system, we may want to refactor some of this logic out.
52
+ # I'm putting it here for now though.
53
+ if column_type is None:
54
+ return 'None'
55
+ if column_type.is_string_type():
56
+ base = 'str'
57
+ elif column_type.is_int_type():
58
+ base = 'int'
59
+ elif column_type.is_float_type():
60
+ base = 'float'
61
+ elif column_type.is_bool_type():
62
+ base = 'bool'
63
+ elif column_type.is_array_type():
64
+ base = 'ArrayT'
65
+ elif column_type.is_json_type():
66
+ base = 'JsonT'
67
+ elif column_type.is_image_type():
68
+ base = 'ImageT'
69
+ elif column_type.is_video_type():
70
+ base = 'VideoT'
71
+ elif column_type.is_audio_type():
72
+ base = 'AudioT'
73
+ elif column_type.is_document_type():
74
+ base = 'DocumentT'
75
+ else:
76
+ assert False
77
+ return f'Optional[{base}]' if column_type.nullable else base
@@ -0,0 +1,6 @@
1
+ from pathlib import Path
2
+
3
+
4
+ def get_templates_path() -> Path:
5
+ """Implementation of the 'mkdocstrings.python.templates' plugin for custom jinja templates."""
6
+ return Path(__file__).parent / "templates"
@@ -0,0 +1,135 @@
1
+ {#- Template for Pixeltable UDFs. Cargo-culted (with modification) from _base/function.html.jinja. -#}
2
+
3
+ {% block logs scoped %}
4
+ {#- Logging block.
5
+
6
+ This block can be used to log debug messages, deprecation messages, warnings, etc.
7
+ -#}
8
+ {{ log.debug("Rendering " + function.path) }}
9
+ {% endblock logs %}
10
+
11
+ {% import "language"|get_template as lang with context %}
12
+ {#- Language module providing the `t` translation method. -#}
13
+
14
+ <div class="doc doc-object doc-function">
15
+ {% with obj = function, html_id = function.path %}
16
+
17
+ {% if root %}
18
+ {% set show_full_path = config.show_root_full_path %}
19
+ {% set root_members = True %}
20
+ {% elif root_members %}
21
+ {% set show_full_path = config.show_root_members_full_path or config.show_object_full_path %}
22
+ {% set root_members = False %}
23
+ {% else %}
24
+ {% set show_full_path = config.show_object_full_path %}
25
+ {% endif %}
26
+
27
+ {% set function_name = function.path if show_full_path else function.name %}
28
+ {#- Brief or full function name depending on configuration. -#}
29
+ {% set symbol_type = "udf" %}
30
+ {#- Symbol type: method when parent is a class, function otherwise. -#}
31
+
32
+ {% if not root or config.show_root_heading %}
33
+ {% filter heading(
34
+ heading_level,
35
+ role="function",
36
+ id=html_id,
37
+ class="doc doc-heading",
38
+ toc_label=(('<code class="doc-symbol doc-symbol-toc doc-symbol-' + symbol_type + '"></code>&nbsp;')|safe if config.show_symbol_type_toc else '') + function.name,
39
+ ) %}
40
+
41
+ {% block heading scoped %}
42
+ {#- Heading block.
43
+
44
+ This block renders the heading for the function.
45
+ -#}
46
+ {% if config.show_symbol_type_heading %}<code class="doc-symbol doc-symbol-heading doc-symbol-{{ symbol_type }}"></code>{% endif %}
47
+ {% if config.separate_signature %}
48
+ <span class="doc doc-object-name doc-function-name">{{ function_name }}</span>
49
+ {% else %}
50
+ {%+ filter highlight(language="python", inline=True) %}
51
+ {{ function_name }}{% include "signature"|get_template with context %}
52
+ {% endfilter %}
53
+ {% endif %}
54
+ {% endblock heading %}
55
+
56
+ {% block labels scoped %}
57
+ {#- Labels block.
58
+
59
+ This block renders the labels for the function.
60
+ -#}
61
+ {% with labels = function.labels %}
62
+ {% include "labels"|get_template with context %}
63
+ {% endwith %}
64
+ {% endblock labels %}
65
+
66
+ {% endfilter %}
67
+
68
+ {% block signature scoped %}
69
+ {#- Signature block.
70
+
71
+ This block renders the signature for the function.
72
+ -#}
73
+ {% if config.separate_signature %}
74
+ {% filter format_signature(function, config.line_length, crossrefs=config.signature_crossrefs) %}
75
+ {{ function.name }}
76
+ {% endfilter %}
77
+ {% endif %}
78
+ {% endblock signature %}
79
+
80
+ {% else %}
81
+
82
+ {% if config.show_root_toc_entry %}
83
+ {% filter heading(
84
+ heading_level,
85
+ role="function",
86
+ id=html_id,
87
+ toc_label=(('<code class="doc-symbol doc-symbol-toc doc-symbol-' + symbol_type + '"></code>&nbsp;')|safe if config.show_symbol_type_toc else '') + function.name,
88
+ hidden=True,
89
+ ) %}
90
+ {% endfilter %}
91
+ {% endif %}
92
+ {% set heading_level = heading_level - 1 %}
93
+ {% endif %}
94
+
95
+ <div class="doc doc-contents {% if root %}first{% endif %}">
96
+ {% block contents scoped %}
97
+ {#- Contents block.
98
+
99
+ This block renders the contents of the function.
100
+ It contains other blocks that users can override.
101
+ Overriding the contents block allows to rearrange the order of the blocks.
102
+ -#}
103
+ {% block docstring scoped %}
104
+ {#- Docstring block.
105
+
106
+ This block renders the docstring for the function.
107
+ -#}
108
+ {% with docstring_sections = function.docstring.parsed %}
109
+ {% include "docstring"|get_template with context %}
110
+ {% endwith %}
111
+ {% endblock docstring %}
112
+
113
+ {% block source scoped %}
114
+ {#- Source block.
115
+
116
+ This block renders the source code for the function.
117
+ -#}
118
+ {% if config.show_source and function.source %}
119
+ <details class="quote">
120
+ <summary>{{ lang.t("Source code in") }} <code>
121
+ {%- if function.relative_filepath.is_absolute() -%}
122
+ {{ function.relative_package_filepath }}
123
+ {%- else -%}
124
+ {{ function.relative_filepath }}
125
+ {%- endif -%}
126
+ </code></summary>
127
+ {{ function.source|highlight(language="python", linestart=function.lineno, linenums=True) }}
128
+ </details>
129
+ {% endif %}
130
+ {% endblock source %}
131
+ {% endblock contents %}
132
+ </div>
133
+
134
+ {% endwith %}
135
+ </div>
pixeltable/utils/s3.py CHANGED
@@ -10,4 +10,4 @@ def get_client() -> Any:
10
10
  except AttributeError:
11
11
  # No credentials available, use unsigned mode
12
12
  config = botocore.config.Config(signature_version=botocore.UNSIGNED)
13
- return boto3.client('s3', config=config)
13
+ return boto3.client('s3', config=config)