pixeltable 0.2.12__py3-none-any.whl → 0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (67) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/column.py +5 -0
  4. pixeltable/catalog/globals.py +8 -0
  5. pixeltable/catalog/insertable_table.py +2 -2
  6. pixeltable/catalog/table.py +27 -9
  7. pixeltable/catalog/table_version.py +41 -68
  8. pixeltable/catalog/view.py +3 -3
  9. pixeltable/dataframe.py +7 -6
  10. pixeltable/exec/__init__.py +2 -1
  11. pixeltable/exec/expr_eval_node.py +8 -1
  12. pixeltable/exec/row_update_node.py +61 -0
  13. pixeltable/exec/{sql_scan_node.py → sql_node.py} +120 -56
  14. pixeltable/exprs/__init__.py +1 -2
  15. pixeltable/exprs/comparison.py +5 -5
  16. pixeltable/exprs/compound_predicate.py +12 -12
  17. pixeltable/exprs/expr.py +67 -22
  18. pixeltable/exprs/function_call.py +60 -29
  19. pixeltable/exprs/globals.py +2 -0
  20. pixeltable/exprs/in_predicate.py +3 -3
  21. pixeltable/exprs/inline_array.py +18 -11
  22. pixeltable/exprs/is_null.py +5 -5
  23. pixeltable/exprs/method_ref.py +63 -0
  24. pixeltable/ext/__init__.py +9 -0
  25. pixeltable/ext/functions/__init__.py +8 -0
  26. pixeltable/ext/functions/whisperx.py +45 -5
  27. pixeltable/ext/functions/yolox.py +60 -14
  28. pixeltable/func/aggregate_function.py +10 -4
  29. pixeltable/func/callable_function.py +16 -4
  30. pixeltable/func/expr_template_function.py +1 -1
  31. pixeltable/func/function.py +12 -2
  32. pixeltable/func/function_registry.py +26 -9
  33. pixeltable/func/udf.py +32 -4
  34. pixeltable/functions/__init__.py +1 -1
  35. pixeltable/functions/fireworks.py +33 -0
  36. pixeltable/functions/globals.py +36 -1
  37. pixeltable/functions/huggingface.py +155 -7
  38. pixeltable/functions/image.py +242 -40
  39. pixeltable/functions/openai.py +214 -0
  40. pixeltable/functions/string.py +600 -8
  41. pixeltable/functions/timestamp.py +210 -0
  42. pixeltable/functions/together.py +106 -0
  43. pixeltable/functions/video.py +28 -10
  44. pixeltable/functions/whisper.py +32 -0
  45. pixeltable/globals.py +3 -3
  46. pixeltable/io/__init__.py +1 -1
  47. pixeltable/io/globals.py +186 -5
  48. pixeltable/io/label_studio.py +42 -2
  49. pixeltable/io/pandas.py +70 -34
  50. pixeltable/metadata/__init__.py +1 -1
  51. pixeltable/metadata/converters/convert_18.py +39 -0
  52. pixeltable/metadata/notes.py +10 -0
  53. pixeltable/plan.py +82 -7
  54. pixeltable/tool/create_test_db_dump.py +4 -5
  55. pixeltable/tool/doc_plugins/griffe.py +81 -0
  56. pixeltable/tool/doc_plugins/mkdocstrings.py +6 -0
  57. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +135 -0
  58. pixeltable/type_system.py +15 -14
  59. pixeltable/utils/s3.py +1 -1
  60. pixeltable-0.2.14.dist-info/METADATA +206 -0
  61. {pixeltable-0.2.12.dist-info → pixeltable-0.2.14.dist-info}/RECORD +64 -56
  62. pixeltable-0.2.14.dist-info/entry_points.txt +3 -0
  63. pixeltable/exprs/image_member_access.py +0 -96
  64. pixeltable/exprs/predicate.py +0 -44
  65. pixeltable-0.2.12.dist-info/METADATA +0 -137
  66. {pixeltable-0.2.12.dist-info → pixeltable-0.2.14.dist-info}/LICENSE +0 -0
  67. {pixeltable-0.2.12.dist-info → pixeltable-0.2.14.dist-info}/WHEEL +0 -0
pixeltable/func/udf.py CHANGED
@@ -2,7 +2,6 @@ from __future__ import annotations
2
2
 
3
3
  from typing import List, Callable, Optional, overload, Any
4
4
 
5
- import pixeltable as pxt
6
5
  import pixeltable.exceptions as excs
7
6
  import pixeltable.type_system as ts
8
7
  from .callable_function import CallableFunction
@@ -26,6 +25,8 @@ def udf(
26
25
  param_types: Optional[List[ts.ColumnType]] = None,
27
26
  batch_size: Optional[int] = None,
28
27
  substitute_fn: Optional[Callable] = None,
28
+ is_method: bool = False,
29
+ is_property: bool = False,
29
30
  _force_stored: bool = False
30
31
  ) -> Callable[[Callable], Function]: ...
31
32
 
@@ -56,6 +57,8 @@ def udf(*args, **kwargs):
56
57
  param_types = kwargs.pop('param_types', None)
57
58
  batch_size = kwargs.pop('batch_size', None)
58
59
  substitute_fn = kwargs.pop('substitute_fn', None)
60
+ is_method = kwargs.pop('is_method', None)
61
+ is_property = kwargs.pop('is_property', None)
59
62
  force_stored = kwargs.pop('_force_stored', False)
60
63
  if len(kwargs) > 0:
61
64
  raise excs.Error(f'Invalid @udf decorator kwargs: {", ".join(kwargs.keys())}')
@@ -64,8 +67,15 @@ def udf(*args, **kwargs):
64
67
 
65
68
  def decorator(decorated_fn: Callable):
66
69
  return make_function(
67
- decorated_fn, return_type, param_types, batch_size,
68
- substitute_fn=substitute_fn, force_stored=force_stored)
70
+ decorated_fn,
71
+ return_type,
72
+ param_types,
73
+ batch_size,
74
+ substitute_fn=substitute_fn,
75
+ is_method=is_method,
76
+ is_property=is_property,
77
+ force_stored=force_stored
78
+ )
69
79
 
70
80
  return decorator
71
81
 
@@ -76,6 +86,8 @@ def make_function(
76
86
  param_types: Optional[List[ts.ColumnType]] = None,
77
87
  batch_size: Optional[int] = None,
78
88
  substitute_fn: Optional[Callable] = None,
89
+ is_method: bool = False,
90
+ is_property: bool = False,
79
91
  function_name: Optional[str] = None,
80
92
  force_stored: bool = False
81
93
  ) -> Function:
@@ -112,6 +124,15 @@ def make_function(
112
124
  if batch_size is None and len(sig.batched_parameters) > 0:
113
125
  raise excs.Error(f'{errmsg_name}(): batched parameters in udf, but no `batch_size` given')
114
126
 
127
+ if is_method and is_property:
128
+ raise excs.Error(f'Cannot specify both `is_method` and `is_property` (in function `{function_name}`)')
129
+ if is_property and len(sig.parameters) != 1:
130
+ raise excs.Error(
131
+ f"`is_property=True` expects a UDF with exactly 1 parameter, but `{function_name}` has {len(sig.parameters)}"
132
+ )
133
+ if (is_method or is_property) and function_path is None:
134
+ raise excs.Error('Stored functions cannot be declared using `is_method` or `is_property`')
135
+
115
136
  if substitute_fn is None:
116
137
  py_fn = decorated_fn
117
138
  else:
@@ -120,7 +141,14 @@ def make_function(
120
141
  py_fn = substitute_fn
121
142
 
122
143
  result = CallableFunction(
123
- signature=sig, py_fn=py_fn, self_path=function_path, self_name=function_name, batch_size=batch_size)
144
+ signature=sig,
145
+ py_fn=py_fn,
146
+ self_path=function_path,
147
+ self_name=function_name,
148
+ batch_size=batch_size,
149
+ is_method=is_method,
150
+ is_property=is_property
151
+ )
124
152
 
125
153
  # If this function is part of a module, register it
126
154
  if function_path is not None:
@@ -1,4 +1,4 @@
1
- from . import fireworks, huggingface, image, openai, string, together, video
1
+ from . import fireworks, huggingface, image, openai, string, together, video, timestamp
2
2
  from .globals import *
3
3
  from pixeltable.utils.code import local_public_names
4
4
 
@@ -1,3 +1,10 @@
1
+ """
2
+ Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
3
+ that wrap various endpoints from the Fireworks AI API. In order to use them, you must
4
+ first `pip install fireworks-ai` and configure your Fireworks AI credentials, as described in
5
+ the [Working with Fireworks](https://pixeltable.readme.io/docs/working-with-fireworks) tutorial.
6
+ """
7
+
1
8
  from typing import Optional, TYPE_CHECKING
2
9
 
3
10
  import pixeltable as pxt
@@ -29,6 +36,32 @@ def chat_completions(
29
36
  top_p: Optional[float] = None,
30
37
  temperature: Optional[float] = None,
31
38
  ) -> dict:
39
+ """
40
+ Creates a model response for the given chat conversation.
41
+
42
+ Equivalent to the Fireworks AI `chat/completions` API endpoint.
43
+ For additional details, see: [https://docs.fireworks.ai/api-reference/post-chatcompletions](https://docs.fireworks.ai/api-reference/post-chatcompletions)
44
+
45
+ __Requirements:__
46
+
47
+ - `pip install fireworks-ai`
48
+
49
+ Args:
50
+ messages: A list of messages comprising the conversation so far.
51
+ model: The name of the model to use.
52
+
53
+ For details on the other parameters, see: [https://docs.fireworks.ai/api-reference/post-chatcompletions](https://docs.fireworks.ai/api-reference/post-chatcompletions)
54
+
55
+ Returns:
56
+ A dictionary containing the response and other metadata.
57
+
58
+ Examples:
59
+ Add a computed column that applies the model `accounts/fireworks/models/mixtral-8x22b-instruct`
60
+ to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
61
+
62
+ >>> messages = [{'role': 'user', 'content': tbl.prompt}]
63
+ ... tbl['response'] = chat_completions(tbl.prompt, model='accounts/fireworks/models/mixtral-8x22b-instruct')
64
+ """
32
65
  kwargs = {'max_tokens': max_tokens, 'top_k': top_k, 'top_p': top_p, 'temperature': temperature}
33
66
  kwargs_not_none = {k: v for k, v in kwargs.items() if v is not None}
34
67
  return _fireworks_client().chat.completions.create(model=model, messages=messages, **kwargs_not_none).dict()
@@ -1,4 +1,4 @@
1
- from typing import Union
1
+ from typing import Optional, Union
2
2
 
3
3
  import pixeltable.func as func
4
4
  import pixeltable.type_system as ts
@@ -14,6 +14,7 @@ def cast(expr: exprs.Expr, target_type: ts.ColumnType) -> exprs.Expr:
14
14
 
15
15
  @func.uda(update_types=[ts.IntType()], value_type=ts.IntType(), allows_window=True, requires_order_by=False)
16
16
  class sum(func.Aggregator):
17
+ """Sums the selected integers or floats."""
17
18
  def __init__(self):
18
19
  self.sum: Union[int, float] = 0
19
20
 
@@ -38,6 +39,40 @@ class count(func.Aggregator):
38
39
  return self.count
39
40
 
40
41
 
42
+ @func.uda(update_types=[ts.FloatType()], value_type=ts.FloatType(nullable=True), allows_window=True, requires_order_by=False)
43
+ class max(func.Aggregator):
44
+ def __init__(self):
45
+ self.val = None
46
+
47
+ def update(self, val: Optional[float]) -> None:
48
+ if val is not None:
49
+ if self.val is None:
50
+ self.val = val
51
+ else:
52
+ import builtins
53
+ self.val = builtins.max(self.val, val)
54
+
55
+ def value(self) -> Optional[float]:
56
+ return self.val
57
+
58
+
59
+ @func.uda(update_types=[ts.FloatType()], value_type=ts.FloatType(nullable=True), allows_window=True, requires_order_by=False)
60
+ class min(func.Aggregator):
61
+ def __init__(self):
62
+ self.val = None
63
+
64
+ def update(self, val: Optional[float]) -> None:
65
+ if val is not None:
66
+ if self.val is None:
67
+ self.val = val
68
+ else:
69
+ import builtins
70
+ self.val = builtins.min(self.val, val)
71
+
72
+ def value(self) -> Optional[float]:
73
+ return self.val
74
+
75
+
41
76
  @func.uda(update_types=[ts.IntType()], value_type=ts.FloatType(), allows_window=False, requires_order_by=False)
42
77
  class mean(func.Aggregator):
43
78
  def __init__(self):
@@ -1,3 +1,12 @@
1
+ """
2
+ Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
3
+ that wrap various models from the Hugging Face `transformers` package.
4
+
5
+ These UDFs will cause Pixeltable to invoke the relevant models locally. In order to use them, you must
6
+ first `pip install transformers` (or in some cases, `sentence-transformers`, as noted in the specific
7
+ UDFs).
8
+ """
9
+
1
10
  from typing import Callable, TypeVar, Optional, Any
2
11
 
3
12
  import PIL.Image
@@ -13,15 +22,39 @@ from pixeltable.utils.code import local_public_names
13
22
 
14
23
  @pxt.udf(batch_size=32, return_type=ts.ArrayType((None,), dtype=ts.FloatType()))
15
24
  def sentence_transformer(
16
- sentences: Batch[str], *, model_id: str, normalize_embeddings: bool = False
25
+ sentence: Batch[str], *, model_id: str, normalize_embeddings: bool = False
17
26
  ) -> Batch[np.ndarray]:
18
- """Runs the specified sentence transformer model."""
27
+ """
28
+ Computes sentence embeddings. `model_id` should be a pretrained Sentence Transformers model, as described
29
+ in the [Sentence Transformers Pretrained Models](https://sbert.net/docs/sentence_transformer/pretrained_models.html)
30
+ documentation.
31
+
32
+ __Requirements:__
33
+
34
+ - `pip install sentence-transformers`
35
+
36
+ Args:
37
+ sentence: The sentence to embed.
38
+ model_id: The pretrained model to use for the encoding.
39
+ normalize_embeddings: If `True`, normalizes embeddings to length 1; see the
40
+ [Sentence Transformers API Docs](https://sbert.net/docs/package_reference/sentence_transformer/SentenceTransformer.html)
41
+ for more details
42
+
43
+ Returns:
44
+ An array containing the output of the embedding model.
45
+
46
+ Examples:
47
+ Add a computed column that applies the model `all-mpnet-base-2` to an existing Pixeltable column `tbl.sentence`
48
+ of the table `tbl`:
49
+
50
+ >>> tbl['result'] = sentence_transformer(tbl.sentence, model_id='all-mpnet-base-v2')
51
+ """
19
52
  env.Env.get().require_package('sentence_transformers')
20
53
  from sentence_transformers import SentenceTransformer
21
54
 
22
55
  model = _lookup_model(model_id, SentenceTransformer)
23
56
 
24
- array = model.encode(sentences, normalize_embeddings=normalize_embeddings)
57
+ array = model.encode(sentence, normalize_embeddings=normalize_embeddings)
25
58
  return [array[i] for i in range(array.shape[0])]
26
59
 
27
60
 
@@ -49,7 +82,32 @@ def sentence_transformer_list(sentences: list, *, model_id: str, normalize_embed
49
82
 
50
83
  @pxt.udf(batch_size=32)
51
84
  def cross_encoder(sentences1: Batch[str], sentences2: Batch[str], *, model_id: str) -> Batch[float]:
52
- """Runs the specified cross-encoder model."""
85
+ """
86
+ Performs predicts on the given sentence pair.
87
+ `model_id` should be a pretrained Cross-Encoder model, as described in the
88
+ [Cross-Encoder Pretrained Models](https://www.sbert.net/docs/cross_encoder/pretrained_models.html)
89
+ documentation.
90
+
91
+ __Requirements:__
92
+
93
+ - `pip install sentence-transformers`
94
+
95
+ Parameters:
96
+ sentences1: The first sentence to be paired.
97
+ sentences2: The second sentence to be paired.
98
+ model_id: The identifier of the cross-encoder model to use.
99
+
100
+ Returns:
101
+ The similarity score between the inputs.
102
+
103
+ Examples:
104
+ Add a computed column that applies the model `ms-marco-MiniLM-L-4-v2` to the sentences in
105
+ columns `tbl.sentence1` and `tbl.sentence2`:
106
+
107
+ >>> tbl['result'] = sentence_transformer(
108
+ tbl.sentence1, tbl.sentence2, model_id='ms-marco-MiniLM-L-4-v2'
109
+ )
110
+ """
53
111
  env.Env.get().require_package('sentence_transformers')
54
112
  from sentence_transformers import CrossEncoder
55
113
 
@@ -72,7 +130,27 @@ def cross_encoder_list(sentence1: str, sentences2: list, *, model_id: str) -> li
72
130
 
73
131
  @pxt.udf(batch_size=32, return_type=ts.ArrayType((None,), dtype=ts.FloatType(), nullable=False))
74
132
  def clip_text(text: Batch[str], *, model_id: str) -> Batch[np.ndarray]:
75
- """Runs the specified CLIP model on text."""
133
+ """
134
+ Computes a CLIP embedding for the specified text. `model_id` should be a reference to a pretrained
135
+ [CLIP Model](https://huggingface.co/docs/transformers/model_doc/clip).
136
+
137
+ __Requirements:__
138
+
139
+ - `pip install transformers`
140
+
141
+ Args:
142
+ text: The string to embed.
143
+ model_id: The pretrained model to use for the embedding.
144
+
145
+ Returns:
146
+ An array containing the output of the embedding model.
147
+
148
+ Examples:
149
+ Add a computed column that applies the model `openai/clip-vit-base-patch32` to an existing
150
+ Pixeltable column `tbl.text` of the table `tbl`:
151
+
152
+ >>> tbl['result'] = clip_text(tbl.text, model_id='openai/clip-vit-base-patch32')
153
+ """
76
154
  env.Env.get().require_package('transformers')
77
155
  device = resolve_torch_device('auto')
78
156
  import torch
@@ -90,7 +168,27 @@ def clip_text(text: Batch[str], *, model_id: str) -> Batch[np.ndarray]:
90
168
 
91
169
  @pxt.udf(batch_size=32, return_type=ts.ArrayType((None,), dtype=ts.FloatType(), nullable=False))
92
170
  def clip_image(image: Batch[PIL.Image.Image], *, model_id: str) -> Batch[np.ndarray]:
93
- """Runs the specified CLIP model on images."""
171
+ """
172
+ Computes a CLIP embedding for the specified image. `model_id` should be a reference to a pretrained
173
+ [CLIP Model](https://huggingface.co/docs/transformers/model_doc/clip).
174
+
175
+ __Requirements:__
176
+
177
+ - `pip install transformers`
178
+
179
+ Args:
180
+ image: The image to embed.
181
+ model_id: The pretrained model to use for the embedding.
182
+
183
+ Returns:
184
+ An array containing the output of the embedding model.
185
+
186
+ Examples:
187
+ Add a computed column that applies the model `openai/clip-vit-base-patch32` to an existing
188
+ Pixeltable column `tbl.image` of the table `tbl`:
189
+
190
+ >>> tbl['result'] = clip_image(tbl.image, model_id='openai/clip-vit-base-patch32')
191
+ """
94
192
  env.Env.get().require_package('transformers')
95
193
  device = resolve_torch_device('auto')
96
194
  import torch
@@ -120,7 +218,41 @@ def _(model_id: str) -> ts.ArrayType:
120
218
 
121
219
  @pxt.udf(batch_size=4)
122
220
  def detr_for_object_detection(image: Batch[PIL.Image.Image], *, model_id: str, threshold: float = 0.5) -> Batch[dict]:
123
- """Runs the specified DETR model."""
221
+ """
222
+ Computes DETR object detections for the specified image. `model_id` should be a reference to a pretrained
223
+ [DETR Model](https://huggingface.co/docs/transformers/model_doc/detr).
224
+
225
+ __Requirements:__
226
+
227
+ - `pip install transformers`
228
+
229
+ Args:
230
+ image: The image to embed.
231
+ model_id: The pretrained model to use for the embedding.
232
+
233
+ Returns:
234
+ A dictionary containing the output of the object detection model, in the following format:
235
+
236
+ ```python
237
+ {
238
+ 'scores': [0.99, 0.999], # list of confidence scores for each detected object
239
+ 'labels': [25, 25], # list of COCO class labels for each detected object
240
+ 'label_text': ['giraffe', 'giraffe'], # corresponding text names of class labels
241
+ 'boxes': [[51.942, 356.174, 181.481, 413.975], [383.225, 58.66, 605.64, 361.346]]
242
+ # list of bounding boxes for each detected object, as [x1, y1, x2, y2]
243
+ }
244
+ ```
245
+
246
+ Examples:
247
+ Add a computed column that applies the model `facebook/detr-resnet-50` to an existing
248
+ Pixeltable column `tbl.image` of the table `tbl`:
249
+
250
+ >>> tbl['detections'] = detr_for_object_detection(
251
+ ... tbl.image,
252
+ ... model_id='facebook/detr-resnet-50',
253
+ ... threshold=0.8
254
+ ... )
255
+ """
124
256
  env.Env.get().require_package('transformers')
125
257
  device = resolve_torch_device('auto')
126
258
  import torch
@@ -152,6 +284,22 @@ def detr_for_object_detection(image: Batch[PIL.Image.Image], *, model_id: str, t
152
284
 
153
285
  @pxt.udf
154
286
  def detr_to_coco(image: PIL.Image.Image, detr_info: dict[str, Any]) -> dict[str, Any]:
287
+ """
288
+ Converts the output of a DETR object detection model to COCO format.
289
+
290
+ Args:
291
+ image: The image for which detections were computed.
292
+ detr_info: The output of a DETR object detection model, as returned by `detr_for_object_detection`.
293
+
294
+ Returns:
295
+ A dictionary containing the data from `detr_info`, converted to COCO format.
296
+
297
+ Examples:
298
+ Add a computed column that converts the output `tbl.detections` to COCO format, where `tbl.image`
299
+ is the image for which detections were computed:
300
+
301
+ >>> tbl['detections_coco'] = detr_to_coco(tbl.image, tbl.detections)
302
+ """
155
303
  bboxes, labels = detr_info['boxes'], detr_info['labels']
156
304
  annotations = [
157
305
  {'bbox': [bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]], 'category': label}