pixeltable 0.2.11__py3-none-any.whl → 0.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (48) hide show
  1. pixeltable/__init__.py +2 -2
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/insertable_table.py +2 -2
  4. pixeltable/catalog/schema_object.py +28 -2
  5. pixeltable/catalog/table.py +68 -30
  6. pixeltable/catalog/table_version.py +14 -43
  7. pixeltable/catalog/view.py +2 -2
  8. pixeltable/dataframe.py +8 -7
  9. pixeltable/exec/expr_eval_node.py +8 -1
  10. pixeltable/exec/sql_scan_node.py +1 -1
  11. pixeltable/exprs/__init__.py +0 -1
  12. pixeltable/exprs/column_ref.py +2 -7
  13. pixeltable/exprs/comparison.py +5 -5
  14. pixeltable/exprs/compound_predicate.py +12 -12
  15. pixeltable/exprs/expr.py +32 -0
  16. pixeltable/exprs/in_predicate.py +3 -3
  17. pixeltable/exprs/is_null.py +5 -5
  18. pixeltable/exprs/similarity_expr.py +27 -16
  19. pixeltable/func/aggregate_function.py +10 -4
  20. pixeltable/func/callable_function.py +4 -0
  21. pixeltable/func/function_registry.py +2 -0
  22. pixeltable/functions/globals.py +36 -1
  23. pixeltable/functions/huggingface.py +62 -4
  24. pixeltable/functions/image.py +17 -0
  25. pixeltable/functions/openai.py +1 -1
  26. pixeltable/functions/string.py +622 -7
  27. pixeltable/functions/video.py +26 -8
  28. pixeltable/globals.py +54 -50
  29. pixeltable/index/embedding_index.py +28 -27
  30. pixeltable/io/external_store.py +2 -2
  31. pixeltable/io/globals.py +54 -5
  32. pixeltable/io/label_studio.py +45 -5
  33. pixeltable/io/pandas.py +18 -7
  34. pixeltable/metadata/__init__.py +1 -1
  35. pixeltable/metadata/converters/convert_17.py +26 -0
  36. pixeltable/plan.py +6 -6
  37. pixeltable/tool/create_test_db_dump.py +2 -2
  38. pixeltable/tool/doc_plugins/griffe.py +77 -0
  39. pixeltable/tool/doc_plugins/mkdocstrings.py +6 -0
  40. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +135 -0
  41. pixeltable/utils/s3.py +1 -1
  42. pixeltable-0.2.13.dist-info/METADATA +206 -0
  43. {pixeltable-0.2.11.dist-info → pixeltable-0.2.13.dist-info}/RECORD +46 -42
  44. pixeltable-0.2.13.dist-info/entry_points.txt +3 -0
  45. pixeltable/exprs/predicate.py +0 -44
  46. pixeltable-0.2.11.dist-info/METADATA +0 -137
  47. {pixeltable-0.2.11.dist-info → pixeltable-0.2.13.dist-info}/LICENSE +0 -0
  48. {pixeltable-0.2.11.dist-info → pixeltable-0.2.13.dist-info}/WHEEL +0 -0
pixeltable/exprs/expr.py CHANGED
@@ -518,6 +518,38 @@ class Expr(abc.ABC):
518
518
  return ArithmeticExpr(op, self, Literal(other)) # type: ignore[arg-type]
519
519
  raise TypeError(f'Other must be Expr or literal: {type(other)}')
520
520
 
521
+ def __and__(self, other: object) -> Expr:
522
+ if not isinstance(other, Expr):
523
+ raise TypeError(f'Other needs to be an expression: {type(other)}')
524
+ if not other.col_type.is_bool_type():
525
+ raise TypeError(f'Other needs to be an expression that returns a boolean: {other.col_type}')
526
+ from .compound_predicate import CompoundPredicate
527
+ return CompoundPredicate(LogicalOperator.AND, [self, other])
528
+
529
+ def __or__(self, other: object) -> Expr:
530
+ if not isinstance(other, Expr):
531
+ raise TypeError(f'Other needs to be an expression: {type(other)}')
532
+ if not other.col_type.is_bool_type():
533
+ raise TypeError(f'Other needs to be an expression that returns a boolean: {other.col_type}')
534
+ from .compound_predicate import CompoundPredicate
535
+ return CompoundPredicate(LogicalOperator.OR, [self, other])
536
+
537
+ def __invert__(self) -> Expr:
538
+ from .compound_predicate import CompoundPredicate
539
+ return CompoundPredicate(LogicalOperator.NOT, [self])
540
+
541
+ def split_conjuncts(
542
+ self, condition: Callable[[Expr], bool]) -> tuple[list[Expr], Optional[Expr]]:
543
+ """
544
+ Returns clauses of a conjunction that meet condition in the first element.
545
+ The second element contains remaining clauses, rolled into a conjunction.
546
+ """
547
+ assert self.col_type.is_bool_type() # only valid for predicates
548
+ if condition(self):
549
+ return [self], None
550
+ else:
551
+ return [], self
552
+
521
553
  def _make_applicator_function(self, fn: Callable, col_type: Optional[ts.ColumnType]) -> 'pixeltable.func.Function':
522
554
  """
523
555
  Creates a unary pixeltable `Function` that encapsulates a python `Callable`. The result type of
@@ -5,20 +5,20 @@ from typing import Optional, List, Any, Dict, Tuple, Iterable
5
5
  import sqlalchemy as sql
6
6
 
7
7
  import pixeltable.exceptions as excs
8
+ import pixeltable.type_system as ts
8
9
  from .data_row import DataRow
9
10
  from .expr import Expr
10
- from .predicate import Predicate
11
11
  from .row_builder import RowBuilder
12
12
 
13
13
 
14
- class InPredicate(Predicate):
14
+ class InPredicate(Expr):
15
15
  """Predicate corresponding to the SQL IN operator."""
16
16
 
17
17
  def __init__(self, lhs: Expr, value_set_literal: Optional[Iterable] = None, value_set_expr: Optional[Expr] = None):
18
18
  assert (value_set_literal is None) != (value_set_expr is None)
19
19
  if not lhs.col_type.is_scalar_type():
20
20
  raise excs.Error(f'isin(): only supported for scalar types, not {lhs.col_type}')
21
- super().__init__()
21
+ super().__init__(ts.BoolType())
22
22
 
23
23
  self.value_list: Optional[list] = None # only contains values of the correct type
24
24
  if value_set_expr is not None:
@@ -1,18 +1,18 @@
1
1
  from __future__ import annotations
2
+
2
3
  from typing import Optional, List, Dict
3
4
 
4
5
  import sqlalchemy as sql
5
6
 
6
- from .predicate import Predicate
7
- from .expr import Expr
7
+ import pixeltable.type_system as ts
8
8
  from .data_row import DataRow
9
+ from .expr import Expr
9
10
  from .row_builder import RowBuilder
10
- import pixeltable.catalog as catalog
11
11
 
12
12
 
13
- class IsNull(Predicate):
13
+ class IsNull(Expr):
14
14
  def __init__(self, e: Expr):
15
- super().__init__()
15
+ super().__init__(ts.BoolType())
16
16
  self.components = [e]
17
17
  self.id = self._create_id()
18
18
 
@@ -1,4 +1,4 @@
1
- from typing import Optional, List
1
+ from typing import Optional, List, Any
2
2
 
3
3
  import sqlalchemy as sql
4
4
  import PIL.Image
@@ -14,33 +14,44 @@ from .row_builder import RowBuilder
14
14
 
15
15
  class SimilarityExpr(Expr):
16
16
 
17
- def __init__(self, col_ref: ColumnRef, item: Expr):
17
+ def __init__(self, col_ref: ColumnRef, item: Any, idx_name: Optional[str] = None):
18
18
  super().__init__(ts.FloatType())
19
- self.components = [col_ref, item]
19
+ item_expr = Expr.from_object(item)
20
+ if item_expr is None or not(item_expr.col_type.is_string_type() or item_expr.col_type.is_image_type()):
21
+ raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not a {type(item)}')
22
+ assert item_expr.col_type.is_string_type() or item_expr.col_type.is_image_type()
23
+
24
+ self.components = [col_ref, item_expr]
20
25
  self.id = self._create_id()
21
- assert item.col_type.is_string_type() or item.col_type.is_image_type()
22
26
 
23
27
  # determine index to use
24
28
  idx_info = col_ref.col.get_idx_info()
25
29
  import pixeltable.index as index
26
- embedding_idx_info = [info for info in idx_info.values() if isinstance(info.idx, index.EmbeddingIndex)]
30
+ embedding_idx_info = {
31
+ info.name: info for info in idx_info.values() if isinstance(info.idx, index.EmbeddingIndex)
32
+ }
27
33
  if len(embedding_idx_info) == 0:
28
- raise excs.Error(f'No index found for column {col_ref.col}')
34
+ raise excs.Error(f'No index found for column {col_ref.col!r}')
35
+ if idx_name is not None and idx_name not in embedding_idx_info:
36
+ raise excs.Error(f'Index {idx_name!r} not found for column {col_ref.col.name!r}')
29
37
  if len(embedding_idx_info) > 1:
30
- raise excs.Error(
31
- f'Column {col_ref.col.name} has multiple indices; use the index name to disambiguate, '
32
- f'e.g., `{col_ref.col.name}.<index-name>.similarity(...)`')
33
- self.idx_info = embedding_idx_info[0]
38
+ if idx_name is None:
39
+ raise excs.Error(
40
+ f'Column {col_ref.col.name!r} has multiple indices; use the index name to disambiguate: '
41
+ f'`{col_ref.col.name}.similarity(..., idx=<name>)`')
42
+ self.idx_info = embedding_idx_info[idx_name]
43
+ else:
44
+ self.idx_info = next(iter(embedding_idx_info.values()))
34
45
  idx = self.idx_info.idx
35
46
 
36
- if item.col_type.is_string_type() and idx.txt_embed is None:
47
+ if item_expr.col_type.is_string_type() and idx.string_embed is None:
37
48
  raise excs.Error(
38
- f'Embedding index {self.idx_info.name} on column {self.idx_info.col.name} was created without the '
39
- f'text_embed parameter and does not support text queries')
40
- if item.col_type.is_image_type() and idx.img_embed is None:
49
+ f'Embedding index {self.idx_info.name!r} on column {self.idx_info.col.name!r} was created without the '
50
+ f"'string_embed' parameter and does not support string queries")
51
+ if item_expr.col_type.is_image_type() and idx.image_embed is None:
41
52
  raise excs.Error(
42
- f'Embedding index {self.idx_info.name} on column {self.idx_info.col.name} was created without the '
43
- f'img_embed parameter and does not support image queries')
53
+ f'Embedding index {self.idx_info.name!r} on column {self.idx_info.col.name!r} was created without the '
54
+ f"'image_embed' parameter and does not support image queries")
44
55
 
45
56
  def __str__(self) -> str:
46
57
  return f'{self.components[0]}.similarity({self.components[1]})'
@@ -1,16 +1,18 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import abc
4
- import importlib
5
4
  import inspect
6
- from typing import Optional, Any, Type, List, Dict, Callable
7
- import itertools
5
+ from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Type
8
6
 
9
7
  import pixeltable.exceptions as excs
10
8
  import pixeltable.type_system as ts
9
+
11
10
  from .function import Function
12
- from .signature import Signature, Parameter
13
11
  from .globals import validate_symbol_path
12
+ from .signature import Parameter, Signature
13
+
14
+ if TYPE_CHECKING:
15
+ import pixeltable
14
16
 
15
17
 
16
18
  class Aggregator(abc.ABC):
@@ -40,6 +42,7 @@ class AggregateFunction(Function):
40
42
  self.requires_order_by = requires_order_by
41
43
  self.allows_std_agg = allows_std_agg
42
44
  self.allows_window = allows_window
45
+ self.__doc__ = aggregator_class.__doc__
43
46
 
44
47
  # our signature is the signature of 'update', but without self,
45
48
  # plus the parameters of 'init' as keyword-only parameters
@@ -135,6 +138,9 @@ class AggregateFunction(Function):
135
138
  f'expression'
136
139
  )
137
140
 
141
+ def __repr__(self) -> str:
142
+ return f'<Pixeltable Aggregator {self.name}>'
143
+
138
144
 
139
145
  def uda(
140
146
  *,
@@ -25,6 +25,7 @@ class CallableFunction(Function):
25
25
  self.py_fn = py_fn
26
26
  self.self_name = self_name
27
27
  self.batch_size = batch_size
28
+ self.__doc__ = py_fn.__doc__
28
29
  super().__init__(signature, self_path=self_path)
29
30
 
30
31
  @property
@@ -113,3 +114,6 @@ class CallableFunction(Function):
113
114
  f'{self.display_name}(): '
114
115
  f'parameter {param.name} must be a constant value, not a Pixeltable expression'
115
116
  )
117
+
118
+ def __repr__(self) -> str:
119
+ return f'<Pixeltable UDF {self.name}>'
@@ -66,6 +66,8 @@ class FunctionRegistry:
66
66
  # self.module_fns[fn_path] = obj
67
67
 
68
68
  def register_function(self, fqn: str, fn: Function) -> None:
69
+ if fqn in self.module_fns:
70
+ raise excs.Error(f'A UDF with that name already exists: {fqn}')
69
71
  self.module_fns[fqn] = fn
70
72
 
71
73
  def list_functions(self) -> List[Function]:
@@ -1,4 +1,4 @@
1
- from typing import Union
1
+ from typing import Optional, Union
2
2
 
3
3
  import pixeltable.func as func
4
4
  import pixeltable.type_system as ts
@@ -14,6 +14,7 @@ def cast(expr: exprs.Expr, target_type: ts.ColumnType) -> exprs.Expr:
14
14
 
15
15
  @func.uda(update_types=[ts.IntType()], value_type=ts.IntType(), allows_window=True, requires_order_by=False)
16
16
  class sum(func.Aggregator):
17
+ """Sums the selected integers or floats."""
17
18
  def __init__(self):
18
19
  self.sum: Union[int, float] = 0
19
20
 
@@ -38,6 +39,40 @@ class count(func.Aggregator):
38
39
  return self.count
39
40
 
40
41
 
42
+ @func.uda(update_types=[ts.FloatType()], value_type=ts.FloatType(nullable=True), allows_window=True, requires_order_by=False)
43
+ class max(func.Aggregator):
44
+ def __init__(self):
45
+ self.val = None
46
+
47
+ def update(self, val: Optional[float]) -> None:
48
+ if val is not None:
49
+ if self.val is None:
50
+ self.val = val
51
+ else:
52
+ import builtins
53
+ self.val = builtins.max(self.val, val)
54
+
55
+ def value(self) -> Optional[float]:
56
+ return self.val
57
+
58
+
59
+ @func.uda(update_types=[ts.FloatType()], value_type=ts.FloatType(nullable=True), allows_window=True, requires_order_by=False)
60
+ class min(func.Aggregator):
61
+ def __init__(self):
62
+ self.val = None
63
+
64
+ def update(self, val: Optional[float]) -> None:
65
+ if val is not None:
66
+ if self.val is None:
67
+ self.val = val
68
+ else:
69
+ import builtins
70
+ self.val = builtins.min(self.val, val)
71
+
72
+ def value(self) -> Optional[float]:
73
+ return self.val
74
+
75
+
41
76
  @func.uda(update_types=[ts.IntType()], value_type=ts.FloatType(), allows_window=False, requires_order_by=False)
42
77
  class mean(func.Aggregator):
43
78
  def __init__(self):
@@ -1,3 +1,12 @@
1
+ """
2
+ Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
3
+ that wrap various models from the Hugging Face `transformers` package.
4
+
5
+ These UDFs will cause Pixeltable to invoke the relevant models locally. In order to use them, you must
6
+ first `pip install transformers` (or in some cases, `sentence-transformers`, as noted in the specific
7
+ UDFs).
8
+ """
9
+
1
10
  from typing import Callable, TypeVar, Optional, Any
2
11
 
3
12
  import PIL.Image
@@ -13,15 +22,39 @@ from pixeltable.utils.code import local_public_names
13
22
 
14
23
  @pxt.udf(batch_size=32, return_type=ts.ArrayType((None,), dtype=ts.FloatType()))
15
24
  def sentence_transformer(
16
- sentences: Batch[str], *, model_id: str, normalize_embeddings: bool = False
25
+ sentence: Batch[str], *, model_id: str, normalize_embeddings: bool = False
17
26
  ) -> Batch[np.ndarray]:
18
- """Runs the specified sentence transformer model."""
27
+ """
28
+ Runs the specified pretrained sentence-transformers model. `model_id` should be a pretrained model, as described
29
+ in the [Sentence Transformers Pretrained Models](https://sbert.net/docs/sentence_transformer/pretrained_models.html)
30
+ documentation.
31
+
32
+ __Requirements:__
33
+
34
+ - `pip install sentence-transformers`
35
+
36
+ Args:
37
+ sentence: The sentence to embed.
38
+ model_id: The pretrained model to use for the encoding.
39
+ normalize_embeddings: If `True`, normalizes embeddings to length 1; see the
40
+ [Sentence Transformers API Docs](https://sbert.net/docs/package_reference/sentence_transformer/SentenceTransformer.html)
41
+ for more details
42
+
43
+ Returns:
44
+ An array containing the output of the embedding model.
45
+
46
+ Examples:
47
+ Add a computed column that applies the model `all-mpnet-base-2` to an existing Pixeltable column `tbl.sentence`
48
+ of the table `tbl`:
49
+
50
+ >>> tbl['result'] = sentence_transformer(tbl.sentence, model_id='all-mpnet-base-v2')
51
+ """
19
52
  env.Env.get().require_package('sentence_transformers')
20
53
  from sentence_transformers import SentenceTransformer
21
54
 
22
55
  model = _lookup_model(model_id, SentenceTransformer)
23
56
 
24
- array = model.encode(sentences, normalize_embeddings=normalize_embeddings)
57
+ array = model.encode(sentence, normalize_embeddings=normalize_embeddings)
25
58
  return [array[i] for i in range(array.shape[0])]
26
59
 
27
60
 
@@ -49,7 +82,32 @@ def sentence_transformer_list(sentences: list, *, model_id: str, normalize_embed
49
82
 
50
83
  @pxt.udf(batch_size=32)
51
84
  def cross_encoder(sentences1: Batch[str], sentences2: Batch[str], *, model_id: str) -> Batch[float]:
52
- """Runs the specified cross-encoder model."""
85
+ """
86
+ Runs the specified cross-encoder model to compute similarity scores for pairs of sentences.
87
+ `model_id` should be a pretrained model, as described in the
88
+ [Cross-Encoder Pretrained Models](https://www.sbert.net/docs/cross_encoder/pretrained_models.html)
89
+ documentation.
90
+
91
+ __Requirements:__
92
+
93
+ - `pip install sentence-transformers`
94
+
95
+ Parameters:
96
+ sentences1: The first sentence to be paired.
97
+ sentences2: The second sentence to be paired.
98
+ model_id: The identifier of the cross-encoder model to use.
99
+
100
+ Returns:
101
+ The similarity score between the inputs.
102
+
103
+ Examples:
104
+ Add a computed column that applies the model `ms-marco-MiniLM-L-4-v2` to the sentences in
105
+ columns `tbl.sentence1` and `tbl.sentence2`:
106
+
107
+ >>> tbl['result'] = sentence_transformer(
108
+ tbl.sentence1, tbl.sentence2, model_id='ms-marco-MiniLM-L-4-v2'
109
+ )
110
+ """
53
111
  env.Env.get().require_package('sentence_transformers')
54
112
  from sentence_transformers import CrossEncoder
55
113
 
@@ -1,3 +1,15 @@
1
+ """
2
+ Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs) for `ImageType`.
3
+
4
+ Example:
5
+ ```python
6
+ import pixeltable as pxt
7
+
8
+ t = pxt.get_table(...)
9
+ t.select(t.img_col.convert('L')).collect()
10
+ ```
11
+ """
12
+
1
13
  import base64
2
14
  from typing import Optional, Tuple
3
15
 
@@ -41,6 +53,11 @@ def composite(image1: PIL.Image.Image, image2: PIL.Image.Image, mask: PIL.Image.
41
53
  # Image.convert()
42
54
  @func.udf
43
55
  def convert(self: PIL.Image.Image, mode: str) -> PIL.Image.Image:
56
+ """
57
+ Convert the image to a different mode.
58
+
59
+ Equivalent to [`PIL.Image.Image.convert()`](https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.convert).
60
+ """
44
61
  return self.convert(mode)
45
62
 
46
63
 
@@ -141,7 +141,7 @@ def chat_completions(
141
141
 
142
142
 
143
143
  @pxt.udf
144
- def vision(prompt: str, image: PIL.Image.Image, *, model: str = 'gpt-4-vision-preview') -> str:
144
+ def vision(prompt: str, image: PIL.Image.Image, *, model: str) -> str:
145
145
  # TODO(aaron-siegel): Decompose CPU/GPU ops into separate functions
146
146
  bytes_arr = io.BytesIO()
147
147
  image.save(bytes_arr, format='png')