pixeltable 0.2.20__py3-none-any.whl → 0.2.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (120) hide show
  1. pixeltable/__init__.py +7 -19
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +7 -7
  4. pixeltable/catalog/column.py +37 -11
  5. pixeltable/catalog/globals.py +21 -0
  6. pixeltable/catalog/insertable_table.py +6 -4
  7. pixeltable/catalog/table.py +227 -148
  8. pixeltable/catalog/table_version.py +66 -28
  9. pixeltable/catalog/table_version_path.py +0 -8
  10. pixeltable/catalog/view.py +18 -19
  11. pixeltable/dataframe.py +16 -32
  12. pixeltable/env.py +6 -1
  13. pixeltable/exec/__init__.py +1 -2
  14. pixeltable/exec/aggregation_node.py +27 -17
  15. pixeltable/exec/cache_prefetch_node.py +1 -1
  16. pixeltable/exec/data_row_batch.py +9 -26
  17. pixeltable/exec/exec_node.py +36 -7
  18. pixeltable/exec/expr_eval_node.py +19 -11
  19. pixeltable/exec/in_memory_data_node.py +14 -11
  20. pixeltable/exec/sql_node.py +266 -138
  21. pixeltable/exprs/__init__.py +1 -0
  22. pixeltable/exprs/arithmetic_expr.py +3 -1
  23. pixeltable/exprs/array_slice.py +7 -7
  24. pixeltable/exprs/column_property_ref.py +37 -10
  25. pixeltable/exprs/column_ref.py +93 -14
  26. pixeltable/exprs/comparison.py +5 -5
  27. pixeltable/exprs/compound_predicate.py +8 -7
  28. pixeltable/exprs/data_row.py +56 -36
  29. pixeltable/exprs/expr.py +65 -63
  30. pixeltable/exprs/expr_dict.py +55 -0
  31. pixeltable/exprs/expr_set.py +26 -15
  32. pixeltable/exprs/function_call.py +53 -24
  33. pixeltable/exprs/globals.py +4 -1
  34. pixeltable/exprs/in_predicate.py +8 -7
  35. pixeltable/exprs/inline_expr.py +4 -4
  36. pixeltable/exprs/is_null.py +4 -4
  37. pixeltable/exprs/json_mapper.py +11 -12
  38. pixeltable/exprs/json_path.py +5 -10
  39. pixeltable/exprs/literal.py +5 -5
  40. pixeltable/exprs/method_ref.py +5 -4
  41. pixeltable/exprs/object_ref.py +2 -1
  42. pixeltable/exprs/row_builder.py +88 -36
  43. pixeltable/exprs/rowid_ref.py +14 -13
  44. pixeltable/exprs/similarity_expr.py +12 -7
  45. pixeltable/exprs/sql_element_cache.py +12 -6
  46. pixeltable/exprs/type_cast.py +8 -6
  47. pixeltable/exprs/variable.py +5 -4
  48. pixeltable/ext/functions/whisperx.py +7 -2
  49. pixeltable/func/aggregate_function.py +1 -1
  50. pixeltable/func/callable_function.py +2 -2
  51. pixeltable/func/function.py +11 -10
  52. pixeltable/func/function_registry.py +6 -7
  53. pixeltable/func/query_template_function.py +11 -12
  54. pixeltable/func/signature.py +17 -15
  55. pixeltable/func/udf.py +0 -4
  56. pixeltable/functions/__init__.py +2 -2
  57. pixeltable/functions/audio.py +4 -6
  58. pixeltable/functions/globals.py +84 -42
  59. pixeltable/functions/huggingface.py +31 -34
  60. pixeltable/functions/image.py +59 -45
  61. pixeltable/functions/json.py +0 -1
  62. pixeltable/functions/llama_cpp.py +106 -0
  63. pixeltable/functions/mistralai.py +2 -2
  64. pixeltable/functions/ollama.py +147 -0
  65. pixeltable/functions/openai.py +22 -25
  66. pixeltable/functions/replicate.py +72 -0
  67. pixeltable/functions/string.py +59 -50
  68. pixeltable/functions/timestamp.py +20 -20
  69. pixeltable/functions/together.py +2 -2
  70. pixeltable/functions/video.py +11 -20
  71. pixeltable/functions/whisper.py +2 -20
  72. pixeltable/globals.py +65 -74
  73. pixeltable/index/base.py +2 -2
  74. pixeltable/index/btree.py +20 -7
  75. pixeltable/index/embedding_index.py +12 -14
  76. pixeltable/io/__init__.py +1 -2
  77. pixeltable/io/external_store.py +11 -5
  78. pixeltable/io/fiftyone.py +178 -0
  79. pixeltable/io/globals.py +98 -2
  80. pixeltable/io/hf_datasets.py +1 -1
  81. pixeltable/io/label_studio.py +6 -6
  82. pixeltable/io/parquet.py +14 -13
  83. pixeltable/iterators/base.py +3 -2
  84. pixeltable/iterators/document.py +10 -8
  85. pixeltable/iterators/video.py +126 -60
  86. pixeltable/metadata/__init__.py +4 -3
  87. pixeltable/metadata/converters/convert_14.py +4 -2
  88. pixeltable/metadata/converters/convert_15.py +1 -1
  89. pixeltable/metadata/converters/convert_19.py +1 -0
  90. pixeltable/metadata/converters/convert_20.py +1 -1
  91. pixeltable/metadata/converters/convert_21.py +34 -0
  92. pixeltable/metadata/converters/util.py +54 -12
  93. pixeltable/metadata/notes.py +1 -0
  94. pixeltable/metadata/schema.py +40 -21
  95. pixeltable/plan.py +149 -165
  96. pixeltable/py.typed +0 -0
  97. pixeltable/store.py +57 -37
  98. pixeltable/tool/create_test_db_dump.py +6 -6
  99. pixeltable/tool/create_test_video.py +1 -1
  100. pixeltable/tool/doc_plugins/griffe.py +3 -34
  101. pixeltable/tool/embed_udf.py +1 -1
  102. pixeltable/tool/mypy_plugin.py +55 -0
  103. pixeltable/type_system.py +260 -61
  104. pixeltable/utils/arrow.py +10 -9
  105. pixeltable/utils/coco.py +4 -4
  106. pixeltable/utils/documents.py +16 -2
  107. pixeltable/utils/filecache.py +9 -9
  108. pixeltable/utils/formatter.py +10 -11
  109. pixeltable/utils/http_server.py +2 -5
  110. pixeltable/utils/media_store.py +6 -6
  111. pixeltable/utils/pytorch.py +10 -11
  112. pixeltable/utils/sql.py +2 -1
  113. {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/METADATA +50 -13
  114. pixeltable-0.2.22.dist-info/RECORD +153 -0
  115. pixeltable/exec/media_validation_node.py +0 -43
  116. pixeltable/utils/help.py +0 -11
  117. pixeltable-0.2.20.dist-info/RECORD +0 -147
  118. {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/LICENSE +0 -0
  119. {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/WHEEL +0 -0
  120. {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/entry_points.txt +0 -0
@@ -1,15 +1,17 @@
1
1
  from __future__ import annotations
2
- from typing import Optional, List, Any, Dict, Tuple
3
- from .sql_element_cache import SqlElementCache
2
+
3
+ from typing import Any, Optional
4
4
  from uuid import UUID
5
5
 
6
6
  import sqlalchemy as sql
7
7
 
8
- from .expr import Expr
8
+ import pixeltable.catalog as catalog
9
+ import pixeltable.type_system as ts
10
+
9
11
  from .data_row import DataRow
12
+ from .expr import Expr
10
13
  from .row_builder import RowBuilder
11
- import pixeltable.type_system as ts
12
- import pixeltable.catalog as catalog
14
+ from .sql_element_cache import SqlElementCache
13
15
 
14
16
 
15
17
  class RowidRef(Expr):
@@ -49,14 +51,14 @@ class RowidRef(Expr):
49
51
  return self.normalized_base_id == other.normalized_base_id \
50
52
  and self.rowid_component_idx == other.rowid_component_idx
51
53
 
52
- def _id_attrs(self) -> List[Tuple[str, Any]]:
54
+ def _id_attrs(self) -> list[tuple[str, Any]]:
53
55
  return super()._id_attrs() +\
54
56
  [('normalized_base_id', self.normalized_base_id), ('idx', self.rowid_component_idx)]
55
57
 
56
58
  def __str__(self) -> str:
57
59
  # check if this is the pos column of a component view
58
60
  tbl = self.tbl if self.tbl is not None else catalog.Catalog.get().tbl_versions[(self.tbl_id, None)]
59
- if tbl.is_component_view() and self.rowid_component_idx == tbl.store_tbl.pos_col_idx:
61
+ if tbl.is_component_view() and self.rowid_component_idx == tbl.store_tbl.pos_col_idx: # type: ignore[attr-defined]
60
62
  return catalog.globals._POS_COLUMN_NAME
61
63
  return ''
62
64
 
@@ -68,12 +70,12 @@ class RowidRef(Expr):
68
70
  """
69
71
  if self.tbl_id == tbl.tbl_version.id:
70
72
  return
71
- tbl_version_ids = [tbl_version.id for tbl_version in tbl.get_tbl_versions()]
72
- assert self.tbl_id in tbl_version_ids
73
+ base_ids = [tbl_version.id for tbl_version in tbl.get_tbl_versions()]
74
+ assert self.tbl_id in base_ids # our current TableVersion is a base of the new TableVersion
73
75
  self.tbl = tbl.tbl_version
74
76
  self.tbl_id = self.tbl.id
75
77
 
76
- def sql_expr(self, _: SqlElementCache) -> Optional[sql.ClauseElement]:
78
+ def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
77
79
  tbl = self.tbl if self.tbl is not None else catalog.Catalog.get().tbl_versions[(self.tbl_id, None)]
78
80
  rowid_cols = tbl.store_tbl.rowid_columns()
79
81
  return rowid_cols[self.rowid_component_idx]
@@ -81,7 +83,7 @@ class RowidRef(Expr):
81
83
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
82
84
  data_row[self.slot_idx] = data_row.pk[self.rowid_component_idx]
83
85
 
84
- def _as_dict(self) -> Dict:
86
+ def _as_dict(self) -> dict:
85
87
  return {
86
88
  'tbl_id': str(self.tbl_id),
87
89
  'normalized_base_id': str(self.normalized_base_id),
@@ -89,7 +91,6 @@ class RowidRef(Expr):
89
91
  }
90
92
 
91
93
  @classmethod
92
- def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
94
+ def _from_dict(cls, d: dict, components: list[Expr]) -> RowidRef:
93
95
  tbl_id, normalized_base_id, idx = UUID(d['tbl_id']), UUID(d['normalized_base_id']), d['idx']
94
96
  return cls(tbl=None, idx=idx, tbl_id=tbl_id, normalized_base_id=normalized_base_id)
95
-
@@ -1,16 +1,16 @@
1
- from typing import Optional, List, Any
2
- from .sql_element_cache import SqlElementCache
1
+ from typing import Any, Optional
3
2
 
4
3
  import sqlalchemy as sql
5
- import PIL.Image
6
4
 
7
5
  import pixeltable.exceptions as excs
8
6
  import pixeltable.type_system as ts
7
+
9
8
  from .column_ref import ColumnRef
10
9
  from .data_row import DataRow
11
10
  from .expr import Expr
12
11
  from .literal import Literal
13
12
  from .row_builder import RowBuilder
13
+ from .sql_element_cache import SqlElementCache
14
14
 
15
15
 
16
16
  class SimilarityExpr(Expr):
@@ -27,7 +27,7 @@ class SimilarityExpr(Expr):
27
27
 
28
28
  # determine index to use
29
29
  idx_info = col_ref.col.get_idx_info()
30
- import pixeltable.index as index
30
+ from pixeltable import index
31
31
  embedding_idx_info = {
32
32
  info.name: info for info in idx_info.values() if isinstance(info.idx, index.EmbeddingIndex)
33
33
  }
@@ -44,6 +44,7 @@ class SimilarityExpr(Expr):
44
44
  else:
45
45
  self.idx_info = next(iter(embedding_idx_info.values()))
46
46
  idx = self.idx_info.idx
47
+ assert isinstance(idx, index.EmbeddingIndex)
47
48
 
48
49
  if item_expr.col_type.is_string_type() and idx.string_embed is None:
49
50
  raise excs.Error(
@@ -57,16 +58,20 @@ class SimilarityExpr(Expr):
57
58
  def __str__(self) -> str:
58
59
  return f'{self.components[0]}.similarity({self.components[1]})'
59
60
 
60
- def sql_expr(self, _: SqlElementCache) -> Optional[sql.ClauseElement]:
61
+ def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
61
62
  if not isinstance(self.components[1], Literal):
62
63
  raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not an expression')
63
64
  item = self.components[1].val
65
+ from pixeltable import index
66
+ assert isinstance(self.idx_info.idx, index.EmbeddingIndex)
64
67
  return self.idx_info.idx.similarity_clause(self.idx_info.val_col, item)
65
68
 
66
- def as_order_by_clause(self, is_asc: bool) -> Optional[sql.ClauseElement]:
69
+ def as_order_by_clause(self, is_asc: bool) -> Optional[sql.ColumnElement]:
67
70
  if not isinstance(self.components[1], Literal):
68
71
  raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not an expression')
69
72
  item = self.components[1].val
73
+ from pixeltable import index
74
+ assert isinstance(self.idx_info.idx, index.EmbeddingIndex)
70
75
  return self.idx_info.idx.order_by_clause(self.idx_info.val_col, item, is_asc)
71
76
 
72
77
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
@@ -74,7 +79,7 @@ class SimilarityExpr(Expr):
74
79
  assert False
75
80
 
76
81
  @classmethod
77
- def _from_dict(cls, d: dict, components: List[Expr]) -> Expr:
82
+ def _from_dict(cls, d: dict, components: list[Expr]) -> 'SimilarityExpr':
78
83
  assert len(components) == 2
79
84
  assert isinstance(components[0], ColumnRef)
80
85
  return cls(components[0], components[1])
@@ -1,8 +1,9 @@
1
- from typing import Iterable, Union, Optional
1
+ from typing import Iterable, Union, Optional, cast
2
2
 
3
3
  import sqlalchemy as sql
4
4
 
5
5
  from .expr import Expr
6
+ from .expr_dict import ExprDict
6
7
 
7
8
 
8
9
  class SqlElementCache:
@@ -10,8 +11,11 @@ class SqlElementCache:
10
11
 
11
12
  cache: dict[int, Optional[sql.ColumnElement]] # key: Expr.id
12
13
 
13
- def __init__(self):
14
+ def __init__(self, elements: Optional[ExprDict[sql.ColumnElement]] = None):
14
15
  self.cache = {}
16
+ if elements is not None:
17
+ for e, el in elements.items():
18
+ self.cache[e.id] = el
15
19
 
16
20
  def get(self, e: Expr) -> Optional[sql.ColumnElement]:
17
21
  """Returns the sql.ColumnElement for the given Expr, or None if Expr.to_sql() returns None."""
@@ -23,8 +27,10 @@ class SqlElementCache:
23
27
  self.cache[e.id] = el
24
28
  return el
25
29
 
26
- def contains(self, items: Union[Expr, Iterable[Expr]]) -> bool:
27
- """Returns True if every item has a (non-None) sql.ColumnElement."""
28
- if isinstance(items, Expr):
29
- return self.get(items) is not None
30
+ def contains(self, item: Expr) -> bool:
31
+ """Returns True if the cache contains a (non-None) value for the given Expr."""
32
+ return self.get(item) is not None
33
+
34
+ def contains_all(self, items: Iterable[Expr]) -> bool:
35
+ """Returns True if the cache contains a (non-None) value for every item in the collection of Exprs."""
30
36
  return all(self.get(e) is not None for e in items)
@@ -1,8 +1,9 @@
1
- from typing import Optional, Dict, List, Tuple, Any
1
+ from typing import Any, Optional
2
2
 
3
3
  import sqlalchemy as sql
4
4
 
5
5
  import pixeltable.type_system as ts
6
+
6
7
  from .expr import DataRow, Expr
7
8
  from .row_builder import RowBuilder
8
9
  from .sql_element_cache import SqlElementCache
@@ -15,7 +16,7 @@ class TypeCast(Expr):
15
16
  """
16
17
  def __init__(self, underlying: Expr, new_type: ts.ColumnType):
17
18
  super().__init__(new_type)
18
- self.components: List[Expr] = [underlying]
19
+ self.components: list[Expr] = [underlying]
19
20
  self.id: Optional[int] = self._create_id()
20
21
 
21
22
  @property
@@ -26,10 +27,10 @@ class TypeCast(Expr):
26
27
  # `TypeCast` has no properties beyond those captured by `Expr`.
27
28
  return True
28
29
 
29
- def _id_attrs(self) -> List[Tuple[str, Any]]:
30
+ def _id_attrs(self) -> list[tuple[str, Any]]:
30
31
  return super()._id_attrs() + [('new_type', self.col_type)]
31
32
 
32
- def sql_expr(self, _: SqlElementCache) -> Optional[sql.ClauseElement]:
33
+ def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
33
34
  """
34
35
  sql_expr() is unimplemented for now, in order to sidestep potentially thorny
35
36
  questions about consistency of doing type conversions in both Python and Postgres.
@@ -40,11 +41,12 @@ class TypeCast(Expr):
40
41
  original_val = data_row[self._underlying.slot_idx]
41
42
  data_row[self.slot_idx] = self.col_type.create_literal(original_val)
42
43
 
43
- def _as_dict(self) -> Dict:
44
+
45
+ def _as_dict(self) -> dict:
44
46
  return {'new_type': self.col_type.as_dict(), **super()._as_dict()}
45
47
 
46
48
  @classmethod
47
- def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
49
+ def _from_dict(cls, d: dict, components: list[Expr]) -> 'TypeCast':
48
50
  assert 'new_type' in d
49
51
  assert len(components) == 1
50
52
  return cls(components[0], ts.ColumnType.from_dict(d['new_type']))
@@ -1,8 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import List, Tuple, Any, Dict, NoReturn
3
+ from typing import Any, NoReturn
4
4
 
5
5
  import pixeltable.type_system as ts
6
+
6
7
  from .data_row import DataRow
7
8
  from .expr import Expr
8
9
  from .row_builder import RowBuilder
@@ -20,7 +21,7 @@ class Variable(Expr):
20
21
  self.name = name
21
22
  self.id = self._create_id()
22
23
 
23
- def _id_attrs(self) -> List[Tuple[str, Any]]:
24
+ def _id_attrs(self) -> list[tuple[str, Any]]:
24
25
  return super()._id_attrs() + [('name', self.name)]
25
26
 
26
27
  def default_column_name(self) -> NoReturn:
@@ -38,9 +39,9 @@ class Variable(Expr):
38
39
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> NoReturn:
39
40
  raise NotImplementedError()
40
41
 
41
- def _as_dict(self) -> Dict:
42
+ def _as_dict(self) -> dict:
42
43
  return {'name': self.name, 'type': self.col_type.as_dict(), **super()._as_dict()}
43
44
 
44
45
  @classmethod
45
- def _from_dict(cls, d: Dict, _: List[Expr]) -> Expr:
46
+ def _from_dict(cls, d: dict, _: list[Expr]) -> Variable:
46
47
  return cls(d['name'], ts.ColumnType.from_dict(d['type']))
@@ -8,9 +8,14 @@ if TYPE_CHECKING:
8
8
  import pixeltable as pxt
9
9
 
10
10
 
11
- @pxt.udf(param_types=[pxt.AudioType(), pxt.StringType(), pxt.StringType(), pxt.StringType(), pxt.IntType()])
11
+ @pxt.udf
12
12
  def transcribe(
13
- audio: str, *, model: str, compute_type: Optional[str] = None, language: Optional[str] = None, chunk_size: int = 30
13
+ audio: pxt.Audio,
14
+ *,
15
+ model: str,
16
+ compute_type: Optional[str] = None,
17
+ language: Optional[str] = None,
18
+ chunk_size: int = 30
14
19
  ) -> dict:
15
20
  """
16
21
  Transcribe an audio file using WhisperX.
@@ -86,7 +86,7 @@ class AggregateFunction(Function):
86
86
  res += '\n\n' + inspect.getdoc(self.agg_cls.update)
87
87
  return res
88
88
 
89
- def __call__(self, *args: object, **kwargs: object) -> 'pixeltable.exprs.Expr':
89
+ def __call__(self, *args: object, **kwargs: object) -> 'pixeltable.exprs.FunctionCall':
90
90
  from pixeltable import exprs
91
91
 
92
92
  # perform semantic analysis of special parameters 'order_by' and 'group_by'
@@ -4,7 +4,7 @@ import inspect
4
4
  from typing import Any, Callable, Optional
5
5
  from uuid import UUID
6
6
 
7
- import cloudpickle
7
+ import cloudpickle # type: ignore[import-untyped]
8
8
 
9
9
  from .function import Function
10
10
  from .signature import Signature
@@ -108,7 +108,7 @@ class CallableFunction(Function):
108
108
  @classmethod
109
109
  def from_store(cls, name: Optional[str], md: dict, binary_obj: bytes) -> Function:
110
110
  py_fn = cloudpickle.loads(binary_obj)
111
- assert isinstance(py_fn, Callable)
111
+ assert callable(py_fn)
112
112
  sig = Signature.from_dict(md['signature'])
113
113
  batch_size = md['batch_size']
114
114
  return CallableFunction(sig, py_fn, self_name=name, batch_size=batch_size)
@@ -3,12 +3,13 @@ from __future__ import annotations
3
3
  import abc
4
4
  import importlib
5
5
  import inspect
6
- from typing import Any, Callable, Dict, Optional, Tuple
6
+ from typing import Any, Callable, Optional
7
7
 
8
8
  import sqlalchemy as sql
9
9
 
10
- import pixeltable
10
+ import pixeltable as pxt
11
11
  import pixeltable.type_system as ts
12
+
12
13
  from .globals import resolve_symbol
13
14
  from .signature import Signature
14
15
 
@@ -66,13 +67,13 @@ class Function(abc.ABC):
66
67
  def help_str(self) -> str:
67
68
  return self.display_name + str(self.signature)
68
69
 
69
- def __call__(self, *args: Any, **kwargs: Any) -> 'pixeltable.exprs.Expr':
70
+ def __call__(self, *args: Any, **kwargs: Any) -> 'pxt.exprs.FunctionCall':
70
71
  from pixeltable import exprs
71
72
  bound_args = self.signature.py_signature.bind(*args, **kwargs)
72
73
  self.validate_call(bound_args.arguments)
73
74
  return exprs.FunctionCall(self, bound_args.arguments)
74
75
 
75
- def validate_call(self, bound_args: Dict[str, Any]) -> None:
76
+ def validate_call(self, bound_args: dict[str, Any]) -> None:
76
77
  """Override this to do custom validation of the arguments"""
77
78
  pass
78
79
 
@@ -121,7 +122,7 @@ class Function(abc.ABC):
121
122
  """Print source code"""
122
123
  print('source not available')
123
124
 
124
- def as_dict(self) -> Dict:
125
+ def as_dict(self) -> dict:
125
126
  """
126
127
  Return a serialized reference to the instance that can be passed to json.dumps() and converted back
127
128
  to an instance with from_dict().
@@ -130,13 +131,13 @@ class Function(abc.ABC):
130
131
  classpath = f'{self.__class__.__module__}.{self.__class__.__qualname__}'
131
132
  return {'_classpath': classpath, **self._as_dict()}
132
133
 
133
- def _as_dict(self) -> Dict:
134
+ def _as_dict(self) -> dict:
134
135
  """Default serialization: store the path to self (which includes the module path)"""
135
136
  assert self.self_path is not None
136
137
  return {'path': self.self_path}
137
138
 
138
139
  @classmethod
139
- def from_dict(cls, d: Dict) -> Function:
140
+ def from_dict(cls, d: dict) -> Function:
140
141
  """
141
142
  Turn dict that was produced by calling as_dict() into an instance of the correct Function subclass.
142
143
  """
@@ -147,14 +148,14 @@ class Function(abc.ABC):
147
148
  return func_class._from_dict(d)
148
149
 
149
150
  @classmethod
150
- def _from_dict(cls, d: Dict) -> Function:
151
+ def _from_dict(cls, d: dict) -> Function:
151
152
  """Default deserialization: load the symbol indicated by the stored symbol_path"""
152
153
  assert 'path' in d and d['path'] is not None
153
154
  instance = resolve_symbol(d['path'])
154
155
  assert isinstance(instance, Function)
155
156
  return instance
156
157
 
157
- def to_store(self) -> Tuple[Dict, bytes]:
158
+ def to_store(self) -> tuple[dict, bytes]:
158
159
  """
159
160
  Serialize the function to a format that can be stored in the Pixeltable store
160
161
  Returns:
@@ -165,7 +166,7 @@ class Function(abc.ABC):
165
166
  raise NotImplementedError()
166
167
 
167
168
  @classmethod
168
- def from_store(cls, name: Optional[str], md: Dict, binary_obj: bytes) -> Function:
169
+ def from_store(cls, name: Optional[str], md: dict, binary_obj: bytes) -> Function:
169
170
  """
170
171
  Create a Function instance from the serialized representation returned by to_store()
171
172
  """
@@ -4,7 +4,7 @@ import dataclasses
4
4
  import importlib
5
5
  import logging
6
6
  import sys
7
- from typing import Optional, Dict, List
7
+ from typing import Optional
8
8
  from uuid import UUID
9
9
 
10
10
  import sqlalchemy as sql
@@ -14,7 +14,6 @@ import pixeltable.exceptions as excs
14
14
  import pixeltable.type_system as ts
15
15
  from pixeltable.metadata import schema
16
16
  from .function import Function
17
- from .globals import get_caller_module_path
18
17
 
19
18
  _logger = logging.getLogger('pixeltable')
20
19
 
@@ -32,15 +31,15 @@ class FunctionRegistry:
32
31
  return cls._instance
33
32
 
34
33
  def __init__(self):
35
- self.stored_fns_by_id: Dict[UUID, Function] = {}
36
- self.module_fns: Dict[str, Function] = {} # fqn -> Function
34
+ self.stored_fns_by_id: dict[UUID, Function] = {}
35
+ self.module_fns: dict[str, Function] = {} # fqn -> Function
37
36
  self.type_methods: dict[ts.ColumnType.Type, dict[str, Function]] = {}
38
37
 
39
38
  def clear_cache(self) -> None:
40
39
  """
41
40
  Useful during testing
42
41
  """
43
- self.stored_fns_by_id: Dict[UUID, Function] = {}
42
+ self.stored_fns_by_id = {}
44
43
 
45
44
  # def register_std_modules(self) -> None:
46
45
  # """Register all submodules of pixeltable.functions"""
@@ -76,7 +75,7 @@ class FunctionRegistry:
76
75
  raise excs.Error(f'Duplicate method name for type {base_type}: {fn.name}')
77
76
  self.type_methods[base_type][fn.name] = fn
78
77
 
79
- def list_functions(self) -> List[Function]:
78
+ def list_functions(self) -> list[Function]:
80
79
  # retrieve Function.Metadata data for all existing stored functions from store directly
81
80
  # (self.stored_fns_by_id isn't guaranteed to contain all functions)
82
81
  # TODO: have the client do this, once the client takes over the Db functionality
@@ -85,7 +84,7 @@ class FunctionRegistry:
85
84
  # schema.Db.name, schema.Dir.path, sql_func.length(schema.Function.init_obj))\
86
85
  # .where(schema.Function.db_id == schema.Db.id)\
87
86
  # .where(schema.Function.dir_id == schema.Dir.id)
88
- # stored_fn_md: List[Function.Metadata] = []
87
+ # stored_fn_md: list[Function.Metadata] = []
89
88
  # with Env.get().engine.begin() as conn:
90
89
  # rows = conn.execute(stmt)
91
90
  # for name, md_dict, db_name, dir_path, init_obj_len in rows:
@@ -1,14 +1,15 @@
1
1
  from __future__ import annotations
2
+
2
3
  import inspect
3
- from typing import Dict, Optional, Any, Callable
4
+ from typing import Any, Callable, Optional
4
5
 
5
6
  import sqlalchemy as sql
6
7
 
7
- import pixeltable
8
- import pixeltable.exceptions as excs
9
- import pixeltable.type_system as ts
8
+ import pixeltable as pxt
9
+ from pixeltable import exprs
10
+
10
11
  from .function import Function
11
- from .signature import Signature, Parameter
12
+ from .signature import Signature
12
13
 
13
14
 
14
15
  class QueryTemplateFunction(Function):
@@ -16,24 +17,23 @@ class QueryTemplateFunction(Function):
16
17
 
17
18
  @classmethod
18
19
  def create(
19
- cls, template_callable: Callable, param_types: Optional[list[ts.ColumnType]], path: str, name: str
20
+ cls, template_callable: Callable, param_types: Optional[list[pxt.ColumnType]], path: str, name: str
20
21
  ) -> QueryTemplateFunction:
21
22
  # we need to construct a template df and a signature
22
23
  py_sig = inspect.signature(template_callable)
23
24
  py_params = list(py_sig.parameters.values())
24
25
  params = Signature.create_parameters(py_params=py_params, param_types=param_types)
25
26
  # invoke template_callable with parameter expressions to construct a DataFrame with parameters
26
- import pixeltable.exprs as exprs
27
27
  var_exprs = [exprs.Variable(param.name, param.col_type) for param in params]
28
28
  template_df = template_callable(*var_exprs)
29
29
  from pixeltable import DataFrame
30
30
  assert isinstance(template_df, DataFrame)
31
31
  # we take params and return json
32
- sig = Signature(return_type=ts.JsonType(), parameters=params)
32
+ sig = Signature(return_type=pxt.JsonType(), parameters=params)
33
33
  return QueryTemplateFunction(template_df, sig, path=path, name=name)
34
34
 
35
35
  def __init__(
36
- self, template_df: Optional['pixeltable.DataFrame'], sig: Optional[Signature], path: Optional[str] = None,
36
+ self, template_df: Optional['pxt.DataFrame'], sig: Optional[Signature], path: Optional[str] = None,
37
37
  name: Optional[str] = None,
38
38
  ):
39
39
  super().__init__(sig, self_path=path)
@@ -46,7 +46,6 @@ class QueryTemplateFunction(Function):
46
46
  self.conn: Optional[sql.engine.Connection] = None
47
47
 
48
48
  # convert defaults to Literals
49
- import pixeltable.exprs as exprs
50
49
  self.defaults: dict[str, exprs.Literal] = {} # key: param name, value: default value converted to a Literal
51
50
  param_types = self.template_df.parameters()
52
51
  for param in [p for p in self.signature.parameters.values() if p.has_default()]:
@@ -75,10 +74,10 @@ class QueryTemplateFunction(Function):
75
74
  def name(self) -> str:
76
75
  return self.self_name
77
76
 
78
- def _as_dict(self) -> Dict:
77
+ def _as_dict(self) -> dict:
79
78
  return {'name': self.name, 'signature': self.signature.as_dict(), 'df': self.template_df.as_dict()}
80
79
 
81
80
  @classmethod
82
- def _from_dict(cls, d: Dict) -> Function:
81
+ def _from_dict(cls, d: dict) -> Function:
83
82
  from pixeltable.dataframe import DataFrame
84
83
  return cls(DataFrame.from_dict(d['df']), Signature.from_dict(d['signature']), name=d['name'])
@@ -1,12 +1,12 @@
1
1
  from __future__ import annotations
2
2
 
3
- import json
4
3
  import dataclasses
5
4
  import enum
6
5
  import inspect
6
+ import json
7
7
  import logging
8
8
  import typing
9
- from typing import Optional, Callable, Dict, List, Any, Union, Tuple
9
+ from typing import Any, Callable, Optional, Union
10
10
 
11
11
  import pixeltable.exceptions as excs
12
12
  import pixeltable.type_system as ts
@@ -18,7 +18,7 @@ _logger = logging.getLogger('pixeltable')
18
18
  class Parameter:
19
19
  name: str
20
20
  col_type: Optional[ts.ColumnType] # None for variable parameters
21
- kind: enum.Enum # inspect.Parameter.kind; inspect._ParameterKind is private
21
+ kind: inspect._ParameterKind
22
22
  # for some reason, this needs to precede is_batched in the dataclass definition,
23
23
  # otherwise Python complains that an argument with a default is followed by an argument without a default
24
24
  default: Any = inspect.Parameter.empty # default value for the parameter
@@ -82,7 +82,7 @@ class Signature:
82
82
  """
83
83
  SPECIAL_PARAM_NAMES = ['group_by', 'order_by']
84
84
 
85
- def __init__(self, return_type: ts.ColumnType, parameters: List[Parameter], is_batched: bool = False):
85
+ def __init__(self, return_type: ts.ColumnType, parameters: list[Parameter], is_batched: bool = False):
86
86
  assert isinstance(return_type, ts.ColumnType)
87
87
  self.return_type = return_type
88
88
  self.is_batched = is_batched
@@ -97,7 +97,7 @@ class Signature:
97
97
  assert isinstance(self.return_type, ts.ColumnType)
98
98
  return self.return_type
99
99
 
100
- def as_dict(self) -> Dict[str, Any]:
100
+ def as_dict(self) -> dict[str, Any]:
101
101
  result = {
102
102
  'return_type': self.get_return_type().as_dict(),
103
103
  'parameters': [p.as_dict() for p in self.parameters.values()],
@@ -106,11 +106,13 @@ class Signature:
106
106
  return result
107
107
 
108
108
  @classmethod
109
- def from_dict(cls, d: Dict[str, Any]) -> Signature:
109
+ def from_dict(cls, d: dict[str, Any]) -> Signature:
110
110
  parameters = [Parameter.from_dict(param_dict) for param_dict in d['parameters']]
111
111
  return cls(ts.ColumnType.from_dict(d['return_type']), parameters, d['is_batched'])
112
112
 
113
- def __eq__(self, other: Signature) -> bool:
113
+ def __eq__(self, other: object) -> bool:
114
+ if not isinstance(other, Signature):
115
+ return False
114
116
  if self.get_return_type() != other.get_return_type():
115
117
  return False
116
118
  if len(self.parameters) != len(other.parameters):
@@ -122,7 +124,7 @@ class Signature:
122
124
  return True
123
125
 
124
126
  def __str__(self) -> str:
125
- param_strs: List[str] = []
127
+ param_strs: list[str] = []
126
128
  for p in self.parameters.values():
127
129
  if p.kind == inspect.Parameter.VAR_POSITIONAL:
128
130
  param_strs.append(f'*{p.name}')
@@ -133,7 +135,7 @@ class Signature:
133
135
  return f'({", ".join(param_strs)}) -> {str(self.get_return_type())}'
134
136
 
135
137
  @classmethod
136
- def _infer_type(cls, annotation: Optional[type]) -> Tuple[Optional[ts.ColumnType], Optional[bool]]:
138
+ def _infer_type(cls, annotation: Optional[type]) -> tuple[Optional[ts.ColumnType], Optional[bool]]:
137
139
  """Returns: (column type, is_batched) or (None, ...) if the type cannot be inferred"""
138
140
  if annotation is None:
139
141
  return (None, None)
@@ -154,13 +156,13 @@ class Signature:
154
156
  @classmethod
155
157
  def create_parameters(
156
158
  cls, py_fn: Optional[Callable] = None, py_params: Optional[list[inspect.Parameter]] = None,
157
- param_types: Optional[List[ts.ColumnType]] = None
158
- ) -> List[Parameter]:
159
+ param_types: Optional[list[ts.ColumnType]] = None
160
+ ) -> list[Parameter]:
159
161
  assert (py_fn is None) != (py_params is None)
160
162
  if py_fn is not None:
161
163
  sig = inspect.signature(py_fn)
162
164
  py_params = list(sig.parameters.values())
163
- parameters: List[Parameter] = []
165
+ parameters: list[Parameter] = []
164
166
 
165
167
  for idx, param in enumerate(py_params):
166
168
  if param.name in cls.SPECIAL_PARAM_NAMES:
@@ -187,9 +189,9 @@ class Signature:
187
189
 
188
190
  @classmethod
189
191
  def create(
190
- cls, py_fn: Callable,
191
- param_types: Optional[List[ts.ColumnType]] = None,
192
- return_type: Optional[Union[ts.ColumnType, Callable]] = None
192
+ cls, py_fn: Callable,
193
+ param_types: Optional[list[ts.ColumnType]] = None,
194
+ return_type: Optional[ts.ColumnType] = None
193
195
  ) -> Signature:
194
196
  """Create a signature for the given Callable.
195
197
  Infer the parameter and return types, if none are specified.
pixeltable/func/udf.py CHANGED
@@ -38,10 +38,6 @@ def udf(*args, **kwargs):
38
38
  >>> @pxt.udf
39
39
  ... def my_function(x: int) -> int:
40
40
  ... return x + 1
41
-
42
- >>> @pxt.udf(param_types=[pxt.IntType()], return_type=pxt.IntType())
43
- ... def my_function(x):
44
- ... return x + 1
45
41
  """
46
42
  if len(args) == 1 and len(kwargs) == 0 and callable(args[0]):
47
43
 
@@ -1,7 +1,7 @@
1
1
  from pixeltable.utils.code import local_public_names
2
2
 
3
- from . import (anthropic, audio, fireworks, huggingface, image, json, mistralai, openai, string, timestamp, together,
4
- video, vision)
3
+ from . import (anthropic, audio, fireworks, huggingface, image, json, llama_cpp, mistralai, ollama, openai, string,
4
+ timestamp, together, video, vision, whisper)
5
5
  from .globals import *
6
6
 
7
7
  __all__ = local_public_names(__name__, exclude=['globals']) + local_public_names(globals.__name__)
@@ -11,18 +11,16 @@ t.select(pxtf.audio.get_metadata()).collect()
11
11
  ```
12
12
  """
13
13
 
14
- import pixeltable.func as func
15
- import pixeltable.type_system as ts
14
+ import pixeltable as pxt
16
15
  from pixeltable.utils.code import local_public_names
17
16
 
18
17
 
19
- @func.udf(return_type=ts.JsonType(nullable=False), param_types=[ts.AudioType(nullable=False)], is_method=True)
20
- def get_metadata(audio: str) -> dict:
18
+ @pxt.udf(is_method=True)
19
+ def get_metadata(audio: pxt.Audio) -> dict:
21
20
  """
22
21
  Gets various metadata associated with an audio file and returns it as a dictionary.
23
22
  """
24
- import pixeltable.functions as pxtf
25
- return pxtf.video._get_metadata(audio)
23
+ return pxt.functions.video._get_metadata(audio)
26
24
 
27
25
 
28
26
  __all__ = local_public_names(__name__)