pixeltable 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (76) hide show
  1. pixeltable/__init__.py +15 -33
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/catalog.py +1 -1
  4. pixeltable/catalog/column.py +28 -16
  5. pixeltable/catalog/dir.py +2 -2
  6. pixeltable/catalog/insertable_table.py +5 -55
  7. pixeltable/catalog/named_function.py +2 -2
  8. pixeltable/catalog/schema_object.py +2 -7
  9. pixeltable/catalog/table.py +298 -204
  10. pixeltable/catalog/table_version.py +104 -139
  11. pixeltable/catalog/table_version_path.py +22 -4
  12. pixeltable/catalog/view.py +20 -10
  13. pixeltable/dataframe.py +128 -25
  14. pixeltable/env.py +21 -14
  15. pixeltable/exec/exec_context.py +5 -0
  16. pixeltable/exec/exec_node.py +1 -0
  17. pixeltable/exec/in_memory_data_node.py +29 -24
  18. pixeltable/exec/sql_scan_node.py +1 -1
  19. pixeltable/exprs/column_ref.py +13 -8
  20. pixeltable/exprs/data_row.py +4 -0
  21. pixeltable/exprs/expr.py +16 -1
  22. pixeltable/exprs/function_call.py +4 -4
  23. pixeltable/exprs/row_builder.py +29 -20
  24. pixeltable/exprs/similarity_expr.py +4 -3
  25. pixeltable/ext/functions/yolox.py +2 -1
  26. pixeltable/func/__init__.py +1 -0
  27. pixeltable/func/aggregate_function.py +14 -12
  28. pixeltable/func/callable_function.py +8 -6
  29. pixeltable/func/expr_template_function.py +13 -19
  30. pixeltable/func/function.py +3 -6
  31. pixeltable/func/query_template_function.py +84 -0
  32. pixeltable/func/signature.py +68 -23
  33. pixeltable/func/udf.py +13 -10
  34. pixeltable/functions/__init__.py +6 -91
  35. pixeltable/functions/eval.py +26 -14
  36. pixeltable/functions/fireworks.py +25 -23
  37. pixeltable/functions/globals.py +62 -0
  38. pixeltable/functions/huggingface.py +20 -16
  39. pixeltable/functions/image.py +170 -1
  40. pixeltable/functions/openai.py +95 -128
  41. pixeltable/functions/string.py +10 -2
  42. pixeltable/functions/together.py +95 -84
  43. pixeltable/functions/util.py +16 -0
  44. pixeltable/functions/video.py +94 -16
  45. pixeltable/functions/whisper.py +78 -0
  46. pixeltable/globals.py +1 -1
  47. pixeltable/io/__init__.py +10 -0
  48. pixeltable/io/external_store.py +370 -0
  49. pixeltable/io/globals.py +50 -22
  50. pixeltable/{datatransfer → io}/label_studio.py +279 -166
  51. pixeltable/io/parquet.py +1 -1
  52. pixeltable/iterators/__init__.py +9 -0
  53. pixeltable/iterators/string.py +40 -0
  54. pixeltable/metadata/__init__.py +6 -8
  55. pixeltable/metadata/converters/convert_10.py +2 -4
  56. pixeltable/metadata/converters/convert_12.py +7 -2
  57. pixeltable/metadata/converters/convert_13.py +6 -8
  58. pixeltable/metadata/converters/convert_14.py +2 -4
  59. pixeltable/metadata/converters/convert_15.py +40 -25
  60. pixeltable/metadata/converters/convert_16.py +18 -0
  61. pixeltable/metadata/converters/util.py +11 -8
  62. pixeltable/metadata/schema.py +3 -6
  63. pixeltable/plan.py +8 -7
  64. pixeltable/store.py +1 -1
  65. pixeltable/tool/create_test_db_dump.py +145 -54
  66. pixeltable/tool/embed_udf.py +9 -0
  67. pixeltable/type_system.py +1 -2
  68. pixeltable/utils/code.py +34 -0
  69. {pixeltable-0.2.7.dist-info → pixeltable-0.2.9.dist-info}/METADATA +2 -2
  70. pixeltable-0.2.9.dist-info/RECORD +131 -0
  71. pixeltable/datatransfer/__init__.py +0 -1
  72. pixeltable/datatransfer/remote.py +0 -113
  73. pixeltable/functions/pil/image.py +0 -147
  74. pixeltable-0.2.7.dist-info/RECORD +0 -126
  75. {pixeltable-0.2.7.dist-info → pixeltable-0.2.9.dist-info}/LICENSE +0 -0
  76. {pixeltable-0.2.7.dist-info → pixeltable-0.2.9.dist-info}/WHEEL +0 -0
pixeltable/__init__.py CHANGED
@@ -1,11 +1,10 @@
1
1
  from .catalog import Column, Table, InsertableTable, View
2
2
  from .dataframe import DataFrame
3
- from .datatransfer import Remote
4
- from .catalog import Column, Table, InsertableTable, View
5
- from .exceptions import Error, Error
3
+ from .exceptions import Error
6
4
  from .exprs import RELATIVE_PATH_ROOT
7
- from .func import Function, udf, uda, Aggregator, expr_udf
8
- from .globals import *
5
+ from .func import Function, udf, Aggregator, uda, expr_udf
6
+ from .globals import init, create_table, create_view, get_table, move, drop_table, list_tables, create_dir, rm_dir, \
7
+ list_dirs, list_functions, get_path, configure_logging
9
8
  from .type_system import (
10
9
  ColumnType,
11
10
  StringType,
@@ -22,34 +21,17 @@ from .type_system import (
22
21
  )
23
22
  from .utils.help import help
24
23
 
25
- # noinspection PyUnresolvedReferences
26
24
  from . import functions, io, iterators
27
25
  from .__version__ import __version__, __version_tuple__
28
26
 
29
- __all__ = [
30
- 'DataFrame',
31
- 'Column',
32
- 'Table',
33
- 'InsertableTable',
34
- 'View',
35
- 'Error',
36
- 'ColumnType',
37
- 'StringType',
38
- 'IntType',
39
- 'FloatType',
40
- 'BoolType',
41
- 'TimestampType',
42
- 'JsonType',
43
- 'RELATIVE_PATH_ROOT',
44
- 'ArrayType',
45
- 'ImageType',
46
- 'VideoType',
47
- 'AudioType',
48
- 'DocumentType',
49
- 'Function',
50
- 'help',
51
- 'udf',
52
- 'Aggregator',
53
- 'uda',
54
- 'expr_udf',
55
- ]
27
+ # This is the safest / most maintainable way to do this: start with the default and "blacklist" stuff that
28
+ # we don't want in there. (Using a "whitelist" is considerably harder to maintain.)
29
+
30
+ __default_dir = set(symbol for symbol in dir() if not symbol.startswith('_'))
31
+ __removed_symbols = {'catalog', 'dataframe', 'env', 'exceptions', 'exec', 'exprs', 'func', 'globals', 'index',
32
+ 'metadata', 'plan', 'type_system', 'utils'}
33
+ __all__ = sorted(list(__default_dir - __removed_symbols))
34
+
35
+
36
+ def __dir__():
37
+ return __all__
pixeltable/__version__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  # These version placeholders will be replaced during build.
2
- __version__ = "0.2.7"
3
- __version_tuple__ = (0, 2, 7)
2
+ __version__ = "0.2.9"
3
+ __version_tuple__ = (0, 2, 9)
@@ -39,7 +39,7 @@ class Catalog:
39
39
  # key: [id, version]
40
40
  # - mutable version of a table: version == None (even though TableVersion.version is set correctly)
41
41
  # - snapshot versions: records the version of the snapshot
42
- self.tbl_versions: Dict[Tuple[UUID, int], TableVersion] = {}
42
+ self.tbl_versions: Dict[Tuple[UUID, Optional[int]], TableVersion] = {}
43
43
 
44
44
  self.tbls: Dict[UUID, Table] = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
45
45
  self.tbl_dependents: Dict[UUID, List[Table]] = {}
@@ -1,7 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- from typing import Optional, Union, Callable, Set
4
+ from typing import Optional, Union, Callable, Any
5
+ from uuid import UUID
5
6
 
6
7
  import sqlalchemy as sql
7
8
 
@@ -23,7 +24,7 @@ class Column:
23
24
  is_pk: bool = False, stored: Optional[bool] = None,
24
25
  col_id: Optional[int] = None, schema_version_add: Optional[int] = None,
25
26
  schema_version_drop: Optional[int] = None, sa_col_type: Optional[sql.sqltypes.TypeEngine] = None,
26
- records_errors: Optional[bool] = None
27
+ records_errors: Optional[bool] = None, value_expr_dict: Optional[dict[str, Any]] = None,
27
28
  ):
28
29
  """Column constructor.
29
30
 
@@ -56,8 +57,9 @@ class Column:
56
57
  if col_type is None and computed_with is None:
57
58
  raise excs.Error(f'Column `{name}`: col_type is required if computed_with is not specified')
58
59
 
59
- self.value_expr: Optional['Expr'] = None
60
+ self._value_expr: Optional['Expr'] = None
60
61
  self.compute_func: Optional[Callable] = None
62
+ self.value_expr_dict = value_expr_dict
61
63
  from pixeltable import exprs
62
64
  if computed_with is not None:
63
65
  value_expr = exprs.Expr.from_object(computed_with)
@@ -73,8 +75,8 @@ class Column:
73
75
  # column name references and for that we need to wait until we're assigned to a Table
74
76
  self.compute_func = computed_with
75
77
  else:
76
- self.value_expr = value_expr.copy()
77
- self.col_type = self.value_expr.col_type
78
+ self._value_expr = value_expr.copy()
79
+ self.col_type = self._value_expr.col_type
78
80
 
79
81
  if col_type is not None:
80
82
  self.col_type = col_type
@@ -87,11 +89,6 @@ class Column:
87
89
  self.schema_version_add = schema_version_add
88
90
  self.schema_version_drop = schema_version_drop
89
91
 
90
- # stored_proxy may be set later if this is a non-stored column.
91
- # if col1.stored_proxy == col2, then also col1 == col2.proxy_base.
92
- self.stored_proxy: Optional[Column] = None
93
- self.proxy_base: Optional[Column] = None
94
-
95
92
  self._records_errors = records_errors
96
93
 
97
94
  # column in the stored table for the values of this Column
@@ -105,12 +102,22 @@ class Column:
105
102
  from .table_version import TableVersion
106
103
  self.tbl: Optional[TableVersion] = None # set by owning TableVersion
107
104
 
108
- def __hash__(self) -> int:
109
- assert self.tbl is not None
110
- return hash((self.tbl.id, self.id))
105
+ @property
106
+ def value_expr(self) -> Optional['Expr']:
107
+ """Instantiate value_expr on-demand"""
108
+ # TODO: instantiate expr in the c'tor and add an Expr.prepare() that can create additional state after the
109
+ # catalog has been fully loaded; that way, we encounter bugs in the serialization/deserialization logic earlier
110
+ if self.value_expr_dict is not None and self._value_expr is None:
111
+ from pixeltable import exprs
112
+ self._value_expr = exprs.Expr.from_dict(self.value_expr_dict)
113
+ return self._value_expr
114
+
115
+ def set_value_expr(self, value_expr: 'Expr') -> None:
116
+ self._value_expr = value_expr
117
+ self.value_expr_dict = None
111
118
 
112
119
  def check_value_expr(self) -> None:
113
- assert self.value_expr is not None
120
+ assert self._value_expr is not None
114
121
  if self.stored == False and self.is_computed and self.has_window_fn_call():
115
122
  raise excs.Error(
116
123
  f'Column {self.name}: stored={self.stored} not supported for columns computed with window functions:'
@@ -129,7 +136,7 @@ class Column:
129
136
 
130
137
  @property
131
138
  def is_computed(self) -> bool:
132
- return self.compute_func is not None or self.value_expr is not None
139
+ return self.compute_func is not None or self._value_expr is not None or self.value_expr_dict is not None
133
140
 
134
141
  @property
135
142
  def is_stored(self) -> bool:
@@ -184,10 +191,15 @@ class Column:
184
191
  def __str__(self) -> str:
185
192
  return f'{self.name}: {self.col_type}'
186
193
 
194
+ def __hash__(self) -> int:
195
+ # TODO(aaron-siegel): This and __eq__ do not capture the table version. We need to rethink the Column
196
+ # abstraction (perhaps separating out the version-dependent properties into a different abstraction).
197
+ assert self.tbl is not None
198
+ return hash((self.tbl.id, self.id))
199
+
187
200
  def __eq__(self, other: object) -> bool:
188
201
  if not isinstance(other, Column):
189
202
  return False
190
203
  assert self.tbl is not None
191
204
  assert other.tbl is not None
192
205
  return self.tbl.id == other.tbl.id and self.id == other.id
193
-
pixeltable/catalog/dir.py CHANGED
@@ -21,8 +21,8 @@ class Dir(SchemaObject):
21
21
  def display_name(cls) -> str:
22
22
  return 'directory'
23
23
 
24
- def move(self, new_name: str, new_dir_id: UUID) -> None:
25
- super().move(new_name, new_dir_id)
24
+ def _move(self, new_name: str, new_dir_id: UUID) -> None:
25
+ super()._move(new_name, new_dir_id)
26
26
  with Env.get().engine.begin() as conn:
27
27
  dir_md = schema.DirMd(name=new_name)
28
28
  conn.execute(
@@ -71,56 +71,6 @@ class InsertableTable(Table):
71
71
  self, rows: Optional[Iterable[dict[str, Any]]] = None, /, *, print_stats: bool = False,
72
72
  fail_on_exception: bool = True, **kwargs: Any
73
73
  ) -> UpdateStatus:
74
- """Inserts rows into this table. There are two mutually exclusive call patterns:
75
-
76
- To insert multiple rows at a time:
77
- ``insert(rows: Iterable[dict[str, Any]], /, *, print_stats: bool = False, fail_on_exception: bool = True)``
78
-
79
- To insert just a single row, you can use the more convenient syntax:
80
- ``insert(*, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any)``
81
-
82
- Args:
83
- rows: (if inserting multiple rows) A list of rows to insert, each of which is a dictionary mapping column
84
- names to values.
85
- kwargs: (if inserting a single row) Keyword-argument pairs representing column names and values.
86
- print_stats: If ``True``, print statistics about the cost of computed columns.
87
- fail_on_exception:
88
- Determines how exceptions in computed columns and invalid media files (e.g., corrupt images)
89
- are handled.
90
- If ``False``, store error information (accessible as column properties 'errortype' and 'errormsg')
91
- for those cases, but continue inserting rows.
92
- If ``True``, raise an exception that aborts the insert.
93
-
94
- Returns:
95
- execution status
96
-
97
- Raises:
98
- Error: if a row does not match the table schema or contains values for computed columns
99
-
100
- Examples:
101
- Insert two rows into a table with three int columns ``a``, ``b``, and ``c``. Column ``c`` is nullable.
102
-
103
- >>> tbl.insert([{'a': 1, 'b': 1, 'c': 1}, {'a': 2, 'b': 2}])
104
-
105
- Insert a single row into a table with three int columns ``a``, ``b``, and ``c``.
106
-
107
- >>> tbl.insert(a=1, b=1, c=1)
108
- """
109
- # The commented code is the intended implementation, with signature (*args, **kwargs).
110
- # That signature cannot be used currently, due to a present limitation in mkdocs.
111
- # See: https://github.com/mkdocstrings/mkdocstrings/issues/669
112
-
113
- # print_stats = kwargs.pop('print_stats', False)
114
- # fail_on_exception = kwargs.pop('fail_on_exception', True)
115
- # if len(args) > 0:
116
- # # There's a positional argument; this means `rows` is expressed as a
117
- # # list of dicts (multi-insert)
118
- # rows = list(args[0])
119
- # else:
120
- # # No positional argument; this means we're inserting a single row
121
- # # using kwargs syntax
122
- # rows = [kwargs]
123
-
124
74
  if rows is None:
125
75
  rows = [kwargs]
126
76
  else:
@@ -136,7 +86,7 @@ class InsertableTable(Table):
136
86
  if not isinstance(row, dict):
137
87
  raise excs.Error('rows must be a list of dictionaries')
138
88
  self._validate_input_rows(rows)
139
- result = self.tbl_version.insert(rows, print_stats=print_stats, fail_on_exception=fail_on_exception)
89
+ result = self._tbl_version.insert(rows, print_stats=print_stats, fail_on_exception=fail_on_exception)
140
90
 
141
91
  if result.num_excs == 0:
142
92
  cols_with_excs_str = ''
@@ -155,8 +105,8 @@ class InsertableTable(Table):
155
105
  def _validate_input_rows(self, rows: List[Dict[str, Any]]) -> None:
156
106
  """Verify that the input rows match the table schema"""
157
107
  valid_col_names = set(self.column_names())
158
- reqd_col_names = set(self.tbl_version_path.tbl_version.get_required_col_names())
159
- computed_col_names = set(self.tbl_version_path.tbl_version.get_computed_col_names())
108
+ reqd_col_names = set(self._tbl_version_path.tbl_version.get_required_col_names())
109
+ computed_col_names = set(self._tbl_version_path.tbl_version.get_computed_col_names())
160
110
  for row in rows:
161
111
  assert isinstance(row, dict)
162
112
  col_names = set(row.keys())
@@ -170,7 +120,7 @@ class InsertableTable(Table):
170
120
  raise excs.Error(f'Value for computed column {col_name} in row {row}')
171
121
 
172
122
  # validate data
173
- col = self.tbl_version_path.get_column(col_name)
123
+ col = self._tbl_version_path.get_column(col_name)
174
124
  try:
175
125
  # basic sanity checks here
176
126
  checked_val = col.col_type.create_literal(val)
@@ -199,7 +149,7 @@ class InsertableTable(Table):
199
149
  if where is not None:
200
150
  if not isinstance(where, Predicate):
201
151
  raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
202
- analysis_info = Planner.analyze(self.tbl_version_path, where)
152
+ analysis_info = Planner.analyze(self._tbl_version_path, where)
203
153
  # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
204
154
  if analysis_info.filter is not None:
205
155
  raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
@@ -25,8 +25,8 @@ class NamedFunction(SchemaObject):
25
25
  def display_name(cls) -> str:
26
26
  return 'function'
27
27
 
28
- def move(self, new_name: str, new_dir_id: UUID) -> None:
29
- super().move(new_name, new_dir_id)
28
+ def _move(self, new_name: str, new_dir_id: UUID) -> None:
29
+ super()._move(new_name, new_dir_id)
30
30
  with Env.get().engine.begin() as conn:
31
31
  stmt = sql.text((
32
32
  f"UPDATE {schema.Function.__table__} "
@@ -14,7 +14,7 @@ class SchemaObject:
14
14
  self._name = name
15
15
  self._dir_id = dir_id
16
16
 
17
- def get_id(self) -> UUID:
17
+ def _get_id(self) -> UUID:
18
18
  return self._id
19
19
 
20
20
  def get_name(self) -> str:
@@ -28,12 +28,7 @@ class SchemaObject:
28
28
  """
29
29
  pass
30
30
 
31
- @property
32
- def fqn(self) -> str:
33
- return f'{self.parent_dir().fqn}.{self._name}'
34
-
35
- def move(self, new_name: str, new_dir_id: UUID) -> None:
31
+ def _move(self, new_name: str, new_dir_id: UUID) -> None:
36
32
  """Subclasses need to override this to make the change persistent"""
37
33
  self._name = new_name
38
34
  self._dir_id = new_dir_id
39
-