pixeltable 0.2.21__py3-none-any.whl → 0.2.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. pixeltable/__init__.py +2 -2
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +1 -1
  4. pixeltable/catalog/column.py +41 -29
  5. pixeltable/catalog/globals.py +18 -0
  6. pixeltable/catalog/insertable_table.py +30 -10
  7. pixeltable/catalog/table.py +198 -86
  8. pixeltable/catalog/table_version.py +47 -53
  9. pixeltable/catalog/table_version_path.py +2 -2
  10. pixeltable/catalog/view.py +17 -18
  11. pixeltable/dataframe.py +27 -36
  12. pixeltable/env.py +7 -0
  13. pixeltable/exec/__init__.py +0 -1
  14. pixeltable/exec/aggregation_node.py +6 -3
  15. pixeltable/exec/cache_prefetch_node.py +189 -43
  16. pixeltable/exec/data_row_batch.py +5 -22
  17. pixeltable/exec/exec_context.py +2 -2
  18. pixeltable/exec/exec_node.py +3 -2
  19. pixeltable/exec/expr_eval_node.py +23 -16
  20. pixeltable/exec/in_memory_data_node.py +6 -3
  21. pixeltable/exec/sql_node.py +24 -25
  22. pixeltable/exprs/arithmetic_expr.py +12 -5
  23. pixeltable/exprs/array_slice.py +7 -7
  24. pixeltable/exprs/column_property_ref.py +37 -10
  25. pixeltable/exprs/column_ref.py +97 -14
  26. pixeltable/exprs/comparison.py +10 -5
  27. pixeltable/exprs/compound_predicate.py +8 -7
  28. pixeltable/exprs/data_row.py +27 -18
  29. pixeltable/exprs/expr.py +53 -52
  30. pixeltable/exprs/expr_set.py +5 -0
  31. pixeltable/exprs/function_call.py +32 -16
  32. pixeltable/exprs/globals.py +4 -1
  33. pixeltable/exprs/in_predicate.py +8 -7
  34. pixeltable/exprs/inline_expr.py +4 -4
  35. pixeltable/exprs/is_null.py +4 -4
  36. pixeltable/exprs/json_mapper.py +11 -12
  37. pixeltable/exprs/json_path.py +6 -11
  38. pixeltable/exprs/literal.py +5 -5
  39. pixeltable/exprs/method_ref.py +5 -4
  40. pixeltable/exprs/object_ref.py +2 -1
  41. pixeltable/exprs/row_builder.py +88 -36
  42. pixeltable/exprs/rowid_ref.py +12 -11
  43. pixeltable/exprs/similarity_expr.py +12 -7
  44. pixeltable/exprs/sql_element_cache.py +7 -5
  45. pixeltable/exprs/type_cast.py +8 -6
  46. pixeltable/exprs/variable.py +5 -4
  47. pixeltable/func/aggregate_function.py +9 -9
  48. pixeltable/func/expr_template_function.py +6 -5
  49. pixeltable/func/function.py +11 -10
  50. pixeltable/func/udf.py +6 -11
  51. pixeltable/functions/__init__.py +2 -2
  52. pixeltable/functions/globals.py +5 -7
  53. pixeltable/functions/huggingface.py +155 -45
  54. pixeltable/functions/llama_cpp.py +107 -0
  55. pixeltable/functions/mistralai.py +1 -1
  56. pixeltable/functions/ollama.py +147 -0
  57. pixeltable/functions/openai.py +1 -1
  58. pixeltable/functions/replicate.py +72 -0
  59. pixeltable/functions/string.py +9 -0
  60. pixeltable/functions/together.py +1 -1
  61. pixeltable/functions/util.py +5 -2
  62. pixeltable/globals.py +67 -26
  63. pixeltable/index/btree.py +16 -3
  64. pixeltable/index/embedding_index.py +4 -4
  65. pixeltable/io/__init__.py +1 -2
  66. pixeltable/io/fiftyone.py +178 -0
  67. pixeltable/io/globals.py +96 -2
  68. pixeltable/iterators/base.py +3 -2
  69. pixeltable/iterators/document.py +1 -1
  70. pixeltable/iterators/video.py +120 -63
  71. pixeltable/metadata/__init__.py +1 -1
  72. pixeltable/metadata/converters/convert_21.py +34 -0
  73. pixeltable/metadata/converters/util.py +45 -4
  74. pixeltable/metadata/notes.py +1 -0
  75. pixeltable/metadata/schema.py +8 -0
  76. pixeltable/plan.py +17 -15
  77. pixeltable/py.typed +0 -0
  78. pixeltable/store.py +7 -2
  79. pixeltable/tool/create_test_db_dump.py +1 -1
  80. pixeltable/tool/create_test_video.py +1 -1
  81. pixeltable/tool/embed_udf.py +1 -1
  82. pixeltable/tool/mypy_plugin.py +28 -5
  83. pixeltable/type_system.py +100 -36
  84. pixeltable/utils/coco.py +5 -5
  85. pixeltable/utils/documents.py +15 -1
  86. pixeltable/utils/formatter.py +12 -13
  87. pixeltable/utils/s3.py +6 -3
  88. {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/METADATA +158 -49
  89. pixeltable-0.2.23.dist-info/RECORD +153 -0
  90. pixeltable/exec/media_validation_node.py +0 -43
  91. pixeltable-0.2.21.dist-info/RECORD +0 -148
  92. {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/LICENSE +0 -0
  93. {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/WHEEL +0 -0
  94. {pixeltable-0.2.21.dist-info → pixeltable-0.2.23.dist-info}/entry_points.txt +0 -0
pixeltable/__init__.py CHANGED
@@ -3,8 +3,8 @@ from .dataframe import DataFrame
3
3
  from .exceptions import Error
4
4
  from .exprs import RELATIVE_PATH_ROOT
5
5
  from .func import Aggregator, Function, expr_udf, uda, udf
6
- from .globals import (array, configure_logging, create_dir, create_table, create_view, drop_dir, drop_table, get_table,
7
- init, list_dirs, list_functions, list_tables, move)
6
+ from .globals import (array, configure_logging, create_dir, create_snapshot, create_table, create_view, drop_dir,
7
+ drop_table, get_table, init, list_dirs, list_functions, list_tables, move)
8
8
  from .type_system import (Array, ArrayType, Audio, AudioType, Bool, BoolType, ColumnType, Document, DocumentType, Float,
9
9
  FloatType, Image, ImageType, Int, IntType, Json, JsonType, Required, String, StringType,
10
10
  Timestamp, TimestampType, Video, VideoType)
pixeltable/__version__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  # These version placeholders will be replaced during build.
2
- __version__ = "0.2.21"
3
- __version_tuple__ = (0, 2, 21)
2
+ __version__ = "0.2.23"
3
+ __version_tuple__ = (0, 2, 23)
@@ -1,7 +1,7 @@
1
1
  from .catalog import Catalog
2
2
  from .column import Column
3
3
  from .dir import Dir
4
- from .globals import UpdateStatus, is_valid_identifier, is_valid_path
4
+ from .globals import UpdateStatus, is_valid_identifier, is_valid_path, MediaValidation
5
5
  from .insertable_table import InsertableTable
6
6
  from .named_function import NamedFunction
7
7
  from .path import Path
@@ -1,31 +1,49 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- from typing import TYPE_CHECKING, Any, Callable, Optional, Union
4
+ from typing import TYPE_CHECKING, Any, Optional
5
5
 
6
6
  import sqlalchemy as sql
7
7
 
8
8
  import pixeltable.exceptions as excs
9
9
  import pixeltable.type_system as ts
10
10
  from pixeltable import exprs
11
-
12
- from .globals import is_valid_identifier
11
+ from .globals import is_valid_identifier, MediaValidation
13
12
 
14
13
  if TYPE_CHECKING:
15
14
  from .table_version import TableVersion
16
15
 
17
16
  _logger = logging.getLogger('pixeltable')
18
17
 
18
+
19
19
  class Column:
20
20
  """Representation of a column in the schema of a Table/DataFrame.
21
21
 
22
22
  A Column contains all the metadata necessary for executing queries and updates against a particular version of a
23
23
  table/view.
24
24
  """
25
+ name: str
26
+ id: Optional[int]
27
+ col_type: ts.ColumnType
28
+ stored: bool
29
+ is_pk: bool
30
+ _media_validation: Optional[MediaValidation] # if not set, TableVersion.media_validation applies
31
+ schema_version_add: Optional[int]
32
+ schema_version_drop: Optional[int]
33
+ _records_errors: Optional[bool]
34
+ sa_col: Optional[sql.schema.Column]
35
+ sa_col_type: Optional[sql.sqltypes.TypeEngine]
36
+ sa_errormsg_col: Optional[sql.schema.Column]
37
+ sa_errortype_col: Optional[sql.schema.Column]
38
+ _value_expr: Optional[exprs.Expr]
39
+ value_expr_dict: Optional[dict[str, Any]]
40
+ dependent_cols: set[Column]
41
+ tbl: Optional[TableVersion]
42
+
25
43
  def __init__(
26
44
  self, name: Optional[str], col_type: Optional[ts.ColumnType] = None,
27
- computed_with: Optional[Union[exprs.Expr, Callable]] = None,
28
- is_pk: bool = False, stored: bool = True,
45
+ computed_with: Optional[exprs.Expr] = None,
46
+ is_pk: bool = False, stored: bool = True, media_validation: Optional[MediaValidation] = None,
29
47
  col_id: Optional[int] = None, schema_version_add: Optional[int] = None,
30
48
  schema_version_drop: Optional[int] = None, sa_col_type: Optional[sql.sqltypes.TypeEngine] = None,
31
49
  records_errors: Optional[bool] = None, value_expr_dict: Optional[dict[str, Any]] = None,
@@ -35,7 +53,7 @@ class Column:
35
53
  Args:
36
54
  name: column name; None for system columns (eg, index columns)
37
55
  col_type: column type; can be None if the type can be derived from ``computed_with``
38
- computed_with: a callable or an Expr object that computes the column value
56
+ computed_with: an Expr that computes the column value
39
57
  is_pk: if True, this column is part of the primary key
40
58
  stored: determines whether a computed column is present in the stored table or recomputed on demand
41
59
  col_id: column ID (only used internally)
@@ -45,11 +63,6 @@ class Column:
45
63
  col_type is None
46
64
  - when loaded from md store: ``computed_with`` is set and col_type is set
47
65
 
48
- ``computed_with`` is a Callable:
49
- - the callable's parameter names must correspond to existing columns in the table for which this Column
50
- is being used
51
- - ``col_type`` needs to be set to the callable's return type
52
-
53
66
  ``stored`` (only valid for computed image columns):
54
67
  - if True: the column is present in the stored table
55
68
  - if False: the column is not present in the stored table and recomputed during a query
@@ -62,21 +75,13 @@ class Column:
62
75
  raise excs.Error(f'Column `{name}`: col_type is required if computed_with is not specified')
63
76
 
64
77
  self._value_expr: Optional[exprs.Expr] = None
65
- self.compute_func: Optional[Callable] = None
66
78
  self.value_expr_dict = value_expr_dict
67
79
  if computed_with is not None:
68
80
  value_expr = exprs.Expr.from_object(computed_with)
69
81
  if value_expr is None:
70
- # computed_with needs to be a Callable
71
- if not callable(computed_with):
72
- raise excs.Error(
73
- f'Column {name}: computed_with needs to be either a Pixeltable expression or a Callable, '
74
- f'but it is a {type(computed_with)}')
75
- if col_type is None:
76
- raise excs.Error(f'Column {name}: col_type is required if computed_with is a Callable')
77
- # we need to turn the computed_with function into an Expr, but this requires resolving
78
- # column name references and for that we need to wait until we're assigned to a Table
79
- self.compute_func = computed_with
82
+ raise excs.Error(
83
+ f'Column {name}: computed_with needs to be a valid Pixeltable expression, '
84
+ f'but it is a {type(computed_with)}')
80
85
  else:
81
86
  self._value_expr = value_expr.copy()
82
87
  self.col_type = self._value_expr.col_type
@@ -86,24 +91,24 @@ class Column:
86
91
  assert self.col_type is not None
87
92
 
88
93
  self.stored = stored
89
- self.dependent_cols: set[Column] = set() # cols with value_exprs that reference us; set by TableVersion
94
+ self.dependent_cols = set() # cols with value_exprs that reference us; set by TableVersion
90
95
  self.id = col_id
91
96
  self.is_pk = is_pk
97
+ self._media_validation = media_validation
92
98
  self.schema_version_add = schema_version_add
93
99
  self.schema_version_drop = schema_version_drop
94
100
 
95
101
  self._records_errors = records_errors
96
102
 
97
103
  # column in the stored table for the values of this Column
98
- self.sa_col: Optional[sql.schema.Column] = None
104
+ self.sa_col = None
99
105
  self.sa_col_type = sa_col_type
100
106
 
101
107
  # computed cols also have storage columns for the exception string and type
102
- self.sa_errormsg_col: Optional[sql.schema.Column] = None
103
- self.sa_errortype_col: Optional[sql.schema.Column] = None
108
+ self.sa_errormsg_col = None
109
+ self.sa_errortype_col = None
104
110
 
105
- from .table_version import TableVersion
106
- self.tbl: Optional[TableVersion] = None # set by owning TableVersion
111
+ self.tbl = None # set by owning TableVersion
107
112
 
108
113
  @property
109
114
  def value_expr(self) -> Optional[exprs.Expr]:
@@ -139,7 +144,7 @@ class Column:
139
144
 
140
145
  @property
141
146
  def is_computed(self) -> bool:
142
- return self.compute_func is not None or self._value_expr is not None or self.value_expr_dict is not None
147
+ return self._value_expr is not None or self.value_expr_dict is not None
143
148
 
144
149
  @property
145
150
  def is_stored(self) -> bool:
@@ -160,6 +165,13 @@ class Column:
160
165
  assert self.tbl is not None
161
166
  return f'{self.tbl.name}.{self.name}'
162
167
 
168
+ @property
169
+ def media_validation(self) -> MediaValidation:
170
+ if self._media_validation is not None:
171
+ return self._media_validation
172
+ assert self.tbl is not None
173
+ return self.tbl.media_validation
174
+
163
175
  def source(self) -> None:
164
176
  """
165
177
  If this is a computed col and the top-level expr is a function call, print the source, if possible.
@@ -1,8 +1,12 @@
1
+ from __future__ import annotations
1
2
  import dataclasses
3
+ import enum
2
4
  import itertools
3
5
  import logging
4
6
  from typing import Optional
5
7
 
8
+ import pixeltable.exceptions as excs
9
+
6
10
  _logger = logging.getLogger('pixeltable')
7
11
 
8
12
  # name of the position column in a component view
@@ -34,6 +38,20 @@ class UpdateStatus:
34
38
  self.cols_with_excs = list(dict.fromkeys(self.cols_with_excs + other.cols_with_excs))
35
39
  return self
36
40
 
41
+
42
+ class MediaValidation(enum.Enum):
43
+ ON_READ = 0
44
+ ON_WRITE = 1
45
+
46
+ @classmethod
47
+ def validated(cls, name: str, error_prefix: str) -> MediaValidation:
48
+ try:
49
+ return cls[name.upper()]
50
+ except KeyError:
51
+ val_strs = ', '.join(f'{s.lower()!r}' for s in cls.__members__.keys())
52
+ raise excs.Error(f'{error_prefix} must be one of: [{val_strs}]')
53
+
54
+
37
55
  def is_valid_identifier(name: str) -> bool:
38
56
  return name.isidentifier() and not name.startswith('_')
39
57
 
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- from typing import Any, Dict, Iterable, List, Optional, overload
4
+ from typing import Any, Iterable, Literal, Optional, overload
5
5
  from uuid import UUID
6
6
 
7
7
  import sqlalchemy.orm as orm
@@ -13,7 +13,7 @@ from pixeltable.env import Env
13
13
  from pixeltable.utils.filecache import FileCache
14
14
 
15
15
  from .catalog import Catalog
16
- from .globals import UpdateStatus
16
+ from .globals import MediaValidation, UpdateStatus
17
17
  from .table import Table
18
18
  from .table_version import TableVersion
19
19
  from .table_version_path import TableVersionPath
@@ -35,8 +35,8 @@ class InsertableTable(Table):
35
35
  # MODULE-LOCAL, NOT PUBLIC
36
36
  @classmethod
37
37
  def _create(
38
- cls, dir_id: UUID, name: str, schema: dict[str, ts.ColumnType], df: Optional[pxt.DataFrame], primary_key: List[str],
39
- num_retained_versions: int, comment: str
38
+ cls, dir_id: UUID, name: str, schema: dict[str, ts.ColumnType], df: Optional[pxt.DataFrame],
39
+ primary_key: list[str], num_retained_versions: int, comment: str, media_validation: MediaValidation
40
40
  ) -> InsertableTable:
41
41
  columns = cls._create_columns(schema)
42
42
  cls._verify_schema(columns)
@@ -50,7 +50,9 @@ class InsertableTable(Table):
50
50
  col.is_pk = True
51
51
 
52
52
  with orm.Session(Env.get().engine, future=True) as session:
53
- _, tbl_version = TableVersion.create(session, dir_id, name, columns, num_retained_versions, comment)
53
+ _, tbl_version = TableVersion.create(
54
+ session, dir_id, name, columns, num_retained_versions=num_retained_versions, comment=comment,
55
+ media_validation=media_validation)
54
56
  tbl = cls(dir_id, tbl_version)
55
57
  # TODO We need to commit before doing the insertion, in order to avoid a primary key (version) collision
56
58
  # when the table metadata gets updated. Once we have a notion of user-defined transactions in
@@ -77,15 +79,31 @@ class InsertableTable(Table):
77
79
 
78
80
  @overload
79
81
  def insert(
80
- self, rows: Iterable[Dict[str, Any]], /, *, print_stats: bool = False, fail_on_exception: bool = True
82
+ self,
83
+ rows: Iterable[dict[str, Any]],
84
+ /,
85
+ *,
86
+ print_stats: bool = False,
87
+ on_error: Literal['abort', 'ignore'] = 'abort'
81
88
  ) -> UpdateStatus: ...
82
89
 
83
90
  @overload
84
- def insert(self, *, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any) -> UpdateStatus: ...
91
+ def insert(
92
+ self,
93
+ *,
94
+ print_stats: bool = False,
95
+ on_error: Literal['abort', 'ignore'] = 'abort',
96
+ **kwargs: Any
97
+ ) -> UpdateStatus: ...
85
98
 
86
99
  def insert( # type: ignore[misc]
87
- self, rows: Optional[Iterable[dict[str, Any]]] = None, /, *, print_stats: bool = False,
88
- fail_on_exception: bool = True, **kwargs: Any
100
+ self,
101
+ rows: Optional[Iterable[dict[str, Any]]] = None,
102
+ /,
103
+ *,
104
+ print_stats: bool = False,
105
+ on_error: Literal['abort', 'ignore'] = 'abort',
106
+ **kwargs: Any
89
107
  ) -> UpdateStatus:
90
108
  if rows is None:
91
109
  rows = [kwargs]
@@ -94,6 +112,8 @@ class InsertableTable(Table):
94
112
  if len(kwargs) > 0:
95
113
  raise excs.Error('`kwargs` cannot be specified unless `rows is None`.')
96
114
 
115
+ fail_on_exception = on_error == 'abort'
116
+
97
117
  if not isinstance(rows, list):
98
118
  raise excs.Error('rows must be a list of dictionaries')
99
119
  if len(rows) == 0:
@@ -119,7 +139,7 @@ class InsertableTable(Table):
119
139
  FileCache.get().emit_eviction_warnings()
120
140
  return status
121
141
 
122
- def _validate_input_rows(self, rows: List[Dict[str, Any]]) -> None:
142
+ def _validate_input_rows(self, rows: list[dict[str, Any]]) -> None:
123
143
  """Verify that the input rows match the table schema"""
124
144
  valid_col_names = set(self._schema.keys())
125
145
  reqd_col_names = set(self._tbl_version_path.tbl_version.get_required_col_names())