pixeltable 0.2.20__py3-none-any.whl → 0.2.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (120) hide show
  1. pixeltable/__init__.py +7 -19
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +7 -7
  4. pixeltable/catalog/column.py +37 -11
  5. pixeltable/catalog/globals.py +21 -0
  6. pixeltable/catalog/insertable_table.py +6 -4
  7. pixeltable/catalog/table.py +227 -148
  8. pixeltable/catalog/table_version.py +66 -28
  9. pixeltable/catalog/table_version_path.py +0 -8
  10. pixeltable/catalog/view.py +18 -19
  11. pixeltable/dataframe.py +16 -32
  12. pixeltable/env.py +6 -1
  13. pixeltable/exec/__init__.py +1 -2
  14. pixeltable/exec/aggregation_node.py +27 -17
  15. pixeltable/exec/cache_prefetch_node.py +1 -1
  16. pixeltable/exec/data_row_batch.py +9 -26
  17. pixeltable/exec/exec_node.py +36 -7
  18. pixeltable/exec/expr_eval_node.py +19 -11
  19. pixeltable/exec/in_memory_data_node.py +14 -11
  20. pixeltable/exec/sql_node.py +266 -138
  21. pixeltable/exprs/__init__.py +1 -0
  22. pixeltable/exprs/arithmetic_expr.py +3 -1
  23. pixeltable/exprs/array_slice.py +7 -7
  24. pixeltable/exprs/column_property_ref.py +37 -10
  25. pixeltable/exprs/column_ref.py +93 -14
  26. pixeltable/exprs/comparison.py +5 -5
  27. pixeltable/exprs/compound_predicate.py +8 -7
  28. pixeltable/exprs/data_row.py +56 -36
  29. pixeltable/exprs/expr.py +65 -63
  30. pixeltable/exprs/expr_dict.py +55 -0
  31. pixeltable/exprs/expr_set.py +26 -15
  32. pixeltable/exprs/function_call.py +53 -24
  33. pixeltable/exprs/globals.py +4 -1
  34. pixeltable/exprs/in_predicate.py +8 -7
  35. pixeltable/exprs/inline_expr.py +4 -4
  36. pixeltable/exprs/is_null.py +4 -4
  37. pixeltable/exprs/json_mapper.py +11 -12
  38. pixeltable/exprs/json_path.py +5 -10
  39. pixeltable/exprs/literal.py +5 -5
  40. pixeltable/exprs/method_ref.py +5 -4
  41. pixeltable/exprs/object_ref.py +2 -1
  42. pixeltable/exprs/row_builder.py +88 -36
  43. pixeltable/exprs/rowid_ref.py +14 -13
  44. pixeltable/exprs/similarity_expr.py +12 -7
  45. pixeltable/exprs/sql_element_cache.py +12 -6
  46. pixeltable/exprs/type_cast.py +8 -6
  47. pixeltable/exprs/variable.py +5 -4
  48. pixeltable/ext/functions/whisperx.py +7 -2
  49. pixeltable/func/aggregate_function.py +1 -1
  50. pixeltable/func/callable_function.py +2 -2
  51. pixeltable/func/function.py +11 -10
  52. pixeltable/func/function_registry.py +6 -7
  53. pixeltable/func/query_template_function.py +11 -12
  54. pixeltable/func/signature.py +17 -15
  55. pixeltable/func/udf.py +0 -4
  56. pixeltable/functions/__init__.py +2 -2
  57. pixeltable/functions/audio.py +4 -6
  58. pixeltable/functions/globals.py +84 -42
  59. pixeltable/functions/huggingface.py +31 -34
  60. pixeltable/functions/image.py +59 -45
  61. pixeltable/functions/json.py +0 -1
  62. pixeltable/functions/llama_cpp.py +106 -0
  63. pixeltable/functions/mistralai.py +2 -2
  64. pixeltable/functions/ollama.py +147 -0
  65. pixeltable/functions/openai.py +22 -25
  66. pixeltable/functions/replicate.py +72 -0
  67. pixeltable/functions/string.py +59 -50
  68. pixeltable/functions/timestamp.py +20 -20
  69. pixeltable/functions/together.py +2 -2
  70. pixeltable/functions/video.py +11 -20
  71. pixeltable/functions/whisper.py +2 -20
  72. pixeltable/globals.py +65 -74
  73. pixeltable/index/base.py +2 -2
  74. pixeltable/index/btree.py +20 -7
  75. pixeltable/index/embedding_index.py +12 -14
  76. pixeltable/io/__init__.py +1 -2
  77. pixeltable/io/external_store.py +11 -5
  78. pixeltable/io/fiftyone.py +178 -0
  79. pixeltable/io/globals.py +98 -2
  80. pixeltable/io/hf_datasets.py +1 -1
  81. pixeltable/io/label_studio.py +6 -6
  82. pixeltable/io/parquet.py +14 -13
  83. pixeltable/iterators/base.py +3 -2
  84. pixeltable/iterators/document.py +10 -8
  85. pixeltable/iterators/video.py +126 -60
  86. pixeltable/metadata/__init__.py +4 -3
  87. pixeltable/metadata/converters/convert_14.py +4 -2
  88. pixeltable/metadata/converters/convert_15.py +1 -1
  89. pixeltable/metadata/converters/convert_19.py +1 -0
  90. pixeltable/metadata/converters/convert_20.py +1 -1
  91. pixeltable/metadata/converters/convert_21.py +34 -0
  92. pixeltable/metadata/converters/util.py +54 -12
  93. pixeltable/metadata/notes.py +1 -0
  94. pixeltable/metadata/schema.py +40 -21
  95. pixeltable/plan.py +149 -165
  96. pixeltable/py.typed +0 -0
  97. pixeltable/store.py +57 -37
  98. pixeltable/tool/create_test_db_dump.py +6 -6
  99. pixeltable/tool/create_test_video.py +1 -1
  100. pixeltable/tool/doc_plugins/griffe.py +3 -34
  101. pixeltable/tool/embed_udf.py +1 -1
  102. pixeltable/tool/mypy_plugin.py +55 -0
  103. pixeltable/type_system.py +260 -61
  104. pixeltable/utils/arrow.py +10 -9
  105. pixeltable/utils/coco.py +4 -4
  106. pixeltable/utils/documents.py +16 -2
  107. pixeltable/utils/filecache.py +9 -9
  108. pixeltable/utils/formatter.py +10 -11
  109. pixeltable/utils/http_server.py +2 -5
  110. pixeltable/utils/media_store.py +6 -6
  111. pixeltable/utils/pytorch.py +10 -11
  112. pixeltable/utils/sql.py +2 -1
  113. {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/METADATA +50 -13
  114. pixeltable-0.2.22.dist-info/RECORD +153 -0
  115. pixeltable/exec/media_validation_node.py +0 -43
  116. pixeltable/utils/help.py +0 -11
  117. pixeltable-0.2.20.dist-info/RECORD +0 -147
  118. {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/LICENSE +0 -0
  119. {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/WHEEL +0 -0
  120. {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/entry_points.txt +0 -0
pixeltable/__init__.py CHANGED
@@ -1,25 +1,13 @@
1
- from .catalog import Column, Table, InsertableTable, View
1
+ from .catalog import Column, InsertableTable, Table, UpdateStatus, View
2
2
  from .dataframe import DataFrame
3
3
  from .exceptions import Error
4
4
  from .exprs import RELATIVE_PATH_ROOT
5
- from .func import Function, udf, Aggregator, uda, expr_udf
6
- from .globals import init, create_table, create_view, get_table, move, drop_table, list_tables, create_dir, drop_dir, \
7
- list_dirs, list_functions, configure_logging, array
8
- from .type_system import (
9
- ColumnType,
10
- StringType,
11
- IntType,
12
- FloatType,
13
- BoolType,
14
- TimestampType,
15
- JsonType,
16
- ArrayType,
17
- ImageType,
18
- VideoType,
19
- AudioType,
20
- DocumentType,
21
- )
22
- from .utils.help import help
5
+ from .func import Aggregator, Function, expr_udf, uda, udf
6
+ from .globals import (array, configure_logging, create_dir, create_table, create_view, drop_dir, drop_table, get_table,
7
+ init, list_dirs, list_functions, list_tables, move)
8
+ from .type_system import (Array, ArrayType, Audio, AudioType, Bool, BoolType, ColumnType, Document, DocumentType, Float,
9
+ FloatType, Image, ImageType, Int, IntType, Json, JsonType, Required, String, StringType,
10
+ Timestamp, TimestampType, Video, VideoType)
23
11
 
24
12
  from . import ext, functions, io, iterators
25
13
  from .__version__ import __version__, __version_tuple__
pixeltable/__version__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  # These version placeholders will be replaced during build.
2
- __version__ = "0.2.20"
3
- __version_tuple__ = (0, 2, 20)
2
+ __version__ = "0.2.22"
3
+ __version_tuple__ = (0, 2, 22)
@@ -1,13 +1,13 @@
1
1
  from .catalog import Catalog
2
2
  from .column import Column
3
- from .table_version_path import TableVersionPath
4
- from .table_version import TableVersion
5
- from .schema_object import SchemaObject
6
- from .named_function import NamedFunction
7
3
  from .dir import Dir
8
- from .table import Table
4
+ from .globals import UpdateStatus, is_valid_identifier, is_valid_path, MediaValidation
9
5
  from .insertable_table import InsertableTable
10
- from .view import View
6
+ from .named_function import NamedFunction
11
7
  from .path import Path
12
8
  from .path_dict import PathDict
13
- from .globals import is_valid_identifier, is_valid_path
9
+ from .schema_object import SchemaObject
10
+ from .table import Table
11
+ from .table_version import TableVersion
12
+ from .table_version_path import TableVersionPath
13
+ from .view import View
@@ -8,24 +8,43 @@ import sqlalchemy as sql
8
8
  import pixeltable.exceptions as excs
9
9
  import pixeltable.type_system as ts
10
10
  from pixeltable import exprs
11
-
12
- from .globals import is_valid_identifier
11
+ from .globals import is_valid_identifier, MediaValidation
13
12
 
14
13
  if TYPE_CHECKING:
15
14
  from .table_version import TableVersion
16
15
 
17
16
  _logger = logging.getLogger('pixeltable')
18
17
 
18
+
19
19
  class Column:
20
20
  """Representation of a column in the schema of a Table/DataFrame.
21
21
 
22
22
  A Column contains all the metadata necessary for executing queries and updates against a particular version of a
23
23
  table/view.
24
24
  """
25
+ name: str
26
+ id: Optional[int]
27
+ col_type: ts.ColumnType
28
+ stored: bool
29
+ is_pk: bool
30
+ _media_validation: Optional[MediaValidation] # if not set, TableVersion.media_validation applies
31
+ schema_version_add: Optional[int]
32
+ schema_version_drop: Optional[int]
33
+ _records_errors: Optional[bool]
34
+ sa_col: Optional[sql.schema.Column]
35
+ sa_col_type: Optional[sql.sqltypes.TypeEngine]
36
+ sa_errormsg_col: Optional[sql.schema.Column]
37
+ sa_errortype_col: Optional[sql.schema.Column]
38
+ compute_func: Optional[Callable]
39
+ _value_expr: Optional[exprs.Expr]
40
+ value_expr_dict: Optional[dict[str, Any]]
41
+ dependent_cols: set[Column]
42
+ tbl: Optional[TableVersion]
43
+
25
44
  def __init__(
26
45
  self, name: Optional[str], col_type: Optional[ts.ColumnType] = None,
27
46
  computed_with: Optional[Union[exprs.Expr, Callable]] = None,
28
- is_pk: bool = False, stored: bool = True,
47
+ is_pk: bool = False, stored: bool = True, media_validation: Optional[MediaValidation] = None,
29
48
  col_id: Optional[int] = None, schema_version_add: Optional[int] = None,
30
49
  schema_version_drop: Optional[int] = None, sa_col_type: Optional[sql.sqltypes.TypeEngine] = None,
31
50
  records_errors: Optional[bool] = None, value_expr_dict: Optional[dict[str, Any]] = None,
@@ -61,8 +80,8 @@ class Column:
61
80
  if col_type is None and computed_with is None:
62
81
  raise excs.Error(f'Column `{name}`: col_type is required if computed_with is not specified')
63
82
 
64
- self._value_expr: Optional[exprs.Expr] = None
65
- self.compute_func: Optional[Callable] = None
83
+ self._value_expr = None
84
+ self.compute_func = None
66
85
  self.value_expr_dict = value_expr_dict
67
86
  if computed_with is not None:
68
87
  value_expr = exprs.Expr.from_object(computed_with)
@@ -86,24 +105,24 @@ class Column:
86
105
  assert self.col_type is not None
87
106
 
88
107
  self.stored = stored
89
- self.dependent_cols: set[Column] = set() # cols with value_exprs that reference us; set by TableVersion
108
+ self.dependent_cols = set() # cols with value_exprs that reference us; set by TableVersion
90
109
  self.id = col_id
91
110
  self.is_pk = is_pk
111
+ self._media_validation = media_validation
92
112
  self.schema_version_add = schema_version_add
93
113
  self.schema_version_drop = schema_version_drop
94
114
 
95
115
  self._records_errors = records_errors
96
116
 
97
117
  # column in the stored table for the values of this Column
98
- self.sa_col: Optional[sql.schema.Column] = None
118
+ self.sa_col = None
99
119
  self.sa_col_type = sa_col_type
100
120
 
101
121
  # computed cols also have storage columns for the exception string and type
102
- self.sa_errormsg_col: Optional[sql.schema.Column] = None
103
- self.sa_errortype_col: Optional[sql.schema.Column] = None
122
+ self.sa_errormsg_col = None
123
+ self.sa_errortype_col = None
104
124
 
105
- from .table_version import TableVersion
106
- self.tbl: Optional[TableVersion] = None # set by owning TableVersion
125
+ self.tbl = None # set by owning TableVersion
107
126
 
108
127
  @property
109
128
  def value_expr(self) -> Optional[exprs.Expr]:
@@ -160,6 +179,13 @@ class Column:
160
179
  assert self.tbl is not None
161
180
  return f'{self.tbl.name}.{self.name}'
162
181
 
182
+ @property
183
+ def media_validation(self) -> MediaValidation:
184
+ if self._media_validation is not None:
185
+ return self._media_validation
186
+ assert self.tbl is not None
187
+ return self.tbl.media_validation
188
+
163
189
  def source(self) -> None:
164
190
  """
165
191
  If this is a computed col and the top-level expr is a function call, print the source, if possible.
@@ -1,8 +1,12 @@
1
+ from __future__ import annotations
1
2
  import dataclasses
3
+ import enum
2
4
  import itertools
3
5
  import logging
4
6
  from typing import Optional
5
7
 
8
+ import pixeltable.exceptions as excs
9
+
6
10
  _logger = logging.getLogger('pixeltable')
7
11
 
8
12
  # name of the position column in a component view
@@ -16,6 +20,9 @@ _PREDEF_SYMBOLS: Optional[set[str]] = None
16
20
 
17
21
  @dataclasses.dataclass
18
22
  class UpdateStatus:
23
+ """
24
+ Information about updates that resulted from a table operation.
25
+ """
19
26
  num_rows: int = 0
20
27
  # TODO: disambiguate what this means: # of slots computed or # of columns computed?
21
28
  num_computed_values: int = 0
@@ -31,6 +38,20 @@ class UpdateStatus:
31
38
  self.cols_with_excs = list(dict.fromkeys(self.cols_with_excs + other.cols_with_excs))
32
39
  return self
33
40
 
41
+
42
+ class MediaValidation(enum.Enum):
43
+ ON_READ = 0
44
+ ON_WRITE = 1
45
+
46
+ @classmethod
47
+ def validated(cls, name: str, error_prefix: str) -> MediaValidation:
48
+ try:
49
+ return cls[name.upper()]
50
+ except KeyError:
51
+ val_strs = ', '.join(f'{s.lower()!r}' for s in cls.__members__.keys())
52
+ raise excs.Error(f'{error_prefix} must be one of: [{val_strs}]')
53
+
54
+
34
55
  def is_valid_identifier(name: str) -> bool:
35
56
  return name.isidentifier() and not name.startswith('_')
36
57
 
@@ -13,7 +13,7 @@ from pixeltable.env import Env
13
13
  from pixeltable.utils.filecache import FileCache
14
14
 
15
15
  from .catalog import Catalog
16
- from .globals import UpdateStatus
16
+ from .globals import UpdateStatus, MediaValidation
17
17
  from .table import Table
18
18
  from .table_version import TableVersion
19
19
  from .table_version_path import TableVersionPath
@@ -35,8 +35,8 @@ class InsertableTable(Table):
35
35
  # MODULE-LOCAL, NOT PUBLIC
36
36
  @classmethod
37
37
  def _create(
38
- cls, dir_id: UUID, name: str, schema: dict[str, ts.ColumnType], df: Optional[pxt.DataFrame], primary_key: List[str],
39
- num_retained_versions: int, comment: str
38
+ cls, dir_id: UUID, name: str, schema: dict[str, ts.ColumnType], df: Optional[pxt.DataFrame],
39
+ primary_key: List[str], num_retained_versions: int, comment: str, media_validation: MediaValidation
40
40
  ) -> InsertableTable:
41
41
  columns = cls._create_columns(schema)
42
42
  cls._verify_schema(columns)
@@ -50,7 +50,9 @@ class InsertableTable(Table):
50
50
  col.is_pk = True
51
51
 
52
52
  with orm.Session(Env.get().engine, future=True) as session:
53
- _, tbl_version = TableVersion.create(session, dir_id, name, columns, num_retained_versions, comment)
53
+ _, tbl_version = TableVersion.create(
54
+ session, dir_id, name, columns, num_retained_versions=num_retained_versions, comment=comment,
55
+ media_validation=media_validation)
54
56
  tbl = cls(dir_id, tbl_version)
55
57
  # TODO We need to commit before doing the insertion, in order to avoid a primary key (version) collision
56
58
  # when the table metadata gets updated. Once we have a notion of user-defined transactions in