pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. pixeltable/__init__.py +83 -19
  2. pixeltable/_query.py +1444 -0
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +7 -4
  5. pixeltable/catalog/catalog.py +2394 -119
  6. pixeltable/catalog/column.py +225 -104
  7. pixeltable/catalog/dir.py +38 -9
  8. pixeltable/catalog/globals.py +53 -34
  9. pixeltable/catalog/insertable_table.py +265 -115
  10. pixeltable/catalog/path.py +80 -17
  11. pixeltable/catalog/schema_object.py +28 -43
  12. pixeltable/catalog/table.py +1270 -677
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +1270 -751
  15. pixeltable/catalog/table_version_handle.py +109 -0
  16. pixeltable/catalog/table_version_path.py +137 -42
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +251 -134
  20. pixeltable/config.py +215 -0
  21. pixeltable/env.py +736 -285
  22. pixeltable/exceptions.py +26 -2
  23. pixeltable/exec/__init__.py +7 -2
  24. pixeltable/exec/aggregation_node.py +39 -21
  25. pixeltable/exec/cache_prefetch_node.py +87 -109
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +25 -28
  29. pixeltable/exec/data_row_batch.py +11 -46
  30. pixeltable/exec/exec_context.py +26 -11
  31. pixeltable/exec/exec_node.py +35 -27
  32. pixeltable/exec/expr_eval/__init__.py +3 -0
  33. pixeltable/exec/expr_eval/evaluators.py +365 -0
  34. pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
  35. pixeltable/exec/expr_eval/globals.py +200 -0
  36. pixeltable/exec/expr_eval/row_buffer.py +74 -0
  37. pixeltable/exec/expr_eval/schedulers.py +413 -0
  38. pixeltable/exec/globals.py +35 -0
  39. pixeltable/exec/in_memory_data_node.py +35 -27
  40. pixeltable/exec/object_store_save_node.py +293 -0
  41. pixeltable/exec/row_update_node.py +44 -29
  42. pixeltable/exec/sql_node.py +414 -115
  43. pixeltable/exprs/__init__.py +8 -5
  44. pixeltable/exprs/arithmetic_expr.py +79 -45
  45. pixeltable/exprs/array_slice.py +5 -5
  46. pixeltable/exprs/column_property_ref.py +40 -26
  47. pixeltable/exprs/column_ref.py +254 -61
  48. pixeltable/exprs/comparison.py +14 -9
  49. pixeltable/exprs/compound_predicate.py +9 -10
  50. pixeltable/exprs/data_row.py +213 -72
  51. pixeltable/exprs/expr.py +270 -104
  52. pixeltable/exprs/expr_dict.py +6 -5
  53. pixeltable/exprs/expr_set.py +20 -11
  54. pixeltable/exprs/function_call.py +383 -284
  55. pixeltable/exprs/globals.py +18 -5
  56. pixeltable/exprs/in_predicate.py +7 -7
  57. pixeltable/exprs/inline_expr.py +37 -37
  58. pixeltable/exprs/is_null.py +8 -4
  59. pixeltable/exprs/json_mapper.py +120 -54
  60. pixeltable/exprs/json_path.py +90 -60
  61. pixeltable/exprs/literal.py +61 -16
  62. pixeltable/exprs/method_ref.py +7 -6
  63. pixeltable/exprs/object_ref.py +19 -8
  64. pixeltable/exprs/row_builder.py +238 -75
  65. pixeltable/exprs/rowid_ref.py +53 -15
  66. pixeltable/exprs/similarity_expr.py +65 -50
  67. pixeltable/exprs/sql_element_cache.py +5 -5
  68. pixeltable/exprs/string_op.py +107 -0
  69. pixeltable/exprs/type_cast.py +25 -13
  70. pixeltable/exprs/variable.py +2 -2
  71. pixeltable/func/__init__.py +9 -5
  72. pixeltable/func/aggregate_function.py +197 -92
  73. pixeltable/func/callable_function.py +119 -35
  74. pixeltable/func/expr_template_function.py +101 -48
  75. pixeltable/func/function.py +375 -62
  76. pixeltable/func/function_registry.py +20 -19
  77. pixeltable/func/globals.py +6 -5
  78. pixeltable/func/mcp.py +74 -0
  79. pixeltable/func/query_template_function.py +151 -35
  80. pixeltable/func/signature.py +178 -49
  81. pixeltable/func/tools.py +164 -0
  82. pixeltable/func/udf.py +176 -53
  83. pixeltable/functions/__init__.py +44 -4
  84. pixeltable/functions/anthropic.py +226 -47
  85. pixeltable/functions/audio.py +148 -11
  86. pixeltable/functions/bedrock.py +137 -0
  87. pixeltable/functions/date.py +188 -0
  88. pixeltable/functions/deepseek.py +113 -0
  89. pixeltable/functions/document.py +81 -0
  90. pixeltable/functions/fal.py +76 -0
  91. pixeltable/functions/fireworks.py +72 -20
  92. pixeltable/functions/gemini.py +249 -0
  93. pixeltable/functions/globals.py +208 -53
  94. pixeltable/functions/groq.py +108 -0
  95. pixeltable/functions/huggingface.py +1088 -95
  96. pixeltable/functions/image.py +155 -84
  97. pixeltable/functions/json.py +8 -11
  98. pixeltable/functions/llama_cpp.py +31 -19
  99. pixeltable/functions/math.py +169 -0
  100. pixeltable/functions/mistralai.py +50 -75
  101. pixeltable/functions/net.py +70 -0
  102. pixeltable/functions/ollama.py +29 -36
  103. pixeltable/functions/openai.py +548 -160
  104. pixeltable/functions/openrouter.py +143 -0
  105. pixeltable/functions/replicate.py +15 -14
  106. pixeltable/functions/reve.py +250 -0
  107. pixeltable/functions/string.py +310 -85
  108. pixeltable/functions/timestamp.py +37 -19
  109. pixeltable/functions/together.py +77 -120
  110. pixeltable/functions/twelvelabs.py +188 -0
  111. pixeltable/functions/util.py +7 -2
  112. pixeltable/functions/uuid.py +30 -0
  113. pixeltable/functions/video.py +1528 -117
  114. pixeltable/functions/vision.py +26 -26
  115. pixeltable/functions/voyageai.py +289 -0
  116. pixeltable/functions/whisper.py +19 -10
  117. pixeltable/functions/whisperx.py +179 -0
  118. pixeltable/functions/yolox.py +112 -0
  119. pixeltable/globals.py +716 -236
  120. pixeltable/index/__init__.py +3 -1
  121. pixeltable/index/base.py +17 -21
  122. pixeltable/index/btree.py +32 -22
  123. pixeltable/index/embedding_index.py +155 -92
  124. pixeltable/io/__init__.py +12 -7
  125. pixeltable/io/datarows.py +140 -0
  126. pixeltable/io/external_store.py +83 -125
  127. pixeltable/io/fiftyone.py +24 -33
  128. pixeltable/io/globals.py +47 -182
  129. pixeltable/io/hf_datasets.py +96 -127
  130. pixeltable/io/label_studio.py +171 -156
  131. pixeltable/io/lancedb.py +3 -0
  132. pixeltable/io/pandas.py +136 -115
  133. pixeltable/io/parquet.py +40 -153
  134. pixeltable/io/table_data_conduit.py +702 -0
  135. pixeltable/io/utils.py +100 -0
  136. pixeltable/iterators/__init__.py +8 -4
  137. pixeltable/iterators/audio.py +207 -0
  138. pixeltable/iterators/base.py +9 -3
  139. pixeltable/iterators/document.py +144 -87
  140. pixeltable/iterators/image.py +17 -38
  141. pixeltable/iterators/string.py +15 -12
  142. pixeltable/iterators/video.py +523 -127
  143. pixeltable/metadata/__init__.py +33 -8
  144. pixeltable/metadata/converters/convert_10.py +2 -3
  145. pixeltable/metadata/converters/convert_13.py +2 -2
  146. pixeltable/metadata/converters/convert_15.py +15 -11
  147. pixeltable/metadata/converters/convert_16.py +4 -5
  148. pixeltable/metadata/converters/convert_17.py +4 -5
  149. pixeltable/metadata/converters/convert_18.py +4 -6
  150. pixeltable/metadata/converters/convert_19.py +6 -9
  151. pixeltable/metadata/converters/convert_20.py +3 -6
  152. pixeltable/metadata/converters/convert_21.py +6 -8
  153. pixeltable/metadata/converters/convert_22.py +3 -2
  154. pixeltable/metadata/converters/convert_23.py +33 -0
  155. pixeltable/metadata/converters/convert_24.py +55 -0
  156. pixeltable/metadata/converters/convert_25.py +19 -0
  157. pixeltable/metadata/converters/convert_26.py +23 -0
  158. pixeltable/metadata/converters/convert_27.py +29 -0
  159. pixeltable/metadata/converters/convert_28.py +13 -0
  160. pixeltable/metadata/converters/convert_29.py +110 -0
  161. pixeltable/metadata/converters/convert_30.py +63 -0
  162. pixeltable/metadata/converters/convert_31.py +11 -0
  163. pixeltable/metadata/converters/convert_32.py +15 -0
  164. pixeltable/metadata/converters/convert_33.py +17 -0
  165. pixeltable/metadata/converters/convert_34.py +21 -0
  166. pixeltable/metadata/converters/convert_35.py +9 -0
  167. pixeltable/metadata/converters/convert_36.py +38 -0
  168. pixeltable/metadata/converters/convert_37.py +15 -0
  169. pixeltable/metadata/converters/convert_38.py +39 -0
  170. pixeltable/metadata/converters/convert_39.py +124 -0
  171. pixeltable/metadata/converters/convert_40.py +73 -0
  172. pixeltable/metadata/converters/convert_41.py +12 -0
  173. pixeltable/metadata/converters/convert_42.py +9 -0
  174. pixeltable/metadata/converters/convert_43.py +44 -0
  175. pixeltable/metadata/converters/util.py +44 -18
  176. pixeltable/metadata/notes.py +21 -0
  177. pixeltable/metadata/schema.py +185 -42
  178. pixeltable/metadata/utils.py +74 -0
  179. pixeltable/mypy/__init__.py +3 -0
  180. pixeltable/mypy/mypy_plugin.py +123 -0
  181. pixeltable/plan.py +616 -225
  182. pixeltable/share/__init__.py +3 -0
  183. pixeltable/share/packager.py +797 -0
  184. pixeltable/share/protocol/__init__.py +33 -0
  185. pixeltable/share/protocol/common.py +165 -0
  186. pixeltable/share/protocol/operation_types.py +33 -0
  187. pixeltable/share/protocol/replica.py +119 -0
  188. pixeltable/share/publish.py +349 -0
  189. pixeltable/store.py +398 -232
  190. pixeltable/type_system.py +730 -267
  191. pixeltable/utils/__init__.py +40 -0
  192. pixeltable/utils/arrow.py +201 -29
  193. pixeltable/utils/av.py +298 -0
  194. pixeltable/utils/azure_store.py +346 -0
  195. pixeltable/utils/coco.py +26 -27
  196. pixeltable/utils/code.py +4 -4
  197. pixeltable/utils/console_output.py +46 -0
  198. pixeltable/utils/coroutine.py +24 -0
  199. pixeltable/utils/dbms.py +92 -0
  200. pixeltable/utils/description_helper.py +11 -12
  201. pixeltable/utils/documents.py +60 -61
  202. pixeltable/utils/exception_handler.py +36 -0
  203. pixeltable/utils/filecache.py +38 -22
  204. pixeltable/utils/formatter.py +88 -51
  205. pixeltable/utils/gcs_store.py +295 -0
  206. pixeltable/utils/http.py +133 -0
  207. pixeltable/utils/http_server.py +14 -13
  208. pixeltable/utils/iceberg.py +13 -0
  209. pixeltable/utils/image.py +17 -0
  210. pixeltable/utils/lancedb.py +90 -0
  211. pixeltable/utils/local_store.py +322 -0
  212. pixeltable/utils/misc.py +5 -0
  213. pixeltable/utils/object_stores.py +573 -0
  214. pixeltable/utils/pydantic.py +60 -0
  215. pixeltable/utils/pytorch.py +20 -20
  216. pixeltable/utils/s3_store.py +527 -0
  217. pixeltable/utils/sql.py +32 -5
  218. pixeltable/utils/system.py +30 -0
  219. pixeltable/utils/transactional_directory.py +4 -3
  220. pixeltable-0.5.7.dist-info/METADATA +579 -0
  221. pixeltable-0.5.7.dist-info/RECORD +227 -0
  222. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  223. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  224. pixeltable/__version__.py +0 -3
  225. pixeltable/catalog/named_function.py +0 -36
  226. pixeltable/catalog/path_dict.py +0 -141
  227. pixeltable/dataframe.py +0 -894
  228. pixeltable/exec/expr_eval_node.py +0 -232
  229. pixeltable/ext/__init__.py +0 -14
  230. pixeltable/ext/functions/__init__.py +0 -8
  231. pixeltable/ext/functions/whisperx.py +0 -77
  232. pixeltable/ext/functions/yolox.py +0 -157
  233. pixeltable/tool/create_test_db_dump.py +0 -311
  234. pixeltable/tool/create_test_video.py +0 -81
  235. pixeltable/tool/doc_plugins/griffe.py +0 -50
  236. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  237. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  238. pixeltable/tool/embed_udf.py +0 -9
  239. pixeltable/tool/mypy_plugin.py +0 -55
  240. pixeltable/utils/media_store.py +0 -76
  241. pixeltable/utils/s3.py +0 -16
  242. pixeltable-0.2.26.dist-info/METADATA +0 -400
  243. pixeltable-0.2.26.dist-info/RECORD +0 -156
  244. pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
  245. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -1,35 +1,38 @@
1
1
  import dataclasses
2
+ import types
2
3
  import typing
3
4
  import uuid
4
- from typing import Any, Optional, TypeVar, Union, get_type_hints
5
+ from enum import Enum
6
+ from typing import Any, TypeVar, Union, get_type_hints
5
7
 
6
8
  import sqlalchemy as sql
7
- import sqlalchemy.orm as orm
8
- from sqlalchemy import BigInteger, ForeignKey, Integer, LargeBinary
9
+ from sqlalchemy import BigInteger, ForeignKey, Integer, LargeBinary, orm
9
10
  from sqlalchemy.dialects.postgresql import JSONB, UUID
10
- from sqlalchemy.orm import declarative_base
11
11
  from sqlalchemy.orm.decl_api import DeclarativeMeta
12
12
 
13
+ from ..catalog.update_status import UpdateStatus
14
+
13
15
  # Base has to be marked explicitly as a type, in order to be used elsewhere as a type hint. But in addition to being
14
16
  # a type, it's also a `DeclarativeMeta`. The following pattern enables us to expose both `Base` and `Base.metadata`
15
17
  # outside of the module in a typesafe way.
16
- Base: type = declarative_base()
18
+ Base: type = orm.declarative_base()
17
19
  assert isinstance(Base, DeclarativeMeta)
18
20
  base_metadata = Base.metadata
19
21
 
20
22
  T = TypeVar('T')
21
23
 
22
- def md_from_dict(data_class_type: type[T], data: Any) -> T:
24
+
25
+ def md_from_dict(type_: type[T], data: Any) -> T:
23
26
  """Re-instantiate a dataclass instance that contains nested dataclasses from a dict."""
24
- if dataclasses.is_dataclass(data_class_type):
25
- fieldtypes = {f: t for f, t in get_type_hints(data_class_type).items()}
26
- return data_class_type(**{f: md_from_dict(fieldtypes[f], data[f]) for f in data}) # type: ignore[return-value]
27
+ if dataclasses.is_dataclass(type_):
28
+ fieldtypes = get_type_hints(type_)
29
+ return type_(**{f: md_from_dict(fieldtypes[f], data[f]) for f in data})
27
30
 
28
- origin = typing.get_origin(data_class_type)
31
+ origin = typing.get_origin(type_)
29
32
  if origin is not None:
30
- type_args = typing.get_args(data_class_type)
31
- if origin is Union and type(None) in type_args:
32
- # Handling Optional types
33
+ type_args = typing.get_args(type_)
34
+ if (origin is Union or origin is types.UnionType) and type(None) in type_args:
35
+ # handling T | None, T | None
33
36
  non_none_args = [arg for arg in type_args if arg is not type(None)]
34
37
  assert len(non_none_args) == 1
35
38
  return md_from_dict(non_none_args[0], data) if data is not None else None
@@ -42,11 +45,19 @@ def md_from_dict(data_class_type: type[T], data: Any) -> T:
42
45
  elif origin is tuple:
43
46
  return tuple(md_from_dict(arg_type, elem) for arg_type, elem in zip(type_args, data)) # type: ignore[return-value]
44
47
  else:
45
- assert False
48
+ raise AssertionError(origin)
49
+ elif isinstance(type_, type) and issubclass(type_, Enum):
50
+ return type_(data)
46
51
  else:
47
52
  return data
48
53
 
49
54
 
55
+ def _md_dict_factory(data: list[tuple[str, Any]]) -> dict:
56
+ """Use this to serialize <>Md instances with dataclasses.asdict()"""
57
+ # serialize enums to their values
58
+ return {k: v.value if isinstance(v, Enum) else v for k, v in data}
59
+
60
+
50
61
  # structure of the stored metadata:
51
62
  # - each schema entity that grows somehow proportionally to the data (# of output_rows, total insert operations,
52
63
  # number of schema changes) gets its own table
@@ -56,6 +67,7 @@ def md_from_dict(data_class_type: type[T], data: Any) -> T:
56
67
  # schema easier (the goal is not to have to rely on some schema migration framework; if that breaks for some user,
57
68
  # it would be very difficult to patch up)
58
69
 
70
+
59
71
  @dataclasses.dataclass
60
72
  class SystemInfoMd:
61
73
  schema_version: int
@@ -63,7 +75,9 @@ class SystemInfoMd:
63
75
 
64
76
  class SystemInfo(Base):
65
77
  """A single-row table that contains system-wide metadata."""
78
+
66
79
  __tablename__ = 'systeminfo'
80
+
67
81
  dummy = sql.Column(Integer, primary_key=True, default=0, nullable=False)
68
82
  md = sql.Column(JSONB, nullable=False) # SystemInfoMd
69
83
 
@@ -71,14 +85,22 @@ class SystemInfo(Base):
71
85
  @dataclasses.dataclass
72
86
  class DirMd:
73
87
  name: str
88
+ user: str | None
89
+ additional_md: dict[str, Any] # deprecated
74
90
 
75
91
 
76
92
  class Dir(Base):
77
93
  __tablename__ = 'dirs'
78
94
 
79
- id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, nullable=False)
95
+ id: orm.Mapped[uuid.UUID] = orm.mapped_column(
96
+ UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, nullable=False
97
+ )
80
98
  parent_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=True)
81
- md = sql.Column(JSONB, nullable=False)
99
+ md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False) # DirMd
100
+ additional_md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False, default=dict)
101
+
102
+ # used to force acquisition of an X-lock via an Update stmt
103
+ lock_dummy: orm.Mapped[int] = orm.mapped_column(BigInteger, nullable=True)
82
104
 
83
105
 
84
106
  @dataclasses.dataclass
@@ -89,20 +111,24 @@ class ColumnMd:
89
111
  - when a column was added/dropped, which is needed to GC unreachable storage columns
90
112
  (a column that was added after table snapshot n and dropped before table snapshot n+1 can be removed
91
113
  from the stored table).
92
- """
114
+ """
115
+
93
116
  id: int
94
117
  schema_version_add: int
95
- schema_version_drop: Optional[int]
118
+ schema_version_drop: int | None
96
119
  col_type: dict
97
120
 
98
121
  # if True, is part of the primary key
99
122
  is_pk: bool
100
123
 
101
124
  # if set, this is a computed column
102
- value_expr: Optional[dict]
125
+ value_expr: dict | None
103
126
 
104
127
  # if True, the column is present in the stored table
105
- stored: Optional[bool]
128
+ stored: bool | None
129
+
130
+ # If present, the URI for the destination for column values
131
+ destination: str | None = None
106
132
 
107
133
 
108
134
  @dataclasses.dataclass
@@ -110,37 +136,71 @@ class IndexMd:
110
136
  """
111
137
  Metadata needed to instantiate an EmbeddingIndex
112
138
  """
139
+
113
140
  id: int
114
141
  name: str
142
+ indexed_col_tbl_id: str # UUID of the table (as string) that contains column being indexed
115
143
  indexed_col_id: int # column being indexed
116
144
  index_val_col_id: int # column holding the values to be indexed
117
145
  index_val_undo_col_id: int # column holding index values for deleted rows
118
146
  schema_version_add: int
119
- schema_version_drop: Optional[int]
147
+ schema_version_drop: int | None
120
148
  class_fqn: str
121
149
  init_args: dict[str, Any]
122
150
 
123
151
 
152
+ # a stored table version path is a list of (table id as str, effective table version)
153
+ TableVersionPath = list[tuple[str, int | None]]
154
+
155
+
124
156
  @dataclasses.dataclass
125
157
  class ViewMd:
126
158
  is_snapshot: bool
159
+ include_base_columns: bool
127
160
 
128
161
  # (table id, version); for mutable views, all versions are None
129
- base_versions: list[tuple[str, Optional[int]]]
162
+ base_versions: TableVersionPath
130
163
 
131
164
  # filter predicate applied to the base table; view-only
132
- predicate: Optional[dict[str, Any]]
165
+ predicate: dict[str, Any] | None
166
+
167
+ # sampling predicate applied to the base table; view-only
168
+ sample_clause: dict[str, Any] | None
133
169
 
134
170
  # ComponentIterator subclass; only for component views
135
- iterator_class_fqn: Optional[str]
171
+ iterator_class_fqn: str | None
136
172
 
137
173
  # args to pass to the iterator class constructor; only for component views
138
- iterator_args: Optional[dict[str, Any]]
174
+ iterator_args: dict[str, Any] | None
175
+
176
+
177
+ class TableState(Enum):
178
+ """The operational state of the table"""
179
+
180
+ LIVE = 0
181
+ ROLLFORWARD = 1 # finalizing pending table ops
182
+ ROLLBACK = 2 # rolling back pending table ops
183
+
184
+
185
+ class TableStatement(Enum):
186
+ """The top-level DDL/DML operation (corresponding to a statement in SQL; not: a TableOp) currently being executed"""
187
+
188
+ CREATE_TABLE = 0
189
+ CREATE_VIEW = 1
190
+ DROP_TABLE = 2
191
+ ADD_COLUMNS = 3
192
+ DROP_COLUMNS = 4
193
+ ADD_INDEX = 5
194
+ DROP_INDEX = 6
139
195
 
140
196
 
141
197
  @dataclasses.dataclass
142
198
  class TableMd:
199
+ tbl_id: str # uuid.UUID
143
200
  name: str
201
+ is_replica: bool
202
+
203
+ user: str | None
144
204
 
145
205
  # monotonically increasing w/in Table for both data and schema changes, starting at 0
146
206
  current_version: int
@@ -154,13 +214,52 @@ class TableMd:
154
214
  # - every row is assigned a unique and immutable rowid on insertion
155
215
  next_row_id: int
156
216
 
217
+ # sequence number to track changes in the set of mutable views of this table (ie, this table = the view base)
218
+ # - incremented for each add/drop of a mutable view
219
+ # - only maintained for mutable tables
220
+ # TODO: replace with mutable_views: list[UUID] to help with debugging
221
+ view_sn: int
222
+
157
223
  # Metadata format for external stores:
158
224
  # {'class': 'pixeltable.io.label_studio.LabelStudioProject', 'md': {'project_id': 3}}
159
225
  external_stores: list[dict[str, Any]]
160
226
 
161
227
  column_md: dict[int, ColumnMd] # col_id -> ColumnMd
162
228
  index_md: dict[int, IndexMd] # index_id -> IndexMd
163
- view_md: Optional[ViewMd]
229
+ view_md: ViewMd | None
230
+ # TODO: Remove additional_md from this and other Md dataclasses (and switch to using the separate additional_md
231
+ # column in all cases)
232
+ additional_md: dict[str, Any] # deprecated
233
+
234
+ # deprecated
235
+ has_pending_ops: bool = False
236
+
237
+ tbl_state: TableState = TableState.LIVE
238
+ pending_stmt: TableStatement | None = None
239
+
240
+ @property
241
+ def is_snapshot(self) -> bool:
242
+ return self.view_md is not None and self.view_md.is_snapshot
243
+
244
+ @property
245
+ def is_mutable(self) -> bool:
246
+ return not self.is_snapshot and not self.is_replica
247
+
248
+ @property
249
+ def is_pure_snapshot(self) -> bool:
250
+ return (
251
+ self.view_md is not None
252
+ and self.view_md.is_snapshot
253
+ and self.view_md.sample_clause is None
254
+ and self.view_md.predicate is None
255
+ and len(self.column_md) == 0
256
+ )
257
+
258
+ @property
259
+ def ancestors(self) -> TableVersionPath:
260
+ if self.view_md is None:
261
+ return []
262
+ return self.view_md.base_versions
164
263
 
165
264
 
166
265
  class Table(Base):
@@ -170,28 +269,46 @@ class Table(Base):
170
269
  Views are in essence a subclass of tables, because they also store materialized columns. The differences are:
171
270
  - views have a base, which is either a (live) table or a snapshot
172
271
  - views can have a filter predicate
272
+
273
+ dir_id: NULL for dropped tables
173
274
  """
275
+
174
276
  __tablename__ = 'tables'
175
277
 
176
278
  MAX_VERSION = 9223372036854775807 # 2^63 - 1
177
279
 
178
280
  id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), primary_key=True, nullable=False)
179
- dir_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=False)
180
- md = sql.Column(JSONB, nullable=False) # TableMd
281
+ dir_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=True)
282
+ md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False) # TableMd
283
+ additional_md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False, default=dict)
284
+
285
+ # used to force acquisition of an X-lock via an Update stmt
286
+ lock_dummy: orm.Mapped[int] = orm.mapped_column(BigInteger, nullable=True)
181
287
 
182
288
 
183
289
  @dataclasses.dataclass
184
- class TableVersionMd:
290
+ class VersionMd:
291
+ tbl_id: str # uuid.UUID
185
292
  created_at: float # time.time()
186
293
  version: int
187
294
  schema_version: int
295
+ user: str | None = None # User that created this version
296
+ update_status: UpdateStatus | None = None # UpdateStatus of the change that created this version
297
+ # A version fragment cannot be queried or instantiated via get_table(). A fragment represents a version of a
298
+ # replica table that has incomplete data, and exists only to provide base table support for a dependent view.
299
+ is_fragment: bool = False
300
+ additional_md: dict[str, Any] = dataclasses.field(default_factory=dict) # deprecated
188
301
 
189
302
 
190
303
  class TableVersion(Base):
191
304
  __tablename__ = 'tableversions'
192
- tbl_id = sql.Column(UUID(as_uuid=True), ForeignKey('tables.id'), primary_key=True, nullable=False)
193
- version = sql.Column(BigInteger, primary_key=True, nullable=False)
194
- md = sql.Column(JSONB, nullable=False) # TableVersionMd
305
+
306
+ tbl_id: orm.Mapped[uuid.UUID] = orm.mapped_column(
307
+ UUID(as_uuid=True), ForeignKey('tables.id'), primary_key=True, nullable=False
308
+ )
309
+ version: orm.Mapped[int] = orm.mapped_column(BigInteger, primary_key=True, nullable=False)
310
+ md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False)
311
+ additional_md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False, default=dict)
195
312
 
196
313
 
197
314
  @dataclasses.dataclass
@@ -199,21 +316,24 @@ class SchemaColumn:
199
316
  """
200
317
  Records the versioned metadata of a column.
201
318
  """
319
+
202
320
  pos: int
203
321
  name: str
204
322
 
205
323
  # media validation strategy of this particular media column; if not set, TableMd.media_validation applies
206
324
  # stores column.MediaValiation.name.lower()
207
- media_validation: Optional[str]
325
+ media_validation: str | None
208
326
 
209
327
 
210
328
  @dataclasses.dataclass
211
- class TableSchemaVersionMd:
329
+ class SchemaVersionMd:
212
330
  """
213
331
  Records all versioned table metadata.
214
332
  """
333
+
334
+ tbl_id: str # uuid.UUID
215
335
  schema_version: int
216
- preceding_schema_version: Optional[int]
336
+ preceding_schema_version: int | None
217
337
  columns: dict[int, SchemaColumn] # col_id -> SchemaColumn
218
338
  num_retained_versions: int
219
339
  comment: str
@@ -221,15 +341,35 @@ class TableSchemaVersionMd:
221
341
  # default validation strategy for any media column of this table
222
342
  # stores column.MediaValiation.name.lower()
223
343
  media_validation: str
344
+ additional_md: dict[str, Any] # deprecated
224
345
 
225
346
 
226
347
  # versioning: each table schema change results in a new record
227
348
  class TableSchemaVersion(Base):
228
349
  __tablename__ = 'tableschemaversions'
229
350
 
230
- tbl_id = sql.Column(UUID(as_uuid=True), ForeignKey('tables.id'), primary_key=True, nullable=False)
231
- schema_version = sql.Column(BigInteger, primary_key=True, nullable=False)
232
- md = sql.Column(JSONB, nullable=False) # TableSchemaVersionMd
351
+ tbl_id: orm.Mapped[uuid.UUID] = orm.mapped_column(
352
+ UUID(as_uuid=True), ForeignKey('tables.id'), primary_key=True, nullable=False
353
+ )
354
+ schema_version: orm.Mapped[int] = orm.mapped_column(BigInteger, primary_key=True, nullable=False)
355
+ md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False) # TableSchemaVersionMd
356
+ additional_md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False, default=dict)
357
+
358
+
359
+ class PendingTableOp(Base):
360
+ """
361
+ Table operation that needs to be completed before the table can be used.
362
+
363
+ Operations need to be completed in order of increasing seq_num.
364
+ """
365
+
366
+ __tablename__ = 'pendingtableops'
367
+
368
+ tbl_id: orm.Mapped[uuid.UUID] = orm.mapped_column(
369
+ UUID(as_uuid=True), ForeignKey('tables.id'), primary_key=True, nullable=False
370
+ )
371
+ op_sn: orm.Mapped[int] = orm.mapped_column(Integer, primary_key=True, nullable=False) # catalog.TableOp.op_sn
372
+ op: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False) # catalog.TableOp
233
373
 
234
374
 
235
375
  @dataclasses.dataclass
@@ -249,9 +389,12 @@ class Function(Base):
249
389
  We store the Python version under which a Function was created (and the callable pickled) in order to warn
250
390
  against version mismatches.
251
391
  """
392
+
252
393
  __tablename__ = 'functions'
253
394
 
254
- id = sql.Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, nullable=False)
255
- dir_id = sql.Column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=True)
256
- md = sql.Column(JSONB, nullable=False) # FunctionMd
257
- binary_obj = sql.Column(LargeBinary, nullable=True)
395
+ id: orm.Mapped[uuid.UUID] = orm.mapped_column(
396
+ UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, nullable=False
397
+ )
398
+ dir_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=True)
399
+ md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False) # FunctionMd
400
+ binary_obj: orm.Mapped[bytes | None] = orm.mapped_column(LargeBinary, nullable=True)
@@ -0,0 +1,74 @@
1
+ from __future__ import annotations
2
+
3
+ from pixeltable.metadata import schema
4
+
5
+
6
+ class MetadataUtils:
7
+ @classmethod
8
+ def _diff_md(
9
+ cls, old_md: dict[int, schema.SchemaColumn] | None, new_md: dict[int, schema.SchemaColumn] | None
10
+ ) -> str:
11
+ """Return a string reporting the differences in a specific entry in two dictionaries
12
+
13
+ Results are formatted as follows:
14
+ - If `old_md` is `None`, returns 'Initial Version'.
15
+ - If `old_md` and `new_md` are the same, returns an empty string.
16
+ - If there are additions, changes, or deletions, returns a string summarizing the changes.
17
+ """
18
+ assert new_md is not None
19
+ if old_md is None:
20
+ return 'Initial Version'
21
+ if old_md == new_md:
22
+ return ''
23
+ added = {k: v.name for k, v in new_md.items() if k not in old_md}
24
+ changed = {
25
+ k: f'{old_md[k].name!r} to {v.name!r}'
26
+ for k, v in new_md.items()
27
+ if k in old_md and old_md[k].name != v.name
28
+ }
29
+ deleted = {k: v.name for k, v in old_md.items() if k not in new_md}
30
+ if len(added) == 0 and len(changed) == 0 and len(deleted) == 0:
31
+ return ''
32
+ # Format the result
33
+ t = []
34
+ if len(added) > 0:
35
+ t.append('Added: ' + ', '.join(added.values()))
36
+ if len(changed) > 0:
37
+ t.append('Renamed: ' + ', '.join(changed.values()))
38
+ if len(deleted) > 0:
39
+ t.append('Deleted: ' + ', '.join(deleted.values()))
40
+ r = ', '.join(t)
41
+ return r
42
+
43
+ @classmethod
44
+ def _create_md_change_dict(cls, md_list: list[tuple[int, dict[int, schema.SchemaColumn]]] | None) -> dict[int, str]:
45
+ """Return a dictionary of schema changes by version
46
+ Args:
47
+ md_list: a list of tuples, each containing a version number and a metadata dictionary.
48
+ """
49
+ r: dict[int, str] = {}
50
+ if md_list is None or len(md_list) == 0:
51
+ return r
52
+
53
+ # Sort the list in place by version number
54
+ md_list.sort()
55
+
56
+ first_retrieved_version = md_list[0][0]
57
+ if first_retrieved_version == 0:
58
+ prev_md = None
59
+ prev_ver = -1
60
+ start = 0
61
+ else:
62
+ prev_md = md_list[0][1]
63
+ prev_ver = first_retrieved_version
64
+ start = 1
65
+
66
+ for ver, curr_md in md_list[start:]:
67
+ if ver == prev_ver:
68
+ continue
69
+ assert ver > prev_ver
70
+ tf = cls._diff_md(prev_md, curr_md)
71
+ if tf != '':
72
+ r[ver] = tf
73
+ prev_md = curr_md
74
+ return r
@@ -0,0 +1,3 @@
1
+ from .mypy_plugin import plugin
2
+
3
+ __all__ = ['plugin']
@@ -0,0 +1,123 @@
1
+ from typing import Callable, ClassVar
2
+
3
+ from mypy import nodes
4
+ from mypy.plugin import AnalyzeTypeContext, ClassDefContext, FunctionContext, MethodSigContext, Plugin
5
+ from mypy.plugins.common import add_attribute_to_class, add_method_to_class
6
+ from mypy.types import AnyType, FunctionLike, Instance, NoneType, Type, TypeOfAny
7
+
8
+ import pixeltable as pxt
9
+ from pixeltable import exprs
10
+
11
+
12
+ class PxtPlugin(Plugin):
13
+ __UDA_FULLNAME = f'{pxt.uda.__module__}.{pxt.uda.__name__}'
14
+ __ARRAY_GETITEM_FULLNAME = f'{pxt.Array.__module__}.{pxt.Array.__name__}.__class_getitem__'
15
+ __ADD_COLUMN_FULLNAME = f'{pxt.Table.__module__}.{pxt.Table.__name__}.{pxt.Table.add_column.__name__}'
16
+ __ADD_COMPUTED_COLUMN_FULLNAME = (
17
+ f'{pxt.Table.__module__}.{pxt.Table.__name__}.{pxt.Table.add_computed_column.__name__}'
18
+ )
19
+ __TYPE_MAP: ClassVar[dict] = {
20
+ pxt.Json: 'typing.Any',
21
+ pxt.Array: 'numpy.ndarray',
22
+ pxt.Image: 'PIL.Image.Image',
23
+ pxt.Video: 'builtins.str',
24
+ pxt.Audio: 'builtins.str',
25
+ pxt.Document: 'builtins.str',
26
+ }
27
+ __FULLNAME_MAP: ClassVar[dict] = {f'{k.__module__}.{k.__name__}': v for k, v in __TYPE_MAP.items()}
28
+
29
+ def get_function_hook(self, fullname: str) -> Callable[[FunctionContext], Type] | None:
30
+ return adjust_uda_type
31
+
32
+ def get_type_analyze_hook(self, fullname: str) -> Callable[[AnalyzeTypeContext], Type] | None:
33
+ if fullname in self.__FULLNAME_MAP:
34
+ subst_name = self.__FULLNAME_MAP[fullname]
35
+ return lambda ctx: adjust_pxt_type(ctx, subst_name)
36
+ return None
37
+
38
+ def get_method_signature_hook(self, fullname: str) -> Callable[[MethodSigContext], FunctionLike] | None:
39
+ if fullname in (self.__ADD_COLUMN_FULLNAME, self.__ADD_COMPUTED_COLUMN_FULLNAME):
40
+ return adjust_kwargs
41
+ return None
42
+
43
+ def get_class_decorator_hook_2(self, fullname: str) -> Callable[[ClassDefContext], bool] | None:
44
+ if fullname == self.__UDA_FULLNAME:
45
+ return adjust_uda_methods
46
+ return None
47
+
48
+
49
+ def plugin(version: str) -> type:
50
+ return PxtPlugin
51
+
52
+
53
+ _AGGREGATOR_FULLNAME = f'{pxt.Aggregator.__module__}.{pxt.Aggregator.__name__}'
54
+ _FN_CALL_FULLNAME = f'{exprs.Expr.__module__}.{exprs.Expr.__name__}'
55
+
56
+
57
+ def adjust_uda_type(ctx: FunctionContext) -> Type:
58
+ """
59
+ Mypy doesn't understand that a class with a @uda decorator isn't actually a class, so it assumes
60
+ that sum(expr), for example, actually returns an instance of sum. We correct this by changing the
61
+ return type of any subclass of `Aggregator` to `FunctionCall`.
62
+ """
63
+ ret_type = ctx.default_return_type
64
+ if isinstance(ret_type, Instance) and (
65
+ ret_type.type.fullname == _AGGREGATOR_FULLNAME
66
+ or any(base.type.fullname == _AGGREGATOR_FULLNAME for base in ret_type.type.bases)
67
+ ):
68
+ ret_type = AnyType(TypeOfAny.special_form)
69
+ return ret_type
70
+
71
+
72
+ def adjust_pxt_type(ctx: AnalyzeTypeContext, subst_name: str) -> Type:
73
+ """
74
+ Replaces the special Pixeltable classes (such as pxt.Array) with their standard equivalents (such as np.ndarray).
75
+ """
76
+ if subst_name == 'typing.Any':
77
+ return AnyType(TypeOfAny.special_form)
78
+ return ctx.api.named_type(subst_name, [])
79
+
80
+
81
+ def adjust_kwargs(ctx: MethodSigContext) -> FunctionLike:
82
+ """
83
+ Mypy has a "feature" where it will spit out multiple warnings if a method with signature
84
+ ```
85
+ def my_func(*, arg1: int, arg2: str, **kwargs: Expr)
86
+ ```
87
+ (for example) is called with bare kwargs:
88
+ ```
89
+ my_func(my_kwarg=value)
90
+ ```
91
+ This is a disaster for type-checking of add_column and add_computed_column. Here we adjust the signature so
92
+ that mypy thinks it is simply
93
+ ```
94
+ def my_func(**kwargs: Any)
95
+ ```
96
+ thereby avoiding any type-checking errors. For details, see: <https://github.com/python/mypy/issues/18481>
97
+ """
98
+ sig = ctx.default_signature
99
+ new_arg_names = sig.arg_names[-1:]
100
+ new_arg_types = [AnyType(TypeOfAny.special_form)]
101
+ new_arg_kinds = sig.arg_kinds[-1:]
102
+ return sig.copy_modified(arg_names=new_arg_names, arg_types=new_arg_types, arg_kinds=new_arg_kinds)
103
+
104
+
105
+ def adjust_uda_methods(ctx: ClassDefContext) -> bool:
106
+ """
107
+ Mypy does not handle the `@pxt.uda` aggregator well; it continues to treat the decorated class as a class,
108
+ even though it has been replaced by an `AggregateFunction`. Here we add static methods to the class that
109
+ imitate various (instance) methods of `AggregateFunction` so that they can be properly type-checked.
110
+ """
111
+ list_type = ctx.api.named_type('builtins.list', [AnyType(TypeOfAny.special_form)])
112
+ fn_arg = nodes.Argument(nodes.Var('fn'), AnyType(TypeOfAny.special_form), None, nodes.ARG_POS)
113
+ args_arg = nodes.Argument(nodes.Var('args'), AnyType(TypeOfAny.special_form), None, nodes.ARG_STAR)
114
+ kwargs_arg = nodes.Argument(nodes.Var('kwargs'), AnyType(TypeOfAny.special_form), None, nodes.ARG_STAR2)
115
+ add_method_to_class(ctx.api, ctx.cls, '__init__', args=[args_arg, kwargs_arg], return_type=NoneType())
116
+ add_method_to_class(
117
+ ctx.api, ctx.cls, 'to_sql', args=[fn_arg], return_type=AnyType(TypeOfAny.special_form), is_staticmethod=True
118
+ )
119
+ add_method_to_class(
120
+ ctx.api, ctx.cls, 'overload', args=[fn_arg], return_type=AnyType(TypeOfAny.special_form), is_staticmethod=True
121
+ )
122
+ add_attribute_to_class(ctx.api, ctx.cls, 'signatures', typ=list_type, is_classvar=True)
123
+ return True