pixeltable 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (127) hide show
  1. pixeltable/__init__.py +5 -3
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +1 -0
  4. pixeltable/catalog/catalog.py +335 -128
  5. pixeltable/catalog/column.py +22 -5
  6. pixeltable/catalog/dir.py +19 -6
  7. pixeltable/catalog/insertable_table.py +34 -37
  8. pixeltable/catalog/named_function.py +0 -4
  9. pixeltable/catalog/schema_object.py +28 -42
  10. pixeltable/catalog/table.py +193 -158
  11. pixeltable/catalog/table_version.py +191 -232
  12. pixeltable/catalog/table_version_handle.py +50 -0
  13. pixeltable/catalog/table_version_path.py +49 -33
  14. pixeltable/catalog/view.py +56 -96
  15. pixeltable/config.py +103 -0
  16. pixeltable/dataframe.py +89 -89
  17. pixeltable/env.py +98 -168
  18. pixeltable/exec/aggregation_node.py +5 -4
  19. pixeltable/exec/cache_prefetch_node.py +1 -1
  20. pixeltable/exec/component_iteration_node.py +13 -9
  21. pixeltable/exec/data_row_batch.py +3 -3
  22. pixeltable/exec/exec_context.py +0 -4
  23. pixeltable/exec/exec_node.py +3 -2
  24. pixeltable/exec/expr_eval/schedulers.py +2 -1
  25. pixeltable/exec/in_memory_data_node.py +9 -4
  26. pixeltable/exec/row_update_node.py +1 -2
  27. pixeltable/exec/sql_node.py +20 -16
  28. pixeltable/exprs/__init__.py +2 -0
  29. pixeltable/exprs/arithmetic_expr.py +7 -11
  30. pixeltable/exprs/array_slice.py +1 -1
  31. pixeltable/exprs/column_property_ref.py +3 -3
  32. pixeltable/exprs/column_ref.py +12 -13
  33. pixeltable/exprs/comparison.py +3 -6
  34. pixeltable/exprs/compound_predicate.py +4 -4
  35. pixeltable/exprs/expr.py +31 -22
  36. pixeltable/exprs/expr_dict.py +3 -3
  37. pixeltable/exprs/expr_set.py +1 -1
  38. pixeltable/exprs/function_call.py +110 -80
  39. pixeltable/exprs/globals.py +3 -3
  40. pixeltable/exprs/in_predicate.py +1 -1
  41. pixeltable/exprs/inline_expr.py +3 -3
  42. pixeltable/exprs/is_null.py +1 -1
  43. pixeltable/exprs/json_mapper.py +2 -2
  44. pixeltable/exprs/json_path.py +17 -10
  45. pixeltable/exprs/literal.py +1 -1
  46. pixeltable/exprs/method_ref.py +2 -2
  47. pixeltable/exprs/row_builder.py +8 -17
  48. pixeltable/exprs/rowid_ref.py +21 -10
  49. pixeltable/exprs/similarity_expr.py +5 -5
  50. pixeltable/exprs/sql_element_cache.py +1 -1
  51. pixeltable/exprs/type_cast.py +2 -3
  52. pixeltable/exprs/variable.py +2 -2
  53. pixeltable/ext/__init__.py +2 -0
  54. pixeltable/ext/functions/__init__.py +2 -0
  55. pixeltable/ext/functions/yolox.py +3 -3
  56. pixeltable/func/__init__.py +3 -1
  57. pixeltable/func/aggregate_function.py +9 -9
  58. pixeltable/func/callable_function.py +3 -4
  59. pixeltable/func/expr_template_function.py +6 -16
  60. pixeltable/func/function.py +48 -14
  61. pixeltable/func/function_registry.py +1 -3
  62. pixeltable/func/query_template_function.py +5 -12
  63. pixeltable/func/signature.py +23 -22
  64. pixeltable/func/tools.py +3 -3
  65. pixeltable/func/udf.py +6 -4
  66. pixeltable/functions/__init__.py +2 -0
  67. pixeltable/functions/fireworks.py +7 -4
  68. pixeltable/functions/globals.py +4 -5
  69. pixeltable/functions/huggingface.py +1 -5
  70. pixeltable/functions/image.py +17 -7
  71. pixeltable/functions/llama_cpp.py +1 -1
  72. pixeltable/functions/mistralai.py +1 -1
  73. pixeltable/functions/ollama.py +4 -4
  74. pixeltable/functions/openai.py +19 -19
  75. pixeltable/functions/string.py +23 -30
  76. pixeltable/functions/timestamp.py +11 -6
  77. pixeltable/functions/together.py +14 -12
  78. pixeltable/functions/util.py +1 -1
  79. pixeltable/functions/video.py +5 -4
  80. pixeltable/functions/vision.py +6 -9
  81. pixeltable/functions/whisper.py +3 -3
  82. pixeltable/globals.py +246 -260
  83. pixeltable/index/__init__.py +2 -0
  84. pixeltable/index/base.py +1 -1
  85. pixeltable/index/btree.py +3 -1
  86. pixeltable/index/embedding_index.py +11 -5
  87. pixeltable/io/external_store.py +11 -12
  88. pixeltable/io/label_studio.py +4 -3
  89. pixeltable/io/parquet.py +57 -56
  90. pixeltable/iterators/__init__.py +4 -2
  91. pixeltable/iterators/audio.py +11 -11
  92. pixeltable/iterators/document.py +10 -10
  93. pixeltable/iterators/string.py +1 -2
  94. pixeltable/iterators/video.py +14 -15
  95. pixeltable/metadata/__init__.py +9 -5
  96. pixeltable/metadata/converters/convert_10.py +0 -1
  97. pixeltable/metadata/converters/convert_15.py +0 -2
  98. pixeltable/metadata/converters/convert_23.py +0 -2
  99. pixeltable/metadata/converters/convert_24.py +3 -3
  100. pixeltable/metadata/converters/convert_25.py +1 -1
  101. pixeltable/metadata/converters/convert_27.py +0 -2
  102. pixeltable/metadata/converters/convert_28.py +0 -2
  103. pixeltable/metadata/converters/convert_29.py +7 -8
  104. pixeltable/metadata/converters/util.py +7 -7
  105. pixeltable/metadata/schema.py +27 -19
  106. pixeltable/plan.py +68 -40
  107. pixeltable/share/__init__.py +2 -0
  108. pixeltable/share/packager.py +15 -12
  109. pixeltable/share/publish.py +3 -5
  110. pixeltable/store.py +37 -38
  111. pixeltable/type_system.py +41 -28
  112. pixeltable/utils/coco.py +4 -4
  113. pixeltable/utils/console_output.py +1 -3
  114. pixeltable/utils/description_helper.py +1 -1
  115. pixeltable/utils/documents.py +3 -3
  116. pixeltable/utils/filecache.py +20 -9
  117. pixeltable/utils/formatter.py +2 -3
  118. pixeltable/utils/media_store.py +1 -1
  119. pixeltable/utils/pytorch.py +1 -1
  120. pixeltable/utils/sql.py +4 -4
  121. pixeltable/utils/transactional_directory.py +2 -1
  122. {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/METADATA +1 -1
  123. pixeltable-0.3.8.dist-info/RECORD +174 -0
  124. pixeltable-0.3.6.dist-info/RECORD +0 -172
  125. {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/LICENSE +0 -0
  126. {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/WHEEL +0 -0
  127. {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/entry_points.txt +0 -0
pixeltable/__init__.py CHANGED
@@ -1,7 +1,9 @@
1
+ # ruff: noqa: F401
2
+
1
3
  from .__version__ import __version__, __version_tuple__
2
4
  from .catalog import Column, InsertableTable, Table, UpdateStatus, View
3
5
  from .dataframe import DataFrame
4
- from .exceptions import Error
6
+ from .exceptions import Error, PixeltableWarning
5
7
  from .exprs import RELATIVE_PATH_ROOT
6
8
  from .func import Aggregator, Function, expr_udf, query, uda, udf
7
9
  from .globals import (
@@ -56,7 +58,7 @@ from . import ext, functions, io, iterators # isort: skip
56
58
  # This is the safest / most maintainable way to construct __all__: start with the default and "blacklist"
57
59
  # stuff that we don't want in there. (Using a "whitelist" is considerably harder to maintain.)
58
60
 
59
- __default_dir = set(symbol for symbol in dir() if not symbol.startswith('_'))
61
+ __default_dir = {symbol for symbol in dir() if not symbol.startswith('_')}
60
62
  __removed_symbols = {
61
63
  'catalog',
62
64
  'dataframe',
@@ -72,7 +74,7 @@ __removed_symbols = {
72
74
  'type_system',
73
75
  'utils',
74
76
  }
75
- __all__ = sorted(list(__default_dir - __removed_symbols))
77
+ __all__ = sorted(__default_dir - __removed_symbols)
76
78
 
77
79
 
78
80
  def __dir__():
pixeltable/__version__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  # These version placeholders will be replaced during build.
2
- __version__ = '0.3.6'
3
- __version_tuple__ = (0, 3, 6)
2
+ __version__ = '0.3.8'
3
+ __version_tuple__ = (0, 3, 8)
@@ -9,5 +9,6 @@ from .path_dict import PathDict
9
9
  from .schema_object import SchemaObject
10
10
  from .table import Table
11
11
  from .table_version import TableVersion
12
+ from .table_version_handle import TableVersionHandle
12
13
  from .table_version_path import TableVersionPath
13
14
  from .view import View
@@ -2,37 +2,51 @@ from __future__ import annotations
2
2
 
3
3
  import dataclasses
4
4
  import logging
5
- from typing import Optional
5
+ from typing import Optional, Type
6
6
  from uuid import UUID
7
7
 
8
8
  import sqlalchemy as sql
9
- import sqlalchemy.orm as orm
10
9
 
10
+ import pixeltable.env as env
11
+ import pixeltable.exceptions as excs
11
12
  import pixeltable.metadata.schema as schema
13
+ from pixeltable.env import Env
12
14
 
13
- from .path_dict import PathDict
15
+ from .dir import Dir
16
+ from .schema_object import SchemaObject
14
17
  from .table import Table
15
18
  from .table_version import TableVersion
19
+ from .table_version_handle import TableVersionHandle
16
20
  from .table_version_path import TableVersionPath
17
21
 
18
- # This import must go last to avoid circular imports.
19
- import pixeltable.env as env # isort: skip
22
+ # from .. import InsertableTable
20
23
 
21
24
  _logger = logging.getLogger('pixeltable')
22
25
 
23
26
 
27
+ def _join_path(path: str, name: str) -> str:
28
+ """Append name to path, if path is not empty."""
29
+ return name if path == '' else f'{path}.{name}'
30
+
31
+
24
32
  class Catalog:
25
- """A repository of catalog objects"""
33
+ """The functional interface to getting access to catalog objects
34
+
35
+ All interface functions must be called in the context of a transaction, started with Env.begin().
36
+ """
26
37
 
27
38
  _instance: Optional[Catalog] = None
28
39
 
40
+ # key: [id, version]
41
+ # - mutable version of a table: version == None (even though TableVersion.version is set correctly)
42
+ # - snapshot versions: records the version of the snapshot
43
+ _tbl_versions: dict[tuple[UUID, Optional[int]], TableVersion]
44
+ _tbls: dict[UUID, Table]
45
+
29
46
  @classmethod
30
47
  def get(cls) -> Catalog:
31
48
  if cls._instance is None:
32
49
  cls._instance = cls()
33
- with orm.Session(env.Env.get().engine, future=True) as session:
34
- cls._instance._load_table_versions(session)
35
- # cls._instance._load_functions(session)
36
50
  return cls._instance
37
51
 
38
52
  @classmethod
@@ -41,69 +55,280 @@ class Catalog:
41
55
  cls._instance = None
42
56
 
43
57
  def __init__(self) -> None:
44
- # key: [id, version]
45
- # - mutable version of a table: version == None (even though TableVersion.version is set correctly)
46
- # - snapshot versions: records the version of the snapshot
47
- self.tbl_versions: dict[tuple[UUID, Optional[int]], TableVersion] = {}
58
+ self._tbl_versions = {}
59
+ self._tbls = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
60
+ self._init_store()
48
61
 
49
- self.tbls: dict[UUID, Table] = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
50
- self.tbl_dependents: dict[UUID, list[Table]] = {}
62
+ def get_dir_path(self, dir_id: UUID) -> str:
63
+ """Return path for directory with given id"""
64
+ session = env.Env.get().session
65
+ names: list[str] = []
66
+ while True:
67
+ dir = session.query(schema.Dir).filter(schema.Dir.id == dir_id).one()
68
+ if dir.md['name'] == '':
69
+ break
70
+ names.insert(0, dir.md['name'])
71
+ dir_id = dir.parent_id
72
+ assert dir_id is not None
73
+ return '.'.join(names)
51
74
 
52
- self._init_store()
53
- self.paths = PathDict() # do this after _init_catalog()
75
+ def get_tbl_path(self, tbl_id: UUID) -> str:
76
+ """Return path for table with given id"""
77
+ session = env.Env.get().session
78
+ tbl = session.query(schema.Table).filter(schema.Table.id == tbl_id).one()
79
+ dir_path = self.get_dir_path(tbl.dir_id)
80
+ return _join_path(dir_path, tbl.md['name'])
54
81
 
55
- def _init_store(self) -> None:
56
- """One-time initialization of the stored catalog. Idempotent."""
57
- with orm.Session(env.Env.get().engine, future=True) as session:
58
- if session.query(sql.func.count(schema.Dir.id)).scalar() > 0:
59
- return
60
- # create a top-level directory, so that every schema object has a directory
61
- dir_md = schema.DirMd(name='', user=None, additional_md={})
62
- dir_record = schema.Dir(parent_id=None, md=dataclasses.asdict(dir_md))
63
- session.add(dir_record)
64
- session.flush()
65
- session.commit()
66
- _logger.info(f'Initialized catalog')
82
+ @dataclasses.dataclass
83
+ class DirEntry:
84
+ dir: Optional[schema.Dir]
85
+ dir_entries: dict[str, Catalog.DirEntry]
86
+ table: Optional[schema.Table]
67
87
 
68
- def _load_snapshot_version(
69
- self, tbl_id: UUID, version: int, base: Optional[TableVersion], session: orm.Session
70
- ) -> TableVersion:
71
- q = (
88
+ def get_dir_contents(self, dir_id: UUID, recursive: bool = False) -> dict[str, DirEntry]:
89
+ """Returns a dict mapping the entry names to DirEntry objects"""
90
+ session = env.Env.get().session
91
+ result: dict[str, Catalog.DirEntry] = {}
92
+
93
+ dirs = session.query(schema.Dir).filter(schema.Dir.parent_id == dir_id).all()
94
+ for dir in dirs:
95
+ dir_contents: dict[str, Catalog.DirEntry] = {}
96
+ if recursive:
97
+ dir_contents = self.get_dir_contents(dir.id, recursive=True)
98
+ result[dir.md['name']] = self.DirEntry(dir=dir, dir_entries=dir_contents, table=None)
99
+
100
+ tbls = session.query(schema.Table).filter(schema.Table.dir_id == dir_id).all()
101
+ for tbl in tbls:
102
+ result[tbl.md['name']] = self.DirEntry(dir=None, dir_entries={}, table=tbl)
103
+
104
+ return result
105
+
106
+ def drop_dir(self, dir_id: UUID) -> None:
107
+ """Delete the directory with the given id"""
108
+ session = env.Env.get().session
109
+ session.query(schema.Dir).filter(schema.Dir.id == dir_id).delete()
110
+
111
+ def get_schema_object(
112
+ self,
113
+ path: str,
114
+ expected: Optional[Type[SchemaObject]] = None,
115
+ raise_if_exists: bool = False,
116
+ raise_if_not_exists: bool = False,
117
+ ) -> Optional[SchemaObject]:
118
+ """Return the schema object at the given path, or None if it doesn't exist.
119
+
120
+ Raises Error if
121
+ - the parent directory doesn't exist'
122
+ - raise_if_exists is True and the path exists
123
+ - raise_if_not_exists is True and the path does not exist
124
+ - expected is not None and the existing object has a different type
125
+ """
126
+ session = env.Env.get().session
127
+ if path == '':
128
+ # the root dir
129
+ if expected is not None and expected is not Dir:
130
+ raise excs.Error(f'{path!r} needs to be a {expected._display_name()} but is a {Dir._display_name()}')
131
+ dir = self._get_dir(path)
132
+ return Dir(dir.id, dir.parent_id, dir.md['name'])
133
+
134
+ components = path.split('.')
135
+ parent_path = '.'.join(components[:-1])
136
+ parent_dir = self._get_dir('.'.join(components[:-1]))
137
+ if parent_dir is None:
138
+ raise excs.Error(f'Directory {parent_path!r} does not exist')
139
+ name = components[-1]
140
+
141
+ # check if path points to a directory
142
+ obj: Optional[SchemaObject] = None
143
+ dir = (
144
+ session.query(schema.Dir)
145
+ .filter(schema.Dir.parent_id == parent_dir.id, schema.Dir.md['name'].astext == name)
146
+ .one_or_none()
147
+ )
148
+ if dir is not None:
149
+ obj = Dir(dir.id, dir.parent_id, dir.md['name'])
150
+ else:
151
+ # check if it's a table
152
+ row = (
153
+ session.query(schema.Table.id)
154
+ .filter(schema.Table.dir_id == parent_dir.id, schema.Table.md['name'].astext == name)
155
+ .one_or_none()
156
+ )
157
+ if row is not None:
158
+ tbl_id = row[0]
159
+ if not tbl_id in self._tbls:
160
+ self._tbls[tbl_id] = self._load_tbl(tbl_id)
161
+ obj = self._tbls[tbl_id]
162
+
163
+ if obj is None and raise_if_not_exists:
164
+ raise excs.Error(f'Path {path!r} does not exist')
165
+ elif obj is not None and raise_if_exists:
166
+ raise excs.Error(f'Path {path!r} is an existing {type(obj)._display_name()}')
167
+ elif obj is not None and expected is not None and not isinstance(obj, expected):
168
+ raise excs.Error(f'{path!r} needs to be a {expected._display_name()} but is a {type(obj)._display_name()}')
169
+ return obj
170
+
171
+ def get_tbl(self, tbl_id: UUID) -> Optional[Table]:
172
+ if not tbl_id in self._tbls:
173
+ tbl = self._load_tbl(tbl_id)
174
+ if tbl is None:
175
+ return None
176
+ self._tbls[tbl_id] = tbl
177
+ return self._tbls[tbl_id]
178
+
179
+ def add_tbl(self, tbl: Table) -> None:
180
+ """Explicitly add a Table"""
181
+ self._tbls[tbl._id] = tbl
182
+
183
+ def get_views(self, tbl_id: UUID) -> list[UUID]:
184
+ """Return the ids of views that directly reference the given table"""
185
+ session = env.Env.get().session
186
+ q = session.query(schema.Table.id).filter(sql.text(f"md->'view_md'->'base_versions'->0->>0 = {tbl_id.hex!r}"))
187
+ result = [r[0] for r in q.all()]
188
+ return result
189
+
190
+ def remove_tbl(self, tbl_id: UUID) -> None:
191
+ assert tbl_id in self._tbls
192
+ del self._tbls[tbl_id]
193
+
194
+ def get_tbl_version(self, tbl_id: UUID, effective_version: Optional[int]) -> Optional[TableVersion]:
195
+ if (tbl_id, effective_version) not in self._tbl_versions:
196
+ self._tbl_versions[(tbl_id, effective_version)] = self._load_tbl_version(tbl_id, effective_version)
197
+ return self._tbl_versions[(tbl_id, effective_version)]
198
+
199
+ def add_tbl_version(self, tbl_version: TableVersion) -> None:
200
+ """Explicitly add a TableVersion"""
201
+ self._tbl_versions[(tbl_version.id, tbl_version.effective_version)] = tbl_version
202
+ # if this is a mutable view, also record it in the base
203
+ if tbl_version.is_view and tbl_version.effective_version is None:
204
+ base = tbl_version.base.get()
205
+ base.mutable_views.append(TableVersionHandle(tbl_version.id, tbl_version.effective_version))
206
+
207
+ def remove_tbl_version(self, tbl_version: TableVersion) -> None:
208
+ assert (tbl_version.id, tbl_version.effective_version) in self._tbl_versions
209
+ del self._tbl_versions[(tbl_version.id, tbl_version.effective_version)]
210
+
211
+ def get_dir(self, dir_id: UUID) -> Optional[Dir]:
212
+ """Return the Dir with the given id, or None if it doesn't exist"""
213
+ session = env.Env.get().session
214
+ dir_record = session.query(schema.Dir).filter(schema.Dir.id == dir_id).one_or_none()
215
+ if dir_record is None:
216
+ return None
217
+ return Dir(dir_record.id, dir_record.parent_id, dir_record.md['name'])
218
+
219
+ def _get_dir(self, path: str) -> Optional[schema.Dir]:
220
+ session = env.Env.get().session
221
+ assert session is not None
222
+ if path == '':
223
+ return session.query(schema.Dir).filter(schema.Dir.parent_id.is_(None)).one()
224
+ else:
225
+ components = path.split('.')
226
+ parent_path = '.'.join(components[:-1])
227
+ parent_dir = self._get_dir(parent_path)
228
+ if parent_dir is None:
229
+ return None
230
+ name = components[-1]
231
+ dir = (
232
+ session.query(schema.Dir)
233
+ .filter(schema.Dir.parent_id == parent_dir.id, schema.Dir.md['name'].astext == name)
234
+ .one_or_none()
235
+ )
236
+ return dir
237
+
238
+ def _load_tbl(self, tbl_id: UUID) -> Optional[Table]:
239
+ _logger.info(f'Loading table {tbl_id}')
240
+ from .insertable_table import InsertableTable
241
+ from .view import View
242
+
243
+ session = env.Env.get().session
244
+ tbl_record, schema_version_record = (
72
245
  session.query(schema.Table, schema.TableSchemaVersion)
73
- .select_from(schema.Table)
74
- .join(schema.TableVersion)
75
246
  .join(schema.TableSchemaVersion)
76
- .where(schema.Table.id == tbl_id)
77
- .where(sql.text(f"({schema.TableVersion.__table__}.md->>'version')::int = {version}"))
247
+ .where(schema.Table.id == schema.TableSchemaVersion.tbl_id)
248
+ # Table.md['current_schema_version'] == TableSchemaVersion.schema_version
78
249
  .where(
79
250
  sql.text(
80
- (
81
- f"({schema.TableVersion.__table__}.md->>'schema_version')::int = "
82
- f'{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}'
83
- )
251
+ f"({schema.Table.__table__}.md->>'current_schema_version')::int = "
252
+ f'{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}'
84
253
  )
85
254
  )
255
+ .where(schema.Table.id == tbl_id)
256
+ .one_or_none()
86
257
  )
87
- tbl_record, schema_version_record = q.one()
258
+ if tbl_record is None:
259
+ return None
260
+
88
261
  tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
262
+ view_md = tbl_md.view_md
263
+ if view_md is None:
264
+ # this is a base table
265
+ if (tbl_id, None) not in self._tbl_versions:
266
+ self._tbl_versions[(tbl_id, None)] = self._load_tbl_version(tbl_id, None)
267
+ tbl = InsertableTable(tbl_record.dir_id, TableVersionHandle(tbl_id, None))
268
+ return tbl
269
+
270
+ # this is a view; determine the sequence of TableVersions to load
271
+ tbl_version_path: list[tuple[UUID, Optional[int]]] = []
89
272
  schema_version_md = schema.md_from_dict(schema.TableSchemaVersionMd, schema_version_record.md)
90
- # we ignore tbl_record.base_tbl_id/base_snapshot_id and use 'base' instead: if the base is a snapshot
91
- # we'd have to look that up first
92
- return TableVersion(tbl_record.id, tbl_md, version, schema_version_md, is_snapshot=True, base=base)
273
+ pure_snapshot = view_md.is_snapshot and view_md.predicate is None and len(schema_version_md.columns) == 0
274
+ if pure_snapshot:
275
+ # this is a pure snapshot, without a physical table backing it; we only need the bases
276
+ pass
277
+ else:
278
+ effective_version = 0 if view_md.is_snapshot else None # snapshots only have version 0
279
+ tbl_version_path.append((tbl_id, effective_version))
280
+ tbl_version_path.extend((UUID(tbl_id), version) for tbl_id, version in view_md.base_versions)
93
281
 
94
- def _load_table_versions(self, session: orm.Session) -> None:
95
- from .insertable_table import InsertableTable
96
- from .view import View
282
+ # load TableVersions, starting at the root
283
+ base_path: Optional[TableVersionPath] = None
284
+ view_path: Optional[TableVersionPath] = None
285
+ for id, effective_version in tbl_version_path[::-1]:
286
+ if (id, effective_version) not in self._tbl_versions:
287
+ self._tbl_versions[(id, effective_version)] = self._load_tbl_version(id, effective_version)
288
+ view_path = TableVersionPath(TableVersionHandle(id, effective_version), base=base_path)
289
+ base_path = view_path
290
+ view = View(tbl_id, tbl_record.dir_id, tbl_md.name, view_path, snapshot_only=pure_snapshot)
291
+ # TODO: also load mutable views
292
+ return view
97
293
 
98
- # load tables/views;
99
- # do this in ascending order of creation ts so that we can resolve base references in one pass
294
+ def _load_tbl_version(self, tbl_id: UUID, effective_version: Optional[int]) -> Optional[TableVersion]:
295
+ _logger.info(f'Loading table version: {tbl_id}:{effective_version}')
296
+ session = env.Env.get().session
100
297
  q = (
101
298
  session.query(schema.Table, schema.TableSchemaVersion)
102
299
  .select_from(schema.Table)
103
- .join(schema.TableVersion)
300
+ .where(schema.Table.id == tbl_id)
104
301
  .join(schema.TableSchemaVersion)
105
- .where(sql.text(f"({schema.TableVersion.__table__}.md->>'version')::int = 0"))
106
- .where(
302
+ .where(schema.TableSchemaVersion.tbl_id == tbl_id)
303
+ )
304
+
305
+ if effective_version is not None:
306
+ # we are loading a specific version
307
+ # SELECT *
308
+ # FROM Table t
309
+ # JOIN TableVersion tv ON (tv.tbl_id = tbl_id AND tv.version = effective_version)
310
+ # JOIN TableSchemaVersion tsv ON (tsv.tbl_id = tbl_id AND tv.md.schema_version = tsv.schema_version)
311
+ # WHERE t.id = tbl_id
312
+ q = (
313
+ q.join(schema.TableVersion)
314
+ .where(schema.TableVersion.tbl_id == tbl_id)
315
+ .where(sql.text(f"({schema.TableVersion.__table__}.md->>'version')::int = {effective_version}"))
316
+ .where(
317
+ sql.text(
318
+ (
319
+ f"({schema.TableVersion.__table__}.md->>'schema_version')::int = "
320
+ f'{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}'
321
+ )
322
+ )
323
+ )
324
+ )
325
+ else:
326
+ # we are loading the current version
327
+ # SELECT *
328
+ # FROM Table t
329
+ # JOIN TableSchemaVersion tsv ON (tsv.tbl_id = tbl_id AND t.current_schema_version = tsv.schema_version)
330
+ # WHERE t.id = tbl_id
331
+ q = q.where(
107
332
  sql.text(
108
333
  (
109
334
  f"({schema.Table.__table__}.md->>'current_schema_version')::int = "
@@ -111,80 +336,62 @@ class Catalog:
111
336
  )
112
337
  )
113
338
  )
114
- .order_by(sql.text(f"({schema.TableVersion.__table__}.md->>'created_at')::float"))
339
+
340
+ tbl_record, schema_version_record = q.one_or_none()
341
+ tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
342
+ schema_version_md = schema.md_from_dict(schema.TableSchemaVersionMd, schema_version_record.md)
343
+ view_md = tbl_md.view_md
344
+
345
+ # load mutable view ids
346
+ q = session.query(schema.Table.id).filter(
347
+ sql.text(
348
+ f"md->'view_md'->'base_versions'->0->>0 = {tbl_id.hex!r} "
349
+ "AND md->'view_md'->'base_versions'->0->1 IS NULL"
350
+ )
115
351
  )
352
+ mutable_view_ids = [r[0] for r in q.all()]
353
+ mutable_views = [TableVersionHandle(id, None) for id in mutable_view_ids]
116
354
 
117
- for tbl_record, schema_version_record in q.all():
118
- tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
119
- schema_version_md = schema.md_from_dict(schema.TableSchemaVersionMd, schema_version_record.md)
120
- view_md = tbl_md.view_md
121
-
122
- if view_md is not None:
123
- assert len(view_md.base_versions) > 0
124
- # construct a TableVersionPath for the view
125
- refd_versions = [(UUID(tbl_id), version) for tbl_id, version in view_md.base_versions]
126
- base_path: Optional[TableVersionPath] = None
127
- base: Optional[TableVersion] = None
128
- # go through the versions in reverse order, so we can construct TableVersionPaths
129
- for base_id, version in refd_versions[::-1]:
130
- base_version = self.tbl_versions.get((base_id, version), None)
131
- if base_version is None:
132
- if version is None:
133
- # debugging
134
- pass
135
- # if this is a reference to a mutable table, we should have loaded it already
136
- assert version is not None
137
- base_version = self._load_snapshot_version(base_id, version, base, session)
138
- base_path = TableVersionPath(base_version, base=base_path)
139
- base = base_version
140
- assert base_path is not None
141
-
142
- base_tbl_id = base_path.tbl_id()
143
- is_snapshot = view_md is not None and view_md.is_snapshot
144
- snapshot_only = is_snapshot and view_md.predicate is None and len(schema_version_md.columns) == 0
145
- include_base_columns = view_md is None or view_md.include_base_columns
146
- if snapshot_only:
147
- # this is a pure snapshot, without a physical table backing it
148
- view_path = base_path
149
- else:
150
- tbl_version = TableVersion(
151
- tbl_record.id,
152
- tbl_md,
153
- tbl_md.current_version,
154
- schema_version_md,
155
- is_snapshot=is_snapshot,
156
- base=base_path.tbl_version if is_snapshot else None,
157
- base_path=base_path if not is_snapshot else None,
158
- )
159
- view_path = TableVersionPath(tbl_version, base=base_path)
160
-
161
- tbl: Table = View(
162
- tbl_record.id,
163
- tbl_record.dir_id,
164
- tbl_md.name,
165
- view_path,
166
- base_tbl_id,
167
- snapshot_only=snapshot_only,
168
- include_base_columns=include_base_columns,
169
- )
170
- self.tbl_dependents[base_tbl_id].append(tbl)
171
-
172
- else:
173
- tbl_version = TableVersion(tbl_record.id, tbl_md, tbl_md.current_version, schema_version_md)
174
- tbl = InsertableTable(tbl_record.dir_id, tbl_version)
175
-
176
- self.tbls[tbl._id] = tbl
177
- self.tbl_dependents[tbl._id] = []
178
- self.paths.add_schema_obj(tbl._dir_id, tbl_md.name, tbl)
179
-
180
- # def _load_functions(self, session: orm.Session) -> None:
181
- # # load Function metadata; doesn't load the actual callable, which can be large and is only done on-demand by the
182
- # # FunctionRegistry
183
- # q = session.query(schema.Function.id, schema.Function.dir_id, schema.Function.md) \
184
- # .where(sql.text(f"({schema.Function.__table__}.md->>'name')::text IS NOT NULL"))
185
- # for id, dir_id, md in q.all():
186
- # assert 'name' in md
187
- # name = md['name']
188
- # assert name is not None
189
- # named_fn = NamedFunction(id, dir_id, name)
190
- # self.paths.add_schema_obj(dir_id, name, named_fn)
355
+ if view_md is None:
356
+ # this is a base table
357
+ tbl_version = TableVersion(
358
+ tbl_record.id, tbl_md, effective_version, schema_version_md, mutable_views=mutable_views
359
+ )
360
+ return tbl_version
361
+
362
+ assert len(view_md.base_versions) > 0 # a view needs to have a base
363
+ pure_snapshot = view_md.is_snapshot and view_md.predicate is None and len(schema_version_md.columns) == 0
364
+ assert not pure_snapshot # a pure snapshot doesn't have a physical table backing it, no point in loading it
365
+
366
+ base: TableVersionHandle
367
+ base_path: Optional[TableVersionPath] = None # needed for live view
368
+ if view_md.is_snapshot:
369
+ base = TableVersionHandle(UUID(view_md.base_versions[0][0]), view_md.base_versions[0][1])
370
+ else:
371
+ base_path = TableVersionPath.from_md(tbl_md.view_md.base_versions)
372
+ base = base_path.tbl_version
373
+
374
+ tbl_version = TableVersion(
375
+ tbl_record.id,
376
+ tbl_md,
377
+ effective_version,
378
+ schema_version_md,
379
+ base_path=base_path,
380
+ base=base,
381
+ mutable_views=mutable_views,
382
+ )
383
+ return tbl_version
384
+
385
+ def _init_store(self) -> None:
386
+ """One-time initialization of the stored catalog. Idempotent."""
387
+ with env.Env.get().begin_xact():
388
+ session = env.Env.get().session
389
+ if session.query(sql.func.count(schema.Dir.id)).scalar() > 0:
390
+ return
391
+ # create a top-level directory, so that every schema object has a directory
392
+ dir_md = schema.DirMd(name='', user=None, additional_md={})
393
+ dir_record = schema.Dir(parent_id=None, md=dataclasses.asdict(dir_md))
394
+ session.add(dir_record)
395
+ session.flush()
396
+ session.commit()
397
+ _logger.info(f'Initialized catalog')
@@ -1,6 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
+ import warnings
5
+ from textwrap import dedent
4
6
  from typing import TYPE_CHECKING, Any, Optional
5
7
 
6
8
  import sqlalchemy as sql
@@ -13,6 +15,7 @@ from .globals import MediaValidation, is_valid_identifier
13
15
 
14
16
  if TYPE_CHECKING:
15
17
  from .table_version import TableVersion
18
+ from .table_version_handle import TableVersionHandle
16
19
 
17
20
  _logger = logging.getLogger('pixeltable')
18
21
 
@@ -40,7 +43,7 @@ class Column:
40
43
  _value_expr: Optional[exprs.Expr]
41
44
  value_expr_dict: Optional[dict[str, Any]]
42
45
  dependent_cols: set[Column]
43
- tbl: Optional[TableVersion]
46
+ tbl: Optional[TableVersionHandle]
44
47
 
45
48
  def __init__(
46
49
  self,
@@ -129,6 +132,20 @@ class Column:
129
132
  from pixeltable import exprs
130
133
 
131
134
  self._value_expr = exprs.Expr.from_dict(self.value_expr_dict)
135
+ self._value_expr.bind_rel_paths()
136
+ if not self._value_expr.is_valid:
137
+ message = (
138
+ dedent(
139
+ f"""
140
+ The computed column {self.name!r} in table {self.tbl.get().name!r} is no longer valid.
141
+ {{validation_error}}
142
+ You can continue to query existing data from this column, but evaluating it on new data will raise an error.
143
+ """
144
+ )
145
+ .strip()
146
+ .format(validation_error=self._value_expr.validation_error)
147
+ )
148
+ warnings.warn(message, category=excs.PixeltableWarning)
132
149
  return self._value_expr
133
150
 
134
151
  def set_value_expr(self, value_expr: exprs.Expr) -> None:
@@ -153,7 +170,7 @@ class Column:
153
170
 
154
171
  def get_idx_info(self) -> dict[str, 'TableVersion.IndexInfo']:
155
172
  assert self.tbl is not None
156
- return {name: info for name, info in self.tbl.idxs_by_name.items() if info.col == self}
173
+ return {name: info for name, info in self.tbl.get().idxs_by_name.items() if info.col == self}
157
174
 
158
175
  @property
159
176
  def is_computed(self) -> bool:
@@ -176,14 +193,14 @@ class Column:
176
193
  @property
177
194
  def qualified_name(self) -> str:
178
195
  assert self.tbl is not None
179
- return f'{self.tbl.name}.{self.name}'
196
+ return f'{self.tbl.get().name}.{self.name}'
180
197
 
181
198
  @property
182
199
  def media_validation(self) -> MediaValidation:
183
200
  if self._media_validation is not None:
184
201
  return self._media_validation
185
202
  assert self.tbl is not None
186
- return self.tbl.media_validation
203
+ return self.tbl.get().media_validation
187
204
 
188
205
  def source(self) -> None:
189
206
  """
@@ -228,7 +245,7 @@ class Column:
228
245
  return f'{self.name}: {self.col_type}'
229
246
 
230
247
  def __repr__(self) -> str:
231
- return f'Column({self.id!r}, {self.name!r}, tbl={self.tbl.name!r})'
248
+ return f'Column({self.id!r}, {self.name!r}, tbl={self.tbl.get().name!r})'
232
249
 
233
250
  def __hash__(self) -> int:
234
251
  # TODO(aaron-siegel): This and __eq__ do not capture the table version. We need to rethink the Column