pixeltable 0.3.8__py3-none-any.whl → 0.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

pixeltable/catalog/dir.py CHANGED
@@ -1,10 +1,13 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import dataclasses
4
+ import datetime
5
+ import json
4
6
  import logging
5
7
  from uuid import UUID
6
8
 
7
9
  import sqlalchemy as sql
10
+ from sqlalchemy.dialects.postgresql import JSONB
8
11
 
9
12
  from pixeltable.env import Env
10
13
  from pixeltable.metadata import schema
@@ -26,6 +29,7 @@ class Dir(SchemaObject):
26
29
  dir_record = schema.Dir(parent_id=parent_id, md=dataclasses.asdict(dir_md))
27
30
  session.add(dir_record)
28
31
  session.flush()
32
+ # print(f'{datetime.datetime.now()} create dir {dir_record}')
29
33
  assert dir_record.id is not None
30
34
  assert isinstance(dir_record.id, UUID)
31
35
  dir = cls(dir_record.id, parent_id, name)
@@ -43,11 +47,16 @@ class Dir(SchemaObject):
43
47
  return super()._path()
44
48
 
45
49
  def _move(self, new_name: str, new_dir_id: UUID) -> None:
50
+ # print(
51
+ # f'{datetime.datetime.now()} move dir name={self._name} parent={self._dir_id} new_name={new_name} new_dir_id={new_dir_id}'
52
+ # )
46
53
  super()._move(new_name, new_dir_id)
47
- with Env.get().engine.begin() as conn:
48
- dir_md = schema.DirMd(name=new_name, user=None, additional_md={})
49
- conn.execute(
50
- sql.update(schema.Dir.__table__)
51
- .values({schema.Dir.parent_id: self._dir_id, schema.Dir.md: dataclasses.asdict(dir_md)})
52
- .where(schema.Dir.id == self._id)
54
+ stmt = sql.text(
55
+ (
56
+ f'UPDATE {schema.Dir.__table__} '
57
+ f'SET {schema.Dir.parent_id.name} = :new_dir_id, '
58
+ f" {schema.Dir.md.name}['name'] = :new_name "
59
+ f'WHERE {schema.Dir.id.name} = :id'
53
60
  )
61
+ )
62
+ Env.get().conn.execute(stmt, {'new_dir_id': new_dir_id, 'new_name': json.dumps(new_name), 'id': self._id})
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
+ from typing import Iterator
4
5
 
5
6
  from pixeltable import exceptions as excs
6
7
 
@@ -55,5 +56,19 @@ class Path:
55
56
  is_prefix = self.components == other.components[: self.len]
56
57
  return is_prefix and (self.len == (other.len - 1) or not is_parent)
57
58
 
59
+ def ancestors(self) -> Iterator[Path]:
60
+ """
61
+ Return all ancestors of this path in top-down order including root.
62
+ If this path is for the root directory, which has no parent, then None is returned.
63
+ """
64
+ if self.is_root:
65
+ return
66
+ else:
67
+ for i in range(0, len(self.components)):
68
+ yield Path('.'.join(self.components[0:i]), empty_is_valid=True)
69
+
58
70
  def __str__(self) -> str:
59
71
  return '.'.join(self.components)
72
+
73
+ def __lt__(self, other: Path) -> bool:
74
+ return str(self) < str(other)
@@ -2,7 +2,7 @@ from abc import abstractmethod
2
2
  from typing import TYPE_CHECKING, Any, Optional
3
3
  from uuid import UUID
4
4
 
5
- import pixeltable.env as env
5
+ from pixeltable.env import Env
6
6
 
7
7
  if TYPE_CHECKING:
8
8
  from pixeltable import catalog
@@ -28,24 +28,19 @@ class SchemaObject:
28
28
  """Returns the parent directory of this schema object."""
29
29
  from .catalog import Catalog
30
30
 
31
- with env.Env.get().begin_xact():
31
+ with Env.get().begin_xact():
32
32
  if self._dir_id is None:
33
33
  return None
34
34
  return Catalog.get().get_dir(self._dir_id)
35
35
 
36
36
  def _path(self) -> str:
37
37
  """Returns the path to this schema object."""
38
- with env.Env.get().begin_xact():
39
- from .catalog import Catalog
38
+ from .catalog import Catalog
40
39
 
41
- cat = Catalog.get()
42
- dir_path = cat.get_dir_path(self._dir_id)
43
- if dir_path == '':
44
- # Either this is the root directory, with empty path, or its parent is the
45
- # root directory. Either way, we return just the name.
46
- return self._name
47
- else:
48
- return f'{dir_path}.{self._name}'
40
+ assert self._dir_id is not None
41
+ with Env.get().begin_xact():
42
+ path = Catalog.get().get_dir_path(self._dir_id)
43
+ return str(path.append(self._name))
49
44
 
50
45
  def get_metadata(self) -> dict[str, Any]:
51
46
  """Returns metadata associated with this schema object."""
@@ -171,8 +171,8 @@ class Table(SchemaObject):
171
171
 
172
172
  def _get_views(self, *, recursive: bool = True) -> list['Table']:
173
173
  cat = catalog.Catalog.get()
174
- view_ids = cat.get_views(self._id)
175
- views = [cat.get_tbl(id) for id in view_ids]
174
+ view_ids = cat.get_view_ids(self._id)
175
+ views = [cat.get_table_by_id(id) for id in view_ids]
176
176
  if recursive:
177
177
  views.extend([t for view in views for t in view._get_views(recursive=True)])
178
178
  return views
@@ -265,7 +265,7 @@ class Table(SchemaObject):
265
265
  if self._tbl_version_path.base is None:
266
266
  return None
267
267
  base_id = self._tbl_version_path.base.tbl_version.id
268
- return catalog.Catalog.get().get_tbl(base_id)
268
+ return catalog.Catalog.get().get_table_by_id(base_id)
269
269
 
270
270
  @property
271
271
  def _bases(self) -> list['Table']:
@@ -369,11 +369,6 @@ class Table(SchemaObject):
369
369
  pd_rows.append(row)
370
370
  return pd.DataFrame(pd_rows)
371
371
 
372
- def ensure_md_loaded(self) -> None:
373
- """Ensure that table metadata is loaded."""
374
- for col in self._tbl_version.get().cols_by_id.values():
375
- _ = col.value_expr
376
-
377
372
  def describe(self) -> None:
378
373
  """
379
374
  Print the table schema.
@@ -387,13 +382,9 @@ class Table(SchemaObject):
387
382
  print(repr(self))
388
383
 
389
384
  def _drop(self) -> None:
390
- cat = catalog.Catalog.get()
391
385
  self._check_is_dropped()
392
386
  self._tbl_version.get().drop()
393
387
  self._is_dropped = True
394
- # update catalog
395
- cat = catalog.Catalog.get()
396
- cat.remove_tbl(self._id)
397
388
 
398
389
  # TODO Factor this out into a separate module.
399
390
  # The return type is unresolvable, but torch can't be imported since it's an optional dependency.
@@ -177,10 +177,6 @@ class TableVersion:
177
177
  # Init external stores (this needs to happen after the schema is created)
178
178
  self._init_external_stores(tbl_md)
179
179
 
180
- # Force column metadata to load, in order to surface any invalid metadata now (as warnings)
181
- for col in self.cols_by_id.values():
182
- _ = col.value_expr
183
-
184
180
  def __hash__(self) -> int:
185
181
  return hash(self.id)
186
182
 
@@ -458,6 +454,11 @@ class TableVersion:
458
454
  )
459
455
  )
460
456
 
457
+ def ensure_md_loaded(self) -> None:
458
+ """Ensure that table metadata is loaded."""
459
+ for col in self.cols_by_id.values():
460
+ _ = col.value_expr
461
+
461
462
  def _store_idx_name(self, idx_id: int) -> str:
462
463
  """Return name of index in the store, which needs to be globally unique"""
463
464
  return f'idx_{self.id.hex}_{idx_id}'
@@ -237,15 +237,11 @@ class View(Table):
237
237
  )
238
238
 
239
239
  def _drop(self) -> None:
240
- cat = catalog.Catalog.get()
241
240
  if self._snapshot_only:
242
241
  # there is not TableVersion to drop
243
242
  self._check_is_dropped()
244
243
  self.is_dropped = True
245
244
  TableVersion.delete_md(self._id)
246
- # update catalog
247
- cat = catalog.Catalog.get()
248
- cat.remove_tbl(self._id)
249
245
  else:
250
246
  super()._drop()
251
247
 
pixeltable/env.py CHANGED
@@ -170,19 +170,25 @@ class Env:
170
170
  assert self._current_session is not None
171
171
  return self._current_session
172
172
 
173
+ def in_xact(self) -> bool:
174
+ return self._current_conn is not None
175
+
173
176
  @contextmanager
174
177
  def begin_xact(self) -> Iterator[sql.Connection]:
175
178
  """Return a context manager that yields a connection to the database. Idempotent."""
176
179
  if self._current_conn is None:
177
180
  assert self._current_session is None
178
- with self.engine.begin() as conn, sql.orm.Session(conn) as session:
179
- self._current_conn = conn
180
- self._current_session = session
181
- try:
181
+ try:
182
+ with self.engine.begin() as conn, sql.orm.Session(conn) as session:
183
+ # TODO: remove print() once we're done with debugging the concurrent update behavior
184
+ # print(f'{datetime.datetime.now()}: start xact')
185
+ self._current_conn = conn
186
+ self._current_session = session
182
187
  yield conn
183
- finally:
184
- self._current_session = None
185
- self._current_conn = None
188
+ finally:
189
+ self._current_session = None
190
+ self._current_conn = None
191
+ # print(f'{datetime.datetime.now()}: end xact')
186
192
  else:
187
193
  assert self._current_session is not None
188
194
  yield self._current_conn
@@ -391,7 +397,7 @@ class Env:
391
397
  def _create_engine(self, time_zone_name: Optional[str], echo: bool = False) -> None:
392
398
  connect_args = {} if time_zone_name is None else {'options': f'-c timezone={time_zone_name}'}
393
399
  self._sa_engine = sql.create_engine(
394
- self.db_url, echo=echo, future=True, isolation_level='REPEATABLE READ', connect_args=connect_args
400
+ self.db_url, echo=echo, isolation_level='REPEATABLE READ', connect_args=connect_args
395
401
  )
396
402
  self._logger.info(f'Created SQLAlchemy engine at: {self.db_url}')
397
403
  with self.engine.begin() as conn:
@@ -137,7 +137,7 @@ class ColumnRef(Expr):
137
137
  return self.col == other.col and self.perform_validation == other.perform_validation
138
138
 
139
139
  def _df(self) -> 'pxt.dataframe.DataFrame':
140
- tbl = catalog.Catalog.get().get_tbl(self.col.tbl.id)
140
+ tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl.id)
141
141
  return tbl.select(self)
142
142
 
143
143
  def show(self, *args, **kwargs) -> 'pxt.dataframe.DataFrameResultSet':
@@ -165,7 +165,7 @@ class ColumnRef(Expr):
165
165
  return self._descriptors().to_html()
166
166
 
167
167
  def _descriptors(self) -> DescriptionHelper:
168
- tbl = catalog.Catalog.get().get_tbl(self.col.tbl.id)
168
+ tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl.id)
169
169
  helper = DescriptionHelper()
170
170
  helper.append(f'Column\n{self.col.name!r}\n(of table {tbl._path()!r})')
171
171
  helper.append(tbl._col_descriptor([self.col.name]))
pixeltable/exprs/expr.py CHANGED
@@ -90,14 +90,29 @@ class Expr(abc.ABC):
90
90
  result = c_scope
91
91
  return result
92
92
 
93
- def bind_rel_paths(self, mapper: Optional['exprs.JsonMapper'] = None) -> None:
93
+ def bind_rel_paths(self) -> None:
94
94
  """
95
95
  Binds relative JsonPaths to mapper.
96
96
  This needs to be done in a separate phase after __init__(), because RelativeJsonPath()(-1) cannot be resolved
97
97
  by the immediately containing JsonMapper during initialization.
98
98
  """
99
+ self._bind_rel_paths()
100
+ assert not self._has_relative_path, self._expr_tree()
101
+
102
+ def _bind_rel_paths(self, mapper: Optional['exprs.JsonMapper'] = None) -> None:
103
+ for c in self.components:
104
+ c._bind_rel_paths(mapper)
105
+
106
+ def _expr_tree(self) -> str:
107
+ """Returns a string representation of this expression as a multi-line tree. Useful for debugging."""
108
+ buf: list[str] = []
109
+ self._expr_tree_r(0, buf)
110
+ return '\n'.join(buf)
111
+
112
+ def _expr_tree_r(self, indent: int, buf: list[str]) -> None:
113
+ buf.append(f'{" " * indent}{type(self).__name__}: {self}'.replace('\n', '\\n'))
99
114
  for c in self.components:
100
- c.bind_rel_paths(mapper)
115
+ c._expr_tree_r(indent + 2, buf)
101
116
 
102
117
  def default_column_name(self) -> Optional[str]:
103
118
  """
@@ -355,6 +370,10 @@ class Expr(abc.ABC):
355
370
  except StopIteration:
356
371
  return False
357
372
 
373
+ @property
374
+ def _has_relative_path(self) -> bool:
375
+ return any(c._has_relative_path for c in self.components)
376
+
358
377
  def tbl_ids(self) -> set[UUID]:
359
378
  """Returns table ids referenced by this expr."""
360
379
  from .column_ref import ColumnRef
@@ -514,7 +533,7 @@ class Expr(abc.ABC):
514
533
 
515
534
  @classmethod
516
535
  def _from_dict(cls, d: dict, components: list[Expr]) -> Self:
517
- raise AssertionError('not implemented')
536
+ raise AssertionError(f'not implemented: {cls.__name__}')
518
537
 
519
538
  def isin(self, value_set: Any) -> 'exprs.InPredicate':
520
539
  from .in_predicate import InPredicate
@@ -360,10 +360,7 @@ class FunctionCall(Expr):
360
360
  return
361
361
  args, kwargs = args_kwargs
362
362
 
363
- if isinstance(self.fn, func.CallableFunction) and not self.fn.is_batched:
364
- # optimization: avoid additional level of indirection we'd get from calling Function.exec()
365
- data_row[self.slot_idx] = self.fn.py_fn(*args, **kwargs)
366
- elif self.is_window_fn_call:
363
+ if self.is_window_fn_call:
367
364
  assert isinstance(self.fn, func.AggregateFunction)
368
365
  agg_cls = self.fn.agg_class
369
366
  if self.has_group_by():
@@ -48,9 +48,9 @@ class JsonMapper(Expr):
48
48
  scope_anchor = ObjectRef(self.target_expr_scope, self)
49
49
  self.components.append(scope_anchor)
50
50
 
51
- def bind_rel_paths(self, mapper: Optional[JsonMapper] = None) -> None:
52
- self._src_expr.bind_rel_paths(mapper)
53
- self._target_expr.bind_rel_paths(self)
51
+ def _bind_rel_paths(self, mapper: Optional[JsonMapper] = None) -> None:
52
+ self._src_expr._bind_rel_paths(mapper)
53
+ self._target_expr._bind_rel_paths(self)
54
54
  self.parent_mapper = mapper
55
55
  parent_scope = _GLOBAL_SCOPE if mapper is None else mapper.target_expr_scope
56
56
  self.target_expr_scope.parent = parent_scope
@@ -80,11 +80,16 @@ class JsonPath(Expr):
80
80
  def is_relative_path(self) -> bool:
81
81
  return self._anchor is None
82
82
 
83
- def bind_rel_paths(self, mapper: Optional['JsonMapper'] = None) -> None:
84
- if not self.is_relative_path():
85
- return
86
- # TODO: take scope_idx into account
87
- self.set_anchor(mapper.scope_anchor)
83
+ @property
84
+ def _has_relative_path(self) -> bool:
85
+ return self.is_relative_path() or super()._has_relative_path
86
+
87
+ def _bind_rel_paths(self, mapper: Optional['JsonMapper'] = None) -> None:
88
+ if self.is_relative_path():
89
+ # TODO: take scope_idx into account
90
+ self.set_anchor(mapper.scope_anchor)
91
+ else:
92
+ self._anchor._bind_rel_paths(mapper)
88
93
 
89
94
  def __call__(self, *args: object, **kwargs: object) -> 'JsonPath':
90
95
  """
@@ -8,6 +8,7 @@ from uuid import UUID
8
8
  import cloudpickle # type: ignore[import-untyped]
9
9
 
10
10
  import pixeltable.exceptions as excs
11
+ from pixeltable.utils.coroutine import run_coroutine_synchronously
11
12
 
12
13
  from .function import Function
13
14
  from .signature import Signature
@@ -93,13 +94,15 @@ class CallableFunction(Function):
93
94
  batched_kwargs = {k: [v] for k, v in kwargs.items() if k not in constant_param_names}
94
95
  result: list[Any]
95
96
  if inspect.iscoroutinefunction(self.py_fn):
96
- result = asyncio.run(self.py_fn(*batched_args, **constant_kwargs, **batched_kwargs))
97
+ # TODO: This is temporary (see note in utils/coroutine.py)
98
+ result = run_coroutine_synchronously(self.py_fn(*batched_args, **constant_kwargs, **batched_kwargs))
97
99
  else:
98
100
  result = self.py_fn(*batched_args, **constant_kwargs, **batched_kwargs)
99
101
  assert len(result) == 1
100
102
  return result[0]
101
103
  elif inspect.iscoroutinefunction(self.py_fn):
102
- return asyncio.run(self.py_fn(*args, **kwargs))
104
+ # TODO: This is temporary (see note in utils/coroutine.py)
105
+ return run_coroutine_synchronously(self.py_fn(*args, **kwargs))
103
106
  else:
104
107
  return self.py_fn(*args, **kwargs)
105
108
 
@@ -17,8 +17,6 @@ class QueryTemplateFunction(Function):
17
17
 
18
18
  template_df: Optional['DataFrame']
19
19
  self_name: Optional[str]
20
- # conn: Optional[sql.engine.Connection]
21
- defaults: dict[str, exprs.Literal]
22
20
 
23
21
  @classmethod
24
22
  def create(
@@ -46,20 +44,6 @@ class QueryTemplateFunction(Function):
46
44
  self.self_name = name
47
45
  self.template_df = template_df
48
46
 
49
- # if we're running as part of an ongoing update operation, we need to use the same connection, otherwise
50
- # we end up with a deadlock
51
- # TODO: figure out a more general way to make execution state available
52
- # self.conn = None
53
-
54
- # convert defaults to Literals
55
- self.defaults = {} # key: param name, value: default value converted to a Literal
56
- param_types = self.template_df.parameters()
57
- for param in [p for p in sig.parameters.values() if p.has_default()]:
58
- assert param.name in param_types
59
- param_type = param_types[param.name]
60
- literal_default = exprs.Literal(param.default, col_type=param_type)
61
- self.defaults[param.name] = literal_default
62
-
63
47
  def _update_as_overload_resolution(self, signature_idx: int) -> None:
64
48
  pass # only one signature supported for QueryTemplateFunction
65
49
 
@@ -72,7 +56,11 @@ class QueryTemplateFunction(Function):
72
56
  bound_args = self.signature.py_signature.bind(*args, **kwargs).arguments
73
57
  # apply defaults, otherwise we might have Parameters left over
74
58
  bound_args.update(
75
- {param_name: default for param_name, default in self.defaults.items() if param_name not in bound_args}
59
+ {
60
+ param.name: param.default
61
+ for param in self.signature.parameters.values()
62
+ if param.has_default() and param.name not in bound_args
63
+ }
76
64
  )
77
65
  bound_df = self.template_df.bind(bound_args)
78
66
  result = await bound_df._acollect()
@@ -87,7 +75,7 @@ class QueryTemplateFunction(Function):
87
75
  return self.self_name
88
76
 
89
77
  def _as_dict(self) -> dict:
90
- return {'name': self.name, 'signature': self.signatures[0].as_dict(), 'df': self.template_df.as_dict()}
78
+ return {'name': self.name, 'signature': self.signature.as_dict(), 'df': self.template_df.as_dict()}
91
79
 
92
80
  @classmethod
93
81
  def _from_dict(cls, d: dict) -> Function: