pixeltable 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (94) hide show
  1. pixeltable/__init__.py +5 -3
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +1 -0
  4. pixeltable/catalog/catalog.py +335 -128
  5. pixeltable/catalog/column.py +21 -5
  6. pixeltable/catalog/dir.py +19 -6
  7. pixeltable/catalog/insertable_table.py +34 -37
  8. pixeltable/catalog/named_function.py +0 -4
  9. pixeltable/catalog/schema_object.py +28 -42
  10. pixeltable/catalog/table.py +195 -158
  11. pixeltable/catalog/table_version.py +187 -232
  12. pixeltable/catalog/table_version_handle.py +50 -0
  13. pixeltable/catalog/table_version_path.py +49 -33
  14. pixeltable/catalog/view.py +56 -96
  15. pixeltable/config.py +103 -0
  16. pixeltable/dataframe.py +90 -90
  17. pixeltable/env.py +98 -168
  18. pixeltable/exec/aggregation_node.py +5 -4
  19. pixeltable/exec/cache_prefetch_node.py +1 -1
  20. pixeltable/exec/component_iteration_node.py +13 -9
  21. pixeltable/exec/data_row_batch.py +3 -3
  22. pixeltable/exec/exec_context.py +0 -4
  23. pixeltable/exec/exec_node.py +3 -2
  24. pixeltable/exec/expr_eval/schedulers.py +2 -1
  25. pixeltable/exec/in_memory_data_node.py +9 -4
  26. pixeltable/exec/row_update_node.py +1 -2
  27. pixeltable/exec/sql_node.py +20 -16
  28. pixeltable/exprs/column_ref.py +9 -9
  29. pixeltable/exprs/comparison.py +1 -1
  30. pixeltable/exprs/data_row.py +4 -4
  31. pixeltable/exprs/expr.py +20 -5
  32. pixeltable/exprs/function_call.py +98 -58
  33. pixeltable/exprs/json_mapper.py +25 -8
  34. pixeltable/exprs/json_path.py +6 -5
  35. pixeltable/exprs/object_ref.py +16 -5
  36. pixeltable/exprs/row_builder.py +15 -15
  37. pixeltable/exprs/rowid_ref.py +21 -7
  38. pixeltable/func/__init__.py +1 -1
  39. pixeltable/func/function.py +38 -6
  40. pixeltable/func/query_template_function.py +3 -6
  41. pixeltable/func/tools.py +26 -26
  42. pixeltable/func/udf.py +1 -1
  43. pixeltable/functions/__init__.py +2 -0
  44. pixeltable/functions/anthropic.py +9 -3
  45. pixeltable/functions/fireworks.py +7 -4
  46. pixeltable/functions/globals.py +4 -5
  47. pixeltable/functions/huggingface.py +1 -5
  48. pixeltable/functions/image.py +17 -7
  49. pixeltable/functions/llama_cpp.py +1 -1
  50. pixeltable/functions/mistralai.py +1 -1
  51. pixeltable/functions/ollama.py +4 -4
  52. pixeltable/functions/openai.py +26 -23
  53. pixeltable/functions/string.py +23 -30
  54. pixeltable/functions/timestamp.py +11 -6
  55. pixeltable/functions/together.py +14 -12
  56. pixeltable/functions/util.py +1 -1
  57. pixeltable/functions/video.py +5 -4
  58. pixeltable/functions/vision.py +6 -9
  59. pixeltable/functions/whisper.py +3 -3
  60. pixeltable/globals.py +246 -260
  61. pixeltable/index/__init__.py +2 -0
  62. pixeltable/index/base.py +1 -1
  63. pixeltable/index/btree.py +3 -1
  64. pixeltable/index/embedding_index.py +11 -5
  65. pixeltable/io/external_store.py +11 -12
  66. pixeltable/io/label_studio.py +4 -3
  67. pixeltable/io/parquet.py +57 -56
  68. pixeltable/iterators/__init__.py +4 -2
  69. pixeltable/iterators/audio.py +11 -11
  70. pixeltable/iterators/document.py +10 -10
  71. pixeltable/iterators/string.py +1 -2
  72. pixeltable/iterators/video.py +14 -15
  73. pixeltable/metadata/__init__.py +9 -5
  74. pixeltable/metadata/converters/convert_10.py +0 -1
  75. pixeltable/metadata/converters/convert_15.py +0 -2
  76. pixeltable/metadata/converters/convert_23.py +0 -2
  77. pixeltable/metadata/converters/convert_24.py +3 -3
  78. pixeltable/metadata/converters/convert_25.py +1 -1
  79. pixeltable/metadata/converters/convert_27.py +0 -2
  80. pixeltable/metadata/converters/convert_28.py +0 -2
  81. pixeltable/metadata/converters/convert_29.py +7 -8
  82. pixeltable/metadata/converters/util.py +7 -7
  83. pixeltable/metadata/schema.py +27 -19
  84. pixeltable/plan.py +68 -40
  85. pixeltable/share/packager.py +12 -9
  86. pixeltable/store.py +37 -38
  87. pixeltable/type_system.py +41 -28
  88. pixeltable/utils/filecache.py +2 -1
  89. {pixeltable-0.3.5.dist-info → pixeltable-0.3.7.dist-info}/METADATA +1 -1
  90. pixeltable-0.3.7.dist-info/RECORD +174 -0
  91. pixeltable-0.3.5.dist-info/RECORD +0 -172
  92. {pixeltable-0.3.5.dist-info → pixeltable-0.3.7.dist-info}/LICENSE +0 -0
  93. {pixeltable-0.3.5.dist-info → pixeltable-0.3.7.dist-info}/WHEEL +0 -0
  94. {pixeltable-0.3.5.dist-info → pixeltable-0.3.7.dist-info}/entry_points.txt +0 -0
pixeltable/catalog/dir.py CHANGED
@@ -18,16 +18,29 @@ class Dir(SchemaObject):
18
18
  def __init__(self, id: UUID, parent_id: UUID, name: str):
19
19
  super().__init__(id, name, parent_id)
20
20
 
21
+ @classmethod
22
+ def _create(cls, parent_id: UUID, name: str) -> Dir:
23
+ session = Env.get().session
24
+ assert session is not None
25
+ dir_md = schema.DirMd(name=name, user=None, additional_md={})
26
+ dir_record = schema.Dir(parent_id=parent_id, md=dataclasses.asdict(dir_md))
27
+ session.add(dir_record)
28
+ session.flush()
29
+ assert dir_record.id is not None
30
+ assert isinstance(dir_record.id, UUID)
31
+ dir = cls(dir_record.id, parent_id, name)
32
+ return dir
33
+
21
34
  @classmethod
22
35
  def _display_name(cls) -> str:
23
36
  return 'directory'
24
37
 
25
- @property
26
- def _has_dependents(self) -> bool:
27
- """Returns True if this directory has any children."""
28
- from pixeltable.catalog import Catalog, Path
29
-
30
- return len(Catalog.get().paths.get_children(Path(self._path), child_type=None, recursive=False)) > 0
38
+ def _path(self) -> str:
39
+ """Returns the path to this schema object."""
40
+ if self._dir_id is None:
41
+ # we're the root dir
42
+ return ''
43
+ return super()._path()
31
44
 
32
45
  def _move(self, new_name: str, new_dir_id: UUID) -> None:
33
46
  super()._move(new_name, new_dir_id)
@@ -4,18 +4,16 @@ import logging
4
4
  from typing import Any, Iterable, Literal, Optional, overload
5
5
  from uuid import UUID
6
6
 
7
- import sqlalchemy.orm as orm
8
-
9
7
  import pixeltable as pxt
10
8
  import pixeltable.type_system as ts
11
9
  from pixeltable import exceptions as excs
12
10
  from pixeltable.env import Env
13
11
  from pixeltable.utils.filecache import FileCache
14
12
 
15
- from .catalog import Catalog
16
13
  from .globals import MediaValidation, UpdateStatus
17
14
  from .table import Table
18
15
  from .table_version import TableVersion
16
+ from .table_version_handle import TableVersionHandle
19
17
  from .table_version_path import TableVersionPath
20
18
 
21
19
  _logger = logging.getLogger('pixeltable')
@@ -24,15 +22,14 @@ _logger = logging.getLogger('pixeltable')
24
22
  class InsertableTable(Table):
25
23
  """A `Table` that allows inserting and deleting rows."""
26
24
 
27
- def __init__(self, dir_id: UUID, tbl_version: TableVersion):
25
+ def __init__(self, dir_id: UUID, tbl_version: TableVersionHandle):
28
26
  tbl_version_path = TableVersionPath(tbl_version)
29
- super().__init__(tbl_version.id, dir_id, tbl_version.name, tbl_version_path)
27
+ super().__init__(tbl_version.id, dir_id, tbl_version.get().name, tbl_version_path)
30
28
 
31
29
  @classmethod
32
30
  def _display_name(cls) -> str:
33
31
  return 'table'
34
32
 
35
- # MODULE-LOCAL, NOT PUBLIC
36
33
  @classmethod
37
34
  def _create(
38
35
  cls,
@@ -56,33 +53,29 @@ class InsertableTable(Table):
56
53
  raise excs.Error(f'Primary key column {pk_col} cannot be nullable')
57
54
  col.is_pk = True
58
55
 
59
- with orm.Session(Env.get().engine, future=True) as session:
60
- _, tbl_version = TableVersion.create(
61
- session,
62
- dir_id,
63
- name,
64
- columns,
65
- num_retained_versions=num_retained_versions,
66
- comment=comment,
67
- media_validation=media_validation,
68
- )
69
- tbl = cls(dir_id, tbl_version)
70
- # TODO We need to commit before doing the insertion, in order to avoid a primary key (version) collision
71
- # when the table metadata gets updated. Once we have a notion of user-defined transactions in
72
- # Pixeltable, we can wrap the create/insert in a transaction to avoid this.
73
- session.commit()
74
- if df is not None:
75
- # A DataFrame was provided, so insert its contents into the table
76
- # (using the same DB session as the table creation)
77
- tbl_version.insert(None, df, conn=session.connection(), fail_on_exception=True)
78
- session.commit()
79
- cat = Catalog.get()
80
- cat.tbl_dependents[tbl._id] = []
81
- cat.tbls[tbl._id] = tbl
82
-
83
- _logger.info(f'Created table `{name}`, id={tbl_version.id}')
84
- Env.get().console_logger.info(f'Created table `{name}`.')
85
- return tbl
56
+ _, tbl_version = TableVersion.create(
57
+ dir_id,
58
+ name,
59
+ columns,
60
+ num_retained_versions=num_retained_versions,
61
+ comment=comment,
62
+ media_validation=media_validation,
63
+ )
64
+ tbl = cls(dir_id, TableVersionHandle.create(tbl_version))
65
+ # TODO We need to commit before doing the insertion, in order to avoid a primary key (version) collision
66
+ # when the table metadata gets updated. Once we have a notion of user-defined transactions in
67
+ # Pixeltable, we can wrap the create/insert in a transaction to avoid this.
68
+ session = Env.get().session
69
+ session.commit()
70
+ if df is not None:
71
+ # A DataFrame was provided, so insert its contents into the table
72
+ # (using the same DB session as the table creation)
73
+ tbl_version.insert(None, df, fail_on_exception=True)
74
+ session.commit()
75
+
76
+ _logger.info(f'Created table `{name}`, id={tbl_version.id}')
77
+ Env.get().console_logger.info(f'Created table `{name}`.')
78
+ return tbl
86
79
 
87
80
  def get_metadata(self) -> dict[str, Any]:
88
81
  md = super().get_metadata()
@@ -131,7 +124,10 @@ class InsertableTable(Table):
131
124
  if not isinstance(row, dict):
132
125
  raise excs.Error('rows must be a list of dictionaries')
133
126
  self._validate_input_rows(rows)
134
- status = self._tbl_version.insert(rows, None, print_stats=print_stats, fail_on_exception=fail_on_exception)
127
+ with Env.get().begin_xact():
128
+ status = self._tbl_version.get().insert(
129
+ rows, None, print_stats=print_stats, fail_on_exception=fail_on_exception
130
+ )
135
131
 
136
132
  if status.num_excs == 0:
137
133
  cols_with_excs_str = ''
@@ -152,8 +148,8 @@ class InsertableTable(Table):
152
148
  def _validate_input_rows(self, rows: list[dict[str, Any]]) -> None:
153
149
  """Verify that the input rows match the table schema"""
154
150
  valid_col_names = set(self._schema.keys())
155
- reqd_col_names = set(self._tbl_version_path.tbl_version.get_required_col_names())
156
- computed_col_names = set(self._tbl_version_path.tbl_version.get_computed_col_names())
151
+ reqd_col_names = set(self._tbl_version_path.tbl_version.get().get_required_col_names())
152
+ computed_col_names = set(self._tbl_version_path.tbl_version.get().get_computed_col_names())
157
153
  for row in rows:
158
154
  assert isinstance(row, dict)
159
155
  col_names = set(row.keys())
@@ -191,4 +187,5 @@ class InsertableTable(Table):
191
187
 
192
188
  >>> tbl.delete(tbl.a > 5)
193
189
  """
194
- return self._tbl_version.delete(where=where)
190
+ with Env.get().begin_xact():
191
+ return self._tbl_version.get().delete(where=where)
@@ -27,10 +27,6 @@ class NamedFunction(SchemaObject):
27
27
  def _display_name(cls) -> str:
28
28
  return 'function'
29
29
 
30
- @property
31
- def _has_dependents(self) -> bool:
32
- return False
33
-
34
30
  def _move(self, new_name: str, new_dir_id: UUID) -> None:
35
31
  super()._move(new_name, new_dir_id)
36
32
  with Env.get().engine.begin() as conn:
@@ -2,6 +2,8 @@ from abc import abstractmethod
2
2
  from typing import TYPE_CHECKING, Any, Optional
3
3
  from uuid import UUID
4
4
 
5
+ import pixeltable.env as env
6
+
5
7
  if TYPE_CHECKING:
6
8
  from pixeltable import catalog
7
9
 
@@ -12,53 +14,42 @@ class SchemaObject:
12
14
  Each object has an id, a name and a parent directory.
13
15
  """
14
16
 
17
+ _id: UUID
18
+ _name: str
19
+ _dir_id: Optional[UUID]
20
+
15
21
  def __init__(self, obj_id: UUID, name: str, dir_id: Optional[UUID]):
16
22
  # make these private so they don't collide with column names (id and name are fairly common)
17
- self.__id = obj_id
18
- self.__name = name
19
- self.__dir_id = dir_id
20
-
21
- @property
22
- def _id(self) -> UUID:
23
- return self.__id
24
-
25
- @property
26
- def _name(self) -> str:
27
- return self.__name
23
+ self._id = obj_id
24
+ self._name = name
25
+ self._dir_id = dir_id
28
26
 
29
- @property
30
- def _dir_id(self) -> Optional[UUID]:
31
- return self.__dir_id
32
-
33
- @property
34
27
  def _parent(self) -> Optional['catalog.Dir']:
35
28
  """Returns the parent directory of this schema object."""
36
- from pixeltable import catalog
29
+ from .catalog import Catalog
37
30
 
38
- if self._dir_id is None:
39
- return None
40
- dir = catalog.Catalog.get().paths.get_schema_obj(self._dir_id)
41
- assert isinstance(dir, catalog.Dir)
42
- return dir
31
+ with env.Env.get().begin_xact():
32
+ if self._dir_id is None:
33
+ return None
34
+ return Catalog.get().get_dir(self._dir_id)
43
35
 
44
- @property
45
36
  def _path(self) -> str:
46
37
  """Returns the path to this schema object."""
47
- parent = self._parent
48
- if parent is None or parent._parent is None:
49
- # Either this is the root directory, with empty path, or its parent is the
50
- # root directory. Either way, we return just the name.
51
- return self._name
52
- else:
53
- return f'{parent._path}.{self._name}'
38
+ with env.Env.get().begin_xact():
39
+ from .catalog import Catalog
40
+
41
+ cat = Catalog.get()
42
+ dir_path = cat.get_dir_path(self._dir_id)
43
+ if dir_path == '':
44
+ # Either this is the root directory, with empty path, or its parent is the
45
+ # root directory. Either way, we return just the name.
46
+ return self._name
47
+ else:
48
+ return f'{dir_path}.{self._name}'
54
49
 
55
50
  def get_metadata(self) -> dict[str, Any]:
56
51
  """Returns metadata associated with this schema object."""
57
- return {
58
- 'name': self._name,
59
- 'path': self._path,
60
- 'parent': self._parent._path if self._parent is not None else None,
61
- }
52
+ return {'name': self._name, 'path': self._path()}
62
53
 
63
54
  @classmethod
64
55
  @abstractmethod
@@ -68,12 +59,7 @@ class SchemaObject:
68
59
  """
69
60
  pass
70
61
 
71
- @property
72
- @abstractmethod
73
- def _has_dependents(self) -> bool:
74
- """Returns True if this object has dependents (e.g., children, views)"""
75
-
76
62
  def _move(self, new_name: str, new_dir_id: UUID) -> None:
77
63
  """Subclasses need to override this to make the change persistent"""
78
- self.__name = new_name
79
- self.__dir_id = new_dir_id
64
+ self._name = new_name
65
+ self._dir_id = new_dir_id