pixeltable 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (140) hide show
  1. pixeltable/__init__.py +21 -4
  2. pixeltable/catalog/__init__.py +13 -0
  3. pixeltable/catalog/catalog.py +159 -0
  4. pixeltable/catalog/column.py +200 -0
  5. pixeltable/catalog/dir.py +32 -0
  6. pixeltable/catalog/globals.py +33 -0
  7. pixeltable/catalog/insertable_table.py +191 -0
  8. pixeltable/catalog/named_function.py +36 -0
  9. pixeltable/catalog/path.py +58 -0
  10. pixeltable/catalog/path_dict.py +139 -0
  11. pixeltable/catalog/schema_object.py +39 -0
  12. pixeltable/catalog/table.py +581 -0
  13. pixeltable/catalog/table_version.py +749 -0
  14. pixeltable/catalog/table_version_path.py +133 -0
  15. pixeltable/catalog/view.py +203 -0
  16. pixeltable/client.py +520 -31
  17. pixeltable/dataframe.py +540 -349
  18. pixeltable/env.py +373 -48
  19. pixeltable/exceptions.py +12 -21
  20. pixeltable/exec/__init__.py +9 -0
  21. pixeltable/exec/aggregation_node.py +78 -0
  22. pixeltable/exec/cache_prefetch_node.py +113 -0
  23. pixeltable/exec/component_iteration_node.py +79 -0
  24. pixeltable/exec/data_row_batch.py +95 -0
  25. pixeltable/exec/exec_context.py +22 -0
  26. pixeltable/exec/exec_node.py +61 -0
  27. pixeltable/exec/expr_eval_node.py +217 -0
  28. pixeltable/exec/in_memory_data_node.py +69 -0
  29. pixeltable/exec/media_validation_node.py +43 -0
  30. pixeltable/exec/sql_scan_node.py +225 -0
  31. pixeltable/exprs/__init__.py +24 -0
  32. pixeltable/exprs/arithmetic_expr.py +102 -0
  33. pixeltable/exprs/array_slice.py +71 -0
  34. pixeltable/exprs/column_property_ref.py +77 -0
  35. pixeltable/exprs/column_ref.py +105 -0
  36. pixeltable/exprs/comparison.py +77 -0
  37. pixeltable/exprs/compound_predicate.py +98 -0
  38. pixeltable/exprs/data_row.py +187 -0
  39. pixeltable/exprs/expr.py +586 -0
  40. pixeltable/exprs/expr_set.py +39 -0
  41. pixeltable/exprs/function_call.py +380 -0
  42. pixeltable/exprs/globals.py +69 -0
  43. pixeltable/exprs/image_member_access.py +115 -0
  44. pixeltable/exprs/image_similarity_predicate.py +58 -0
  45. pixeltable/exprs/inline_array.py +107 -0
  46. pixeltable/exprs/inline_dict.py +101 -0
  47. pixeltable/exprs/is_null.py +38 -0
  48. pixeltable/exprs/json_mapper.py +121 -0
  49. pixeltable/exprs/json_path.py +159 -0
  50. pixeltable/exprs/literal.py +54 -0
  51. pixeltable/exprs/object_ref.py +41 -0
  52. pixeltable/exprs/predicate.py +44 -0
  53. pixeltable/exprs/row_builder.py +355 -0
  54. pixeltable/exprs/rowid_ref.py +94 -0
  55. pixeltable/exprs/type_cast.py +53 -0
  56. pixeltable/exprs/variable.py +45 -0
  57. pixeltable/func/__init__.py +9 -0
  58. pixeltable/func/aggregate_function.py +194 -0
  59. pixeltable/func/batched_function.py +53 -0
  60. pixeltable/func/callable_function.py +69 -0
  61. pixeltable/func/expr_template_function.py +82 -0
  62. pixeltable/func/function.py +110 -0
  63. pixeltable/func/function_registry.py +227 -0
  64. pixeltable/func/globals.py +36 -0
  65. pixeltable/func/nos_function.py +202 -0
  66. pixeltable/func/signature.py +166 -0
  67. pixeltable/func/udf.py +163 -0
  68. pixeltable/functions/__init__.py +52 -103
  69. pixeltable/functions/eval.py +216 -0
  70. pixeltable/functions/fireworks.py +61 -0
  71. pixeltable/functions/huggingface.py +120 -0
  72. pixeltable/functions/image.py +16 -0
  73. pixeltable/functions/openai.py +88 -0
  74. pixeltable/functions/pil/image.py +148 -7
  75. pixeltable/functions/string.py +13 -0
  76. pixeltable/functions/together.py +27 -0
  77. pixeltable/functions/util.py +41 -0
  78. pixeltable/functions/video.py +62 -0
  79. pixeltable/iterators/__init__.py +3 -0
  80. pixeltable/iterators/base.py +48 -0
  81. pixeltable/iterators/document.py +311 -0
  82. pixeltable/iterators/video.py +89 -0
  83. pixeltable/metadata/__init__.py +54 -0
  84. pixeltable/metadata/converters/convert_10.py +18 -0
  85. pixeltable/metadata/schema.py +211 -0
  86. pixeltable/plan.py +656 -0
  87. pixeltable/store.py +413 -182
  88. pixeltable/tests/conftest.py +143 -86
  89. pixeltable/tests/test_audio.py +65 -0
  90. pixeltable/tests/test_catalog.py +27 -0
  91. pixeltable/tests/test_client.py +14 -14
  92. pixeltable/tests/test_component_view.py +372 -0
  93. pixeltable/tests/test_dataframe.py +433 -0
  94. pixeltable/tests/test_dirs.py +78 -62
  95. pixeltable/tests/test_document.py +117 -0
  96. pixeltable/tests/test_exprs.py +591 -135
  97. pixeltable/tests/test_function.py +297 -67
  98. pixeltable/tests/test_functions.py +283 -1
  99. pixeltable/tests/test_migration.py +43 -0
  100. pixeltable/tests/test_nos.py +54 -0
  101. pixeltable/tests/test_snapshot.py +208 -0
  102. pixeltable/tests/test_table.py +1086 -258
  103. pixeltable/tests/test_transactional_directory.py +42 -0
  104. pixeltable/tests/test_types.py +5 -11
  105. pixeltable/tests/test_video.py +149 -34
  106. pixeltable/tests/test_view.py +530 -0
  107. pixeltable/tests/utils.py +186 -45
  108. pixeltable/tool/create_test_db_dump.py +149 -0
  109. pixeltable/type_system.py +490 -133
  110. pixeltable/utils/__init__.py +17 -46
  111. pixeltable/utils/clip.py +12 -15
  112. pixeltable/utils/coco.py +136 -0
  113. pixeltable/utils/documents.py +39 -0
  114. pixeltable/utils/filecache.py +195 -0
  115. pixeltable/utils/help.py +11 -0
  116. pixeltable/utils/media_store.py +76 -0
  117. pixeltable/utils/parquet.py +126 -0
  118. pixeltable/utils/pytorch.py +172 -0
  119. pixeltable/utils/s3.py +13 -0
  120. pixeltable/utils/sql.py +17 -0
  121. pixeltable/utils/transactional_directory.py +35 -0
  122. pixeltable-0.2.0.dist-info/LICENSE +18 -0
  123. pixeltable-0.2.0.dist-info/METADATA +117 -0
  124. pixeltable-0.2.0.dist-info/RECORD +125 -0
  125. {pixeltable-0.1.2.dist-info → pixeltable-0.2.0.dist-info}/WHEEL +1 -1
  126. pixeltable/catalog.py +0 -1421
  127. pixeltable/exprs.py +0 -1745
  128. pixeltable/function.py +0 -269
  129. pixeltable/functions/clip.py +0 -10
  130. pixeltable/functions/pil/__init__.py +0 -23
  131. pixeltable/functions/tf.py +0 -21
  132. pixeltable/index.py +0 -57
  133. pixeltable/tests/test_dict.py +0 -24
  134. pixeltable/tests/test_tf.py +0 -69
  135. pixeltable/tf.py +0 -33
  136. pixeltable/utils/tf.py +0 -33
  137. pixeltable/utils/video.py +0 -32
  138. pixeltable-0.1.2.dist-info/LICENSE +0 -201
  139. pixeltable-0.1.2.dist-info/METADATA +0 -89
  140. pixeltable-0.1.2.dist-info/RECORD +0 -37
@@ -0,0 +1,133 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from typing import Optional, List, Union
5
+ from uuid import UUID
6
+
7
+ import pixeltable
8
+ from .column import Column
9
+ from .globals import POS_COLUMN_NAME
10
+ from .table_version import TableVersion
11
+
12
+ _logger = logging.getLogger('pixeltable')
13
+
14
+ class TableVersionPath:
15
+ """
16
+ A TableVersionPath represents the sequence of TableVersions from a base table to a particular view:
17
+ - for a base table: only includes that TableVersion
18
+ - for a view: includes the TableVersion for the view and all its bases
19
+ - multiple snapshots can reference the same TableVersion, but with different bases, which means that the
20
+ graph of TableVersions is a DAG, not a tree (which is why we cannot embed the DAG into TableVersion directly)
21
+
22
+ TableVersionPath contains all metadata needed to execute queries and updates against a particular version of a
23
+ table/view.
24
+ """
25
+
26
+ def __init__(self, tbl_version: TableVersion, base: Optional[TableVersionPath] = None):
27
+ assert tbl_version is not None
28
+ self.tbl_version = tbl_version
29
+ self.base = base
30
+
31
+ def tbl_id(self) -> UUID:
32
+ """Return the id of the table/view that this path represents"""
33
+ return self.tbl_version.id
34
+
35
+ def version(self) -> int:
36
+ """Return the version of the table/view that this path represents"""
37
+ return self.tbl_version.version
38
+
39
+ def tbl_name(self) -> str:
40
+ """Return the name of the table/view that this path represents"""
41
+ return self.tbl_version.name
42
+
43
+ def path_len(self) -> int:
44
+ """Return the length of the path"""
45
+ return 1 if self.base is None else 1 + self.base.path_len()
46
+
47
+ def is_snapshot(self) -> bool:
48
+ """Return True if this is a path of snapshot versions"""
49
+ if not self.tbl_version.is_snapshot:
50
+ return False
51
+ return self.base.is_snapshot() if self.base is not None else True
52
+
53
+ def is_view(self) -> bool:
54
+ return self.tbl_version.is_view()
55
+
56
+ def is_component_view(self) -> bool:
57
+ return self.tbl_version.is_component_view()
58
+
59
+ def is_insertable(self) -> bool:
60
+ return self.tbl_version.is_insertable()
61
+
62
+ def get_tbl_versions(self) -> List[TableVersion]:
63
+ """Return all tbl versions"""
64
+ if self.base is None:
65
+ return [self.tbl_version]
66
+ return [self.tbl_version] + self.base.get_tbl_versions()
67
+
68
+ def get_bases(self) -> List[TableVersion]:
69
+ """Return all tbl versions"""
70
+ if self.base is None:
71
+ return []
72
+ return self.base.get_tbl_versions()
73
+
74
+ def find_tbl_version(self, id: UUID) -> Optional[TableVersion]:
75
+ """Return the matching TableVersion in the chain of TableVersions, starting with this one"""
76
+ if self.tbl_version.id == id:
77
+ return self.tbl_version
78
+ if self.base is None:
79
+ return None
80
+ return self.base.find_tbl_version(id)
81
+
82
+ def __getattr__(self, col_name: str) -> 'pixeltable.exprs.ColumnRef':
83
+ """Return a ColumnRef for the given column name."""
84
+ from pixeltable.exprs import ColumnRef, RowidRef
85
+ if col_name == POS_COLUMN_NAME and self.is_component_view():
86
+ return RowidRef(self.tbl_version, self.tbl_version.store_tbl.pos_col_idx)
87
+ if col_name not in self.tbl_version.cols_by_name:
88
+ if self.base is None:
89
+ raise AttributeError(f'Column {col_name} unknown')
90
+ return getattr(self.base, col_name)
91
+ col = self.tbl_version.cols_by_name[col_name]
92
+ return ColumnRef(col)
93
+
94
+ def __getitem__(self, index: object) -> Union['pixeltable.exprs.ColumnRef', 'pixeltable.dataframe.DataFrame']:
95
+ """Return a ColumnRef for the given column name, or a DataFrame for the given slice.
96
+ """
97
+ if isinstance(index, str):
98
+ # basically <tbl>.<colname>
99
+ return self.__getattr__(index)
100
+ from pixeltable.dataframe import DataFrame
101
+ return DataFrame(self).__getitem__(index)
102
+
103
+ def columns(self) -> List[Column]:
104
+ """Return all columns visible in this tbl version path, including columns from bases"""
105
+ result = self.tbl_version.cols.copy()
106
+ if self.base is not None:
107
+ base_cols = self.base.columns()
108
+ # we only include base columns that don't conflict with one of our column names
109
+ result.extend([c for c in base_cols if c.name not in self.tbl_version.cols_by_name])
110
+ return result
111
+
112
+ def get_column(self, name: str, include_bases: bool = True) -> Optional[Column]:
113
+ """Return the column with the given name, or None if not found"""
114
+ col = self.tbl_version.cols_by_name.get(name)
115
+ if col is not None:
116
+ return col
117
+ elif self.base is not None and include_bases:
118
+ return self.base.get_column(name)
119
+ else:
120
+ return None
121
+
122
+ def has_column(self, col: Column, include_bases: bool = True) -> bool:
123
+ """Return True if this table has the given column.
124
+ """
125
+ assert col.tbl is not None
126
+ if col.tbl.id == self.tbl_version.id and col.tbl.effective_version == self.tbl_version.effective_version \
127
+ and col.id in self.tbl_version.cols_by_id:
128
+ # the column is visible in this table version
129
+ return True
130
+ elif self.base is not None and include_bases:
131
+ return self.base.has_column(col)
132
+ else:
133
+ return False
@@ -0,0 +1,203 @@
1
+ from __future__ import annotations
2
+ import logging
3
+ from typing import List, Optional, Type, Dict, Set, Any
4
+ from uuid import UUID
5
+ import inspect
6
+
7
+ import sqlalchemy.orm as orm
8
+
9
+ from .table import Table
10
+ from .table_version import TableVersion
11
+ from .table_version_path import TableVersionPath
12
+ from .column import Column
13
+ from .catalog import Catalog
14
+ from .globals import POS_COLUMN_NAME
15
+ from pixeltable.env import Env
16
+ from pixeltable.iterators import ComponentIterator
17
+ from pixeltable.exceptions import Error
18
+ import pixeltable.func as func
19
+ import pixeltable.type_system as ts
20
+ import pixeltable.catalog as catalog
21
+ import pixeltable.metadata.schema as md_schema
22
+ from pixeltable.type_system import InvalidType, IntType
23
+ import pixeltable.exceptions as excs
24
+
25
+
26
+ _logger = logging.getLogger('pixeltable')
27
+
28
+ class View(Table):
29
+ """A `Table` that presents a virtual view of another table (or view).
30
+
31
+ A view is typically backed by a store table, which records the view's columns and is joined back to the bases
32
+ at query execution time.
33
+ The exception is a snapshot view without a predicate and without additional columns: in that case, the view
34
+ is simply a reference to a specific set of base versions.
35
+ """
36
+ def __init__(
37
+ self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath, base: Table,
38
+ snapshot_only: bool):
39
+ super().__init__(id, dir_id, name, tbl_version_path)
40
+ self._base = base # keep a reference to the base Table, so that we can keep track of its dependents
41
+ self._snapshot_only = snapshot_only
42
+
43
+ @classmethod
44
+ def display_name(cls) -> str:
45
+ return 'view'
46
+
47
+ @classmethod
48
+ def create(
49
+ cls, dir_id: UUID, name: str, base: Table, schema: Dict[str, Any],
50
+ predicate: 'exprs.Predicate', is_snapshot: bool, num_retained_versions: int, comment: str,
51
+ iterator_cls: Optional[Type[ComponentIterator]], iterator_args: Optional[Dict]
52
+ ) -> View:
53
+ columns = cls._create_columns(schema)
54
+ cls._verify_schema(columns)
55
+
56
+ # verify that filter can be evaluated in the context of the base
57
+ if predicate is not None:
58
+ if not predicate.is_bound_by(base.tbl_version_path):
59
+ raise excs.Error(f'Filter cannot be computed in the context of the base {base._name}')
60
+ # create a copy that we can modify and store
61
+ predicate = predicate.copy()
62
+
63
+ # same for value exprs
64
+ for col in columns:
65
+ if not col.is_computed:
66
+ continue
67
+ # make sure that the value can be computed in the context of the base
68
+ if col.value_expr is not None and not col.value_expr.is_bound_by(base.tbl_version_path):
69
+ raise excs.Error(
70
+ f'Column {col.name}: value expression cannot be computed in the context of the base {base._name}')
71
+
72
+ if iterator_cls is not None:
73
+ assert iterator_args is not None
74
+
75
+ # validate iterator_args
76
+ py_signature = inspect.signature(iterator_cls.__init__)
77
+ try:
78
+ # make sure iterator_args can be used to instantiate iterator_cls
79
+ bound_args = py_signature.bind(None, **iterator_args).arguments # None: arg for self
80
+ # we ignore 'self'
81
+ first_param_name = next(iter(py_signature.parameters)) # can't guarantee it's actually 'self'
82
+ del bound_args[first_param_name]
83
+
84
+ # construct Signature and type-check bound_args
85
+ params = [
86
+ func.Parameter(param_name, param_type, inspect.Parameter.POSITIONAL_OR_KEYWORD)
87
+ for param_name, param_type in iterator_cls.input_schema().items()
88
+ ]
89
+ sig = func.Signature(InvalidType(), params)
90
+ from pixeltable.exprs import FunctionCall
91
+ FunctionCall.check_args(sig, bound_args)
92
+ except TypeError as e:
93
+ raise Error(f'Cannot instantiate iterator with given arguments: {e}')
94
+
95
+ # prepend pos and output_schema columns to cols:
96
+ # a component view exposes the pos column of its rowid;
97
+ # we create that column here, so it gets assigned a column id;
98
+ # stored=False: it is not stored separately (it's already stored as part of the rowid)
99
+ iterator_cols = [Column(POS_COLUMN_NAME, IntType(), stored=False)]
100
+ output_dict, unstored_cols = iterator_cls.output_schema(**bound_args)
101
+ iterator_cols.extend([
102
+ Column(col_name, col_type, stored=col_name not in unstored_cols)
103
+ for col_name, col_type in output_dict.items()
104
+ ])
105
+
106
+ iterator_col_names = {col.name for col in iterator_cols}
107
+ for col in columns:
108
+ if col.name in iterator_col_names:
109
+ raise Error(f'Duplicate name: column {col.name} is already present in the iterator output schema')
110
+ columns = iterator_cols + columns
111
+
112
+ with orm.Session(Env.get().engine, future=True) as session:
113
+ from pixeltable.exprs import InlineDict
114
+ iterator_args_expr = InlineDict(iterator_args) if iterator_args is not None else None
115
+ iterator_class_fqn = f'{iterator_cls.__module__}.{iterator_cls.__name__}' if iterator_cls is not None \
116
+ else None
117
+ base_version_path = cls._get_snapshot_path(base.tbl_version_path) if is_snapshot else base.tbl_version_path
118
+ base_versions = [
119
+ (tbl_version.id.hex, tbl_version.version if is_snapshot or tbl_version.is_snapshot else None)
120
+ for tbl_version in base_version_path.get_tbl_versions()
121
+ ]
122
+
123
+ # if this is a snapshot, we need to retarget all exprs to the snapshot tbl versions
124
+ if is_snapshot:
125
+ predicate = predicate.retarget(base_version_path) if predicate is not None else None
126
+ iterator_args_expr = iterator_args_expr.retarget(base_version_path) \
127
+ if iterator_args_expr is not None else None
128
+ for col in columns:
129
+ if col.value_expr is not None:
130
+ col.value_expr = col.value_expr.retarget(base_version_path)
131
+
132
+ view_md = md_schema.ViewMd(
133
+ is_snapshot=is_snapshot, predicate=predicate.as_dict() if predicate is not None else None,
134
+ base_versions=base_versions,
135
+ iterator_class_fqn=iterator_class_fqn,
136
+ iterator_args=iterator_args_expr.as_dict() if iterator_args_expr is not None else None)
137
+
138
+ id, tbl_version = TableVersion.create(
139
+ session, dir_id, name, columns, num_retained_versions, comment, base_path=base_version_path, view_md=view_md)
140
+ if tbl_version is None:
141
+ # this is purely a snapshot: we use the base's tbl version path
142
+ view = cls(id, dir_id, name, base_version_path, base, snapshot_only=True)
143
+ _logger.info(f'created snapshot {name}')
144
+ else:
145
+ view = cls(
146
+ id, dir_id, name, TableVersionPath(tbl_version, base=base_version_path), base,
147
+ snapshot_only=False)
148
+ _logger.info(f'Created view `{name}`, id={tbl_version.id}')
149
+
150
+ from pixeltable.plan import Planner
151
+ plan, num_values_per_row = Planner.create_view_load_plan(view.tbl_version_path)
152
+ num_rows, num_excs, cols_with_excs = tbl_version.store_tbl.insert_rows(
153
+ plan, session.connection(), v_min=tbl_version.version)
154
+ print(f'Created view `{name}` with {num_rows} rows, {num_excs} exceptions.')
155
+
156
+ session.commit()
157
+ cat = Catalog.get()
158
+ cat.tbl_dependents[view._id] = []
159
+ cat.tbl_dependents[base._id].append(view)
160
+ cat.tbls[view._id] = view
161
+ return view
162
+
163
+ @classmethod
164
+ def _verify_column(cls, col: Column, existing_column_names: Set[str]) -> None:
165
+ # make sure that columns are nullable or have a default
166
+ if not col.col_type.nullable and not col.is_computed:
167
+ raise Error(f'Column {col.name}: non-computed columns in views must be nullable')
168
+ super()._verify_column(col, existing_column_names)
169
+
170
+ @classmethod
171
+ def _get_snapshot_path(cls, tbl_version_path: TableVersionPath) -> TableVersionPath:
172
+ """Returns snapshot of the given table version path.
173
+ All TableVersions of that path will be snapshot versions. Creates new versions from mutable versions,
174
+ if necessary.
175
+ """
176
+ if tbl_version_path.is_snapshot():
177
+ return tbl_version_path
178
+ tbl_version = tbl_version_path.tbl_version
179
+ if not tbl_version.is_snapshot:
180
+ # create and register snapshot version
181
+ tbl_version = tbl_version.create_snapshot_copy()
182
+ assert tbl_version.is_snapshot
183
+
184
+ return TableVersionPath(
185
+ tbl_version,
186
+ base=cls._get_snapshot_path(tbl_version_path.base) if tbl_version_path.base is not None else None)
187
+
188
+ def _drop(self) -> None:
189
+ cat = catalog.Catalog.get()
190
+ if self._snapshot_only:
191
+ # there is not TableVersion to drop
192
+ self._check_is_dropped()
193
+ self.is_dropped = True
194
+ with Env.get().engine.begin() as conn:
195
+ TableVersion.delete_md(self._id, conn)
196
+ # update catalog
197
+ cat = catalog.Catalog.get()
198
+ del cat.tbls[self._id]
199
+ else:
200
+ super()._drop()
201
+ cat.tbl_dependents[self._base._id].remove(self)
202
+ del cat.tbl_dependents[self._id]
203
+