pixeltable 0.1.0__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +34 -6
- pixeltable/catalog/__init__.py +13 -0
- pixeltable/catalog/catalog.py +159 -0
- pixeltable/catalog/column.py +200 -0
- pixeltable/catalog/dir.py +32 -0
- pixeltable/catalog/globals.py +33 -0
- pixeltable/catalog/insertable_table.py +191 -0
- pixeltable/catalog/named_function.py +36 -0
- pixeltable/catalog/path.py +58 -0
- pixeltable/catalog/path_dict.py +139 -0
- pixeltable/catalog/schema_object.py +39 -0
- pixeltable/catalog/table.py +581 -0
- pixeltable/catalog/table_version.py +749 -0
- pixeltable/catalog/table_version_path.py +133 -0
- pixeltable/catalog/view.py +203 -0
- pixeltable/client.py +590 -30
- pixeltable/dataframe.py +540 -349
- pixeltable/env.py +359 -45
- pixeltable/exceptions.py +12 -21
- pixeltable/exec/__init__.py +9 -0
- pixeltable/exec/aggregation_node.py +78 -0
- pixeltable/exec/cache_prefetch_node.py +116 -0
- pixeltable/exec/component_iteration_node.py +79 -0
- pixeltable/exec/data_row_batch.py +95 -0
- pixeltable/exec/exec_context.py +22 -0
- pixeltable/exec/exec_node.py +61 -0
- pixeltable/exec/expr_eval_node.py +217 -0
- pixeltable/exec/in_memory_data_node.py +69 -0
- pixeltable/exec/media_validation_node.py +43 -0
- pixeltable/exec/sql_scan_node.py +225 -0
- pixeltable/exprs/__init__.py +24 -0
- pixeltable/exprs/arithmetic_expr.py +102 -0
- pixeltable/exprs/array_slice.py +71 -0
- pixeltable/exprs/column_property_ref.py +77 -0
- pixeltable/exprs/column_ref.py +105 -0
- pixeltable/exprs/comparison.py +77 -0
- pixeltable/exprs/compound_predicate.py +98 -0
- pixeltable/exprs/data_row.py +195 -0
- pixeltable/exprs/expr.py +586 -0
- pixeltable/exprs/expr_set.py +39 -0
- pixeltable/exprs/function_call.py +380 -0
- pixeltable/exprs/globals.py +69 -0
- pixeltable/exprs/image_member_access.py +115 -0
- pixeltable/exprs/image_similarity_predicate.py +58 -0
- pixeltable/exprs/inline_array.py +107 -0
- pixeltable/exprs/inline_dict.py +101 -0
- pixeltable/exprs/is_null.py +38 -0
- pixeltable/exprs/json_mapper.py +121 -0
- pixeltable/exprs/json_path.py +159 -0
- pixeltable/exprs/literal.py +54 -0
- pixeltable/exprs/object_ref.py +41 -0
- pixeltable/exprs/predicate.py +44 -0
- pixeltable/exprs/row_builder.py +355 -0
- pixeltable/exprs/rowid_ref.py +94 -0
- pixeltable/exprs/type_cast.py +53 -0
- pixeltable/exprs/variable.py +45 -0
- pixeltable/func/__init__.py +9 -0
- pixeltable/func/aggregate_function.py +194 -0
- pixeltable/func/batched_function.py +53 -0
- pixeltable/func/callable_function.py +69 -0
- pixeltable/func/expr_template_function.py +82 -0
- pixeltable/func/function.py +110 -0
- pixeltable/func/function_registry.py +227 -0
- pixeltable/func/globals.py +36 -0
- pixeltable/func/nos_function.py +202 -0
- pixeltable/func/signature.py +166 -0
- pixeltable/func/udf.py +163 -0
- pixeltable/functions/__init__.py +52 -103
- pixeltable/functions/eval.py +216 -0
- pixeltable/functions/fireworks.py +34 -0
- pixeltable/functions/huggingface.py +120 -0
- pixeltable/functions/image.py +16 -0
- pixeltable/functions/openai.py +256 -0
- pixeltable/functions/pil/image.py +148 -7
- pixeltable/functions/string.py +13 -0
- pixeltable/functions/together.py +122 -0
- pixeltable/functions/util.py +41 -0
- pixeltable/functions/video.py +62 -0
- pixeltable/iterators/__init__.py +3 -0
- pixeltable/iterators/base.py +48 -0
- pixeltable/iterators/document.py +311 -0
- pixeltable/iterators/video.py +89 -0
- pixeltable/metadata/__init__.py +54 -0
- pixeltable/metadata/converters/convert_10.py +18 -0
- pixeltable/metadata/schema.py +211 -0
- pixeltable/plan.py +656 -0
- pixeltable/store.py +418 -182
- pixeltable/tests/conftest.py +146 -88
- pixeltable/tests/functions/test_fireworks.py +42 -0
- pixeltable/tests/functions/test_functions.py +60 -0
- pixeltable/tests/functions/test_huggingface.py +158 -0
- pixeltable/tests/functions/test_openai.py +152 -0
- pixeltable/tests/functions/test_together.py +111 -0
- pixeltable/tests/test_audio.py +65 -0
- pixeltable/tests/test_catalog.py +27 -0
- pixeltable/tests/test_client.py +14 -14
- pixeltable/tests/test_component_view.py +370 -0
- pixeltable/tests/test_dataframe.py +439 -0
- pixeltable/tests/test_dirs.py +78 -62
- pixeltable/tests/test_document.py +120 -0
- pixeltable/tests/test_exprs.py +592 -135
- pixeltable/tests/test_function.py +297 -67
- pixeltable/tests/test_migration.py +43 -0
- pixeltable/tests/test_nos.py +54 -0
- pixeltable/tests/test_snapshot.py +208 -0
- pixeltable/tests/test_table.py +1195 -263
- pixeltable/tests/test_transactional_directory.py +42 -0
- pixeltable/tests/test_types.py +5 -11
- pixeltable/tests/test_video.py +151 -34
- pixeltable/tests/test_view.py +530 -0
- pixeltable/tests/utils.py +320 -45
- pixeltable/tool/create_test_db_dump.py +149 -0
- pixeltable/tool/create_test_video.py +81 -0
- pixeltable/type_system.py +445 -124
- pixeltable/utils/__init__.py +17 -46
- pixeltable/utils/arrow.py +98 -0
- pixeltable/utils/clip.py +12 -15
- pixeltable/utils/coco.py +136 -0
- pixeltable/utils/documents.py +39 -0
- pixeltable/utils/filecache.py +195 -0
- pixeltable/utils/help.py +11 -0
- pixeltable/utils/hf_datasets.py +157 -0
- pixeltable/utils/media_store.py +76 -0
- pixeltable/utils/parquet.py +167 -0
- pixeltable/utils/pytorch.py +91 -0
- pixeltable/utils/s3.py +13 -0
- pixeltable/utils/sql.py +17 -0
- pixeltable/utils/transactional_directory.py +35 -0
- pixeltable-0.2.4.dist-info/LICENSE +18 -0
- pixeltable-0.2.4.dist-info/METADATA +127 -0
- pixeltable-0.2.4.dist-info/RECORD +132 -0
- {pixeltable-0.1.0.dist-info → pixeltable-0.2.4.dist-info}/WHEEL +1 -1
- pixeltable/catalog.py +0 -1421
- pixeltable/exprs.py +0 -1745
- pixeltable/function.py +0 -269
- pixeltable/functions/clip.py +0 -10
- pixeltable/functions/pil/__init__.py +0 -23
- pixeltable/functions/tf.py +0 -21
- pixeltable/index.py +0 -57
- pixeltable/tests/test_dict.py +0 -24
- pixeltable/tests/test_functions.py +0 -11
- pixeltable/tests/test_tf.py +0 -69
- pixeltable/tf.py +0 -33
- pixeltable/utils/tf.py +0 -33
- pixeltable/utils/video.py +0 -32
- pixeltable-0.1.0.dist-info/METADATA +0 -34
- pixeltable-0.1.0.dist-info/RECORD +0 -36
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
from typing import Optional, List, Dict, get_type_hints, Type, Any, TypeVar, Tuple, Union
|
|
2
|
+
import platform
|
|
3
|
+
import uuid
|
|
4
|
+
import dataclasses
|
|
5
|
+
|
|
6
|
+
import sqlalchemy as sql
|
|
7
|
+
from sqlalchemy import Integer, String, Boolean, BigInteger, LargeBinary
|
|
8
|
+
from sqlalchemy.dialects.postgresql import UUID, JSONB
|
|
9
|
+
from sqlalchemy import ForeignKey, UniqueConstraint, ForeignKeyConstraint
|
|
10
|
+
from sqlalchemy.orm import declarative_base
|
|
11
|
+
|
|
12
|
+
Base = declarative_base()
|
|
13
|
+
|
|
14
|
+
T = TypeVar('T')
|
|
15
|
+
|
|
16
|
+
def md_from_dict(data_class_type: Type[T], data: Any) -> T:
|
|
17
|
+
"""Re-instantiate a dataclass instance that contains nested dataclasses from a dict."""
|
|
18
|
+
if dataclasses.is_dataclass(data_class_type):
|
|
19
|
+
fieldtypes = {f: t for f, t in get_type_hints(data_class_type).items()}
|
|
20
|
+
return data_class_type(**{f: md_from_dict(fieldtypes[f], data[f]) for f in data})
|
|
21
|
+
elif hasattr(data_class_type, '__origin__'):
|
|
22
|
+
if data_class_type.__origin__ is Union and type(None) in data_class_type.__args__:
|
|
23
|
+
# Handling Optional types
|
|
24
|
+
non_none_args = [arg for arg in data_class_type.__args__ if arg is not type(None)]
|
|
25
|
+
if len(non_none_args) == 1:
|
|
26
|
+
return md_from_dict(non_none_args[0], data) if data is not None else None
|
|
27
|
+
elif data_class_type.__origin__ is list:
|
|
28
|
+
return [md_from_dict(data_class_type.__args__[0], elem) for elem in data]
|
|
29
|
+
elif data_class_type.__origin__ is dict:
|
|
30
|
+
key_type = data_class_type.__args__[0]
|
|
31
|
+
val_type = data_class_type.__args__[1]
|
|
32
|
+
return {key_type(key): md_from_dict(val_type, val) for key, val in data.items()}
|
|
33
|
+
elif data_class_type.__origin__ is tuple:
|
|
34
|
+
return tuple(md_from_dict(arg_type, elem) for arg_type, elem in zip(data_class_type.__args__, data))
|
|
35
|
+
else:
|
|
36
|
+
return data
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# structure of the stored metadata:
|
|
40
|
+
# - each schema entity that grows somehow proportionally to the data (# of output_rows, total insert operations,
|
|
41
|
+
# number of schema changes) gets its own table
|
|
42
|
+
# - each table has an 'md' column that basically contains the payload
|
|
43
|
+
# - exceptions to that are foreign keys without which lookups would be too slow (ex.: TableSchemaVersions.tbl_id)
|
|
44
|
+
# - the md column contains a dataclass serialized to json; this has the advantage of making changes to the metadata
|
|
45
|
+
# schema easier (the goal is not to have to rely on some schema migration framework; if that breaks for some user,
|
|
46
|
+
# it would be very difficult to patch up)
|
|
47
|
+
|
|
48
|
+
@dataclasses.dataclass
|
|
49
|
+
class SystemInfoMd:
|
|
50
|
+
schema_version: int
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class SystemInfo(Base):
|
|
54
|
+
"""A single-row table that contains system-wide metadata."""
|
|
55
|
+
__tablename__ = 'systeminfo'
|
|
56
|
+
dummy = sql.Column(Integer, primary_key=True, default=0, nullable=False)
|
|
57
|
+
md = sql.Column(JSONB, nullable=False) # SystemInfoMd
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclasses.dataclass
|
|
61
|
+
class DirMd:
|
|
62
|
+
name: str
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class Dir(Base):
|
|
66
|
+
__tablename__ = 'dirs'
|
|
67
|
+
|
|
68
|
+
id = sql.Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, nullable=False)
|
|
69
|
+
parent_id = sql.Column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=True)
|
|
70
|
+
md = sql.Column(JSONB, nullable=False)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@dataclasses.dataclass
|
|
74
|
+
class ColumnHistory:
|
|
75
|
+
"""
|
|
76
|
+
Records when a column was added/dropped, which is needed to GC unreachable storage columns
|
|
77
|
+
(a column that was added after table snapshot n and dropped before table snapshot n+1 can be removed
|
|
78
|
+
from the stored table).
|
|
79
|
+
One record per column (across all schema versions).
|
|
80
|
+
"""
|
|
81
|
+
col_id: int
|
|
82
|
+
schema_version_add: int
|
|
83
|
+
schema_version_drop: Optional[int]
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@dataclasses.dataclass
|
|
87
|
+
class ViewMd:
|
|
88
|
+
is_snapshot: bool
|
|
89
|
+
|
|
90
|
+
# (table id, version); for mutable views, all versions are None
|
|
91
|
+
base_versions: List[Tuple[str, Optional[int]]]
|
|
92
|
+
|
|
93
|
+
# filter predicate applied to the base table; view-only
|
|
94
|
+
predicate: Optional[Dict[str, Any]]
|
|
95
|
+
|
|
96
|
+
# ComponentIterator subclass; only for component views
|
|
97
|
+
iterator_class_fqn: Optional[str]
|
|
98
|
+
|
|
99
|
+
# args to pass to the iterator class constructor; only for component views
|
|
100
|
+
iterator_args: Optional[Dict[str, Any]]
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
@dataclasses.dataclass
|
|
104
|
+
class TableMd:
|
|
105
|
+
name: str
|
|
106
|
+
|
|
107
|
+
# monotonically increasing w/in Table for both data and schema changes, starting at 0
|
|
108
|
+
current_version: int
|
|
109
|
+
# each version has a corresponding schema version (current_version >= current_schema_version)
|
|
110
|
+
current_schema_version: int
|
|
111
|
+
|
|
112
|
+
# used to assign Column.id
|
|
113
|
+
next_col_id: int
|
|
114
|
+
|
|
115
|
+
# - used to assign the rowid column in the storage table
|
|
116
|
+
# - every row is assigned a unique and immutable rowid on insertion
|
|
117
|
+
next_row_id: int
|
|
118
|
+
|
|
119
|
+
column_history: Dict[int, ColumnHistory] # col_id -> ColumnHistory
|
|
120
|
+
|
|
121
|
+
view_md: Optional[ViewMd]
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class Table(Base):
|
|
125
|
+
"""
|
|
126
|
+
Table represents both tables and views.
|
|
127
|
+
|
|
128
|
+
Views are in essence a subclass of tables, because they also store materialized columns. The differences are:
|
|
129
|
+
- views have a base, which is either a (live) table or a snapshot
|
|
130
|
+
- views can have a filter predicate
|
|
131
|
+
"""
|
|
132
|
+
__tablename__ = 'tables'
|
|
133
|
+
|
|
134
|
+
MAX_VERSION = 9223372036854775807 # 2^63 - 1
|
|
135
|
+
|
|
136
|
+
id = sql.Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, nullable=False)
|
|
137
|
+
dir_id = sql.Column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=False)
|
|
138
|
+
md = sql.Column(JSONB, nullable=False) # TableMd
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@dataclasses.dataclass
|
|
142
|
+
class TableVersionMd:
|
|
143
|
+
created_at: float # time.time()
|
|
144
|
+
version: int
|
|
145
|
+
schema_version: int
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
class TableVersion(Base):
|
|
149
|
+
__tablename__ = 'tableversions'
|
|
150
|
+
tbl_id = sql.Column(UUID(as_uuid=True), ForeignKey('tables.id'), primary_key=True, nullable=False)
|
|
151
|
+
version = sql.Column(BigInteger, primary_key=True, nullable=False)
|
|
152
|
+
md = sql.Column(JSONB, nullable=False) # TableVersionMd
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
@dataclasses.dataclass
|
|
156
|
+
class SchemaColumn:
|
|
157
|
+
"""
|
|
158
|
+
Records the logical (user-visible) schema of a table.
|
|
159
|
+
Contains the full set of columns for each new schema version: one record per (column x schema version).
|
|
160
|
+
"""
|
|
161
|
+
pos: int
|
|
162
|
+
name: str
|
|
163
|
+
col_type: dict
|
|
164
|
+
is_pk: bool
|
|
165
|
+
value_expr: Optional[dict]
|
|
166
|
+
stored: Optional[bool]
|
|
167
|
+
# if True, creates vector index for this column
|
|
168
|
+
is_indexed: bool
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
@dataclasses.dataclass
|
|
172
|
+
class TableSchemaVersionMd:
|
|
173
|
+
schema_version: int
|
|
174
|
+
preceding_schema_version: Optional[int]
|
|
175
|
+
columns: Dict[int, SchemaColumn] # col_id -> SchemaColumn
|
|
176
|
+
num_retained_versions: int
|
|
177
|
+
comment: str
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
# versioning: each table schema change results in a new record
|
|
181
|
+
class TableSchemaVersion(Base):
|
|
182
|
+
__tablename__ = 'tableschemaversions'
|
|
183
|
+
|
|
184
|
+
tbl_id = sql.Column(UUID(as_uuid=True), ForeignKey('tables.id'), primary_key=True, nullable=False)
|
|
185
|
+
schema_version = sql.Column(BigInteger, primary_key=True, nullable=False)
|
|
186
|
+
md = sql.Column(JSONB, nullable=False) # TableSchemaVersionMd
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
@dataclasses.dataclass
|
|
190
|
+
class FunctionMd:
|
|
191
|
+
name: str
|
|
192
|
+
py_version: str # platform.python_version
|
|
193
|
+
class_name: str # name of the Function subclass
|
|
194
|
+
md: dict # part of the output of Function.to_store()
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
class Function(Base):
|
|
198
|
+
"""
|
|
199
|
+
User-defined functions that are not module functions (ie, aren't available at runtime as a symbol in a known
|
|
200
|
+
module).
|
|
201
|
+
Functions without a name are anonymous functions used in the definition of a computed column.
|
|
202
|
+
Functions that have names are also assigned to a database and directory.
|
|
203
|
+
We store the Python version under which a Function was created (and the callable pickled) in order to warn
|
|
204
|
+
against version mismatches.
|
|
205
|
+
"""
|
|
206
|
+
__tablename__ = 'functions'
|
|
207
|
+
|
|
208
|
+
id = sql.Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, nullable=False)
|
|
209
|
+
dir_id = sql.Column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=True)
|
|
210
|
+
md = sql.Column(JSONB, nullable=False) # FunctionMd
|
|
211
|
+
binary_obj = sql.Column(LargeBinary, nullable=True)
|