pixeltable 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +21 -4
- pixeltable/catalog/__init__.py +13 -0
- pixeltable/catalog/catalog.py +159 -0
- pixeltable/catalog/column.py +200 -0
- pixeltable/catalog/dir.py +32 -0
- pixeltable/catalog/globals.py +33 -0
- pixeltable/catalog/insertable_table.py +191 -0
- pixeltable/catalog/named_function.py +36 -0
- pixeltable/catalog/path.py +58 -0
- pixeltable/catalog/path_dict.py +139 -0
- pixeltable/catalog/schema_object.py +39 -0
- pixeltable/catalog/table.py +581 -0
- pixeltable/catalog/table_version.py +749 -0
- pixeltable/catalog/table_version_path.py +133 -0
- pixeltable/catalog/view.py +203 -0
- pixeltable/client.py +520 -31
- pixeltable/dataframe.py +540 -349
- pixeltable/env.py +373 -48
- pixeltable/exceptions.py +12 -21
- pixeltable/exec/__init__.py +9 -0
- pixeltable/exec/aggregation_node.py +78 -0
- pixeltable/exec/cache_prefetch_node.py +113 -0
- pixeltable/exec/component_iteration_node.py +79 -0
- pixeltable/exec/data_row_batch.py +95 -0
- pixeltable/exec/exec_context.py +22 -0
- pixeltable/exec/exec_node.py +61 -0
- pixeltable/exec/expr_eval_node.py +217 -0
- pixeltable/exec/in_memory_data_node.py +69 -0
- pixeltable/exec/media_validation_node.py +43 -0
- pixeltable/exec/sql_scan_node.py +225 -0
- pixeltable/exprs/__init__.py +24 -0
- pixeltable/exprs/arithmetic_expr.py +102 -0
- pixeltable/exprs/array_slice.py +71 -0
- pixeltable/exprs/column_property_ref.py +77 -0
- pixeltable/exprs/column_ref.py +105 -0
- pixeltable/exprs/comparison.py +77 -0
- pixeltable/exprs/compound_predicate.py +98 -0
- pixeltable/exprs/data_row.py +187 -0
- pixeltable/exprs/expr.py +586 -0
- pixeltable/exprs/expr_set.py +39 -0
- pixeltable/exprs/function_call.py +380 -0
- pixeltable/exprs/globals.py +69 -0
- pixeltable/exprs/image_member_access.py +115 -0
- pixeltable/exprs/image_similarity_predicate.py +58 -0
- pixeltable/exprs/inline_array.py +107 -0
- pixeltable/exprs/inline_dict.py +101 -0
- pixeltable/exprs/is_null.py +38 -0
- pixeltable/exprs/json_mapper.py +121 -0
- pixeltable/exprs/json_path.py +159 -0
- pixeltable/exprs/literal.py +54 -0
- pixeltable/exprs/object_ref.py +41 -0
- pixeltable/exprs/predicate.py +44 -0
- pixeltable/exprs/row_builder.py +355 -0
- pixeltable/exprs/rowid_ref.py +94 -0
- pixeltable/exprs/type_cast.py +53 -0
- pixeltable/exprs/variable.py +45 -0
- pixeltable/func/__init__.py +9 -0
- pixeltable/func/aggregate_function.py +194 -0
- pixeltable/func/batched_function.py +53 -0
- pixeltable/func/callable_function.py +69 -0
- pixeltable/func/expr_template_function.py +82 -0
- pixeltable/func/function.py +110 -0
- pixeltable/func/function_registry.py +227 -0
- pixeltable/func/globals.py +36 -0
- pixeltable/func/nos_function.py +202 -0
- pixeltable/func/signature.py +166 -0
- pixeltable/func/udf.py +163 -0
- pixeltable/functions/__init__.py +52 -103
- pixeltable/functions/eval.py +216 -0
- pixeltable/functions/fireworks.py +61 -0
- pixeltable/functions/huggingface.py +120 -0
- pixeltable/functions/image.py +16 -0
- pixeltable/functions/openai.py +88 -0
- pixeltable/functions/pil/image.py +148 -7
- pixeltable/functions/string.py +13 -0
- pixeltable/functions/together.py +27 -0
- pixeltable/functions/util.py +41 -0
- pixeltable/functions/video.py +62 -0
- pixeltable/iterators/__init__.py +3 -0
- pixeltable/iterators/base.py +48 -0
- pixeltable/iterators/document.py +311 -0
- pixeltable/iterators/video.py +89 -0
- pixeltable/metadata/__init__.py +54 -0
- pixeltable/metadata/converters/convert_10.py +18 -0
- pixeltable/metadata/schema.py +211 -0
- pixeltable/plan.py +656 -0
- pixeltable/store.py +413 -182
- pixeltable/tests/conftest.py +143 -86
- pixeltable/tests/test_audio.py +65 -0
- pixeltable/tests/test_catalog.py +27 -0
- pixeltable/tests/test_client.py +14 -14
- pixeltable/tests/test_component_view.py +372 -0
- pixeltable/tests/test_dataframe.py +433 -0
- pixeltable/tests/test_dirs.py +78 -62
- pixeltable/tests/test_document.py +117 -0
- pixeltable/tests/test_exprs.py +591 -135
- pixeltable/tests/test_function.py +297 -67
- pixeltable/tests/test_functions.py +283 -1
- pixeltable/tests/test_migration.py +43 -0
- pixeltable/tests/test_nos.py +54 -0
- pixeltable/tests/test_snapshot.py +208 -0
- pixeltable/tests/test_table.py +1086 -258
- pixeltable/tests/test_transactional_directory.py +42 -0
- pixeltable/tests/test_types.py +5 -11
- pixeltable/tests/test_video.py +149 -34
- pixeltable/tests/test_view.py +530 -0
- pixeltable/tests/utils.py +186 -45
- pixeltable/tool/create_test_db_dump.py +149 -0
- pixeltable/type_system.py +490 -133
- pixeltable/utils/__init__.py +17 -46
- pixeltable/utils/clip.py +12 -15
- pixeltable/utils/coco.py +136 -0
- pixeltable/utils/documents.py +39 -0
- pixeltable/utils/filecache.py +195 -0
- pixeltable/utils/help.py +11 -0
- pixeltable/utils/media_store.py +76 -0
- pixeltable/utils/parquet.py +126 -0
- pixeltable/utils/pytorch.py +172 -0
- pixeltable/utils/s3.py +13 -0
- pixeltable/utils/sql.py +17 -0
- pixeltable/utils/transactional_directory.py +35 -0
- pixeltable-0.2.0.dist-info/LICENSE +18 -0
- pixeltable-0.2.0.dist-info/METADATA +117 -0
- pixeltable-0.2.0.dist-info/RECORD +125 -0
- {pixeltable-0.1.2.dist-info → pixeltable-0.2.0.dist-info}/WHEEL +1 -1
- pixeltable/catalog.py +0 -1421
- pixeltable/exprs.py +0 -1745
- pixeltable/function.py +0 -269
- pixeltable/functions/clip.py +0 -10
- pixeltable/functions/pil/__init__.py +0 -23
- pixeltable/functions/tf.py +0 -21
- pixeltable/index.py +0 -57
- pixeltable/tests/test_dict.py +0 -24
- pixeltable/tests/test_tf.py +0 -69
- pixeltable/tf.py +0 -33
- pixeltable/utils/tf.py +0 -33
- pixeltable/utils/video.py +0 -32
- pixeltable-0.1.2.dist-info/LICENSE +0 -201
- pixeltable-0.1.2.dist-info/METADATA +0 -89
- pixeltable-0.1.2.dist-info/RECORD +0 -37
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
from uuid import UUID
|
|
6
|
+
|
|
7
|
+
import sqlalchemy as sql
|
|
8
|
+
|
|
9
|
+
from .schema_object import SchemaObject
|
|
10
|
+
from pixeltable.env import Env
|
|
11
|
+
from pixeltable.metadata import schema
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
_logger = logging.getLogger('pixeltable')
|
|
15
|
+
|
|
16
|
+
class NamedFunction(SchemaObject):
|
|
17
|
+
"""
|
|
18
|
+
Contains references to functions that are named and have a path.
|
|
19
|
+
The Function itself is stored in the FunctionRegistry.
|
|
20
|
+
"""
|
|
21
|
+
def __init__(self, id: UUID, dir_id: UUID, name: str):
|
|
22
|
+
super().__init__(id, name, dir_id)
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
def display_name(cls) -> str:
|
|
26
|
+
return 'function'
|
|
27
|
+
|
|
28
|
+
def move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
29
|
+
super().move(new_name, new_dir_id)
|
|
30
|
+
with Env.get().engine.begin() as conn:
|
|
31
|
+
stmt = sql.text((
|
|
32
|
+
f"UPDATE {schema.Function.__table__} "
|
|
33
|
+
f"SET {schema.Function.dir_id.name} = :new_dir_id, {schema.Function.md.name}['name'] = :new_name "
|
|
34
|
+
f"WHERE {schema.Function.id.name} = :id"))
|
|
35
|
+
conn.execute(stmt, {'new_dir_id': new_dir_id, 'new_name': json.dumps(new_name), 'id': self._id})
|
|
36
|
+
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
from pixeltable import exceptions as excs
|
|
6
|
+
from .globals import is_valid_path
|
|
7
|
+
|
|
8
|
+
_logger = logging.getLogger('pixeltable')
|
|
9
|
+
|
|
10
|
+
class Path:
|
|
11
|
+
def __init__(self, path: str, empty_is_valid: bool = False):
|
|
12
|
+
if not is_valid_path(path, empty_is_valid):
|
|
13
|
+
raise excs.Error(f"Invalid path format: '{path}'")
|
|
14
|
+
self.components = path.split('.')
|
|
15
|
+
|
|
16
|
+
@property
|
|
17
|
+
def len(self) -> int:
|
|
18
|
+
return 0 if self.is_root else len(self.components)
|
|
19
|
+
|
|
20
|
+
@property
|
|
21
|
+
def name(self) -> str:
|
|
22
|
+
assert len(self.components) > 0
|
|
23
|
+
return self.components[-1]
|
|
24
|
+
|
|
25
|
+
@property
|
|
26
|
+
def is_root(self) -> bool:
|
|
27
|
+
return self.components[0] == ''
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def parent(self) -> Path:
|
|
31
|
+
if len(self.components) == 1:
|
|
32
|
+
if self.is_root:
|
|
33
|
+
return self
|
|
34
|
+
else:
|
|
35
|
+
return Path('', empty_is_valid=True)
|
|
36
|
+
else:
|
|
37
|
+
return Path('.'.join(self.components[:-1]))
|
|
38
|
+
|
|
39
|
+
def append(self, name: str) -> Path:
|
|
40
|
+
if self.is_root:
|
|
41
|
+
return Path(name)
|
|
42
|
+
else:
|
|
43
|
+
return Path(f'{str(self)}.{name}')
|
|
44
|
+
|
|
45
|
+
def is_ancestor(self, other: Path, is_parent: bool = False) -> bool:
|
|
46
|
+
"""
|
|
47
|
+
True if self as an ancestor path of other.
|
|
48
|
+
"""
|
|
49
|
+
if self.len >= other.len or other.is_root:
|
|
50
|
+
return False
|
|
51
|
+
if self.is_root and (other.len == 1 or not is_parent):
|
|
52
|
+
return True
|
|
53
|
+
is_prefix = self.components == other.components[:self.len]
|
|
54
|
+
return is_prefix and (self.len == (other.len - 1) or not is_parent)
|
|
55
|
+
|
|
56
|
+
def __str__(self) -> str:
|
|
57
|
+
return '.'.join(self.components)
|
|
58
|
+
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import copy
|
|
4
|
+
import logging
|
|
5
|
+
from typing import Optional, List, Dict, Type
|
|
6
|
+
from uuid import UUID
|
|
7
|
+
|
|
8
|
+
import sqlalchemy.orm as orm
|
|
9
|
+
|
|
10
|
+
from pixeltable import exceptions as excs
|
|
11
|
+
from pixeltable.env import Env
|
|
12
|
+
from pixeltable.metadata import schema
|
|
13
|
+
from .dir import Dir
|
|
14
|
+
from .path import Path
|
|
15
|
+
from .schema_object import SchemaObject
|
|
16
|
+
|
|
17
|
+
_logger = logging.getLogger('pixeltable')
|
|
18
|
+
|
|
19
|
+
class PathDict:
|
|
20
|
+
"""Keep track of all paths in a Db instance"""
|
|
21
|
+
def __init__(self):
|
|
22
|
+
self.dir_contents: Dict[UUID, Dict[str, SchemaObject]] = {}
|
|
23
|
+
self.schema_objs: Dict[UUID, SchemaObject] = {}
|
|
24
|
+
|
|
25
|
+
# load dirs
|
|
26
|
+
with orm.Session(Env.get().engine, future=True) as session:
|
|
27
|
+
_ = [dir_record for dir_record in session.query(schema.Dir).all()]
|
|
28
|
+
self.schema_objs = {
|
|
29
|
+
dir_record.id: Dir(dir_record.id, dir_record.parent_id, schema.DirMd(**dir_record.md).name)
|
|
30
|
+
for dir_record in session.query(schema.Dir).all()
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
# identify root dir
|
|
34
|
+
root_dirs = [dir for dir in self.schema_objs.values() if dir._dir_id is None]
|
|
35
|
+
assert len(root_dirs) == 1
|
|
36
|
+
self.root_dir = root_dirs[0]
|
|
37
|
+
|
|
38
|
+
# build dir_contents
|
|
39
|
+
def record_dir(dir: Dir) -> None:
|
|
40
|
+
if dir._id in self.dir_contents:
|
|
41
|
+
return
|
|
42
|
+
else:
|
|
43
|
+
self.dir_contents[dir._id] = {}
|
|
44
|
+
if dir._dir_id is not None:
|
|
45
|
+
record_dir(self.schema_objs[dir._dir_id])
|
|
46
|
+
self.dir_contents[dir._dir_id][dir._name] = dir
|
|
47
|
+
|
|
48
|
+
for dir in self.schema_objs.values():
|
|
49
|
+
record_dir(dir)
|
|
50
|
+
|
|
51
|
+
def _resolve_path(self, path: Path) -> SchemaObject:
|
|
52
|
+
if path.is_root:
|
|
53
|
+
return self.root_dir
|
|
54
|
+
dir = self.root_dir
|
|
55
|
+
for i, component in enumerate(path.components):
|
|
56
|
+
if component not in self.dir_contents[dir._id]:
|
|
57
|
+
raise excs.Error(f'No such path: {".".join(path.components[:i + 1])}')
|
|
58
|
+
schema_obj = self.dir_contents[dir._id][component]
|
|
59
|
+
if i < len(path.components) - 1:
|
|
60
|
+
if not isinstance(schema_obj, Dir):
|
|
61
|
+
raise excs.Error(f'Not a directory: {".".join(path.components[:i + 1])}')
|
|
62
|
+
dir = schema_obj
|
|
63
|
+
return schema_obj
|
|
64
|
+
|
|
65
|
+
def __getitem__(self, path: Path) -> SchemaObject:
|
|
66
|
+
return self._resolve_path(path)
|
|
67
|
+
|
|
68
|
+
def get_schema_obj(self, id: UUID) -> Optional[SchemaObject]:
|
|
69
|
+
return self.schema_objs.get(id)
|
|
70
|
+
|
|
71
|
+
def add_schema_obj(self, dir_id: UUID, name: str, val: SchemaObject) -> None:
|
|
72
|
+
self.dir_contents[dir_id][name] = val
|
|
73
|
+
self.schema_objs[val._id] = val
|
|
74
|
+
|
|
75
|
+
def __setitem__(self, path: Path, val: SchemaObject) -> None:
|
|
76
|
+
parent_dir = self._resolve_path(path.parent)
|
|
77
|
+
assert path.name not in self.dir_contents[parent_dir._id]
|
|
78
|
+
self.schema_objs[val._id] = val
|
|
79
|
+
self.dir_contents[parent_dir._id][path.name] = val
|
|
80
|
+
if isinstance(val, Dir):
|
|
81
|
+
self.dir_contents[val._id] = {}
|
|
82
|
+
|
|
83
|
+
def __delitem__(self, path: Path) -> None:
|
|
84
|
+
parent_dir = self._resolve_path(path.parent)
|
|
85
|
+
assert path.name in self.dir_contents[parent_dir._id]
|
|
86
|
+
obj = self.dir_contents[parent_dir._id][path.name]
|
|
87
|
+
del self.dir_contents[parent_dir._id][path.name]
|
|
88
|
+
if isinstance(obj, Dir):
|
|
89
|
+
del self.dir_contents[obj._id]
|
|
90
|
+
del self.schema_objs[obj._id]
|
|
91
|
+
|
|
92
|
+
def move(self, from_path: Path, to_path: Path) -> None:
|
|
93
|
+
from_dir = self._resolve_path(from_path.parent)
|
|
94
|
+
assert isinstance(from_dir, Dir)
|
|
95
|
+
assert from_path.name in self.dir_contents[from_dir._id]
|
|
96
|
+
obj = self.dir_contents[from_dir._id][from_path.name]
|
|
97
|
+
del self.dir_contents[from_dir._id][from_path.name]
|
|
98
|
+
to_dir = self._resolve_path(to_path.parent)
|
|
99
|
+
assert to_path.name not in self.dir_contents[to_dir._id]
|
|
100
|
+
self.dir_contents[to_dir._id][to_path.name] = obj
|
|
101
|
+
|
|
102
|
+
def check_is_valid(self, path: Path, expected: Optional[Type[SchemaObject]]) -> None:
|
|
103
|
+
"""Check that path is valid and that the object at path has the expected type.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
path: path to check
|
|
107
|
+
expected: expected type of object at path or None if object should not exist
|
|
108
|
+
|
|
109
|
+
Raises:
|
|
110
|
+
Error if path is invalid or object at path has wrong type
|
|
111
|
+
"""
|
|
112
|
+
# check for existence
|
|
113
|
+
if expected is not None:
|
|
114
|
+
schema_obj = self._resolve_path(path)
|
|
115
|
+
if not isinstance(schema_obj, expected):
|
|
116
|
+
raise excs.Error(
|
|
117
|
+
f'{str(path)} needs to be a {expected.display_name()} but is a {type(schema_obj).display_name()}')
|
|
118
|
+
if expected is None:
|
|
119
|
+
parent_obj = self._resolve_path(path.parent)
|
|
120
|
+
if not isinstance(parent_obj, Dir):
|
|
121
|
+
raise excs.Error(
|
|
122
|
+
f'{str(path.parent)} is a {type(parent_obj).display_name()}, not a {Dir.display_name()}')
|
|
123
|
+
if path.name in self.dir_contents[parent_obj._id]:
|
|
124
|
+
obj = self.dir_contents[parent_obj._id][path.name]
|
|
125
|
+
raise excs.Error(f"{type(obj).display_name()} '{str(path)}' already exists")
|
|
126
|
+
|
|
127
|
+
def get_children(self, parent: Path, child_type: Optional[Type[SchemaObject]], recursive: bool) -> List[Path]:
|
|
128
|
+
dir = self._resolve_path(parent)
|
|
129
|
+
if not isinstance(dir, Dir):
|
|
130
|
+
raise excs.Error(f'{str(parent)} is a {type(dir).display_name()}, not a directory')
|
|
131
|
+
matches = [
|
|
132
|
+
obj for obj in self.dir_contents[dir._id].values() if child_type is None or isinstance(obj, child_type)
|
|
133
|
+
]
|
|
134
|
+
result = [copy.copy(parent).append(obj._name) for obj in matches]
|
|
135
|
+
if recursive:
|
|
136
|
+
for dir in [obj for obj in self.dir_contents[dir._id].values() if isinstance(obj, Dir)]:
|
|
137
|
+
result.extend(self.get_children(copy.copy(parent).append(dir._name), child_type, recursive))
|
|
138
|
+
return result
|
|
139
|
+
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from abc import abstractmethod
|
|
2
|
+
from typing import Optional
|
|
3
|
+
from uuid import UUID
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class SchemaObject:
|
|
7
|
+
"""
|
|
8
|
+
Base class of all addressable objects within a Db.
|
|
9
|
+
Each object has an id, a name and a parent directory.
|
|
10
|
+
"""
|
|
11
|
+
def __init__(self, obj_id: UUID, name: str, dir_id: Optional[UUID]):
|
|
12
|
+
# make these private so they don't collide with column names (id and name are fairly common)
|
|
13
|
+
self._id = obj_id
|
|
14
|
+
self._name = name
|
|
15
|
+
self._dir_id = dir_id
|
|
16
|
+
|
|
17
|
+
def get_id(self) -> UUID:
|
|
18
|
+
return self._id
|
|
19
|
+
|
|
20
|
+
def get_name(self) -> str:
|
|
21
|
+
return self._name
|
|
22
|
+
|
|
23
|
+
@classmethod
|
|
24
|
+
@abstractmethod
|
|
25
|
+
def display_name(cls) -> str:
|
|
26
|
+
"""
|
|
27
|
+
Return name displayed in error messages.
|
|
28
|
+
"""
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
@property
|
|
32
|
+
def fqn(self) -> str:
|
|
33
|
+
return f'{self.parent_dir().fqn}.{self._name}'
|
|
34
|
+
|
|
35
|
+
def move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
36
|
+
"""Subclasses need to override this to make the change persistent"""
|
|
37
|
+
self._name = new_name
|
|
38
|
+
self._dir_id = new_dir_id
|
|
39
|
+
|