pixeltable 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (119) hide show
  1. pixeltable/__init__.py +53 -0
  2. pixeltable/__version__.py +3 -0
  3. pixeltable/catalog/__init__.py +13 -0
  4. pixeltable/catalog/catalog.py +159 -0
  5. pixeltable/catalog/column.py +181 -0
  6. pixeltable/catalog/dir.py +32 -0
  7. pixeltable/catalog/globals.py +33 -0
  8. pixeltable/catalog/insertable_table.py +192 -0
  9. pixeltable/catalog/named_function.py +36 -0
  10. pixeltable/catalog/path.py +58 -0
  11. pixeltable/catalog/path_dict.py +139 -0
  12. pixeltable/catalog/schema_object.py +39 -0
  13. pixeltable/catalog/table.py +695 -0
  14. pixeltable/catalog/table_version.py +1026 -0
  15. pixeltable/catalog/table_version_path.py +133 -0
  16. pixeltable/catalog/view.py +203 -0
  17. pixeltable/dataframe.py +749 -0
  18. pixeltable/env.py +466 -0
  19. pixeltable/exceptions.py +17 -0
  20. pixeltable/exec/__init__.py +10 -0
  21. pixeltable/exec/aggregation_node.py +78 -0
  22. pixeltable/exec/cache_prefetch_node.py +116 -0
  23. pixeltable/exec/component_iteration_node.py +79 -0
  24. pixeltable/exec/data_row_batch.py +94 -0
  25. pixeltable/exec/exec_context.py +22 -0
  26. pixeltable/exec/exec_node.py +61 -0
  27. pixeltable/exec/expr_eval_node.py +217 -0
  28. pixeltable/exec/in_memory_data_node.py +73 -0
  29. pixeltable/exec/media_validation_node.py +43 -0
  30. pixeltable/exec/sql_scan_node.py +226 -0
  31. pixeltable/exprs/__init__.py +25 -0
  32. pixeltable/exprs/arithmetic_expr.py +102 -0
  33. pixeltable/exprs/array_slice.py +71 -0
  34. pixeltable/exprs/column_property_ref.py +77 -0
  35. pixeltable/exprs/column_ref.py +114 -0
  36. pixeltable/exprs/comparison.py +77 -0
  37. pixeltable/exprs/compound_predicate.py +98 -0
  38. pixeltable/exprs/data_row.py +199 -0
  39. pixeltable/exprs/expr.py +594 -0
  40. pixeltable/exprs/expr_set.py +39 -0
  41. pixeltable/exprs/function_call.py +382 -0
  42. pixeltable/exprs/globals.py +69 -0
  43. pixeltable/exprs/image_member_access.py +96 -0
  44. pixeltable/exprs/in_predicate.py +96 -0
  45. pixeltable/exprs/inline_array.py +109 -0
  46. pixeltable/exprs/inline_dict.py +103 -0
  47. pixeltable/exprs/is_null.py +38 -0
  48. pixeltable/exprs/json_mapper.py +121 -0
  49. pixeltable/exprs/json_path.py +159 -0
  50. pixeltable/exprs/literal.py +66 -0
  51. pixeltable/exprs/object_ref.py +41 -0
  52. pixeltable/exprs/predicate.py +44 -0
  53. pixeltable/exprs/row_builder.py +329 -0
  54. pixeltable/exprs/rowid_ref.py +94 -0
  55. pixeltable/exprs/similarity_expr.py +65 -0
  56. pixeltable/exprs/type_cast.py +53 -0
  57. pixeltable/exprs/variable.py +45 -0
  58. pixeltable/ext/__init__.py +5 -0
  59. pixeltable/ext/functions/yolox.py +92 -0
  60. pixeltable/func/__init__.py +7 -0
  61. pixeltable/func/aggregate_function.py +197 -0
  62. pixeltable/func/callable_function.py +113 -0
  63. pixeltable/func/expr_template_function.py +99 -0
  64. pixeltable/func/function.py +141 -0
  65. pixeltable/func/function_registry.py +227 -0
  66. pixeltable/func/globals.py +46 -0
  67. pixeltable/func/nos_function.py +202 -0
  68. pixeltable/func/signature.py +162 -0
  69. pixeltable/func/udf.py +164 -0
  70. pixeltable/functions/__init__.py +95 -0
  71. pixeltable/functions/eval.py +215 -0
  72. pixeltable/functions/fireworks.py +34 -0
  73. pixeltable/functions/huggingface.py +167 -0
  74. pixeltable/functions/image.py +16 -0
  75. pixeltable/functions/openai.py +289 -0
  76. pixeltable/functions/pil/image.py +147 -0
  77. pixeltable/functions/string.py +13 -0
  78. pixeltable/functions/together.py +143 -0
  79. pixeltable/functions/util.py +52 -0
  80. pixeltable/functions/video.py +62 -0
  81. pixeltable/globals.py +425 -0
  82. pixeltable/index/__init__.py +2 -0
  83. pixeltable/index/base.py +51 -0
  84. pixeltable/index/embedding_index.py +168 -0
  85. pixeltable/io/__init__.py +3 -0
  86. pixeltable/io/hf_datasets.py +188 -0
  87. pixeltable/io/pandas.py +148 -0
  88. pixeltable/io/parquet.py +192 -0
  89. pixeltable/iterators/__init__.py +3 -0
  90. pixeltable/iterators/base.py +52 -0
  91. pixeltable/iterators/document.py +432 -0
  92. pixeltable/iterators/video.py +88 -0
  93. pixeltable/metadata/__init__.py +58 -0
  94. pixeltable/metadata/converters/convert_10.py +18 -0
  95. pixeltable/metadata/converters/convert_12.py +3 -0
  96. pixeltable/metadata/converters/convert_13.py +41 -0
  97. pixeltable/metadata/schema.py +234 -0
  98. pixeltable/plan.py +620 -0
  99. pixeltable/store.py +424 -0
  100. pixeltable/tool/create_test_db_dump.py +184 -0
  101. pixeltable/tool/create_test_video.py +81 -0
  102. pixeltable/type_system.py +846 -0
  103. pixeltable/utils/__init__.py +17 -0
  104. pixeltable/utils/arrow.py +98 -0
  105. pixeltable/utils/clip.py +18 -0
  106. pixeltable/utils/coco.py +136 -0
  107. pixeltable/utils/documents.py +69 -0
  108. pixeltable/utils/filecache.py +195 -0
  109. pixeltable/utils/help.py +11 -0
  110. pixeltable/utils/http_server.py +70 -0
  111. pixeltable/utils/media_store.py +76 -0
  112. pixeltable/utils/pytorch.py +91 -0
  113. pixeltable/utils/s3.py +13 -0
  114. pixeltable/utils/sql.py +17 -0
  115. pixeltable/utils/transactional_directory.py +35 -0
  116. pixeltable-0.0.0.dist-info/LICENSE +18 -0
  117. pixeltable-0.0.0.dist-info/METADATA +131 -0
  118. pixeltable-0.0.0.dist-info/RECORD +119 -0
  119. pixeltable-0.0.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,36 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ from uuid import UUID
6
+
7
+ import sqlalchemy as sql
8
+
9
+ from .schema_object import SchemaObject
10
+ from pixeltable.env import Env
11
+ from pixeltable.metadata import schema
12
+
13
+
14
+ _logger = logging.getLogger('pixeltable')
15
+
16
+ class NamedFunction(SchemaObject):
17
+ """
18
+ Contains references to functions that are named and have a path.
19
+ The Function itself is stored in the FunctionRegistry.
20
+ """
21
+ def __init__(self, id: UUID, dir_id: UUID, name: str):
22
+ super().__init__(id, name, dir_id)
23
+
24
+ @classmethod
25
+ def display_name(cls) -> str:
26
+ return 'function'
27
+
28
+ def move(self, new_name: str, new_dir_id: UUID) -> None:
29
+ super().move(new_name, new_dir_id)
30
+ with Env.get().engine.begin() as conn:
31
+ stmt = sql.text((
32
+ f"UPDATE {schema.Function.__table__} "
33
+ f"SET {schema.Function.dir_id.name} = :new_dir_id, {schema.Function.md.name}['name'] = :new_name "
34
+ f"WHERE {schema.Function.id.name} = :id"))
35
+ conn.execute(stmt, {'new_dir_id': new_dir_id, 'new_name': json.dumps(new_name), 'id': self._id})
36
+
@@ -0,0 +1,58 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+
5
+ from pixeltable import exceptions as excs
6
+ from .globals import is_valid_path
7
+
8
+ _logger = logging.getLogger('pixeltable')
9
+
10
+ class Path:
11
+ def __init__(self, path: str, empty_is_valid: bool = False):
12
+ if not is_valid_path(path, empty_is_valid):
13
+ raise excs.Error(f"Invalid path format: '{path}'")
14
+ self.components = path.split('.')
15
+
16
+ @property
17
+ def len(self) -> int:
18
+ return 0 if self.is_root else len(self.components)
19
+
20
+ @property
21
+ def name(self) -> str:
22
+ assert len(self.components) > 0
23
+ return self.components[-1]
24
+
25
+ @property
26
+ def is_root(self) -> bool:
27
+ return self.components[0] == ''
28
+
29
+ @property
30
+ def parent(self) -> Path:
31
+ if len(self.components) == 1:
32
+ if self.is_root:
33
+ return self
34
+ else:
35
+ return Path('', empty_is_valid=True)
36
+ else:
37
+ return Path('.'.join(self.components[:-1]))
38
+
39
+ def append(self, name: str) -> Path:
40
+ if self.is_root:
41
+ return Path(name)
42
+ else:
43
+ return Path(f'{str(self)}.{name}')
44
+
45
+ def is_ancestor(self, other: Path, is_parent: bool = False) -> bool:
46
+ """
47
+ True if self as an ancestor path of other.
48
+ """
49
+ if self.len >= other.len or other.is_root:
50
+ return False
51
+ if self.is_root and (other.len == 1 or not is_parent):
52
+ return True
53
+ is_prefix = self.components == other.components[:self.len]
54
+ return is_prefix and (self.len == (other.len - 1) or not is_parent)
55
+
56
+ def __str__(self) -> str:
57
+ return '.'.join(self.components)
58
+
@@ -0,0 +1,139 @@
1
+ from __future__ import annotations
2
+
3
+ import copy
4
+ import logging
5
+ from typing import Optional, List, Dict, Type
6
+ from uuid import UUID
7
+
8
+ import sqlalchemy.orm as orm
9
+
10
+ from pixeltable import exceptions as excs
11
+ from pixeltable.env import Env
12
+ from pixeltable.metadata import schema
13
+ from .dir import Dir
14
+ from .path import Path
15
+ from .schema_object import SchemaObject
16
+
17
+ _logger = logging.getLogger('pixeltable')
18
+
19
+ class PathDict:
20
+ """Keep track of all paths in a Db instance"""
21
+ def __init__(self):
22
+ self.dir_contents: Dict[UUID, Dict[str, SchemaObject]] = {}
23
+ self.schema_objs: Dict[UUID, SchemaObject] = {}
24
+
25
+ # load dirs
26
+ with orm.Session(Env.get().engine, future=True) as session:
27
+ _ = [dir_record for dir_record in session.query(schema.Dir).all()]
28
+ self.schema_objs = {
29
+ dir_record.id: Dir(dir_record.id, dir_record.parent_id, schema.DirMd(**dir_record.md).name)
30
+ for dir_record in session.query(schema.Dir).all()
31
+ }
32
+
33
+ # identify root dir
34
+ root_dirs = [dir for dir in self.schema_objs.values() if dir._dir_id is None]
35
+ assert len(root_dirs) == 1
36
+ self.root_dir = root_dirs[0]
37
+
38
+ # build dir_contents
39
+ def record_dir(dir: Dir) -> None:
40
+ if dir._id in self.dir_contents:
41
+ return
42
+ else:
43
+ self.dir_contents[dir._id] = {}
44
+ if dir._dir_id is not None:
45
+ record_dir(self.schema_objs[dir._dir_id])
46
+ self.dir_contents[dir._dir_id][dir._name] = dir
47
+
48
+ for dir in self.schema_objs.values():
49
+ record_dir(dir)
50
+
51
+ def _resolve_path(self, path: Path) -> SchemaObject:
52
+ if path.is_root:
53
+ return self.root_dir
54
+ dir = self.root_dir
55
+ for i, component in enumerate(path.components):
56
+ if component not in self.dir_contents[dir._id]:
57
+ raise excs.Error(f'No such path: {".".join(path.components[:i + 1])}')
58
+ schema_obj = self.dir_contents[dir._id][component]
59
+ if i < len(path.components) - 1:
60
+ if not isinstance(schema_obj, Dir):
61
+ raise excs.Error(f'Not a directory: {".".join(path.components[:i + 1])}')
62
+ dir = schema_obj
63
+ return schema_obj
64
+
65
+ def __getitem__(self, path: Path) -> SchemaObject:
66
+ return self._resolve_path(path)
67
+
68
+ def get_schema_obj(self, id: UUID) -> Optional[SchemaObject]:
69
+ return self.schema_objs.get(id)
70
+
71
+ def add_schema_obj(self, dir_id: UUID, name: str, val: SchemaObject) -> None:
72
+ self.dir_contents[dir_id][name] = val
73
+ self.schema_objs[val._id] = val
74
+
75
+ def __setitem__(self, path: Path, val: SchemaObject) -> None:
76
+ parent_dir = self._resolve_path(path.parent)
77
+ assert path.name not in self.dir_contents[parent_dir._id]
78
+ self.schema_objs[val._id] = val
79
+ self.dir_contents[parent_dir._id][path.name] = val
80
+ if isinstance(val, Dir):
81
+ self.dir_contents[val._id] = {}
82
+
83
+ def __delitem__(self, path: Path) -> None:
84
+ parent_dir = self._resolve_path(path.parent)
85
+ assert path.name in self.dir_contents[parent_dir._id]
86
+ obj = self.dir_contents[parent_dir._id][path.name]
87
+ del self.dir_contents[parent_dir._id][path.name]
88
+ if isinstance(obj, Dir):
89
+ del self.dir_contents[obj._id]
90
+ del self.schema_objs[obj._id]
91
+
92
+ def move(self, from_path: Path, to_path: Path) -> None:
93
+ from_dir = self._resolve_path(from_path.parent)
94
+ assert isinstance(from_dir, Dir)
95
+ assert from_path.name in self.dir_contents[from_dir._id]
96
+ obj = self.dir_contents[from_dir._id][from_path.name]
97
+ del self.dir_contents[from_dir._id][from_path.name]
98
+ to_dir = self._resolve_path(to_path.parent)
99
+ assert to_path.name not in self.dir_contents[to_dir._id]
100
+ self.dir_contents[to_dir._id][to_path.name] = obj
101
+
102
+ def check_is_valid(self, path: Path, expected: Optional[Type[SchemaObject]]) -> None:
103
+ """Check that path is valid and that the object at path has the expected type.
104
+
105
+ Args:
106
+ path: path to check
107
+ expected: expected type of object at path or None if object should not exist
108
+
109
+ Raises:
110
+ Error if path is invalid or object at path has wrong type
111
+ """
112
+ # check for existence
113
+ if expected is not None:
114
+ schema_obj = self._resolve_path(path)
115
+ if not isinstance(schema_obj, expected):
116
+ raise excs.Error(
117
+ f'{str(path)} needs to be a {expected.display_name()} but is a {type(schema_obj).display_name()}')
118
+ if expected is None:
119
+ parent_obj = self._resolve_path(path.parent)
120
+ if not isinstance(parent_obj, Dir):
121
+ raise excs.Error(
122
+ f'{str(path.parent)} is a {type(parent_obj).display_name()}, not a {Dir.display_name()}')
123
+ if path.name in self.dir_contents[parent_obj._id]:
124
+ obj = self.dir_contents[parent_obj._id][path.name]
125
+ raise excs.Error(f"{type(obj).display_name()} '{str(path)}' already exists")
126
+
127
+ def get_children(self, parent: Path, child_type: Optional[Type[SchemaObject]], recursive: bool) -> List[Path]:
128
+ dir = self._resolve_path(parent)
129
+ if not isinstance(dir, Dir):
130
+ raise excs.Error(f'{str(parent)} is a {type(dir).display_name()}, not a directory')
131
+ matches = [
132
+ obj for obj in self.dir_contents[dir._id].values() if child_type is None or isinstance(obj, child_type)
133
+ ]
134
+ result = [copy.copy(parent).append(obj._name) for obj in matches]
135
+ if recursive:
136
+ for dir in [obj for obj in self.dir_contents[dir._id].values() if isinstance(obj, Dir)]:
137
+ result.extend(self.get_children(copy.copy(parent).append(dir._name), child_type, recursive))
138
+ return result
139
+
@@ -0,0 +1,39 @@
1
+ from abc import abstractmethod
2
+ from typing import Optional
3
+ from uuid import UUID
4
+
5
+
6
+ class SchemaObject:
7
+ """
8
+ Base class of all addressable objects within a Db.
9
+ Each object has an id, a name and a parent directory.
10
+ """
11
+ def __init__(self, obj_id: UUID, name: str, dir_id: Optional[UUID]):
12
+ # make these private so they don't collide with column names (id and name are fairly common)
13
+ self._id = obj_id
14
+ self._name = name
15
+ self._dir_id = dir_id
16
+
17
+ def get_id(self) -> UUID:
18
+ return self._id
19
+
20
+ def get_name(self) -> str:
21
+ return self._name
22
+
23
+ @classmethod
24
+ @abstractmethod
25
+ def display_name(cls) -> str:
26
+ """
27
+ Return name displayed in error messages.
28
+ """
29
+ pass
30
+
31
+ @property
32
+ def fqn(self) -> str:
33
+ return f'{self.parent_dir().fqn}.{self._name}'
34
+
35
+ def move(self, new_name: str, new_dir_id: UUID) -> None:
36
+ """Subclasses need to override this to make the change persistent"""
37
+ self._name = new_name
38
+ self._dir_id = new_dir_id
39
+