pixeltable 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (119) hide show
  1. pixeltable/__init__.py +53 -0
  2. pixeltable/__version__.py +3 -0
  3. pixeltable/catalog/__init__.py +13 -0
  4. pixeltable/catalog/catalog.py +159 -0
  5. pixeltable/catalog/column.py +181 -0
  6. pixeltable/catalog/dir.py +32 -0
  7. pixeltable/catalog/globals.py +33 -0
  8. pixeltable/catalog/insertable_table.py +192 -0
  9. pixeltable/catalog/named_function.py +36 -0
  10. pixeltable/catalog/path.py +58 -0
  11. pixeltable/catalog/path_dict.py +139 -0
  12. pixeltable/catalog/schema_object.py +39 -0
  13. pixeltable/catalog/table.py +695 -0
  14. pixeltable/catalog/table_version.py +1026 -0
  15. pixeltable/catalog/table_version_path.py +133 -0
  16. pixeltable/catalog/view.py +203 -0
  17. pixeltable/dataframe.py +749 -0
  18. pixeltable/env.py +466 -0
  19. pixeltable/exceptions.py +17 -0
  20. pixeltable/exec/__init__.py +10 -0
  21. pixeltable/exec/aggregation_node.py +78 -0
  22. pixeltable/exec/cache_prefetch_node.py +116 -0
  23. pixeltable/exec/component_iteration_node.py +79 -0
  24. pixeltable/exec/data_row_batch.py +94 -0
  25. pixeltable/exec/exec_context.py +22 -0
  26. pixeltable/exec/exec_node.py +61 -0
  27. pixeltable/exec/expr_eval_node.py +217 -0
  28. pixeltable/exec/in_memory_data_node.py +73 -0
  29. pixeltable/exec/media_validation_node.py +43 -0
  30. pixeltable/exec/sql_scan_node.py +226 -0
  31. pixeltable/exprs/__init__.py +25 -0
  32. pixeltable/exprs/arithmetic_expr.py +102 -0
  33. pixeltable/exprs/array_slice.py +71 -0
  34. pixeltable/exprs/column_property_ref.py +77 -0
  35. pixeltable/exprs/column_ref.py +114 -0
  36. pixeltable/exprs/comparison.py +77 -0
  37. pixeltable/exprs/compound_predicate.py +98 -0
  38. pixeltable/exprs/data_row.py +199 -0
  39. pixeltable/exprs/expr.py +594 -0
  40. pixeltable/exprs/expr_set.py +39 -0
  41. pixeltable/exprs/function_call.py +382 -0
  42. pixeltable/exprs/globals.py +69 -0
  43. pixeltable/exprs/image_member_access.py +96 -0
  44. pixeltable/exprs/in_predicate.py +96 -0
  45. pixeltable/exprs/inline_array.py +109 -0
  46. pixeltable/exprs/inline_dict.py +103 -0
  47. pixeltable/exprs/is_null.py +38 -0
  48. pixeltable/exprs/json_mapper.py +121 -0
  49. pixeltable/exprs/json_path.py +159 -0
  50. pixeltable/exprs/literal.py +66 -0
  51. pixeltable/exprs/object_ref.py +41 -0
  52. pixeltable/exprs/predicate.py +44 -0
  53. pixeltable/exprs/row_builder.py +329 -0
  54. pixeltable/exprs/rowid_ref.py +94 -0
  55. pixeltable/exprs/similarity_expr.py +65 -0
  56. pixeltable/exprs/type_cast.py +53 -0
  57. pixeltable/exprs/variable.py +45 -0
  58. pixeltable/ext/__init__.py +5 -0
  59. pixeltable/ext/functions/yolox.py +92 -0
  60. pixeltable/func/__init__.py +7 -0
  61. pixeltable/func/aggregate_function.py +197 -0
  62. pixeltable/func/callable_function.py +113 -0
  63. pixeltable/func/expr_template_function.py +99 -0
  64. pixeltable/func/function.py +141 -0
  65. pixeltable/func/function_registry.py +227 -0
  66. pixeltable/func/globals.py +46 -0
  67. pixeltable/func/nos_function.py +202 -0
  68. pixeltable/func/signature.py +162 -0
  69. pixeltable/func/udf.py +164 -0
  70. pixeltable/functions/__init__.py +95 -0
  71. pixeltable/functions/eval.py +215 -0
  72. pixeltable/functions/fireworks.py +34 -0
  73. pixeltable/functions/huggingface.py +167 -0
  74. pixeltable/functions/image.py +16 -0
  75. pixeltable/functions/openai.py +289 -0
  76. pixeltable/functions/pil/image.py +147 -0
  77. pixeltable/functions/string.py +13 -0
  78. pixeltable/functions/together.py +143 -0
  79. pixeltable/functions/util.py +52 -0
  80. pixeltable/functions/video.py +62 -0
  81. pixeltable/globals.py +425 -0
  82. pixeltable/index/__init__.py +2 -0
  83. pixeltable/index/base.py +51 -0
  84. pixeltable/index/embedding_index.py +168 -0
  85. pixeltable/io/__init__.py +3 -0
  86. pixeltable/io/hf_datasets.py +188 -0
  87. pixeltable/io/pandas.py +148 -0
  88. pixeltable/io/parquet.py +192 -0
  89. pixeltable/iterators/__init__.py +3 -0
  90. pixeltable/iterators/base.py +52 -0
  91. pixeltable/iterators/document.py +432 -0
  92. pixeltable/iterators/video.py +88 -0
  93. pixeltable/metadata/__init__.py +58 -0
  94. pixeltable/metadata/converters/convert_10.py +18 -0
  95. pixeltable/metadata/converters/convert_12.py +3 -0
  96. pixeltable/metadata/converters/convert_13.py +41 -0
  97. pixeltable/metadata/schema.py +234 -0
  98. pixeltable/plan.py +620 -0
  99. pixeltable/store.py +424 -0
  100. pixeltable/tool/create_test_db_dump.py +184 -0
  101. pixeltable/tool/create_test_video.py +81 -0
  102. pixeltable/type_system.py +846 -0
  103. pixeltable/utils/__init__.py +17 -0
  104. pixeltable/utils/arrow.py +98 -0
  105. pixeltable/utils/clip.py +18 -0
  106. pixeltable/utils/coco.py +136 -0
  107. pixeltable/utils/documents.py +69 -0
  108. pixeltable/utils/filecache.py +195 -0
  109. pixeltable/utils/help.py +11 -0
  110. pixeltable/utils/http_server.py +70 -0
  111. pixeltable/utils/media_store.py +76 -0
  112. pixeltable/utils/pytorch.py +91 -0
  113. pixeltable/utils/s3.py +13 -0
  114. pixeltable/utils/sql.py +17 -0
  115. pixeltable/utils/transactional_directory.py +35 -0
  116. pixeltable-0.0.0.dist-info/LICENSE +18 -0
  117. pixeltable-0.0.0.dist-info/METADATA +131 -0
  118. pixeltable-0.0.0.dist-info/RECORD +119 -0
  119. pixeltable-0.0.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,234 @@
1
+ from typing import Optional, List, get_type_hints, Type, Any, TypeVar, Tuple, Union
2
+ import platform
3
+ import uuid
4
+ import dataclasses
5
+
6
+ import sqlalchemy as sql
7
+ from sqlalchemy import Integer, String, Boolean, BigInteger, LargeBinary
8
+ from sqlalchemy.dialects.postgresql import UUID, JSONB
9
+ from sqlalchemy import ForeignKey, UniqueConstraint, ForeignKeyConstraint
10
+ from sqlalchemy.orm import declarative_base
11
+
12
+ Base = declarative_base()
13
+
14
+ T = TypeVar('T')
15
+
16
+ def md_from_dict(data_class_type: Type[T], data: Any) -> T:
17
+ """Re-instantiate a dataclass instance that contains nested dataclasses from a dict."""
18
+ if dataclasses.is_dataclass(data_class_type):
19
+ fieldtypes = {f: t for f, t in get_type_hints(data_class_type).items()}
20
+ return data_class_type(**{f: md_from_dict(fieldtypes[f], data[f]) for f in data})
21
+ elif hasattr(data_class_type, '__origin__'):
22
+ if data_class_type.__origin__ is Union and type(None) in data_class_type.__args__:
23
+ # Handling Optional types
24
+ non_none_args = [arg for arg in data_class_type.__args__ if arg is not type(None)]
25
+ if len(non_none_args) == 1:
26
+ return md_from_dict(non_none_args[0], data) if data is not None else None
27
+ elif data_class_type.__origin__ is list:
28
+ return [md_from_dict(data_class_type.__args__[0], elem) for elem in data]
29
+ elif data_class_type.__origin__ is dict:
30
+ key_type = data_class_type.__args__[0]
31
+ val_type = data_class_type.__args__[1]
32
+ return {key_type(key): md_from_dict(val_type, val) for key, val in data.items()}
33
+ elif data_class_type.__origin__ is tuple:
34
+ return tuple(md_from_dict(arg_type, elem) for arg_type, elem in zip(data_class_type.__args__, data))
35
+ else:
36
+ return data
37
+
38
+
39
+ # structure of the stored metadata:
40
+ # - each schema entity that grows somehow proportionally to the data (# of output_rows, total insert operations,
41
+ # number of schema changes) gets its own table
42
+ # - each table has an 'md' column that basically contains the payload
43
+ # - exceptions to that are foreign keys without which lookups would be too slow (ex.: TableSchemaVersions.tbl_id)
44
+ # - the md column contains a dataclass serialized to json; this has the advantage of making changes to the metadata
45
+ # schema easier (the goal is not to have to rely on some schema migration framework; if that breaks for some user,
46
+ # it would be very difficult to patch up)
47
+
48
+ @dataclasses.dataclass
49
+ class SystemInfoMd:
50
+ schema_version: int
51
+
52
+
53
+ class SystemInfo(Base):
54
+ """A single-row table that contains system-wide metadata."""
55
+ __tablename__ = 'systeminfo'
56
+ dummy = sql.Column(Integer, primary_key=True, default=0, nullable=False)
57
+ md = sql.Column(JSONB, nullable=False) # SystemInfoMd
58
+
59
+
60
+ @dataclasses.dataclass
61
+ class DirMd:
62
+ name: str
63
+
64
+
65
+ class Dir(Base):
66
+ __tablename__ = 'dirs'
67
+
68
+ id = sql.Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, nullable=False)
69
+ parent_id = sql.Column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=True)
70
+ md = sql.Column(JSONB, nullable=False)
71
+
72
+
73
+ @dataclasses.dataclass
74
+ class ColumnMd:
75
+ """
76
+ Records the non-versioned metadata of a column.
77
+ - immutable attributes: type, primary key, etc.
78
+ - when a column was added/dropped, which is needed to GC unreachable storage columns
79
+ (a column that was added after table snapshot n and dropped before table snapshot n+1 can be removed
80
+ from the stored table).
81
+ """
82
+ id: int
83
+ schema_version_add: int
84
+ schema_version_drop: Optional[int]
85
+ col_type: dict
86
+
87
+ # if True, is part of the primary key
88
+ is_pk: bool
89
+
90
+ # if set, this is a computed column
91
+ value_expr: Optional[dict]
92
+
93
+ # if True, the column is present in the stored table
94
+ stored: Optional[bool]
95
+
96
+
97
+ @dataclasses.dataclass
98
+ class IndexMd:
99
+ """
100
+ Metadata needed to instantiate an EmbeddingIndex
101
+ """
102
+ id: int
103
+ name: str
104
+ indexed_col_id: int # column being indexed
105
+ index_val_col_id: int # column holding the values to be indexed
106
+ index_val_undo_col_id: int # column holding index values for deleted rows
107
+ schema_version_add: int
108
+ schema_version_drop: Optional[int]
109
+ class_fqn: str
110
+ init_args: dict[str, Any]
111
+
112
+
113
+ @dataclasses.dataclass
114
+ class ViewMd:
115
+ is_snapshot: bool
116
+
117
+ # (table id, version); for mutable views, all versions are None
118
+ base_versions: List[Tuple[str, Optional[int]]]
119
+
120
+ # filter predicate applied to the base table; view-only
121
+ predicate: Optional[dict[str, Any]]
122
+
123
+ # ComponentIterator subclass; only for component views
124
+ iterator_class_fqn: Optional[str]
125
+
126
+ # args to pass to the iterator class constructor; only for component views
127
+ iterator_args: Optional[dict[str, Any]]
128
+
129
+
130
+ @dataclasses.dataclass
131
+ class TableMd:
132
+ name: str
133
+
134
+ # monotonically increasing w/in Table for both data and schema changes, starting at 0
135
+ current_version: int
136
+ # each version has a corresponding schema version (current_version >= current_schema_version)
137
+ current_schema_version: int
138
+
139
+ next_col_id: int # used to assign Column.id
140
+ next_idx_id: int # used to assign IndexMd.id
141
+
142
+ # - used to assign the rowid column in the storage table
143
+ # - every row is assigned a unique and immutable rowid on insertion
144
+ next_row_id: int
145
+
146
+ column_md: dict[int, ColumnMd] # col_id -> ColumnMd
147
+ index_md: dict[int, IndexMd] # index_id -> IndexMd
148
+ view_md: Optional[ViewMd]
149
+
150
+
151
+ class Table(Base):
152
+ """
153
+ Table represents both tables and views.
154
+
155
+ Views are in essence a subclass of tables, because they also store materialized columns. The differences are:
156
+ - views have a base, which is either a (live) table or a snapshot
157
+ - views can have a filter predicate
158
+ """
159
+ __tablename__ = 'tables'
160
+
161
+ MAX_VERSION = 9223372036854775807 # 2^63 - 1
162
+
163
+ id = sql.Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, nullable=False)
164
+ dir_id = sql.Column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=False)
165
+ md = sql.Column(JSONB, nullable=False) # TableMd
166
+
167
+
168
+ @dataclasses.dataclass
169
+ class TableVersionMd:
170
+ created_at: float # time.time()
171
+ version: int
172
+ schema_version: int
173
+
174
+
175
+ class TableVersion(Base):
176
+ __tablename__ = 'tableversions'
177
+ tbl_id = sql.Column(UUID(as_uuid=True), ForeignKey('tables.id'), primary_key=True, nullable=False)
178
+ version = sql.Column(BigInteger, primary_key=True, nullable=False)
179
+ md = sql.Column(JSONB, nullable=False) # TableVersionMd
180
+
181
+
182
+ @dataclasses.dataclass
183
+ class SchemaColumn:
184
+ """
185
+ Records the versioned metadata of a column.
186
+ """
187
+ pos: int
188
+ name: str
189
+
190
+
191
+ @dataclasses.dataclass
192
+ class TableSchemaVersionMd:
193
+ """
194
+ Records all versioned table metadata.
195
+ """
196
+ schema_version: int
197
+ preceding_schema_version: Optional[int]
198
+ columns: dict[int, SchemaColumn] # col_id -> SchemaColumn
199
+ num_retained_versions: int
200
+ comment: str
201
+
202
+
203
+ # versioning: each table schema change results in a new record
204
+ class TableSchemaVersion(Base):
205
+ __tablename__ = 'tableschemaversions'
206
+
207
+ tbl_id = sql.Column(UUID(as_uuid=True), ForeignKey('tables.id'), primary_key=True, nullable=False)
208
+ schema_version = sql.Column(BigInteger, primary_key=True, nullable=False)
209
+ md = sql.Column(JSONB, nullable=False) # TableSchemaVersionMd
210
+
211
+
212
+ @dataclasses.dataclass
213
+ class FunctionMd:
214
+ name: str
215
+ py_version: str # platform.python_version
216
+ class_name: str # name of the Function subclass
217
+ md: dict # part of the output of Function.to_store()
218
+
219
+
220
+ class Function(Base):
221
+ """
222
+ User-defined functions that are not module functions (ie, aren't available at runtime as a symbol in a known
223
+ module).
224
+ Functions without a name are anonymous functions used in the definition of a computed column.
225
+ Functions that have names are also assigned to a database and directory.
226
+ We store the Python version under which a Function was created (and the callable pickled) in order to warn
227
+ against version mismatches.
228
+ """
229
+ __tablename__ = 'functions'
230
+
231
+ id = sql.Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, nullable=False)
232
+ dir_id = sql.Column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=True)
233
+ md = sql.Column(JSONB, nullable=False) # FunctionMd
234
+ binary_obj = sql.Column(LargeBinary, nullable=True)