pixeltable 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +20 -9
- pixeltable/__version__.py +3 -0
- pixeltable/catalog/column.py +23 -7
- pixeltable/catalog/insertable_table.py +32 -19
- pixeltable/catalog/table.py +210 -20
- pixeltable/catalog/table_version.py +272 -111
- pixeltable/catalog/table_version_path.py +6 -1
- pixeltable/dataframe.py +184 -110
- pixeltable/datatransfer/__init__.py +1 -0
- pixeltable/datatransfer/label_studio.py +526 -0
- pixeltable/datatransfer/remote.py +113 -0
- pixeltable/env.py +213 -79
- pixeltable/exec/__init__.py +2 -1
- pixeltable/exec/data_row_batch.py +6 -7
- pixeltable/exec/expr_eval_node.py +28 -28
- pixeltable/exec/sql_scan_node.py +7 -6
- pixeltable/exprs/__init__.py +4 -3
- pixeltable/exprs/column_ref.py +11 -2
- pixeltable/exprs/comparison.py +39 -1
- pixeltable/exprs/data_row.py +7 -0
- pixeltable/exprs/expr.py +26 -19
- pixeltable/exprs/function_call.py +17 -18
- pixeltable/exprs/globals.py +14 -2
- pixeltable/exprs/image_member_access.py +9 -28
- pixeltable/exprs/in_predicate.py +96 -0
- pixeltable/exprs/inline_array.py +13 -11
- pixeltable/exprs/inline_dict.py +15 -13
- pixeltable/exprs/row_builder.py +7 -1
- pixeltable/exprs/similarity_expr.py +67 -0
- pixeltable/ext/functions/whisperx.py +30 -0
- pixeltable/ext/functions/yolox.py +16 -0
- pixeltable/func/__init__.py +0 -2
- pixeltable/func/aggregate_function.py +5 -2
- pixeltable/func/callable_function.py +57 -13
- pixeltable/func/expr_template_function.py +14 -3
- pixeltable/func/function.py +35 -4
- pixeltable/func/signature.py +5 -15
- pixeltable/func/udf.py +8 -12
- pixeltable/functions/fireworks.py +9 -4
- pixeltable/functions/huggingface.py +48 -5
- pixeltable/functions/openai.py +49 -11
- pixeltable/functions/pil/image.py +61 -64
- pixeltable/functions/together.py +32 -6
- pixeltable/functions/util.py +0 -43
- pixeltable/functions/video.py +46 -8
- pixeltable/globals.py +443 -0
- pixeltable/index/__init__.py +1 -0
- pixeltable/index/base.py +9 -2
- pixeltable/index/btree.py +54 -0
- pixeltable/index/embedding_index.py +91 -15
- pixeltable/io/__init__.py +4 -0
- pixeltable/io/globals.py +59 -0
- pixeltable/{utils → io}/hf_datasets.py +48 -17
- pixeltable/io/pandas.py +148 -0
- pixeltable/{utils → io}/parquet.py +58 -33
- pixeltable/iterators/__init__.py +1 -1
- pixeltable/iterators/base.py +8 -4
- pixeltable/iterators/document.py +225 -93
- pixeltable/iterators/video.py +16 -9
- pixeltable/metadata/__init__.py +8 -4
- pixeltable/metadata/converters/convert_12.py +3 -0
- pixeltable/metadata/converters/convert_13.py +41 -0
- pixeltable/metadata/converters/convert_14.py +13 -0
- pixeltable/metadata/converters/convert_15.py +29 -0
- pixeltable/metadata/converters/util.py +63 -0
- pixeltable/metadata/schema.py +12 -6
- pixeltable/plan.py +11 -24
- pixeltable/store.py +16 -23
- pixeltable/tool/create_test_db_dump.py +49 -14
- pixeltable/type_system.py +27 -58
- pixeltable/utils/coco.py +94 -0
- pixeltable/utils/documents.py +42 -12
- pixeltable/utils/http_server.py +70 -0
- pixeltable-0.2.7.dist-info/METADATA +137 -0
- pixeltable-0.2.7.dist-info/RECORD +126 -0
- {pixeltable-0.2.5.dist-info → pixeltable-0.2.7.dist-info}/WHEEL +1 -1
- pixeltable/client.py +0 -600
- pixeltable/exprs/image_similarity_predicate.py +0 -58
- pixeltable/func/batched_function.py +0 -53
- pixeltable/func/nos_function.py +0 -202
- pixeltable/tests/conftest.py +0 -171
- pixeltable/tests/ext/test_yolox.py +0 -21
- pixeltable/tests/functions/test_fireworks.py +0 -43
- pixeltable/tests/functions/test_functions.py +0 -60
- pixeltable/tests/functions/test_huggingface.py +0 -158
- pixeltable/tests/functions/test_openai.py +0 -162
- pixeltable/tests/functions/test_together.py +0 -112
- pixeltable/tests/test_audio.py +0 -65
- pixeltable/tests/test_catalog.py +0 -27
- pixeltable/tests/test_client.py +0 -21
- pixeltable/tests/test_component_view.py +0 -379
- pixeltable/tests/test_dataframe.py +0 -440
- pixeltable/tests/test_dirs.py +0 -107
- pixeltable/tests/test_document.py +0 -120
- pixeltable/tests/test_exprs.py +0 -802
- pixeltable/tests/test_function.py +0 -332
- pixeltable/tests/test_index.py +0 -138
- pixeltable/tests/test_migration.py +0 -44
- pixeltable/tests/test_nos.py +0 -54
- pixeltable/tests/test_snapshot.py +0 -231
- pixeltable/tests/test_table.py +0 -1343
- pixeltable/tests/test_transactional_directory.py +0 -42
- pixeltable/tests/test_types.py +0 -52
- pixeltable/tests/test_video.py +0 -159
- pixeltable/tests/test_view.py +0 -535
- pixeltable/tests/utils.py +0 -442
- pixeltable/utils/clip.py +0 -18
- pixeltable-0.2.5.dist-info/METADATA +0 -128
- pixeltable-0.2.5.dist-info/RECORD +0 -139
- {pixeltable-0.2.5.dist-info → pixeltable-0.2.7.dist-info}/LICENSE +0 -0
pixeltable/client.py
DELETED
|
@@ -1,600 +0,0 @@
|
|
|
1
|
-
from typing import List, Optional, Dict, Type, Any, Union
|
|
2
|
-
import pandas as pd
|
|
3
|
-
import logging
|
|
4
|
-
import dataclasses
|
|
5
|
-
|
|
6
|
-
import sqlalchemy as sql
|
|
7
|
-
import sqlalchemy.orm as orm
|
|
8
|
-
|
|
9
|
-
import pixeltable
|
|
10
|
-
from pixeltable.metadata import schema
|
|
11
|
-
from pixeltable.env import Env
|
|
12
|
-
import pixeltable.func as func
|
|
13
|
-
import pixeltable.catalog as catalog
|
|
14
|
-
from pixeltable import exceptions as excs
|
|
15
|
-
from pixeltable.exprs import Predicate
|
|
16
|
-
from pixeltable.iterators import ComponentIterator
|
|
17
|
-
|
|
18
|
-
from typing import TYPE_CHECKING
|
|
19
|
-
if TYPE_CHECKING:
|
|
20
|
-
import datasets
|
|
21
|
-
|
|
22
|
-
__all__ = [
|
|
23
|
-
'Client',
|
|
24
|
-
]
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
_logger = logging.getLogger('pixeltable')
|
|
28
|
-
|
|
29
|
-
class Client:
|
|
30
|
-
"""
|
|
31
|
-
Client for interacting with a Pixeltable environment.
|
|
32
|
-
"""
|
|
33
|
-
|
|
34
|
-
def __init__(self, reload: bool = False) -> None:
|
|
35
|
-
"""Constructs a client.
|
|
36
|
-
"""
|
|
37
|
-
env = Env.get()
|
|
38
|
-
env.set_up()
|
|
39
|
-
env.upgrade_metadata()
|
|
40
|
-
if reload:
|
|
41
|
-
catalog.Catalog.clear()
|
|
42
|
-
self.catalog = catalog.Catalog.get()
|
|
43
|
-
|
|
44
|
-
def logging(
|
|
45
|
-
self, *, to_stdout: Optional[bool] = None, level: Optional[int] = None,
|
|
46
|
-
add: Optional[str] = None, remove: Optional[str] = None
|
|
47
|
-
) -> None:
|
|
48
|
-
"""Configure logging.
|
|
49
|
-
|
|
50
|
-
Args:
|
|
51
|
-
to_stdout: if True, also log to stdout
|
|
52
|
-
level: default log level
|
|
53
|
-
add: comma-separated list of 'module name:log level' pairs; ex.: add='video:10'
|
|
54
|
-
remove: comma-separated list of module names
|
|
55
|
-
"""
|
|
56
|
-
if to_stdout is not None:
|
|
57
|
-
Env.get().log_to_stdout(to_stdout)
|
|
58
|
-
if level is not None:
|
|
59
|
-
Env.get().set_log_level(level)
|
|
60
|
-
if add is not None:
|
|
61
|
-
for module, level in [t.split(':') for t in add.split(',')]:
|
|
62
|
-
Env.get().set_module_log_level(module, int(level))
|
|
63
|
-
if remove is not None:
|
|
64
|
-
for module in remove.split(','):
|
|
65
|
-
Env.get().set_module_log_level(module, None)
|
|
66
|
-
if to_stdout is None and level is None and add is None and remove is None:
|
|
67
|
-
Env.get().print_log_config()
|
|
68
|
-
|
|
69
|
-
def list_functions(self) -> pd.DataFrame:
|
|
70
|
-
"""Returns information about all registered functions.
|
|
71
|
-
|
|
72
|
-
Returns:
|
|
73
|
-
Pandas DataFrame with columns 'Path', 'Name', 'Parameters', 'Return Type', 'Is Agg', 'Library'
|
|
74
|
-
"""
|
|
75
|
-
functions = func.FunctionRegistry.get().list_functions()
|
|
76
|
-
paths = ['.'.join(f.self_path.split('.')[:-1]) for f in functions]
|
|
77
|
-
names = [f.name for f in functions]
|
|
78
|
-
params = [
|
|
79
|
-
', '.join(
|
|
80
|
-
[param_name + ': ' + str(param_type) for param_name, param_type in f.signature.parameters.items()])
|
|
81
|
-
for f in functions
|
|
82
|
-
]
|
|
83
|
-
pd_df = pd.DataFrame({
|
|
84
|
-
'Path': paths,
|
|
85
|
-
'Function Name': names,
|
|
86
|
-
'Parameters': params,
|
|
87
|
-
'Return Type': [str(f.signature.get_return_type()) for f in functions],
|
|
88
|
-
})
|
|
89
|
-
pd_df = pd_df.style.set_properties(**{'text-align': 'left'}) \
|
|
90
|
-
.set_table_styles([dict(selector='th', props=[('text-align', 'center')])]) # center-align headings
|
|
91
|
-
return pd_df.hide(axis='index')
|
|
92
|
-
|
|
93
|
-
def get_path(self, schema_obj: catalog.SchemaObject) -> str:
|
|
94
|
-
"""Returns the path to a SchemaObject.
|
|
95
|
-
|
|
96
|
-
Args:
|
|
97
|
-
schema_obj: SchemaObject to get the path for.
|
|
98
|
-
|
|
99
|
-
Returns:
|
|
100
|
-
Path to the SchemaObject.
|
|
101
|
-
"""
|
|
102
|
-
path_elements: List[str] = []
|
|
103
|
-
dir_id = schema_obj._dir_id
|
|
104
|
-
while dir_id is not None:
|
|
105
|
-
dir = self.catalog.paths.get_schema_obj(dir_id)
|
|
106
|
-
if dir._dir_id is None:
|
|
107
|
-
# this is the root dir with name '', which we don't want to include in the path
|
|
108
|
-
break
|
|
109
|
-
path_elements.insert(0, dir._name)
|
|
110
|
-
dir_id = dir._dir_id
|
|
111
|
-
path_elements.append(schema_obj._name)
|
|
112
|
-
return '.'.join(path_elements)
|
|
113
|
-
|
|
114
|
-
def create_table(
|
|
115
|
-
self, path_str: str, schema: Dict[str, Any], primary_key: Optional[Union[str, List[str]]] = None,
|
|
116
|
-
num_retained_versions: int = 10, comment: str = ''
|
|
117
|
-
) -> catalog.InsertableTable:
|
|
118
|
-
"""Create a new `InsertableTable`.
|
|
119
|
-
|
|
120
|
-
Args:
|
|
121
|
-
path_str: Path to the table.
|
|
122
|
-
schema: dictionary mapping column names to column types, value expressions, or to column specifications.
|
|
123
|
-
num_retained_versions: Number of versions of the table to retain.
|
|
124
|
-
|
|
125
|
-
Returns:
|
|
126
|
-
The newly created table.
|
|
127
|
-
|
|
128
|
-
Raises:
|
|
129
|
-
Error: if the path already exists or is invalid.
|
|
130
|
-
|
|
131
|
-
Examples:
|
|
132
|
-
Create a table with an int and a string column:
|
|
133
|
-
|
|
134
|
-
>>> table = cl.create_table('my_table', schema={'col1': IntType(), 'col2': StringType()})
|
|
135
|
-
"""
|
|
136
|
-
path = catalog.Path(path_str)
|
|
137
|
-
self.catalog.paths.check_is_valid(path, expected=None)
|
|
138
|
-
dir = self.catalog.paths[path.parent]
|
|
139
|
-
|
|
140
|
-
if len(schema) == 0:
|
|
141
|
-
raise excs.Error(f'Table schema is empty: `{path_str}`')
|
|
142
|
-
|
|
143
|
-
if primary_key is None:
|
|
144
|
-
primary_key = []
|
|
145
|
-
elif isinstance(primary_key, str):
|
|
146
|
-
primary_key = [primary_key]
|
|
147
|
-
else:
|
|
148
|
-
if not isinstance(primary_key, list) or not all(isinstance(pk, str) for pk in primary_key):
|
|
149
|
-
raise excs.Error('primary_key must be a single column name or a list of column names')
|
|
150
|
-
|
|
151
|
-
tbl = catalog.InsertableTable.create(
|
|
152
|
-
dir._id, path.name, schema, primary_key=primary_key, num_retained_versions=num_retained_versions, comment=comment)
|
|
153
|
-
self.catalog.paths[path] = tbl
|
|
154
|
-
_logger.info(f'Created table `{path_str}`.')
|
|
155
|
-
return tbl
|
|
156
|
-
|
|
157
|
-
def import_parquet(
|
|
158
|
-
self,
|
|
159
|
-
table_path: str,
|
|
160
|
-
*,
|
|
161
|
-
parquet_path: str,
|
|
162
|
-
schema_override: Optional[Dict[str, Any]] = None,
|
|
163
|
-
**kwargs,
|
|
164
|
-
) -> catalog.InsertableTable:
|
|
165
|
-
"""Create a new `InsertableTable` from a Parquet file or set of files. Requires pyarrow to be installed.
|
|
166
|
-
Args:
|
|
167
|
-
path_str: Path to the table within pixeltable.
|
|
168
|
-
parquet_path: Path to an individual Parquet file or directory of Parquet files.
|
|
169
|
-
schema_override: Optional dictionary mapping column names to column type to override the default
|
|
170
|
-
schema inferred from the Parquet file. The column type should be a pixeltable ColumnType.
|
|
171
|
-
For example, {'col_vid': VideoType()}, rather than {'col_vid': StringType()}.
|
|
172
|
-
Any fields not provided explicitly will map to types with `pixeltable.utils.parquet.parquet_schema_to_pixeltable_schema`
|
|
173
|
-
kwargs: Additional arguments to pass to `Client.create_table`.
|
|
174
|
-
|
|
175
|
-
Returns:
|
|
176
|
-
The newly created table. The table will have loaded the data from the Parquet file(s).
|
|
177
|
-
"""
|
|
178
|
-
from pixeltable.utils import parquet
|
|
179
|
-
|
|
180
|
-
return parquet.import_parquet(
|
|
181
|
-
self,
|
|
182
|
-
table_path=table_path,
|
|
183
|
-
parquet_path=parquet_path,
|
|
184
|
-
schema_override=schema_override,
|
|
185
|
-
**kwargs,
|
|
186
|
-
)
|
|
187
|
-
|
|
188
|
-
def import_huggingface_dataset(
|
|
189
|
-
self,
|
|
190
|
-
table_path: str,
|
|
191
|
-
dataset: Union['datasets.Dataset', 'datasets.DatasetDict'],
|
|
192
|
-
*,
|
|
193
|
-
column_name_for_split: Optional[str] = 'split',
|
|
194
|
-
schema_override: Optional[Dict[str, Any]] = None,
|
|
195
|
-
**kwargs
|
|
196
|
-
) -> catalog.InsertableTable:
|
|
197
|
-
"""Create a new `InsertableTable` from a Huggingface dataset, or dataset dict with multiple splits.
|
|
198
|
-
Requires datasets library to be installed.
|
|
199
|
-
|
|
200
|
-
Args:
|
|
201
|
-
path_str: Path to the table.
|
|
202
|
-
dataset: Huggingface datasts.Dataset or datasts.DatasetDict to insert into the table.
|
|
203
|
-
column_name_for_split: column name to use for split information. If None, no split information will be stored.
|
|
204
|
-
schema_override: Optional dictionary mapping column names to column type to override the corresponding defaults from
|
|
205
|
-
`pixeltable.utils.hf_datasets.huggingface_schema_to_pixeltable_schema`. The column type should be a pixeltable ColumnType.
|
|
206
|
-
For example, {'col_vid': VideoType()}, rather than {'col_vid': StringType()}.
|
|
207
|
-
|
|
208
|
-
kwargs: Additional arguments to pass to `create_table`.
|
|
209
|
-
|
|
210
|
-
Returns:
|
|
211
|
-
The newly created table. The table will have loaded the data from the dataset.
|
|
212
|
-
"""
|
|
213
|
-
from pixeltable.utils import hf_datasets
|
|
214
|
-
|
|
215
|
-
return hf_datasets.import_huggingface_dataset(
|
|
216
|
-
self,
|
|
217
|
-
table_path,
|
|
218
|
-
dataset,
|
|
219
|
-
column_name_for_split=column_name_for_split,
|
|
220
|
-
schema_override=schema_override,
|
|
221
|
-
**kwargs,
|
|
222
|
-
)
|
|
223
|
-
|
|
224
|
-
def create_view(
|
|
225
|
-
self, path_str: str, base: catalog.Table, *, schema: Optional[Dict[str, Any]] = None,
|
|
226
|
-
filter: Optional[Predicate] = None,
|
|
227
|
-
is_snapshot: bool = False, iterator_class: Optional[Type[ComponentIterator]] = None,
|
|
228
|
-
iterator_args: Optional[Dict[str, Any]] = None, num_retained_versions: int = 10, comment: str = '',
|
|
229
|
-
ignore_errors: bool = False) -> catalog.View:
|
|
230
|
-
"""Create a new `View`.
|
|
231
|
-
|
|
232
|
-
Args:
|
|
233
|
-
path_str: Path to the view.
|
|
234
|
-
base: Table (ie, table or view or snapshot) to base the view on.
|
|
235
|
-
schema: dictionary mapping column names to column types, value expressions, or to column specifications.
|
|
236
|
-
filter: Predicate to filter rows of the base table.
|
|
237
|
-
is_snapshot: Whether the view is a snapshot.
|
|
238
|
-
iterator_class: Class of the iterator to use for the view.
|
|
239
|
-
iterator_args: Arguments to pass to the iterator class.
|
|
240
|
-
num_retained_versions: Number of versions of the view to retain.
|
|
241
|
-
ignore_errors: if True, fail silently if the path already exists or is invalid.
|
|
242
|
-
|
|
243
|
-
Returns:
|
|
244
|
-
The newly created view.
|
|
245
|
-
|
|
246
|
-
Raises:
|
|
247
|
-
Error: if the path already exists or is invalid.
|
|
248
|
-
|
|
249
|
-
Examples:
|
|
250
|
-
Create a view with an additional int and a string column and a filter:
|
|
251
|
-
|
|
252
|
-
>>> view = cl.create_view(
|
|
253
|
-
'my_view', base, schema={'col3': IntType(), 'col4': StringType()}, filter=base.col1 > 10)
|
|
254
|
-
|
|
255
|
-
Create a table snapshot:
|
|
256
|
-
|
|
257
|
-
>>> snapshot_view = cl.create_view('my_snapshot_view', base, is_snapshot=True)
|
|
258
|
-
|
|
259
|
-
Create an immutable view with additional computed columns and a filter:
|
|
260
|
-
|
|
261
|
-
>>> snapshot_view = cl.create_view(
|
|
262
|
-
'my_snapshot', base, schema={'col3': base.col2 + 1}, filter=base.col1 > 10, is_snapshot=True)
|
|
263
|
-
"""
|
|
264
|
-
assert (iterator_class is None) == (iterator_args is None)
|
|
265
|
-
assert isinstance(base, catalog.Table)
|
|
266
|
-
path = catalog.Path(path_str)
|
|
267
|
-
try:
|
|
268
|
-
self.catalog.paths.check_is_valid(path, expected=None)
|
|
269
|
-
except Exception as e:
|
|
270
|
-
if ignore_errors:
|
|
271
|
-
return
|
|
272
|
-
else:
|
|
273
|
-
raise e
|
|
274
|
-
dir = self.catalog.paths[path.parent]
|
|
275
|
-
|
|
276
|
-
if schema is None:
|
|
277
|
-
schema = {}
|
|
278
|
-
view = catalog.View.create(
|
|
279
|
-
dir._id, path.name, base=base, schema=schema, predicate=filter, is_snapshot=is_snapshot,
|
|
280
|
-
iterator_cls=iterator_class, iterator_args=iterator_args, num_retained_versions=num_retained_versions, comment=comment)
|
|
281
|
-
self.catalog.paths[path] = view
|
|
282
|
-
_logger.info(f'Created view `{path_str}`.')
|
|
283
|
-
return view
|
|
284
|
-
|
|
285
|
-
def get_table(self, path: str) -> catalog.Table:
|
|
286
|
-
"""Get a handle to a table (including views and snapshots).
|
|
287
|
-
|
|
288
|
-
Args:
|
|
289
|
-
path: Path to the table.
|
|
290
|
-
|
|
291
|
-
Returns:
|
|
292
|
-
A `InsertableTable` or `View` object.
|
|
293
|
-
|
|
294
|
-
Raises:
|
|
295
|
-
Error: If the path does not exist or does not designate a table.
|
|
296
|
-
|
|
297
|
-
Examples:
|
|
298
|
-
Get handle for a table in the top-level directory:
|
|
299
|
-
|
|
300
|
-
>>> table = cl.get_table('my_table')
|
|
301
|
-
|
|
302
|
-
For a table in a subdirectory:
|
|
303
|
-
|
|
304
|
-
>>> table = cl.get_table('subdir.my_table')
|
|
305
|
-
|
|
306
|
-
For a snapshot in the top-level directory:
|
|
307
|
-
|
|
308
|
-
>>> table = cl.get_table('my_snapshot')
|
|
309
|
-
"""
|
|
310
|
-
p = catalog.Path(path)
|
|
311
|
-
self.catalog.paths.check_is_valid(p, expected=catalog.Table)
|
|
312
|
-
obj = self.catalog.paths[p]
|
|
313
|
-
return obj
|
|
314
|
-
|
|
315
|
-
def move(self, path: str, new_path: str) -> None:
|
|
316
|
-
"""Move a schema object to a new directory and/or rename a schema object.
|
|
317
|
-
|
|
318
|
-
Args:
|
|
319
|
-
path: absolute path to the existing schema object.
|
|
320
|
-
new_path: absolute new path for the schema object.
|
|
321
|
-
|
|
322
|
-
Raises:
|
|
323
|
-
Error: If path does not exist or new_path already exists.
|
|
324
|
-
|
|
325
|
-
Examples:
|
|
326
|
-
Move a table to a different directory:
|
|
327
|
-
|
|
328
|
-
>>>> cl.move('dir1.my_table', 'dir2.my_table')
|
|
329
|
-
|
|
330
|
-
Rename a table:
|
|
331
|
-
|
|
332
|
-
>>>> cl.move('dir1.my_table', 'dir1.new_name')
|
|
333
|
-
"""
|
|
334
|
-
p = catalog.Path(path)
|
|
335
|
-
self.catalog.paths.check_is_valid(p, expected=catalog.SchemaObject)
|
|
336
|
-
new_p = catalog.Path(new_path)
|
|
337
|
-
self.catalog.paths.check_is_valid(new_p, expected=None)
|
|
338
|
-
obj = self.catalog.paths[p]
|
|
339
|
-
self.catalog.paths.move(p, new_p)
|
|
340
|
-
new_dir = self.catalog.paths[new_p.parent]
|
|
341
|
-
obj.move(new_p.name, new_dir._id)
|
|
342
|
-
|
|
343
|
-
def list_tables(self, dir_path: str = '', recursive: bool = True) -> List[str]:
|
|
344
|
-
"""List the tables in a directory.
|
|
345
|
-
|
|
346
|
-
Args:
|
|
347
|
-
dir_path: Path to the directory. Defaults to the root directory.
|
|
348
|
-
recursive: Whether to list tables in subdirectories as well.
|
|
349
|
-
|
|
350
|
-
Returns:
|
|
351
|
-
A list of table paths.
|
|
352
|
-
|
|
353
|
-
Raises:
|
|
354
|
-
Error: If the path does not exist or does not designate a directory.
|
|
355
|
-
|
|
356
|
-
Examples:
|
|
357
|
-
List tables in top-level directory:
|
|
358
|
-
|
|
359
|
-
>>> cl.list_tables()
|
|
360
|
-
['my_table', ...]
|
|
361
|
-
|
|
362
|
-
List tables in 'dir1':
|
|
363
|
-
|
|
364
|
-
>>> cl.list_tables('dir1')
|
|
365
|
-
[...]
|
|
366
|
-
"""
|
|
367
|
-
assert dir_path is not None
|
|
368
|
-
path = catalog.Path(dir_path, empty_is_valid=True)
|
|
369
|
-
self.catalog.paths.check_is_valid(path, expected=catalog.Dir)
|
|
370
|
-
return [str(p) for p in self.catalog.paths.get_children(path, child_type=catalog.Table, recursive=recursive)]
|
|
371
|
-
|
|
372
|
-
def drop_table(self, path: str, force: bool = False, ignore_errors: bool = False) -> None:
|
|
373
|
-
"""Drop a table.
|
|
374
|
-
|
|
375
|
-
Args:
|
|
376
|
-
path: Path to the table.
|
|
377
|
-
force: Whether to drop the table even if it has unsaved changes.
|
|
378
|
-
ignore_errors: Whether to ignore errors if the table does not exist.
|
|
379
|
-
|
|
380
|
-
Raises:
|
|
381
|
-
Error: If the path does not exist or does not designate a table and ignore_errors is False.
|
|
382
|
-
|
|
383
|
-
Examples:
|
|
384
|
-
>>> cl.drop_table('my_table')
|
|
385
|
-
"""
|
|
386
|
-
path_obj = catalog.Path(path)
|
|
387
|
-
try:
|
|
388
|
-
self.catalog.paths.check_is_valid(path_obj, expected=catalog.Table)
|
|
389
|
-
except Exception as e:
|
|
390
|
-
if ignore_errors:
|
|
391
|
-
_logger.info(f'Skipped table `{path}` (does not exist).')
|
|
392
|
-
return
|
|
393
|
-
else:
|
|
394
|
-
raise e
|
|
395
|
-
tbl = self.catalog.paths[path_obj]
|
|
396
|
-
if len(self.catalog.tbl_dependents[tbl._id]) > 0:
|
|
397
|
-
dependent_paths = [self.get_path(dep) for dep in self.catalog.tbl_dependents[tbl._id]]
|
|
398
|
-
raise excs.Error(f'Table {path} has dependents: {", ".join(dependent_paths)}')
|
|
399
|
-
tbl._drop()
|
|
400
|
-
del self.catalog.paths[path_obj]
|
|
401
|
-
_logger.info(f'Dropped table `{path}`.')
|
|
402
|
-
|
|
403
|
-
def create_dir(self, path_str: str, ignore_errors: bool = False) -> None:
|
|
404
|
-
"""Create a directory.
|
|
405
|
-
|
|
406
|
-
Args:
|
|
407
|
-
path_str: Path to the directory.
|
|
408
|
-
ignore_errors: if True, silently returns on error
|
|
409
|
-
|
|
410
|
-
Raises:
|
|
411
|
-
Error: If the path already exists or the parent is not a directory.
|
|
412
|
-
|
|
413
|
-
Examples:
|
|
414
|
-
>>> cl.create_dir('my_dir')
|
|
415
|
-
|
|
416
|
-
Create a subdirectory:
|
|
417
|
-
|
|
418
|
-
>>> cl.create_dir('my_dir.sub_dir')
|
|
419
|
-
"""
|
|
420
|
-
try:
|
|
421
|
-
path = catalog.Path(path_str)
|
|
422
|
-
self.catalog.paths.check_is_valid(path, expected=None)
|
|
423
|
-
parent = self.catalog.paths[path.parent]
|
|
424
|
-
assert parent is not None
|
|
425
|
-
with orm.Session(Env.get().engine, future=True) as session:
|
|
426
|
-
dir_md = schema.DirMd(name=path.name)
|
|
427
|
-
dir_record = schema.Dir(parent_id=parent._id, md=dataclasses.asdict(dir_md))
|
|
428
|
-
session.add(dir_record)
|
|
429
|
-
session.flush()
|
|
430
|
-
assert dir_record.id is not None
|
|
431
|
-
self.catalog.paths[path] = catalog.Dir(dir_record.id, parent._id, path.name)
|
|
432
|
-
session.commit()
|
|
433
|
-
_logger.info(f'Created directory `{path_str}`.')
|
|
434
|
-
print(f'Created directory `{path_str}`.')
|
|
435
|
-
except excs.Error as e:
|
|
436
|
-
if ignore_errors:
|
|
437
|
-
return
|
|
438
|
-
else:
|
|
439
|
-
raise e
|
|
440
|
-
|
|
441
|
-
def rm_dir(self, path_str: str) -> None:
|
|
442
|
-
"""Remove a directory.
|
|
443
|
-
|
|
444
|
-
Args:
|
|
445
|
-
path_str: Path to the directory.
|
|
446
|
-
|
|
447
|
-
Raises:
|
|
448
|
-
Error: If the path does not exist or does not designate a directory or if the directory is not empty.
|
|
449
|
-
|
|
450
|
-
Examples:
|
|
451
|
-
>>> cl.rm_dir('my_dir')
|
|
452
|
-
|
|
453
|
-
Remove a subdirectory:
|
|
454
|
-
|
|
455
|
-
>>> cl.rm_dir('my_dir.sub_dir')
|
|
456
|
-
"""
|
|
457
|
-
path = catalog.Path(path_str)
|
|
458
|
-
self.catalog.paths.check_is_valid(path, expected=catalog.Dir)
|
|
459
|
-
|
|
460
|
-
# make sure it's empty
|
|
461
|
-
if len(self.catalog.paths.get_children(path, child_type=None, recursive=True)) > 0:
|
|
462
|
-
raise excs.Error(f'Directory {path_str} is not empty')
|
|
463
|
-
# TODO: figure out how to make force=True work in the presence of snapshots
|
|
464
|
-
# # delete tables
|
|
465
|
-
# for tbl_path in self.paths.get_children(path, child_type=MutableTable, recursive=True):
|
|
466
|
-
# self.drop_table(str(tbl_path), force=True)
|
|
467
|
-
# # rm subdirs
|
|
468
|
-
# for dir_path in self.paths.get_children(path, child_type=Dir, recursive=False):
|
|
469
|
-
# self.rm_dir(str(dir_path), force=True)
|
|
470
|
-
|
|
471
|
-
with Env.get().engine.begin() as conn:
|
|
472
|
-
dir = self.catalog.paths[path]
|
|
473
|
-
conn.execute(sql.delete(schema.Dir.__table__).where(schema.Dir.id == dir._id))
|
|
474
|
-
del self.catalog.paths[path]
|
|
475
|
-
_logger.info(f'Removed directory {path_str}')
|
|
476
|
-
|
|
477
|
-
def list_dirs(self, path_str: str = '', recursive: bool = True) -> List[str]:
|
|
478
|
-
"""List the directories in a directory.
|
|
479
|
-
|
|
480
|
-
Args:
|
|
481
|
-
path_str: Path to the directory.
|
|
482
|
-
recursive: Whether to list subdirectories recursively.
|
|
483
|
-
|
|
484
|
-
Returns:
|
|
485
|
-
List of directory paths.
|
|
486
|
-
|
|
487
|
-
Raises:
|
|
488
|
-
Error: If the path does not exist or does not designate a directory.
|
|
489
|
-
|
|
490
|
-
Examples:
|
|
491
|
-
>>> cl.list_dirs('my_dir', recursive=True)
|
|
492
|
-
['my_dir', 'my_dir.sub_dir1']
|
|
493
|
-
"""
|
|
494
|
-
path = catalog.Path(path_str, empty_is_valid=True)
|
|
495
|
-
self.catalog.paths.check_is_valid(path, expected=catalog.Dir)
|
|
496
|
-
return [str(p) for p in self.catalog.paths.get_children(path, child_type=catalog.Dir, recursive=recursive)]
|
|
497
|
-
|
|
498
|
-
# TODO: for now, named functions are deprecated, until we understand the use case and requirements better
|
|
499
|
-
# def create_function(self, path_str: str, fn: func.Function) -> None:
|
|
500
|
-
# """Create a stored function.
|
|
501
|
-
#
|
|
502
|
-
# Args:
|
|
503
|
-
# path_str: path where the function gets stored
|
|
504
|
-
# func: previously created Function object
|
|
505
|
-
#
|
|
506
|
-
# Raises:
|
|
507
|
-
# Error: if the path already exists or the parent is not a directory
|
|
508
|
-
#
|
|
509
|
-
# Examples:
|
|
510
|
-
# Create a function ``detect()`` that takes an image and returns a JSON object, and store it in ``my_dir``:
|
|
511
|
-
#
|
|
512
|
-
# >>> @pxt.udf(param_types=[ImageType()], return_type=JsonType())
|
|
513
|
-
# ... def detect(img):
|
|
514
|
-
# ... ...
|
|
515
|
-
# >>> cl.create_function('my_dir.detect', detect)
|
|
516
|
-
# """
|
|
517
|
-
# if fn.is_module_function:
|
|
518
|
-
# raise excs.Error(f'Cannot create a named function for a library function')
|
|
519
|
-
# path = catalog.Path(path_str)
|
|
520
|
-
# self.catalog.paths.check_is_valid(path, expected=None)
|
|
521
|
-
# dir = self.catalog.paths[path.parent]
|
|
522
|
-
#
|
|
523
|
-
# func.FunctionRegistry.get().create_function(fn, dir._id, path.name)
|
|
524
|
-
# self.catalog.paths[path] = catalog.NamedFunction(fn.id, dir._id, path.name)
|
|
525
|
-
# fn.md.fqn = str(path)
|
|
526
|
-
# _logger.info(f'Created function {path_str}')
|
|
527
|
-
#
|
|
528
|
-
# def update_function(self, path_str: str, fn: func.Function) -> None:
|
|
529
|
-
# """Update the implementation of a stored function.
|
|
530
|
-
#
|
|
531
|
-
# Args:
|
|
532
|
-
# path_str: path to the function to be updated
|
|
533
|
-
# func: new function implementation
|
|
534
|
-
#
|
|
535
|
-
# Raises:
|
|
536
|
-
# Error: if the path does not exist or ``func`` has a different signature than the stored function.
|
|
537
|
-
# """
|
|
538
|
-
# if fn.is_module_function:
|
|
539
|
-
# raise excs.Error(f'Cannot update a named function to a library function')
|
|
540
|
-
# path = catalog.Path(path_str)
|
|
541
|
-
# self.catalog.paths.check_is_valid(path, expected=catalog.NamedFunction)
|
|
542
|
-
# named_fn = self.catalog.paths[path]
|
|
543
|
-
# f = func.FunctionRegistry.get().get_function(id=named_fn._id)
|
|
544
|
-
# if f.md.signature != fn.md.signature:
|
|
545
|
-
# raise excs.Error(
|
|
546
|
-
# f'The function signature cannot be changed. The existing signature is {f.md.signature}')
|
|
547
|
-
# if f.is_aggregate != fn.is_aggregate:
|
|
548
|
-
# raise excs.Error(f'Cannot change an aggregate function into a non-aggregate function and vice versa')
|
|
549
|
-
# func.FunctionRegistry.get().update_function(named_fn._id, fn)
|
|
550
|
-
# _logger.info(f'Updated function {path_str}')
|
|
551
|
-
#
|
|
552
|
-
# def get_function(self, path_str: str) -> func.Function:
|
|
553
|
-
# """Get a handle to a stored function.
|
|
554
|
-
#
|
|
555
|
-
# Args:
|
|
556
|
-
# path_str: path to the function
|
|
557
|
-
#
|
|
558
|
-
# Returns:
|
|
559
|
-
# Function object
|
|
560
|
-
#
|
|
561
|
-
# Raises:
|
|
562
|
-
# Error: if the path does not exist or is not a function
|
|
563
|
-
#
|
|
564
|
-
# Examples:
|
|
565
|
-
# >>> detect = cl.get_function('my_dir.detect')
|
|
566
|
-
# """
|
|
567
|
-
# path = catalog.Path(path_str)
|
|
568
|
-
# self.catalog.paths.check_is_valid(path, expected=catalog.NamedFunction)
|
|
569
|
-
# named_fn = self.catalog.paths[path]
|
|
570
|
-
# assert isinstance(named_fn, catalog.NamedFunction)
|
|
571
|
-
# fn = func.FunctionRegistry.get().get_function(id=named_fn._id)
|
|
572
|
-
# fn.md.fqn = str(path)
|
|
573
|
-
# return fn
|
|
574
|
-
#
|
|
575
|
-
# def drop_function(self, path_str: str, ignore_errors: bool = False) -> None:
|
|
576
|
-
# """Deletes stored function.
|
|
577
|
-
#
|
|
578
|
-
# Args:
|
|
579
|
-
# path_str: path to the function
|
|
580
|
-
# ignore_errors: if True, does not raise if the function does not exist
|
|
581
|
-
#
|
|
582
|
-
# Raises:
|
|
583
|
-
# Error: if the path does not exist or is not a function
|
|
584
|
-
#
|
|
585
|
-
# Examples:
|
|
586
|
-
# >>> cl.drop_function('my_dir.detect')
|
|
587
|
-
# """
|
|
588
|
-
# path = catalog.Path(path_str)
|
|
589
|
-
# try:
|
|
590
|
-
# self.catalog.paths.check_is_valid(path, expected=catalog.NamedFunction)
|
|
591
|
-
# except excs.Error as e:
|
|
592
|
-
# if ignore_errors:
|
|
593
|
-
# return
|
|
594
|
-
# else:
|
|
595
|
-
# raise e
|
|
596
|
-
# named_fn = self.catalog.paths[path]
|
|
597
|
-
# func.FunctionRegistry.get().delete_function(named_fn._id)
|
|
598
|
-
# del self.catalog.paths[path]
|
|
599
|
-
# _logger.info(f'Dropped function {path_str}')
|
|
600
|
-
|
|
@@ -1,58 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
from typing import Optional, List, Any, Dict, Tuple
|
|
3
|
-
|
|
4
|
-
import sqlalchemy as sql
|
|
5
|
-
import PIL
|
|
6
|
-
import numpy as np
|
|
7
|
-
|
|
8
|
-
from .expr import Expr
|
|
9
|
-
from .predicate import Predicate
|
|
10
|
-
from .column_ref import ColumnRef
|
|
11
|
-
from .data_row import DataRow
|
|
12
|
-
from .row_builder import RowBuilder
|
|
13
|
-
import pixeltable.catalog as catalog
|
|
14
|
-
import pixeltable.utils.clip as clip
|
|
15
|
-
|
|
16
|
-
class ImageSimilarityPredicate(Predicate):
|
|
17
|
-
def __init__(self, img_col_ref: ColumnRef, img: Optional[PIL.Image.Image] = None, text: Optional[str] = None):
|
|
18
|
-
assert (img is None) != (text is None)
|
|
19
|
-
super().__init__()
|
|
20
|
-
self.img_col_ref = img_col_ref
|
|
21
|
-
self.components = [img_col_ref]
|
|
22
|
-
self.img = img
|
|
23
|
-
self.text = text
|
|
24
|
-
self.id = self._create_id()
|
|
25
|
-
|
|
26
|
-
def embedding(self) -> np.ndarray:
|
|
27
|
-
if self.text is not None:
|
|
28
|
-
return clip.embed_text(self.text)
|
|
29
|
-
else:
|
|
30
|
-
return clip.embed_image(self.img)
|
|
31
|
-
|
|
32
|
-
def __str__(self) -> str:
|
|
33
|
-
return f'{str(self.img_col_ref)}.nearest({"<img>" if self.img is not None else self.text})'
|
|
34
|
-
|
|
35
|
-
def _equals(self, other: ImageSimilarityPredicate) -> bool:
|
|
36
|
-
return False
|
|
37
|
-
|
|
38
|
-
def _id_attrs(self) -> List[Tuple[str, Any]]:
|
|
39
|
-
return super()._id_attrs() + [('img', id(self.img)), ('text', self.text)]
|
|
40
|
-
|
|
41
|
-
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
42
|
-
return None
|
|
43
|
-
|
|
44
|
-
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
45
|
-
assert False
|
|
46
|
-
|
|
47
|
-
def _as_dict(self) -> Dict:
|
|
48
|
-
assert False, 'not implemented'
|
|
49
|
-
# TODO: convert self.img into a serializable string
|
|
50
|
-
return {'img': self.img, 'text': self.text, **super()._as_dict()}
|
|
51
|
-
|
|
52
|
-
@classmethod
|
|
53
|
-
def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
|
|
54
|
-
assert 'img' in d
|
|
55
|
-
assert 'text' in d
|
|
56
|
-
assert len(components) == 1
|
|
57
|
-
return cls(components[0], d['img'], d['text'])
|
|
58
|
-
|