pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +23 -5
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -3
- pixeltable/catalog/catalog.py +1318 -404
- pixeltable/catalog/column.py +186 -115
- pixeltable/catalog/dir.py +1 -2
- pixeltable/catalog/globals.py +11 -43
- pixeltable/catalog/insertable_table.py +167 -79
- pixeltable/catalog/path.py +61 -23
- pixeltable/catalog/schema_object.py +9 -10
- pixeltable/catalog/table.py +626 -308
- pixeltable/catalog/table_metadata.py +101 -0
- pixeltable/catalog/table_version.py +713 -569
- pixeltable/catalog/table_version_handle.py +37 -6
- pixeltable/catalog/table_version_path.py +42 -29
- pixeltable/catalog/tbl_ops.py +50 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +108 -94
- pixeltable/config.py +128 -22
- pixeltable/dataframe.py +188 -100
- pixeltable/env.py +407 -136
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +3 -0
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +231 -0
- pixeltable/exec/cell_reconstruction_node.py +135 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +7 -6
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +190 -30
- pixeltable/exec/globals.py +32 -0
- pixeltable/exec/in_memory_data_node.py +18 -18
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +206 -101
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +34 -30
- pixeltable/exprs/column_ref.py +92 -96
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +152 -55
- pixeltable/exprs/expr.py +62 -43
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +75 -37
- pixeltable/exprs/globals.py +1 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +10 -27
- pixeltable/exprs/is_null.py +1 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +127 -53
- pixeltable/exprs/rowid_ref.py +8 -12
- pixeltable/exprs/similarity_expr.py +50 -25
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +10 -10
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +20 -18
- pixeltable/func/signature.py +43 -16
- pixeltable/func/tools.py +23 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +6 -0
- pixeltable/functions/anthropic.py +93 -33
- pixeltable/functions/audio.py +114 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +1 -1
- pixeltable/functions/deepseek.py +20 -9
- pixeltable/functions/fireworks.py +2 -2
- pixeltable/functions/gemini.py +28 -11
- pixeltable/functions/globals.py +13 -13
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1046 -23
- pixeltable/functions/image.py +9 -18
- pixeltable/functions/llama_cpp.py +23 -8
- pixeltable/functions/math.py +3 -4
- pixeltable/functions/mistralai.py +4 -15
- pixeltable/functions/ollama.py +16 -9
- pixeltable/functions/openai.py +104 -82
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +2 -2
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +21 -28
- pixeltable/functions/timestamp.py +13 -14
- pixeltable/functions/together.py +4 -6
- pixeltable/functions/twelvelabs.py +92 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/video.py +1388 -106
- pixeltable/functions/vision.py +7 -7
- pixeltable/functions/whisper.py +15 -7
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +332 -105
- pixeltable/index/base.py +13 -22
- pixeltable/index/btree.py +23 -22
- pixeltable/index/embedding_index.py +32 -44
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +7 -6
- pixeltable/io/external_store.py +49 -77
- pixeltable/io/fiftyone.py +11 -11
- pixeltable/io/globals.py +29 -28
- pixeltable/io/hf_datasets.py +17 -9
- pixeltable/io/label_studio.py +70 -66
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +12 -11
- pixeltable/io/parquet.py +13 -93
- pixeltable/io/table_data_conduit.py +71 -47
- pixeltable/io/utils.py +3 -3
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +21 -11
- pixeltable/iterators/document.py +116 -55
- pixeltable/iterators/image.py +5 -2
- pixeltable/iterators/video.py +293 -13
- pixeltable/metadata/__init__.py +4 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_36.py +2 -2
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/util.py +13 -12
- pixeltable/metadata/notes.py +4 -0
- pixeltable/metadata/schema.py +79 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +274 -223
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +259 -129
- pixeltable/share/protocol/__init__.py +34 -0
- pixeltable/share/protocol/common.py +170 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +109 -0
- pixeltable/share/publish.py +213 -57
- pixeltable/store.py +238 -175
- pixeltable/type_system.py +104 -63
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +108 -13
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +305 -0
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +31 -5
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +5 -6
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +4 -6
- pixeltable/utils/gcs_store.py +283 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +88 -0
- pixeltable/utils/local_store.py +316 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +528 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +392 -0
- pixeltable-0.4.20.dist-info/METADATA +587 -0
- pixeltable-0.4.20.dist-info/RECORD +218 -0
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info}/WHEEL +1 -1
- pixeltable-0.4.20.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable/utils/sample.py +0 -25
- pixeltable-0.4.0rc3.dist-info/METADATA +0 -435
- pixeltable-0.4.0rc3.dist-info/RECORD +0 -189
- pixeltable-0.4.0rc3.dist-info/entry_points.txt +0 -3
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info/licenses}/LICENSE +0 -0
pixeltable/index/base.py
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import abc
|
|
4
|
-
from typing import Any
|
|
5
4
|
|
|
6
5
|
import sqlalchemy as sql
|
|
7
6
|
|
|
8
|
-
|
|
7
|
+
import pixeltable.catalog as catalog
|
|
8
|
+
import pixeltable.exprs as exprs
|
|
9
|
+
import pixeltable.type_system as ts
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
class IndexBase(abc.ABC):
|
|
@@ -18,44 +19,34 @@ class IndexBase(abc.ABC):
|
|
|
18
19
|
"""
|
|
19
20
|
|
|
20
21
|
@abc.abstractmethod
|
|
21
|
-
def
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def index_value_expr(self) -> exprs.Expr:
|
|
26
|
-
"""Return expression that computes the value that goes into the index"""
|
|
27
|
-
pass
|
|
22
|
+
def create_value_expr(self, c: catalog.Column) -> exprs.Expr:
|
|
23
|
+
"""
|
|
24
|
+
Validates that the index can be created on column c and returns an expression that computes the index value.
|
|
25
|
+
"""
|
|
28
26
|
|
|
29
27
|
@abc.abstractmethod
|
|
30
28
|
def records_value_errors(self) -> bool:
|
|
31
29
|
"""True if index_value_expr() can raise errors"""
|
|
32
|
-
pass
|
|
33
30
|
|
|
34
31
|
@abc.abstractmethod
|
|
35
|
-
def
|
|
32
|
+
def get_index_sa_type(self, value_col_type: ts.ColumnType) -> sql.types.TypeEngine:
|
|
36
33
|
"""Return the sqlalchemy type of the index value column"""
|
|
37
|
-
pass
|
|
38
34
|
|
|
39
35
|
@abc.abstractmethod
|
|
40
|
-
def
|
|
41
|
-
"""
|
|
42
|
-
pass
|
|
36
|
+
def sa_create_stmt(self, store_index_name: str, sa_value_col: sql.Column) -> sql.Compiled:
|
|
37
|
+
"""Return a sqlalchemy statement for creating the index"""
|
|
43
38
|
|
|
44
39
|
@abc.abstractmethod
|
|
45
40
|
def drop_index(self, index_name: str, index_value_col: catalog.Column) -> None:
|
|
46
41
|
"""Drop the index on the index value column"""
|
|
47
|
-
pass
|
|
48
42
|
|
|
49
43
|
@classmethod
|
|
50
44
|
@abc.abstractmethod
|
|
51
|
-
def display_name(cls) -> str:
|
|
52
|
-
pass
|
|
45
|
+
def display_name(cls) -> str: ...
|
|
53
46
|
|
|
54
47
|
@abc.abstractmethod
|
|
55
|
-
def as_dict(self) -> dict:
|
|
56
|
-
pass
|
|
48
|
+
def as_dict(self) -> dict: ...
|
|
57
49
|
|
|
58
50
|
@classmethod
|
|
59
51
|
@abc.abstractmethod
|
|
60
|
-
def from_dict(cls,
|
|
61
|
-
pass
|
|
52
|
+
def from_dict(cls, d: dict) -> IndexBase: ...
|
pixeltable/index/btree.py
CHANGED
|
@@ -1,18 +1,18 @@
|
|
|
1
|
-
from typing import TYPE_CHECKING
|
|
1
|
+
from typing import TYPE_CHECKING
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
5
5
|
# TODO: why does this import result in a circular import, but the one im embedding_index.py doesn't?
|
|
6
6
|
# import pixeltable.catalog as catalog
|
|
7
7
|
import pixeltable.exceptions as excs
|
|
8
|
-
|
|
9
|
-
|
|
8
|
+
import pixeltable.exprs as exprs
|
|
9
|
+
import pixeltable.type_system as ts
|
|
10
10
|
from pixeltable.func.udf import udf
|
|
11
11
|
|
|
12
12
|
from .base import IndexBase
|
|
13
13
|
|
|
14
14
|
if TYPE_CHECKING:
|
|
15
|
-
import pixeltable.
|
|
15
|
+
import pixeltable.catalog as catalog
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
class BtreeIndex(IndexBase):
|
|
@@ -22,42 +22,43 @@ class BtreeIndex(IndexBase):
|
|
|
22
22
|
|
|
23
23
|
MAX_STRING_LEN = 256
|
|
24
24
|
|
|
25
|
-
value_expr: 'pixeltable.exprs.Expr'
|
|
26
|
-
|
|
27
25
|
@staticmethod
|
|
28
26
|
@udf
|
|
29
|
-
def str_filter(s:
|
|
27
|
+
def str_filter(s: str | None) -> str | None:
|
|
30
28
|
if s is None:
|
|
31
29
|
return None
|
|
32
30
|
return s[: BtreeIndex.MAX_STRING_LEN]
|
|
33
31
|
|
|
34
|
-
def __init__(self
|
|
32
|
+
def __init__(self) -> None:
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
def create_value_expr(self, c: 'catalog.Column') -> 'exprs.Expr':
|
|
35
36
|
if not c.col_type.is_scalar_type() and not c.col_type.is_media_type():
|
|
36
37
|
raise excs.Error(f'Index on column {c.name}: B-tree index requires scalar or media type, got {c.col_type}')
|
|
38
|
+
value_expr: exprs.Expr
|
|
37
39
|
if c.col_type.is_media_type():
|
|
38
40
|
# an index on a media column is an index on the file url
|
|
39
41
|
# no validation for media columns: we're only interested in the string value
|
|
40
|
-
|
|
42
|
+
value_expr = exprs.ColumnRef(c, perform_validation=False)
|
|
41
43
|
else:
|
|
42
|
-
|
|
44
|
+
value_expr = (
|
|
43
45
|
BtreeIndex.str_filter(exprs.ColumnRef(c)) if c.col_type.is_string_type() else exprs.ColumnRef(c)
|
|
44
46
|
)
|
|
45
|
-
|
|
46
|
-
def index_value_expr(self) -> 'exprs.Expr':
|
|
47
|
-
return self.value_expr
|
|
47
|
+
return value_expr
|
|
48
48
|
|
|
49
49
|
def records_value_errors(self) -> bool:
|
|
50
50
|
return False
|
|
51
51
|
|
|
52
|
-
def
|
|
52
|
+
def get_index_sa_type(self, val_col_type: ts.ColumnType) -> sql.types.TypeEngine:
|
|
53
53
|
"""Return the sqlalchemy type of the index value column"""
|
|
54
|
-
return
|
|
54
|
+
return val_col_type.to_sa_type()
|
|
55
|
+
|
|
56
|
+
def sa_create_stmt(self, store_index_name: str, sa_value_col: sql.Column) -> sql.Compiled:
|
|
57
|
+
"""Return a sqlalchemy statement for creating the index"""
|
|
58
|
+
from sqlalchemy.dialects import postgresql
|
|
55
59
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
idx = sql.Index(index_name, index_value_col.sa_col, postgresql_using='btree')
|
|
59
|
-
conn = Env.get().conn
|
|
60
|
-
idx.create(bind=conn)
|
|
60
|
+
sa_idx = sql.Index(store_index_name, sa_value_col, postgresql_using='btree')
|
|
61
|
+
return sql.schema.CreateIndex(sa_idx, if_not_exists=True).compile(dialect=postgresql.dialect())
|
|
61
62
|
|
|
62
63
|
def drop_index(self, index_name: str, index_value_col: 'catalog.Column') -> None:
|
|
63
64
|
"""Drop the index on the index value column"""
|
|
@@ -72,5 +73,5 @@ class BtreeIndex(IndexBase):
|
|
|
72
73
|
return {}
|
|
73
74
|
|
|
74
75
|
@classmethod
|
|
75
|
-
def from_dict(cls,
|
|
76
|
-
return cls(
|
|
76
|
+
def from_dict(cls, d: dict) -> 'BtreeIndex':
|
|
77
|
+
return cls()
|
|
@@ -1,16 +1,18 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import enum
|
|
4
|
-
from typing import Any, ClassVar
|
|
4
|
+
from typing import Any, ClassVar
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import pgvector.sqlalchemy # type: ignore[import-untyped]
|
|
8
8
|
import PIL.Image
|
|
9
9
|
import sqlalchemy as sql
|
|
10
10
|
|
|
11
|
+
import pixeltable.catalog as catalog
|
|
11
12
|
import pixeltable.exceptions as excs
|
|
13
|
+
import pixeltable.exprs as exprs
|
|
14
|
+
import pixeltable.func as func
|
|
12
15
|
import pixeltable.type_system as ts
|
|
13
|
-
from pixeltable import catalog, exprs, func
|
|
14
16
|
from pixeltable.env import Env
|
|
15
17
|
|
|
16
18
|
from .base import IndexBase
|
|
@@ -39,28 +41,23 @@ class EmbeddingIndex(IndexBase):
|
|
|
39
41
|
}
|
|
40
42
|
|
|
41
43
|
metric: Metric
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
image_embed: Optional[func.Function]
|
|
44
|
+
string_embed: func.Function | None
|
|
45
|
+
image_embed: func.Function | None
|
|
45
46
|
string_embed_signature_idx: int
|
|
46
47
|
image_embed_signature_idx: int
|
|
47
|
-
index_col_type: pgvector.sqlalchemy.Vector
|
|
48
48
|
|
|
49
49
|
def __init__(
|
|
50
50
|
self,
|
|
51
|
-
c: catalog.Column,
|
|
52
51
|
metric: str,
|
|
53
|
-
embed:
|
|
54
|
-
string_embed:
|
|
55
|
-
image_embed:
|
|
52
|
+
embed: func.Function | None = None,
|
|
53
|
+
string_embed: func.Function | None = None,
|
|
54
|
+
image_embed: func.Function | None = None,
|
|
56
55
|
):
|
|
57
56
|
if embed is None and string_embed is None and image_embed is None:
|
|
58
57
|
raise excs.Error('At least one of `embed`, `string_embed`, or `image_embed` must be specified')
|
|
59
58
|
metric_names = [m.name.lower() for m in self.Metric]
|
|
60
59
|
if metric.lower() not in metric_names:
|
|
61
60
|
raise excs.Error(f'Invalid metric {metric}, must be one of {metric_names}')
|
|
62
|
-
if not c.col_type.is_string_type() and not c.col_type.is_image_type():
|
|
63
|
-
raise excs.Error('Embedding index requires string or image column')
|
|
64
61
|
|
|
65
62
|
self.string_embed = None
|
|
66
63
|
self.image_embed = None
|
|
@@ -102,51 +99,43 @@ class EmbeddingIndex(IndexBase):
|
|
|
102
99
|
)
|
|
103
100
|
|
|
104
101
|
# Now validate the return types of the embedding functions.
|
|
105
|
-
|
|
106
102
|
if self.string_embed is not None:
|
|
107
103
|
self._validate_embedding_fn(self.string_embed)
|
|
108
|
-
|
|
109
104
|
if self.image_embed is not None:
|
|
110
105
|
self._validate_embedding_fn(self.image_embed)
|
|
111
106
|
|
|
107
|
+
self.metric = self.Metric[metric.upper()]
|
|
108
|
+
|
|
109
|
+
def create_value_expr(self, c: catalog.Column) -> exprs.Expr:
|
|
110
|
+
if not c.col_type.is_string_type() and not c.col_type.is_image_type():
|
|
111
|
+
raise excs.Error(
|
|
112
|
+
f'Embedding index requires string or image column, column {c.name!r} has type {c.col_type}'
|
|
113
|
+
)
|
|
112
114
|
if c.col_type.is_string_type() and self.string_embed is None:
|
|
113
115
|
raise excs.Error(f"Text embedding function is required for column {c.name} (parameter 'string_embed')")
|
|
114
116
|
if c.col_type.is_image_type() and self.image_embed is None:
|
|
115
117
|
raise excs.Error(f"Image embedding function is required for column {c.name} (parameter 'image_embed')")
|
|
116
118
|
|
|
117
|
-
|
|
118
|
-
self.value_expr = (
|
|
119
|
+
return (
|
|
119
120
|
self.string_embed(exprs.ColumnRef(c))
|
|
120
121
|
if c.col_type.is_string_type()
|
|
121
122
|
else self.image_embed(exprs.ColumnRef(c))
|
|
122
123
|
)
|
|
123
|
-
assert isinstance(self.value_expr.col_type, ts.ArrayType)
|
|
124
|
-
vector_size = self.value_expr.col_type.shape[0]
|
|
125
|
-
assert vector_size is not None
|
|
126
|
-
self.index_col_type = pgvector.sqlalchemy.Vector(vector_size)
|
|
127
|
-
|
|
128
|
-
def index_value_expr(self) -> exprs.Expr:
|
|
129
|
-
"""Return expression that computes the value that goes into the index"""
|
|
130
|
-
return self.value_expr
|
|
131
124
|
|
|
132
125
|
def records_value_errors(self) -> bool:
|
|
133
126
|
return True
|
|
134
127
|
|
|
135
|
-
def
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
postgresql_with={'m': 16, 'ef_construction': 64},
|
|
146
|
-
postgresql_ops={index_value_col.sa_col.name: self.PGVECTOR_OPS[self.metric]},
|
|
128
|
+
def get_index_sa_type(self, val_col_type: ts.ColumnType) -> sql.types.TypeEngine:
|
|
129
|
+
assert isinstance(val_col_type, ts.ArrayType) and val_col_type.shape is not None
|
|
130
|
+
vector_size = val_col_type.shape[0]
|
|
131
|
+
assert vector_size is not None
|
|
132
|
+
return pgvector.sqlalchemy.Vector(vector_size)
|
|
133
|
+
|
|
134
|
+
def sa_create_stmt(self, store_index_name: str, sa_value_col: sql.Column) -> sql.Compiled:
|
|
135
|
+
"""Return a sqlalchemy statement for creating the index"""
|
|
136
|
+
return Env.get().dbms.create_vector_index_stmt(
|
|
137
|
+
store_index_name, sa_value_col, metric=self.PGVECTOR_OPS[self.metric]
|
|
147
138
|
)
|
|
148
|
-
conn = Env.get().conn
|
|
149
|
-
idx.create(bind=conn)
|
|
150
139
|
|
|
151
140
|
def drop_index(self, index_name: str, index_value_col: catalog.Column) -> None:
|
|
152
141
|
"""Drop the index on the index value column"""
|
|
@@ -156,6 +145,7 @@ class EmbeddingIndex(IndexBase):
|
|
|
156
145
|
def similarity_clause(self, val_column: catalog.Column, item: Any) -> sql.ColumnElement:
|
|
157
146
|
"""Create a ColumnElement that represents '<val_column> <op> <item>'"""
|
|
158
147
|
assert isinstance(item, (str, PIL.Image.Image))
|
|
148
|
+
embedding: np.ndarray
|
|
159
149
|
if isinstance(item, str):
|
|
160
150
|
assert self.string_embed is not None
|
|
161
151
|
embedding = self.string_embed.exec([item], {})
|
|
@@ -174,7 +164,7 @@ class EmbeddingIndex(IndexBase):
|
|
|
174
164
|
def order_by_clause(self, val_column: catalog.Column, item: Any, is_asc: bool) -> sql.ColumnElement:
|
|
175
165
|
"""Create a ColumnElement that is used in an ORDER BY clause"""
|
|
176
166
|
assert isinstance(item, (str, PIL.Image.Image))
|
|
177
|
-
embedding:
|
|
167
|
+
embedding: np.ndarray | None = None
|
|
178
168
|
if isinstance(item, str):
|
|
179
169
|
assert self.string_embed is not None
|
|
180
170
|
embedding = self.string_embed.exec([item], {})
|
|
@@ -199,9 +189,7 @@ class EmbeddingIndex(IndexBase):
|
|
|
199
189
|
return 'embedding'
|
|
200
190
|
|
|
201
191
|
@classmethod
|
|
202
|
-
def _resolve_embedding_fn(
|
|
203
|
-
cls, embed_fn: func.Function, expected_type: ts.ColumnType.Type
|
|
204
|
-
) -> Optional[func.Function]:
|
|
192
|
+
def _resolve_embedding_fn(cls, embed_fn: func.Function, expected_type: ts.ColumnType.Type) -> func.Function | None:
|
|
205
193
|
"""Find an overload resolution for `embed_fn` that matches the given type."""
|
|
206
194
|
assert isinstance(embed_fn, func.Function)
|
|
207
195
|
for resolved_fn in embed_fn._resolved_fns:
|
|
@@ -255,7 +243,7 @@ class EmbeddingIndex(IndexBase):
|
|
|
255
243
|
}
|
|
256
244
|
|
|
257
245
|
@classmethod
|
|
258
|
-
def from_dict(cls,
|
|
246
|
+
def from_dict(cls, d: dict) -> EmbeddingIndex:
|
|
259
247
|
string_embed = func.Function.from_dict(d['string_embed']) if d['string_embed'] is not None else None
|
|
260
248
|
image_embed = func.Function.from_dict(d['image_embed']) if d['image_embed'] is not None else None
|
|
261
|
-
return cls(
|
|
249
|
+
return cls(metric=d['metric'], string_embed=string_embed, image_embed=image_embed)
|
pixeltable/io/__init__.py
CHANGED
|
@@ -1,14 +1,16 @@
|
|
|
1
|
+
"""Functions for importing and exporting Pixeltable data."""
|
|
1
2
|
# ruff: noqa: F401
|
|
2
3
|
|
|
3
4
|
from .datarows import import_json, import_rows
|
|
4
|
-
from .external_store import ExternalStore
|
|
5
|
+
from .external_store import ExternalStore
|
|
5
6
|
from .globals import create_label_studio_project, export_images_as_fo_dataset
|
|
6
7
|
from .hf_datasets import import_huggingface_dataset
|
|
8
|
+
from .lancedb import export_lancedb
|
|
7
9
|
from .pandas import import_csv, import_excel, import_pandas
|
|
8
10
|
from .parquet import export_parquet, import_parquet
|
|
9
11
|
|
|
10
12
|
__default_dir = {symbol for symbol in dir() if not symbol.startswith('_')}
|
|
11
|
-
__removed_symbols = {'globals', 'hf_datasets', 'pandas', 'parquet', 'datarows'}
|
|
13
|
+
__removed_symbols = {'globals', 'hf_datasets', 'pandas', 'parquet', 'datarows', 'lancedb'}
|
|
12
14
|
__all__ = sorted(__default_dir - __removed_symbols)
|
|
13
15
|
|
|
14
16
|
|
pixeltable/io/datarows.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Any, Iterable
|
|
3
|
+
from typing import Any, Iterable
|
|
4
4
|
|
|
5
5
|
import pixeltable as pxt
|
|
6
6
|
import pixeltable.type_system as ts
|
|
@@ -8,7 +8,7 @@ from pixeltable import exceptions as excs
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
def _infer_schema_from_rows(
|
|
11
|
-
rows: Iterable[dict[str, Any]], schema_overrides: dict[str,
|
|
11
|
+
rows: Iterable[dict[str, Any]], schema_overrides: dict[str, ts.ColumnType], primary_key: list[str]
|
|
12
12
|
) -> dict[str, ts.ColumnType]:
|
|
13
13
|
schema: dict[str, ts.ColumnType] = {}
|
|
14
14
|
cols_with_nones: set[str] = set()
|
|
@@ -20,6 +20,7 @@ def _infer_schema_from_rows(
|
|
|
20
20
|
# in which the column names are encountered in the input data, even if `schema_overrides`
|
|
21
21
|
# is specified.
|
|
22
22
|
if col_name not in schema:
|
|
23
|
+
assert isinstance(schema_overrides[col_name], ts.ColumnType)
|
|
23
24
|
schema[col_name] = schema_overrides[col_name]
|
|
24
25
|
elif value is not None:
|
|
25
26
|
# If `key` is not in `schema_overrides`, then we infer its type from the data.
|
|
@@ -59,8 +60,8 @@ def import_rows(
|
|
|
59
60
|
tbl_path: str,
|
|
60
61
|
rows: list[dict[str, Any]],
|
|
61
62
|
*,
|
|
62
|
-
schema_overrides:
|
|
63
|
-
primary_key:
|
|
63
|
+
schema_overrides: dict[str, Any] | None = None,
|
|
64
|
+
primary_key: str | list[str] | None = None,
|
|
64
65
|
num_retained_versions: int = 10,
|
|
65
66
|
comment: str = '',
|
|
66
67
|
) -> pxt.Table:
|
|
@@ -103,8 +104,8 @@ def import_json(
|
|
|
103
104
|
tbl_path: str,
|
|
104
105
|
filepath_or_url: str,
|
|
105
106
|
*,
|
|
106
|
-
schema_overrides:
|
|
107
|
-
primary_key:
|
|
107
|
+
schema_overrides: dict[str, Any] | None = None,
|
|
108
|
+
primary_key: str | list[str] | None = None,
|
|
108
109
|
num_retained_versions: int = 10,
|
|
109
110
|
comment: str = '',
|
|
110
111
|
**kwargs: Any,
|