pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (202) hide show
  1. pixeltable/__init__.py +23 -5
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/__init__.py +5 -3
  4. pixeltable/catalog/catalog.py +1318 -404
  5. pixeltable/catalog/column.py +186 -115
  6. pixeltable/catalog/dir.py +1 -2
  7. pixeltable/catalog/globals.py +11 -43
  8. pixeltable/catalog/insertable_table.py +167 -79
  9. pixeltable/catalog/path.py +61 -23
  10. pixeltable/catalog/schema_object.py +9 -10
  11. pixeltable/catalog/table.py +626 -308
  12. pixeltable/catalog/table_metadata.py +101 -0
  13. pixeltable/catalog/table_version.py +713 -569
  14. pixeltable/catalog/table_version_handle.py +37 -6
  15. pixeltable/catalog/table_version_path.py +42 -29
  16. pixeltable/catalog/tbl_ops.py +50 -0
  17. pixeltable/catalog/update_status.py +191 -0
  18. pixeltable/catalog/view.py +108 -94
  19. pixeltable/config.py +128 -22
  20. pixeltable/dataframe.py +188 -100
  21. pixeltable/env.py +407 -136
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +3 -0
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +231 -0
  27. pixeltable/exec/cell_reconstruction_node.py +135 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +7 -6
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +190 -30
  37. pixeltable/exec/globals.py +32 -0
  38. pixeltable/exec/in_memory_data_node.py +18 -18
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +206 -101
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +34 -30
  46. pixeltable/exprs/column_ref.py +92 -96
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +152 -55
  50. pixeltable/exprs/expr.py +62 -43
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +75 -37
  54. pixeltable/exprs/globals.py +1 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +10 -27
  57. pixeltable/exprs/is_null.py +1 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +5 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +127 -53
  64. pixeltable/exprs/rowid_ref.py +8 -12
  65. pixeltable/exprs/similarity_expr.py +50 -25
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +10 -10
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +20 -18
  78. pixeltable/func/signature.py +43 -16
  79. pixeltable/func/tools.py +23 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +6 -0
  82. pixeltable/functions/anthropic.py +93 -33
  83. pixeltable/functions/audio.py +114 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +1 -1
  86. pixeltable/functions/deepseek.py +20 -9
  87. pixeltable/functions/fireworks.py +2 -2
  88. pixeltable/functions/gemini.py +28 -11
  89. pixeltable/functions/globals.py +13 -13
  90. pixeltable/functions/groq.py +108 -0
  91. pixeltable/functions/huggingface.py +1046 -23
  92. pixeltable/functions/image.py +9 -18
  93. pixeltable/functions/llama_cpp.py +23 -8
  94. pixeltable/functions/math.py +3 -4
  95. pixeltable/functions/mistralai.py +4 -15
  96. pixeltable/functions/ollama.py +16 -9
  97. pixeltable/functions/openai.py +104 -82
  98. pixeltable/functions/openrouter.py +143 -0
  99. pixeltable/functions/replicate.py +2 -2
  100. pixeltable/functions/reve.py +250 -0
  101. pixeltable/functions/string.py +21 -28
  102. pixeltable/functions/timestamp.py +13 -14
  103. pixeltable/functions/together.py +4 -6
  104. pixeltable/functions/twelvelabs.py +92 -0
  105. pixeltable/functions/util.py +6 -1
  106. pixeltable/functions/video.py +1388 -106
  107. pixeltable/functions/vision.py +7 -7
  108. pixeltable/functions/whisper.py +15 -7
  109. pixeltable/functions/whisperx.py +179 -0
  110. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  111. pixeltable/globals.py +332 -105
  112. pixeltable/index/base.py +13 -22
  113. pixeltable/index/btree.py +23 -22
  114. pixeltable/index/embedding_index.py +32 -44
  115. pixeltable/io/__init__.py +4 -2
  116. pixeltable/io/datarows.py +7 -6
  117. pixeltable/io/external_store.py +49 -77
  118. pixeltable/io/fiftyone.py +11 -11
  119. pixeltable/io/globals.py +29 -28
  120. pixeltable/io/hf_datasets.py +17 -9
  121. pixeltable/io/label_studio.py +70 -66
  122. pixeltable/io/lancedb.py +3 -0
  123. pixeltable/io/pandas.py +12 -11
  124. pixeltable/io/parquet.py +13 -93
  125. pixeltable/io/table_data_conduit.py +71 -47
  126. pixeltable/io/utils.py +3 -3
  127. pixeltable/iterators/__init__.py +2 -1
  128. pixeltable/iterators/audio.py +21 -11
  129. pixeltable/iterators/document.py +116 -55
  130. pixeltable/iterators/image.py +5 -2
  131. pixeltable/iterators/video.py +293 -13
  132. pixeltable/metadata/__init__.py +4 -2
  133. pixeltable/metadata/converters/convert_18.py +2 -2
  134. pixeltable/metadata/converters/convert_19.py +2 -2
  135. pixeltable/metadata/converters/convert_20.py +2 -2
  136. pixeltable/metadata/converters/convert_21.py +2 -2
  137. pixeltable/metadata/converters/convert_22.py +2 -2
  138. pixeltable/metadata/converters/convert_24.py +2 -2
  139. pixeltable/metadata/converters/convert_25.py +2 -2
  140. pixeltable/metadata/converters/convert_26.py +2 -2
  141. pixeltable/metadata/converters/convert_29.py +4 -4
  142. pixeltable/metadata/converters/convert_34.py +2 -2
  143. pixeltable/metadata/converters/convert_36.py +2 -2
  144. pixeltable/metadata/converters/convert_37.py +15 -0
  145. pixeltable/metadata/converters/convert_38.py +39 -0
  146. pixeltable/metadata/converters/convert_39.py +124 -0
  147. pixeltable/metadata/converters/convert_40.py +73 -0
  148. pixeltable/metadata/converters/util.py +13 -12
  149. pixeltable/metadata/notes.py +4 -0
  150. pixeltable/metadata/schema.py +79 -42
  151. pixeltable/metadata/utils.py +74 -0
  152. pixeltable/mypy/__init__.py +3 -0
  153. pixeltable/mypy/mypy_plugin.py +123 -0
  154. pixeltable/plan.py +274 -223
  155. pixeltable/share/__init__.py +1 -1
  156. pixeltable/share/packager.py +259 -129
  157. pixeltable/share/protocol/__init__.py +34 -0
  158. pixeltable/share/protocol/common.py +170 -0
  159. pixeltable/share/protocol/operation_types.py +33 -0
  160. pixeltable/share/protocol/replica.py +109 -0
  161. pixeltable/share/publish.py +213 -57
  162. pixeltable/store.py +238 -175
  163. pixeltable/type_system.py +104 -63
  164. pixeltable/utils/__init__.py +2 -3
  165. pixeltable/utils/arrow.py +108 -13
  166. pixeltable/utils/av.py +298 -0
  167. pixeltable/utils/azure_store.py +305 -0
  168. pixeltable/utils/code.py +3 -3
  169. pixeltable/utils/console_output.py +4 -1
  170. pixeltable/utils/coroutine.py +6 -23
  171. pixeltable/utils/dbms.py +31 -5
  172. pixeltable/utils/description_helper.py +4 -5
  173. pixeltable/utils/documents.py +5 -6
  174. pixeltable/utils/exception_handler.py +7 -30
  175. pixeltable/utils/filecache.py +6 -6
  176. pixeltable/utils/formatter.py +4 -6
  177. pixeltable/utils/gcs_store.py +283 -0
  178. pixeltable/utils/http_server.py +2 -3
  179. pixeltable/utils/iceberg.py +1 -2
  180. pixeltable/utils/image.py +17 -0
  181. pixeltable/utils/lancedb.py +88 -0
  182. pixeltable/utils/local_store.py +316 -0
  183. pixeltable/utils/misc.py +5 -0
  184. pixeltable/utils/object_stores.py +528 -0
  185. pixeltable/utils/pydantic.py +60 -0
  186. pixeltable/utils/pytorch.py +5 -6
  187. pixeltable/utils/s3_store.py +392 -0
  188. pixeltable-0.4.20.dist-info/METADATA +587 -0
  189. pixeltable-0.4.20.dist-info/RECORD +218 -0
  190. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info}/WHEEL +1 -1
  191. pixeltable-0.4.20.dist-info/entry_points.txt +2 -0
  192. pixeltable/__version__.py +0 -3
  193. pixeltable/ext/__init__.py +0 -17
  194. pixeltable/ext/functions/__init__.py +0 -11
  195. pixeltable/ext/functions/whisperx.py +0 -77
  196. pixeltable/utils/media_store.py +0 -77
  197. pixeltable/utils/s3.py +0 -17
  198. pixeltable/utils/sample.py +0 -25
  199. pixeltable-0.4.0rc3.dist-info/METADATA +0 -435
  200. pixeltable-0.4.0rc3.dist-info/RECORD +0 -189
  201. pixeltable-0.4.0rc3.dist-info/entry_points.txt +0 -3
  202. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info/licenses}/LICENSE +0 -0
pixeltable/index/base.py CHANGED
@@ -1,11 +1,12 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import abc
4
- from typing import Any
5
4
 
6
5
  import sqlalchemy as sql
7
6
 
8
- from pixeltable import catalog, exprs
7
+ import pixeltable.catalog as catalog
8
+ import pixeltable.exprs as exprs
9
+ import pixeltable.type_system as ts
9
10
 
10
11
 
11
12
  class IndexBase(abc.ABC):
@@ -18,44 +19,34 @@ class IndexBase(abc.ABC):
18
19
  """
19
20
 
20
21
  @abc.abstractmethod
21
- def __init__(self, c: catalog.Column, **kwargs: Any):
22
- pass
23
-
24
- @abc.abstractmethod
25
- def index_value_expr(self) -> exprs.Expr:
26
- """Return expression that computes the value that goes into the index"""
27
- pass
22
+ def create_value_expr(self, c: catalog.Column) -> exprs.Expr:
23
+ """
24
+ Validates that the index can be created on column c and returns an expression that computes the index value.
25
+ """
28
26
 
29
27
  @abc.abstractmethod
30
28
  def records_value_errors(self) -> bool:
31
29
  """True if index_value_expr() can raise errors"""
32
- pass
33
30
 
34
31
  @abc.abstractmethod
35
- def index_sa_type(self) -> sql.types.TypeEngine:
32
+ def get_index_sa_type(self, value_col_type: ts.ColumnType) -> sql.types.TypeEngine:
36
33
  """Return the sqlalchemy type of the index value column"""
37
- pass
38
34
 
39
35
  @abc.abstractmethod
40
- def create_index(self, index_name: str, index_value_col: catalog.Column) -> None:
41
- """Create the index on the index value column"""
42
- pass
36
+ def sa_create_stmt(self, store_index_name: str, sa_value_col: sql.Column) -> sql.Compiled:
37
+ """Return a sqlalchemy statement for creating the index"""
43
38
 
44
39
  @abc.abstractmethod
45
40
  def drop_index(self, index_name: str, index_value_col: catalog.Column) -> None:
46
41
  """Drop the index on the index value column"""
47
- pass
48
42
 
49
43
  @classmethod
50
44
  @abc.abstractmethod
51
- def display_name(cls) -> str:
52
- pass
45
+ def display_name(cls) -> str: ...
53
46
 
54
47
  @abc.abstractmethod
55
- def as_dict(self) -> dict:
56
- pass
48
+ def as_dict(self) -> dict: ...
57
49
 
58
50
  @classmethod
59
51
  @abc.abstractmethod
60
- def from_dict(cls, c: catalog.Column, d: dict) -> IndexBase:
61
- pass
52
+ def from_dict(cls, d: dict) -> IndexBase: ...
pixeltable/index/btree.py CHANGED
@@ -1,18 +1,18 @@
1
- from typing import TYPE_CHECKING, Optional
1
+ from typing import TYPE_CHECKING
2
2
 
3
3
  import sqlalchemy as sql
4
4
 
5
5
  # TODO: why does this import result in a circular import, but the one im embedding_index.py doesn't?
6
6
  # import pixeltable.catalog as catalog
7
7
  import pixeltable.exceptions as excs
8
- from pixeltable import catalog, exprs
9
- from pixeltable.env import Env
8
+ import pixeltable.exprs as exprs
9
+ import pixeltable.type_system as ts
10
10
  from pixeltable.func.udf import udf
11
11
 
12
12
  from .base import IndexBase
13
13
 
14
14
  if TYPE_CHECKING:
15
- import pixeltable.exprs
15
+ import pixeltable.catalog as catalog
16
16
 
17
17
 
18
18
  class BtreeIndex(IndexBase):
@@ -22,42 +22,43 @@ class BtreeIndex(IndexBase):
22
22
 
23
23
  MAX_STRING_LEN = 256
24
24
 
25
- value_expr: 'pixeltable.exprs.Expr'
26
-
27
25
  @staticmethod
28
26
  @udf
29
- def str_filter(s: Optional[str]) -> Optional[str]:
27
+ def str_filter(s: str | None) -> str | None:
30
28
  if s is None:
31
29
  return None
32
30
  return s[: BtreeIndex.MAX_STRING_LEN]
33
31
 
34
- def __init__(self, c: 'catalog.Column'):
32
+ def __init__(self) -> None:
33
+ pass
34
+
35
+ def create_value_expr(self, c: 'catalog.Column') -> 'exprs.Expr':
35
36
  if not c.col_type.is_scalar_type() and not c.col_type.is_media_type():
36
37
  raise excs.Error(f'Index on column {c.name}: B-tree index requires scalar or media type, got {c.col_type}')
38
+ value_expr: exprs.Expr
37
39
  if c.col_type.is_media_type():
38
40
  # an index on a media column is an index on the file url
39
41
  # no validation for media columns: we're only interested in the string value
40
- self.value_expr = exprs.ColumnRef(c, perform_validation=False)
42
+ value_expr = exprs.ColumnRef(c, perform_validation=False)
41
43
  else:
42
- self.value_expr = (
44
+ value_expr = (
43
45
  BtreeIndex.str_filter(exprs.ColumnRef(c)) if c.col_type.is_string_type() else exprs.ColumnRef(c)
44
46
  )
45
-
46
- def index_value_expr(self) -> 'exprs.Expr':
47
- return self.value_expr
47
+ return value_expr
48
48
 
49
49
  def records_value_errors(self) -> bool:
50
50
  return False
51
51
 
52
- def index_sa_type(self) -> sql.types.TypeEngine:
52
+ def get_index_sa_type(self, val_col_type: ts.ColumnType) -> sql.types.TypeEngine:
53
53
  """Return the sqlalchemy type of the index value column"""
54
- return self.value_expr.col_type.to_sa_type()
54
+ return val_col_type.to_sa_type()
55
+
56
+ def sa_create_stmt(self, store_index_name: str, sa_value_col: sql.Column) -> sql.Compiled:
57
+ """Return a sqlalchemy statement for creating the index"""
58
+ from sqlalchemy.dialects import postgresql
55
59
 
56
- def create_index(self, index_name: str, index_value_col: 'catalog.Column') -> None:
57
- """Create the index on the index value column"""
58
- idx = sql.Index(index_name, index_value_col.sa_col, postgresql_using='btree')
59
- conn = Env.get().conn
60
- idx.create(bind=conn)
60
+ sa_idx = sql.Index(store_index_name, sa_value_col, postgresql_using='btree')
61
+ return sql.schema.CreateIndex(sa_idx, if_not_exists=True).compile(dialect=postgresql.dialect())
61
62
 
62
63
  def drop_index(self, index_name: str, index_value_col: 'catalog.Column') -> None:
63
64
  """Drop the index on the index value column"""
@@ -72,5 +73,5 @@ class BtreeIndex(IndexBase):
72
73
  return {}
73
74
 
74
75
  @classmethod
75
- def from_dict(cls, c: 'catalog.Column', d: dict) -> 'BtreeIndex':
76
- return cls(c)
76
+ def from_dict(cls, d: dict) -> 'BtreeIndex':
77
+ return cls()
@@ -1,16 +1,18 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import enum
4
- from typing import Any, ClassVar, Optional
4
+ from typing import Any, ClassVar
5
5
 
6
6
  import numpy as np
7
7
  import pgvector.sqlalchemy # type: ignore[import-untyped]
8
8
  import PIL.Image
9
9
  import sqlalchemy as sql
10
10
 
11
+ import pixeltable.catalog as catalog
11
12
  import pixeltable.exceptions as excs
13
+ import pixeltable.exprs as exprs
14
+ import pixeltable.func as func
12
15
  import pixeltable.type_system as ts
13
- from pixeltable import catalog, exprs, func
14
16
  from pixeltable.env import Env
15
17
 
16
18
  from .base import IndexBase
@@ -39,28 +41,23 @@ class EmbeddingIndex(IndexBase):
39
41
  }
40
42
 
41
43
  metric: Metric
42
- value_expr: exprs.FunctionCall
43
- string_embed: Optional[func.Function]
44
- image_embed: Optional[func.Function]
44
+ string_embed: func.Function | None
45
+ image_embed: func.Function | None
45
46
  string_embed_signature_idx: int
46
47
  image_embed_signature_idx: int
47
- index_col_type: pgvector.sqlalchemy.Vector
48
48
 
49
49
  def __init__(
50
50
  self,
51
- c: catalog.Column,
52
51
  metric: str,
53
- embed: Optional[func.Function] = None,
54
- string_embed: Optional[func.Function] = None,
55
- image_embed: Optional[func.Function] = None,
52
+ embed: func.Function | None = None,
53
+ string_embed: func.Function | None = None,
54
+ image_embed: func.Function | None = None,
56
55
  ):
57
56
  if embed is None and string_embed is None and image_embed is None:
58
57
  raise excs.Error('At least one of `embed`, `string_embed`, or `image_embed` must be specified')
59
58
  metric_names = [m.name.lower() for m in self.Metric]
60
59
  if metric.lower() not in metric_names:
61
60
  raise excs.Error(f'Invalid metric {metric}, must be one of {metric_names}')
62
- if not c.col_type.is_string_type() and not c.col_type.is_image_type():
63
- raise excs.Error('Embedding index requires string or image column')
64
61
 
65
62
  self.string_embed = None
66
63
  self.image_embed = None
@@ -102,51 +99,43 @@ class EmbeddingIndex(IndexBase):
102
99
  )
103
100
 
104
101
  # Now validate the return types of the embedding functions.
105
-
106
102
  if self.string_embed is not None:
107
103
  self._validate_embedding_fn(self.string_embed)
108
-
109
104
  if self.image_embed is not None:
110
105
  self._validate_embedding_fn(self.image_embed)
111
106
 
107
+ self.metric = self.Metric[metric.upper()]
108
+
109
+ def create_value_expr(self, c: catalog.Column) -> exprs.Expr:
110
+ if not c.col_type.is_string_type() and not c.col_type.is_image_type():
111
+ raise excs.Error(
112
+ f'Embedding index requires string or image column, column {c.name!r} has type {c.col_type}'
113
+ )
112
114
  if c.col_type.is_string_type() and self.string_embed is None:
113
115
  raise excs.Error(f"Text embedding function is required for column {c.name} (parameter 'string_embed')")
114
116
  if c.col_type.is_image_type() and self.image_embed is None:
115
117
  raise excs.Error(f"Image embedding function is required for column {c.name} (parameter 'image_embed')")
116
118
 
117
- self.metric = self.Metric[metric.upper()]
118
- self.value_expr = (
119
+ return (
119
120
  self.string_embed(exprs.ColumnRef(c))
120
121
  if c.col_type.is_string_type()
121
122
  else self.image_embed(exprs.ColumnRef(c))
122
123
  )
123
- assert isinstance(self.value_expr.col_type, ts.ArrayType)
124
- vector_size = self.value_expr.col_type.shape[0]
125
- assert vector_size is not None
126
- self.index_col_type = pgvector.sqlalchemy.Vector(vector_size)
127
-
128
- def index_value_expr(self) -> exprs.Expr:
129
- """Return expression that computes the value that goes into the index"""
130
- return self.value_expr
131
124
 
132
125
  def records_value_errors(self) -> bool:
133
126
  return True
134
127
 
135
- def index_sa_type(self) -> sql.types.TypeEngine:
136
- """Return the sqlalchemy type of the index value column"""
137
- return self.index_col_type
138
-
139
- def create_index(self, index_name: str, index_value_col: catalog.Column) -> None:
140
- """Create the index on the index value column"""
141
- idx = sql.Index(
142
- index_name,
143
- index_value_col.sa_col,
144
- postgresql_using='hnsw',
145
- postgresql_with={'m': 16, 'ef_construction': 64},
146
- postgresql_ops={index_value_col.sa_col.name: self.PGVECTOR_OPS[self.metric]},
128
+ def get_index_sa_type(self, val_col_type: ts.ColumnType) -> sql.types.TypeEngine:
129
+ assert isinstance(val_col_type, ts.ArrayType) and val_col_type.shape is not None
130
+ vector_size = val_col_type.shape[0]
131
+ assert vector_size is not None
132
+ return pgvector.sqlalchemy.Vector(vector_size)
133
+
134
+ def sa_create_stmt(self, store_index_name: str, sa_value_col: sql.Column) -> sql.Compiled:
135
+ """Return a sqlalchemy statement for creating the index"""
136
+ return Env.get().dbms.create_vector_index_stmt(
137
+ store_index_name, sa_value_col, metric=self.PGVECTOR_OPS[self.metric]
147
138
  )
148
- conn = Env.get().conn
149
- idx.create(bind=conn)
150
139
 
151
140
  def drop_index(self, index_name: str, index_value_col: catalog.Column) -> None:
152
141
  """Drop the index on the index value column"""
@@ -156,6 +145,7 @@ class EmbeddingIndex(IndexBase):
156
145
  def similarity_clause(self, val_column: catalog.Column, item: Any) -> sql.ColumnElement:
157
146
  """Create a ColumnElement that represents '<val_column> <op> <item>'"""
158
147
  assert isinstance(item, (str, PIL.Image.Image))
148
+ embedding: np.ndarray
159
149
  if isinstance(item, str):
160
150
  assert self.string_embed is not None
161
151
  embedding = self.string_embed.exec([item], {})
@@ -174,7 +164,7 @@ class EmbeddingIndex(IndexBase):
174
164
  def order_by_clause(self, val_column: catalog.Column, item: Any, is_asc: bool) -> sql.ColumnElement:
175
165
  """Create a ColumnElement that is used in an ORDER BY clause"""
176
166
  assert isinstance(item, (str, PIL.Image.Image))
177
- embedding: Optional[np.ndarray] = None
167
+ embedding: np.ndarray | None = None
178
168
  if isinstance(item, str):
179
169
  assert self.string_embed is not None
180
170
  embedding = self.string_embed.exec([item], {})
@@ -199,9 +189,7 @@ class EmbeddingIndex(IndexBase):
199
189
  return 'embedding'
200
190
 
201
191
  @classmethod
202
- def _resolve_embedding_fn(
203
- cls, embed_fn: func.Function, expected_type: ts.ColumnType.Type
204
- ) -> Optional[func.Function]:
192
+ def _resolve_embedding_fn(cls, embed_fn: func.Function, expected_type: ts.ColumnType.Type) -> func.Function | None:
205
193
  """Find an overload resolution for `embed_fn` that matches the given type."""
206
194
  assert isinstance(embed_fn, func.Function)
207
195
  for resolved_fn in embed_fn._resolved_fns:
@@ -255,7 +243,7 @@ class EmbeddingIndex(IndexBase):
255
243
  }
256
244
 
257
245
  @classmethod
258
- def from_dict(cls, c: catalog.Column, d: dict) -> EmbeddingIndex:
246
+ def from_dict(cls, d: dict) -> EmbeddingIndex:
259
247
  string_embed = func.Function.from_dict(d['string_embed']) if d['string_embed'] is not None else None
260
248
  image_embed = func.Function.from_dict(d['image_embed']) if d['image_embed'] is not None else None
261
- return cls(c, metric=d['metric'], string_embed=string_embed, image_embed=image_embed)
249
+ return cls(metric=d['metric'], string_embed=string_embed, image_embed=image_embed)
pixeltable/io/__init__.py CHANGED
@@ -1,14 +1,16 @@
1
+ """Functions for importing and exporting Pixeltable data."""
1
2
  # ruff: noqa: F401
2
3
 
3
4
  from .datarows import import_json, import_rows
4
- from .external_store import ExternalStore, SyncStatus
5
+ from .external_store import ExternalStore
5
6
  from .globals import create_label_studio_project, export_images_as_fo_dataset
6
7
  from .hf_datasets import import_huggingface_dataset
8
+ from .lancedb import export_lancedb
7
9
  from .pandas import import_csv, import_excel, import_pandas
8
10
  from .parquet import export_parquet, import_parquet
9
11
 
10
12
  __default_dir = {symbol for symbol in dir() if not symbol.startswith('_')}
11
- __removed_symbols = {'globals', 'hf_datasets', 'pandas', 'parquet', 'datarows'}
13
+ __removed_symbols = {'globals', 'hf_datasets', 'pandas', 'parquet', 'datarows', 'lancedb'}
12
14
  __all__ = sorted(__default_dir - __removed_symbols)
13
15
 
14
16
 
pixeltable/io/datarows.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Any, Iterable, Optional, Union
3
+ from typing import Any, Iterable
4
4
 
5
5
  import pixeltable as pxt
6
6
  import pixeltable.type_system as ts
@@ -8,7 +8,7 @@ from pixeltable import exceptions as excs
8
8
 
9
9
 
10
10
  def _infer_schema_from_rows(
11
- rows: Iterable[dict[str, Any]], schema_overrides: dict[str, Any], primary_key: list[str]
11
+ rows: Iterable[dict[str, Any]], schema_overrides: dict[str, ts.ColumnType], primary_key: list[str]
12
12
  ) -> dict[str, ts.ColumnType]:
13
13
  schema: dict[str, ts.ColumnType] = {}
14
14
  cols_with_nones: set[str] = set()
@@ -20,6 +20,7 @@ def _infer_schema_from_rows(
20
20
  # in which the column names are encountered in the input data, even if `schema_overrides`
21
21
  # is specified.
22
22
  if col_name not in schema:
23
+ assert isinstance(schema_overrides[col_name], ts.ColumnType)
23
24
  schema[col_name] = schema_overrides[col_name]
24
25
  elif value is not None:
25
26
  # If `key` is not in `schema_overrides`, then we infer its type from the data.
@@ -59,8 +60,8 @@ def import_rows(
59
60
  tbl_path: str,
60
61
  rows: list[dict[str, Any]],
61
62
  *,
62
- schema_overrides: Optional[dict[str, Any]] = None,
63
- primary_key: Optional[Union[str, list[str]]] = None,
63
+ schema_overrides: dict[str, Any] | None = None,
64
+ primary_key: str | list[str] | None = None,
64
65
  num_retained_versions: int = 10,
65
66
  comment: str = '',
66
67
  ) -> pxt.Table:
@@ -103,8 +104,8 @@ def import_json(
103
104
  tbl_path: str,
104
105
  filepath_or_url: str,
105
106
  *,
106
- schema_overrides: Optional[dict[str, Any]] = None,
107
- primary_key: Optional[Union[str, list[str]]] = None,
107
+ schema_overrides: dict[str, Any] | None = None,
108
+ primary_key: str | list[str] | None = None,
108
109
  num_retained_versions: int = 10,
109
110
  comment: str = '',
110
111
  **kwargs: Any,