pixeltable 0.2.5__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (87) hide show
  1. pixeltable/__init__.py +18 -9
  2. pixeltable/__version__.py +3 -0
  3. pixeltable/catalog/column.py +9 -5
  4. pixeltable/catalog/insertable_table.py +0 -2
  5. pixeltable/catalog/table.py +16 -8
  6. pixeltable/catalog/table_version.py +3 -2
  7. pixeltable/dataframe.py +184 -110
  8. pixeltable/env.py +69 -18
  9. pixeltable/exec/__init__.py +2 -1
  10. pixeltable/exec/data_row_batch.py +6 -7
  11. pixeltable/exec/expr_eval_node.py +28 -28
  12. pixeltable/exec/sql_scan_node.py +7 -6
  13. pixeltable/exprs/__init__.py +4 -3
  14. pixeltable/exprs/column_ref.py +9 -0
  15. pixeltable/exprs/expr.py +15 -7
  16. pixeltable/exprs/function_call.py +17 -15
  17. pixeltable/exprs/image_member_access.py +9 -28
  18. pixeltable/exprs/in_predicate.py +96 -0
  19. pixeltable/exprs/inline_array.py +13 -11
  20. pixeltable/exprs/inline_dict.py +15 -13
  21. pixeltable/exprs/row_builder.py +7 -1
  22. pixeltable/exprs/similarity_expr.py +65 -0
  23. pixeltable/func/__init__.py +0 -2
  24. pixeltable/func/aggregate_function.py +3 -0
  25. pixeltable/func/callable_function.py +57 -13
  26. pixeltable/func/expr_template_function.py +11 -2
  27. pixeltable/func/function.py +35 -4
  28. pixeltable/func/signature.py +5 -15
  29. pixeltable/func/udf.py +6 -10
  30. pixeltable/functions/huggingface.py +23 -4
  31. pixeltable/functions/openai.py +34 -1
  32. pixeltable/functions/pil/image.py +61 -64
  33. pixeltable/functions/together.py +21 -0
  34. pixeltable/globals.py +425 -0
  35. pixeltable/index/base.py +3 -1
  36. pixeltable/index/embedding_index.py +87 -14
  37. pixeltable/io/__init__.py +3 -0
  38. pixeltable/{utils → io}/hf_datasets.py +48 -17
  39. pixeltable/io/pandas.py +148 -0
  40. pixeltable/{utils → io}/parquet.py +58 -33
  41. pixeltable/iterators/__init__.py +1 -1
  42. pixeltable/iterators/base.py +4 -0
  43. pixeltable/iterators/document.py +218 -97
  44. pixeltable/iterators/video.py +8 -9
  45. pixeltable/metadata/__init__.py +7 -3
  46. pixeltable/metadata/converters/convert_12.py +3 -0
  47. pixeltable/metadata/converters/convert_13.py +41 -0
  48. pixeltable/plan.py +2 -19
  49. pixeltable/store.py +2 -2
  50. pixeltable/tool/create_test_db_dump.py +32 -13
  51. pixeltable/type_system.py +13 -54
  52. pixeltable/utils/documents.py +42 -12
  53. pixeltable/utils/http_server.py +70 -0
  54. {pixeltable-0.2.5.dist-info → pixeltable-0.2.6.dist-info}/METADATA +10 -7
  55. pixeltable-0.2.6.dist-info/RECORD +119 -0
  56. {pixeltable-0.2.5.dist-info → pixeltable-0.2.6.dist-info}/WHEEL +1 -1
  57. pixeltable/client.py +0 -600
  58. pixeltable/exprs/image_similarity_predicate.py +0 -58
  59. pixeltable/func/batched_function.py +0 -53
  60. pixeltable/tests/conftest.py +0 -171
  61. pixeltable/tests/ext/test_yolox.py +0 -21
  62. pixeltable/tests/functions/test_fireworks.py +0 -43
  63. pixeltable/tests/functions/test_functions.py +0 -60
  64. pixeltable/tests/functions/test_huggingface.py +0 -158
  65. pixeltable/tests/functions/test_openai.py +0 -162
  66. pixeltable/tests/functions/test_together.py +0 -112
  67. pixeltable/tests/test_audio.py +0 -65
  68. pixeltable/tests/test_catalog.py +0 -27
  69. pixeltable/tests/test_client.py +0 -21
  70. pixeltable/tests/test_component_view.py +0 -379
  71. pixeltable/tests/test_dataframe.py +0 -440
  72. pixeltable/tests/test_dirs.py +0 -107
  73. pixeltable/tests/test_document.py +0 -120
  74. pixeltable/tests/test_exprs.py +0 -802
  75. pixeltable/tests/test_function.py +0 -332
  76. pixeltable/tests/test_index.py +0 -138
  77. pixeltable/tests/test_migration.py +0 -44
  78. pixeltable/tests/test_nos.py +0 -54
  79. pixeltable/tests/test_snapshot.py +0 -231
  80. pixeltable/tests/test_table.py +0 -1343
  81. pixeltable/tests/test_transactional_directory.py +0 -42
  82. pixeltable/tests/test_types.py +0 -52
  83. pixeltable/tests/test_video.py +0 -159
  84. pixeltable/tests/test_view.py +0 -535
  85. pixeltable/tests/utils.py +0 -442
  86. pixeltable-0.2.5.dist-info/RECORD +0 -139
  87. {pixeltable-0.2.5.dist-info → pixeltable-0.2.6.dist-info}/LICENSE +0 -0
pixeltable/__init__.py CHANGED
@@ -1,18 +1,30 @@
1
1
  from .catalog import Column, Table, InsertableTable, View
2
- from .client import Client
3
2
  from .dataframe import DataFrame
4
3
  from .exceptions import Error, Error
5
4
  from .exprs import RELATIVE_PATH_ROOT
6
5
  from .func import Function, udf, uda, Aggregator, expr_udf
7
- from .type_system import \
8
- ColumnType, StringType, IntType, FloatType, BoolType, TimestampType, JsonType, ArrayType, ImageType, VideoType, \
9
- AudioType, DocumentType
6
+ from .globals import *
7
+ from .type_system import (
8
+ ColumnType,
9
+ StringType,
10
+ IntType,
11
+ FloatType,
12
+ BoolType,
13
+ TimestampType,
14
+ JsonType,
15
+ ArrayType,
16
+ ImageType,
17
+ VideoType,
18
+ AudioType,
19
+ DocumentType,
20
+ )
10
21
  from .utils.help import help
22
+
11
23
  # noinspection PyUnresolvedReferences
12
- from . import functions
24
+ from . import functions, io
25
+ from .__version__ import __version__, __version_tuple__
13
26
 
14
27
  __all__ = [
15
- 'Client',
16
28
  'DataFrame',
17
29
  'Column',
18
30
  'Table',
@@ -39,6 +51,3 @@ __all__ = [
39
51
  'uda',
40
52
  'expr_udf',
41
53
  ]
42
-
43
-
44
-
@@ -0,0 +1,3 @@
1
+ # These version placeholders will be replaced during build.
2
+ __version__ = "0.2.6"
3
+ __version_tuple__ = (0, 2, 6)
@@ -5,8 +5,8 @@ from typing import Optional, Union, Callable, Set
5
5
 
6
6
  import sqlalchemy as sql
7
7
 
8
- from pixeltable import exceptions as excs
9
- from pixeltable.type_system import ColumnType, StringType
8
+ import pixeltable.exceptions as excs
9
+ import pixeltable.type_system as ts
10
10
  from .globals import is_valid_identifier
11
11
 
12
12
  _logger = logging.getLogger('pixeltable')
@@ -18,7 +18,7 @@ class Column:
18
18
  table/view.
19
19
  """
20
20
  def __init__(
21
- self, name: Optional[str], col_type: Optional[ColumnType] = None,
21
+ self, name: Optional[str], col_type: Optional[ts.ColumnType] = None,
22
22
  computed_with: Optional[Union['Expr', Callable]] = None,
23
23
  is_pk: bool = False, stored: Optional[bool] = None,
24
24
  col_id: Optional[int] = None, schema_version_add: Optional[int] = None,
@@ -114,6 +114,10 @@ class Column:
114
114
  l = list(self.value_expr.subexprs(filter=lambda e: isinstance(e, exprs.FunctionCall) and e.is_window_fn_call))
115
115
  return len(l) > 0
116
116
 
117
+ def get_idx_info(self) -> dict[str, 'pixeltable.catalog.TableVersion.IndexInfo']:
118
+ assert self.tbl is not None
119
+ return {name: info for name, info in self.tbl.idxs_by_name.items() if info.col == self}
120
+
117
121
  @property
118
122
  def is_computed(self) -> bool:
119
123
  return self.compute_func is not None or self.value_expr is not None
@@ -148,8 +152,8 @@ class Column:
148
152
  self.store_name(), self.col_type.to_sa_type() if self.sa_col_type is None else self.sa_col_type,
149
153
  nullable=True)
150
154
  if self.is_computed or self.col_type.is_media_type():
151
- self.sa_errormsg_col = sql.Column(self.errormsg_store_name(), StringType().to_sa_type(), nullable=True)
152
- self.sa_errortype_col = sql.Column(self.errortype_store_name(), StringType().to_sa_type(), nullable=True)
155
+ self.sa_errormsg_col = sql.Column(self.errormsg_store_name(), ts.StringType().to_sa_type(), nullable=True)
156
+ self.sa_errortype_col = sql.Column(self.errortype_store_name(), ts.StringType().to_sa_type(), nullable=True)
153
157
 
154
158
  def get_sa_col_type(self) -> sql.sqltypes.TypeEngine:
155
159
  return self.col_type.to_sa_type() if self.sa_col_type is None else self.sa_col_type
@@ -185,8 +185,6 @@ class InsertableTable(Table):
185
185
  if not isinstance(where, Predicate):
186
186
  raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
187
187
  analysis_info = Planner.analyze(self.tbl_version_path, where)
188
- if analysis_info.similarity_clause is not None:
189
- raise excs.Error('nearest() cannot be used with delete()')
190
188
  # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
191
189
  if analysis_info.filter is not None:
192
190
  raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
@@ -97,6 +97,11 @@ class Table(SchemaObject):
97
97
  from pixeltable.dataframe import DataFrame
98
98
  return DataFrame(self.tbl_version_path).order_by(*items, asc=asc)
99
99
 
100
+ def group_by(self, *items: 'exprs.Expr') -> 'pixeltable.dataframe.DataFrame':
101
+ """Return a DataFrame for this table."""
102
+ from pixeltable.dataframe import DataFrame
103
+ return DataFrame(self.tbl_version_path).group_by(*items)
104
+
100
105
  def collect(self) -> 'pixeltable.dataframe.DataFrameResultSet': # type: ignore[name-defined, no-untyped-def]
101
106
  """Return rows from this table.
102
107
  """
@@ -470,13 +475,16 @@ class Table(SchemaObject):
470
475
 
471
476
  def add_embedding_index(
472
477
  self, col_name: str, *, idx_name: Optional[str] = None,
473
- text_embed: Optional[pixeltable.Function] = None, img_embed: Optional[pixeltable.Function] = None
478
+ text_embed: Optional[pixeltable.Function] = None, img_embed: Optional[pixeltable.Function] = None,
479
+ metric: str = 'cosine'
474
480
  ) -> None:
475
481
  """Add an index to the table.
476
482
  Args:
477
483
  col_name: name of column to index
478
484
  idx_name: name of index, which needs to be unique for the table; if not provided, a name will be generated
479
- idx_type: type of index (one of 'embedding')
485
+ text_embed: function to embed text; required if the column is a text column
486
+ img_embed: function to embed images; required if the column is an image column
487
+ metric: distance metric to use for the index; one of 'cosine', 'ip', 'l2'; default is 'cosine'
480
488
 
481
489
  Raises:
482
490
  Error: If an index with that name already exists for the table or if the column does not exist.
@@ -484,11 +492,13 @@ class Table(SchemaObject):
484
492
  Examples:
485
493
  Add an index to the ``img`` column:
486
494
 
487
- >>> tbl.add_embedding_index('img', text_embed=...)
495
+ >>> tbl.add_embedding_index('img', img_embed=...)
488
496
 
489
- Add another index to the ``img`` column, with a specific name:
497
+ Add another index to the ``img`` column, using the inner product as the distance metric,
498
+ and with a specific name; ``text_embed`` is also specified in order to search with text:
490
499
 
491
- >>> tbl.add_embedding_index('img', idx_name='clip_idx', text_embed=...)
500
+ >>> tbl.add_embedding_index(
501
+ 'img', idx_name='clip_idx', img_embed=..., text_embed=...text_embed..., metric='ip')
492
502
  """
493
503
  if self.tbl_version_path.is_snapshot():
494
504
  raise excs.Error('Cannot add an index to a snapshot')
@@ -500,7 +510,7 @@ class Table(SchemaObject):
500
510
  raise excs.Error(f'Duplicate index name: {idx_name}')
501
511
  from pixeltable.index import EmbeddingIndex
502
512
  # create the EmbeddingIndex instance to verify args
503
- idx = EmbeddingIndex(col, text_embed=text_embed, img_embed=img_embed)
513
+ idx = EmbeddingIndex(col, metric=metric, text_embed=text_embed, img_embed=img_embed)
504
514
  status = self.tbl_version_path.tbl_version.add_index(col, idx_name=idx_name, idx=idx)
505
515
  # TODO: how to deal with exceptions here? drop the index and raise?
506
516
 
@@ -582,8 +592,6 @@ class Table(SchemaObject):
582
592
  if not isinstance(where, exprs.Predicate):
583
593
  raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
584
594
  analysis_info = Planner.analyze(self.tbl_version_path, where)
585
- if analysis_info.similarity_clause is not None:
586
- raise excs.Error('nearest() cannot be used with update()')
587
595
  # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
588
596
  if analysis_info.filter is not None:
589
597
  raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
@@ -43,6 +43,7 @@ class TableVersion:
43
43
  @dataclasses.dataclass
44
44
  class IndexInfo:
45
45
  id: int
46
+ name: str
46
47
  idx: index.IndexBase
47
48
  col: Column
48
49
  val_col: Column
@@ -272,7 +273,7 @@ class TableVersion:
272
273
  val_col.sa_col_type = idx.index_sa_type()
273
274
  undo_col = self.cols_by_id[md.index_val_undo_col_id]
274
275
  undo_col.sa_col_type = idx.index_sa_type()
275
- idx_info = self.IndexInfo(id=md.id, idx=idx, col=idx_col, val_col=val_col, undo_col=undo_col)
276
+ idx_info = self.IndexInfo(id=md.id, name=md.name, idx=idx, col=idx_col, val_col=val_col, undo_col=undo_col)
276
277
  self.idxs_by_name[md.name] = idx_info
277
278
 
278
279
  def _init_sa_schema(self) -> None:
@@ -353,7 +354,7 @@ class TableVersion:
353
354
  indexed_col_id=col.id, index_val_col_id=val_col.id, index_val_undo_col_id=undo_col.id,
354
355
  schema_version_add=self.schema_version, schema_version_drop=None,
355
356
  class_fqn=idx_cls.__module__ + '.' + idx_cls.__name__, init_args=idx.as_dict())
356
- idx_info = self.IndexInfo(id=idx_id, idx=idx, col=col, val_col=val_col, undo_col=undo_col)
357
+ idx_info = self.IndexInfo(id=idx_id, name=idx_name, idx=idx, col=col, val_col=val_col, undo_col=undo_col)
357
358
  self.idx_md[idx_id] = idx_md
358
359
  self.idxs_by_name[idx_name] = idx_info
359
360