pixeltable 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (87) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/catalog.py +8 -7
  4. pixeltable/catalog/column.py +11 -8
  5. pixeltable/catalog/insertable_table.py +1 -1
  6. pixeltable/catalog/path_dict.py +8 -6
  7. pixeltable/catalog/table.py +20 -14
  8. pixeltable/catalog/table_version.py +92 -55
  9. pixeltable/catalog/table_version_path.py +7 -9
  10. pixeltable/catalog/view.py +3 -2
  11. pixeltable/dataframe.py +2 -2
  12. pixeltable/env.py +205 -86
  13. pixeltable/exceptions.py +5 -1
  14. pixeltable/exec/aggregation_node.py +2 -1
  15. pixeltable/exec/component_iteration_node.py +2 -2
  16. pixeltable/exec/sql_node.py +11 -8
  17. pixeltable/exprs/__init__.py +2 -2
  18. pixeltable/exprs/arithmetic_expr.py +4 -4
  19. pixeltable/exprs/array_slice.py +2 -1
  20. pixeltable/exprs/column_property_ref.py +9 -7
  21. pixeltable/exprs/column_ref.py +2 -1
  22. pixeltable/exprs/comparison.py +10 -7
  23. pixeltable/exprs/compound_predicate.py +3 -2
  24. pixeltable/exprs/data_row.py +19 -4
  25. pixeltable/exprs/expr.py +51 -41
  26. pixeltable/exprs/expr_set.py +32 -9
  27. pixeltable/exprs/function_call.py +62 -40
  28. pixeltable/exprs/in_predicate.py +3 -2
  29. pixeltable/exprs/inline_expr.py +200 -0
  30. pixeltable/exprs/is_null.py +3 -2
  31. pixeltable/exprs/json_mapper.py +5 -4
  32. pixeltable/exprs/json_path.py +7 -1
  33. pixeltable/exprs/literal.py +34 -7
  34. pixeltable/exprs/method_ref.py +3 -3
  35. pixeltable/exprs/object_ref.py +6 -5
  36. pixeltable/exprs/row_builder.py +25 -17
  37. pixeltable/exprs/rowid_ref.py +2 -1
  38. pixeltable/exprs/similarity_expr.py +2 -1
  39. pixeltable/exprs/sql_element_cache.py +30 -0
  40. pixeltable/exprs/type_cast.py +3 -3
  41. pixeltable/exprs/variable.py +2 -1
  42. pixeltable/ext/functions/whisperx.py +6 -4
  43. pixeltable/ext/functions/yolox.py +11 -9
  44. pixeltable/func/aggregate_function.py +1 -0
  45. pixeltable/func/function.py +28 -4
  46. pixeltable/functions/__init__.py +4 -2
  47. pixeltable/functions/anthropic.py +15 -5
  48. pixeltable/functions/fireworks.py +1 -1
  49. pixeltable/functions/globals.py +6 -1
  50. pixeltable/functions/huggingface.py +91 -14
  51. pixeltable/functions/image.py +20 -5
  52. pixeltable/functions/json.py +5 -5
  53. pixeltable/functions/mistralai.py +188 -0
  54. pixeltable/functions/openai.py +6 -10
  55. pixeltable/functions/string.py +3 -2
  56. pixeltable/functions/timestamp.py +95 -7
  57. pixeltable/functions/together.py +18 -11
  58. pixeltable/functions/video.py +2 -2
  59. pixeltable/functions/vision.py +69 -37
  60. pixeltable/functions/whisper.py +4 -1
  61. pixeltable/globals.py +5 -1
  62. pixeltable/io/hf_datasets.py +17 -15
  63. pixeltable/io/pandas.py +0 -2
  64. pixeltable/io/parquet.py +15 -14
  65. pixeltable/iterators/document.py +16 -15
  66. pixeltable/metadata/__init__.py +1 -1
  67. pixeltable/metadata/converters/convert_18.py +1 -1
  68. pixeltable/metadata/converters/convert_19.py +46 -0
  69. pixeltable/metadata/converters/convert_20.py +56 -0
  70. pixeltable/metadata/converters/util.py +29 -4
  71. pixeltable/metadata/notes.py +2 -0
  72. pixeltable/metadata/schema.py +5 -4
  73. pixeltable/plan.py +100 -78
  74. pixeltable/store.py +5 -1
  75. pixeltable/tool/create_test_db_dump.py +18 -6
  76. pixeltable/type_system.py +15 -15
  77. pixeltable/utils/documents.py +45 -42
  78. pixeltable/utils/formatter.py +2 -2
  79. pixeltable-0.2.19.dist-info/LICENSE +201 -0
  80. {pixeltable-0.2.17.dist-info → pixeltable-0.2.19.dist-info}/METADATA +84 -24
  81. pixeltable-0.2.19.dist-info/RECORD +147 -0
  82. pixeltable/exprs/inline_array.py +0 -116
  83. pixeltable/exprs/inline_dict.py +0 -103
  84. pixeltable-0.2.17.dist-info/LICENSE +0 -18
  85. pixeltable-0.2.17.dist-info/RECORD +0 -144
  86. {pixeltable-0.2.17.dist-info → pixeltable-0.2.19.dist-info}/WHEEL +0 -0
  87. {pixeltable-0.2.17.dist-info → pixeltable-0.2.19.dist-info}/entry_points.txt +0 -0
pixeltable/__init__.py CHANGED
@@ -4,7 +4,7 @@ from .exceptions import Error
4
4
  from .exprs import RELATIVE_PATH_ROOT
5
5
  from .func import Function, udf, Aggregator, uda, expr_udf
6
6
  from .globals import init, create_table, create_view, get_table, move, drop_table, list_tables, create_dir, drop_dir, \
7
- list_dirs, list_functions, configure_logging
7
+ list_dirs, list_functions, configure_logging, array
8
8
  from .type_system import (
9
9
  ColumnType,
10
10
  StringType,
pixeltable/__version__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  # These version placeholders will be replaced during build.
2
- __version__ = "0.2.17"
3
- __version_tuple__ = (0, 2, 17)
2
+ __version__ = "0.2.19"
3
+ __version_tuple__ = (0, 2, 19)
@@ -1,8 +1,9 @@
1
1
  from __future__ import annotations
2
- from typing import Optional, List, Any, Dict, Tuple
3
- from uuid import UUID
2
+
4
3
  import dataclasses
5
4
  import logging
5
+ from typing import Optional
6
+ from uuid import UUID
6
7
 
7
8
  import sqlalchemy as sql
8
9
  import sqlalchemy.orm as orm
@@ -10,8 +11,8 @@ import sqlalchemy.orm as orm
10
11
  from .table_version import TableVersion
11
12
  from .table_version_path import TableVersionPath
12
13
  from .table import Table
13
- from .named_function import NamedFunction
14
14
  from .path_dict import PathDict
15
+
15
16
  import pixeltable.env as env
16
17
  import pixeltable.metadata.schema as schema
17
18
 
@@ -39,10 +40,10 @@ class Catalog:
39
40
  # key: [id, version]
40
41
  # - mutable version of a table: version == None (even though TableVersion.version is set correctly)
41
42
  # - snapshot versions: records the version of the snapshot
42
- self.tbl_versions: Dict[Tuple[UUID, Optional[int]], TableVersion] = {}
43
+ self.tbl_versions: dict[tuple[UUID, Optional[int]], TableVersion] = {}
43
44
 
44
- self.tbls: Dict[UUID, Table] = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
45
- self.tbl_dependents: Dict[UUID, List[Table]] = {}
45
+ self.tbls: dict[UUID, Table] = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
46
+ self.tbl_dependents: dict[UUID, list[Table]] = {}
46
47
 
47
48
  self._init_store()
48
49
  self.paths = PathDict() # do this after _init_catalog()
@@ -133,7 +134,7 @@ class Catalog:
133
134
  base_path=base_path if not is_snapshot else None)
134
135
  view_path = TableVersionPath(tbl_version, base=base_path)
135
136
 
136
- tbl = View(
137
+ tbl: Table = View(
137
138
  tbl_record.id, tbl_record.dir_id, tbl_md.name, view_path, base_tbl_id,
138
139
  snapshot_only=snapshot_only)
139
140
  self.tbl_dependents[base_tbl_id].append(tbl)
@@ -1,15 +1,19 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- from typing import Any, Callable, Optional, Union
4
+ from typing import TYPE_CHECKING, Any, Callable, Optional, Union
5
5
 
6
6
  import sqlalchemy as sql
7
7
 
8
8
  import pixeltable.exceptions as excs
9
9
  import pixeltable.type_system as ts
10
+ from pixeltable import exprs
10
11
 
11
12
  from .globals import is_valid_identifier
12
13
 
14
+ if TYPE_CHECKING:
15
+ from .table_version import TableVersion
16
+
13
17
  _logger = logging.getLogger('pixeltable')
14
18
 
15
19
  class Column:
@@ -20,7 +24,7 @@ class Column:
20
24
  """
21
25
  def __init__(
22
26
  self, name: Optional[str], col_type: Optional[ts.ColumnType] = None,
23
- computed_with: Optional[Union['Expr', Callable]] = None,
27
+ computed_with: Optional[Union[exprs.Expr, Callable]] = None,
24
28
  is_pk: bool = False, stored: bool = True,
25
29
  col_id: Optional[int] = None, schema_version_add: Optional[int] = None,
26
30
  schema_version_drop: Optional[int] = None, sa_col_type: Optional[sql.sqltypes.TypeEngine] = None,
@@ -57,15 +61,14 @@ class Column:
57
61
  if col_type is None and computed_with is None:
58
62
  raise excs.Error(f'Column `{name}`: col_type is required if computed_with is not specified')
59
63
 
60
- self._value_expr: Optional['Expr'] = None
64
+ self._value_expr: Optional[exprs.Expr] = None
61
65
  self.compute_func: Optional[Callable] = None
62
66
  self.value_expr_dict = value_expr_dict
63
- from pixeltable import exprs
64
67
  if computed_with is not None:
65
68
  value_expr = exprs.Expr.from_object(computed_with)
66
69
  if value_expr is None:
67
70
  # computed_with needs to be a Callable
68
- if not isinstance(computed_with, Callable):
71
+ if not callable(computed_with):
69
72
  raise excs.Error(
70
73
  f'Column {name}: computed_with needs to be either a Pixeltable expression or a Callable, '
71
74
  f'but it is a {type(computed_with)}')
@@ -103,7 +106,7 @@ class Column:
103
106
  self.tbl: Optional[TableVersion] = None # set by owning TableVersion
104
107
 
105
108
  @property
106
- def value_expr(self) -> Optional['Expr']:
109
+ def value_expr(self) -> Optional[exprs.Expr]:
107
110
  """Instantiate value_expr on-demand"""
108
111
  # TODO: instantiate expr in the c'tor and add an Expr.prepare() that can create additional state after the
109
112
  # catalog has been fully loaded; that way, we encounter bugs in the serialization/deserialization logic earlier
@@ -112,7 +115,7 @@ class Column:
112
115
  self._value_expr = exprs.Expr.from_dict(self.value_expr_dict)
113
116
  return self._value_expr
114
117
 
115
- def set_value_expr(self, value_expr: 'Expr') -> None:
118
+ def set_value_expr(self, value_expr: exprs.Expr) -> None:
116
119
  self._value_expr = value_expr
117
120
  self.value_expr_dict = None
118
121
 
@@ -130,7 +133,7 @@ class Column:
130
133
  l = list(self.value_expr.subexprs(filter=lambda e: isinstance(e, exprs.FunctionCall) and e.is_window_fn_call))
131
134
  return len(l) > 0
132
135
 
133
- def get_idx_info(self) -> dict[str, 'pixeltable.catalog.TableVersion.IndexInfo']:
136
+ def get_idx_info(self) -> dict[str, 'TableVersion.IndexInfo']:
134
137
  assert self.tbl is not None
135
138
  return {name: info for name, info in self.tbl.idxs_by_name.items() if info.col == self}
136
139
 
@@ -82,7 +82,7 @@ class InsertableTable(Table):
82
82
  @overload
83
83
  def insert(self, *, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any) -> UpdateStatus: ...
84
84
 
85
- def insert(
85
+ def insert( # type: ignore[misc]
86
86
  self, rows: Optional[Iterable[dict[str, Any]]] = None, /, *, print_stats: bool = False,
87
87
  fail_on_exception: bool = True, **kwargs: Any
88
88
  ) -> UpdateStatus:
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import copy
4
4
  import logging
5
- from typing import Optional, List, Dict, Type
5
+ from typing import Optional
6
6
  from uuid import UUID
7
7
 
8
8
  import sqlalchemy.orm as orm
@@ -10,6 +10,7 @@ import sqlalchemy.orm as orm
10
10
  from pixeltable import exceptions as excs
11
11
  from pixeltable.env import Env
12
12
  from pixeltable.metadata import schema
13
+
13
14
  from .dir import Dir
14
15
  from .path import Path
15
16
  from .schema_object import SchemaObject
@@ -19,8 +20,8 @@ _logger = logging.getLogger('pixeltable')
19
20
  class PathDict:
20
21
  """Keep track of all paths in a Db instance"""
21
22
  def __init__(self):
22
- self.dir_contents: Dict[UUID, Dict[str, SchemaObject]] = {}
23
- self.schema_objs: Dict[UUID, SchemaObject] = {}
23
+ self.dir_contents: dict[UUID, dict[str, SchemaObject]] = {}
24
+ self.schema_objs: dict[UUID, SchemaObject] = {}
24
25
 
25
26
  # load dirs
26
27
  with orm.Session(Env.get().engine, future=True) as session:
@@ -36,7 +37,8 @@ class PathDict:
36
37
  self.root_dir = root_dirs[0]
37
38
 
38
39
  # build dir_contents
39
- def record_dir(dir: Dir) -> None:
40
+ def record_dir(dir: SchemaObject) -> None:
41
+ assert isinstance(dir, Dir)
40
42
  if dir._id in self.dir_contents:
41
43
  return
42
44
  else:
@@ -99,7 +101,7 @@ class PathDict:
99
101
  assert to_path.name not in self.dir_contents[to_dir._id]
100
102
  self.dir_contents[to_dir._id][to_path.name] = obj
101
103
 
102
- def check_is_valid(self, path: Path, expected: Optional[Type[SchemaObject]]) -> None:
104
+ def check_is_valid(self, path: Path, expected: Optional[type[SchemaObject]]) -> None:
103
105
  """Check that path is valid and that the object at path has the expected type.
104
106
 
105
107
  Args:
@@ -124,7 +126,7 @@ class PathDict:
124
126
  obj = self.dir_contents[parent_obj._id][path.name]
125
127
  raise excs.Error(f"{type(obj)._display_name()} '{str(path)}' already exists")
126
128
 
127
- def get_children(self, parent: Path, child_type: Optional[Type[SchemaObject]], recursive: bool) -> List[Path]:
129
+ def get_children(self, parent: Path, child_type: Optional[type[SchemaObject]], recursive: bool) -> list[Path]:
128
130
  dir = self._resolve_path(parent)
129
131
  if not isinstance(dir, Dir):
130
132
  raise excs.Error(f'{str(parent)} is a {type(dir)._display_name()}, not a directory')
@@ -1,14 +1,15 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import abc
4
- import itertools
4
+ import builtins
5
5
  import json
6
6
  import logging
7
7
  from pathlib import Path
8
- from typing import Any, Callable, Iterable, Literal, Optional, Set, Tuple, Type, Union, overload
8
+ from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Set, Tuple, Type, Union, overload
9
9
  from uuid import UUID
10
10
 
11
11
  import pandas as pd
12
+ import pandas.io.formats.style
12
13
  import sqlalchemy as sql
13
14
 
14
15
  import pixeltable
@@ -26,6 +27,9 @@ from .schema_object import SchemaObject
26
27
  from .table_version import TableVersion
27
28
  from .table_version_path import TableVersionPath
28
29
 
30
+ if TYPE_CHECKING:
31
+ import torch.utils.data
32
+
29
33
  _logger = logging.getLogger('pixeltable')
30
34
 
31
35
  class Table(SchemaObject):
@@ -211,23 +215,24 @@ class Table(SchemaObject):
211
215
  })
212
216
  return df
213
217
 
214
- def _description_html(self) -> pd.DataFrame:
218
+ def _description_html(self) -> pandas.io.formats.style.Styler:
215
219
  pd_df = self._description()
216
220
  # white-space: pre-wrap: print \n as newline
217
221
  # th: center-align headings
218
- return pd_df.style.set_properties(**{'white-space': 'pre-wrap', 'text-align': 'left'}) \
219
- .set_table_styles([dict(selector='th', props=[('text-align', 'center')])]) \
222
+ return (
223
+ pd_df.style.set_properties(None, **{'white-space': 'pre-wrap', 'text-align': 'left'})
224
+ .set_table_styles([dict(selector='th', props=[('text-align', 'center')])])
220
225
  .hide(axis='index')
226
+ )
221
227
 
222
228
  def describe(self) -> None:
223
229
  """
224
230
  Print the table schema.
225
231
  """
226
- try:
227
- __IPYTHON__
232
+ if getattr(builtins, '__IPYTHON__', False):
228
233
  from IPython.display import display
229
234
  display(self._description_html())
230
- except NameError:
235
+ else:
231
236
  print(self.__repr__())
232
237
 
233
238
  # TODO: Display comments in _repr_html()
@@ -240,7 +245,7 @@ class Table(SchemaObject):
240
245
  return f'{self._display_name()} \'{self._name}\'\n{comment}{description_str}'
241
246
 
242
247
  def _repr_html_(self) -> str:
243
- return self._description_html()._repr_html_()
248
+ return self._description_html()._repr_html_() # type: ignore[attr-defined]
244
249
 
245
250
  def _drop(self) -> None:
246
251
  self._check_is_dropped()
@@ -282,7 +287,7 @@ class Table(SchemaObject):
282
287
  raise excs.Error(f'Column name must be a string, got {type(col_name)}')
283
288
  if not isinstance(spec, (ts.ColumnType, exprs.Expr)):
284
289
  raise excs.Error(f'Column spec must be a ColumnType or an Expr, got {type(spec)}')
285
- self.add_column(**{col_name: spec})
290
+ self.add_column(type=None, stored=None, print_stats=False, **{col_name: spec})
286
291
 
287
292
  def add_column(
288
293
  self,
@@ -368,7 +373,7 @@ class Table(SchemaObject):
368
373
  col_schema['stored'] = stored
369
374
 
370
375
  new_col = self._create_columns({col_name: col_schema})[0]
371
- self._verify_column(new_col, set(self._schema.keys()), self._query_names)
376
+ self._verify_column(new_col, set(self._schema.keys()), set(self._query_names))
372
377
  return self._tbl_version.add_column(new_col, print_stats=print_stats)
373
378
 
374
379
  @classmethod
@@ -395,7 +400,7 @@ class Table(SchemaObject):
395
400
  value_expr = exprs.Expr.from_object(value_spec)
396
401
  if value_expr is None:
397
402
  # needs to be a Callable
398
- if not isinstance(value_spec, Callable):
403
+ if not callable(value_spec):
399
404
  raise excs.Error(
400
405
  f'Column {name}: value needs to be either a Pixeltable expression or a Callable, '
401
406
  f'but it is a {type(value_spec)}')
@@ -427,7 +432,7 @@ class Table(SchemaObject):
427
432
  elif isinstance(spec, exprs.Expr):
428
433
  # create copy so we can modify it
429
434
  value_expr = spec.copy()
430
- elif isinstance(spec, Callable):
435
+ elif callable(spec):
431
436
  raise excs.Error((
432
437
  f'Column {name} computed with a Callable: specify using a dictionary with '
433
438
  f'the "value" and "type" keys (e.g., "{name}": {{"value": <Callable>, "type": IntType()}})'
@@ -546,6 +551,7 @@ class Table(SchemaObject):
546
551
  metric: str = 'cosine'
547
552
  ) -> None:
548
553
  """Add an index to the table.
554
+
549
555
  Args:
550
556
  col_name: name of column to index
551
557
  idx_name: name of index, which needs to be unique for the table; if not provided, a name will be generated
@@ -656,7 +662,7 @@ class Table(SchemaObject):
656
662
  @overload
657
663
  def insert(self, *, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any) -> UpdateStatus: ...
658
664
 
659
- @abc.abstractmethod
665
+ @abc.abstractmethod # type: ignore[misc]
660
666
  def insert(
661
667
  self, rows: Optional[Iterable[dict[str, Any]]] = None, /, *, print_stats: bool = False,
662
668
  fail_on_exception: bool = True, **kwargs: Any