pixeltable 0.2.17__py3-none-any.whl → 0.2.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (79) hide show
  1. pixeltable/__version__.py +2 -2
  2. pixeltable/catalog/catalog.py +8 -7
  3. pixeltable/catalog/column.py +11 -8
  4. pixeltable/catalog/insertable_table.py +1 -1
  5. pixeltable/catalog/path_dict.py +8 -6
  6. pixeltable/catalog/table.py +20 -13
  7. pixeltable/catalog/table_version.py +91 -54
  8. pixeltable/catalog/table_version_path.py +7 -9
  9. pixeltable/catalog/view.py +2 -1
  10. pixeltable/dataframe.py +1 -1
  11. pixeltable/env.py +173 -83
  12. pixeltable/exec/aggregation_node.py +2 -1
  13. pixeltable/exec/component_iteration_node.py +1 -1
  14. pixeltable/exec/sql_node.py +11 -8
  15. pixeltable/exprs/__init__.py +1 -0
  16. pixeltable/exprs/arithmetic_expr.py +4 -4
  17. pixeltable/exprs/array_slice.py +2 -1
  18. pixeltable/exprs/column_property_ref.py +9 -7
  19. pixeltable/exprs/column_ref.py +2 -1
  20. pixeltable/exprs/comparison.py +10 -7
  21. pixeltable/exprs/compound_predicate.py +3 -2
  22. pixeltable/exprs/data_row.py +19 -4
  23. pixeltable/exprs/expr.py +46 -35
  24. pixeltable/exprs/expr_set.py +32 -9
  25. pixeltable/exprs/function_call.py +56 -32
  26. pixeltable/exprs/in_predicate.py +3 -2
  27. pixeltable/exprs/inline_array.py +2 -1
  28. pixeltable/exprs/inline_dict.py +2 -1
  29. pixeltable/exprs/is_null.py +3 -2
  30. pixeltable/exprs/json_mapper.py +5 -4
  31. pixeltable/exprs/json_path.py +7 -1
  32. pixeltable/exprs/literal.py +34 -7
  33. pixeltable/exprs/method_ref.py +3 -3
  34. pixeltable/exprs/object_ref.py +6 -5
  35. pixeltable/exprs/row_builder.py +25 -17
  36. pixeltable/exprs/rowid_ref.py +2 -1
  37. pixeltable/exprs/similarity_expr.py +2 -1
  38. pixeltable/exprs/sql_element_cache.py +30 -0
  39. pixeltable/exprs/type_cast.py +3 -3
  40. pixeltable/exprs/variable.py +2 -1
  41. pixeltable/ext/functions/whisperx.py +4 -4
  42. pixeltable/ext/functions/yolox.py +6 -6
  43. pixeltable/func/aggregate_function.py +1 -0
  44. pixeltable/func/function.py +28 -4
  45. pixeltable/functions/__init__.py +4 -2
  46. pixeltable/functions/anthropic.py +15 -5
  47. pixeltable/functions/fireworks.py +1 -1
  48. pixeltable/functions/globals.py +6 -1
  49. pixeltable/functions/huggingface.py +2 -2
  50. pixeltable/functions/image.py +17 -2
  51. pixeltable/functions/json.py +5 -5
  52. pixeltable/functions/mistralai.py +188 -0
  53. pixeltable/functions/openai.py +6 -10
  54. pixeltable/functions/string.py +3 -2
  55. pixeltable/functions/timestamp.py +95 -7
  56. pixeltable/functions/together.py +4 -4
  57. pixeltable/functions/video.py +2 -2
  58. pixeltable/functions/vision.py +27 -17
  59. pixeltable/functions/whisper.py +1 -1
  60. pixeltable/io/hf_datasets.py +17 -15
  61. pixeltable/io/pandas.py +0 -2
  62. pixeltable/io/parquet.py +15 -14
  63. pixeltable/iterators/document.py +16 -15
  64. pixeltable/metadata/__init__.py +1 -1
  65. pixeltable/metadata/converters/convert_19.py +46 -0
  66. pixeltable/metadata/notes.py +1 -0
  67. pixeltable/metadata/schema.py +5 -4
  68. pixeltable/plan.py +100 -78
  69. pixeltable/store.py +5 -1
  70. pixeltable/tool/create_test_db_dump.py +4 -3
  71. pixeltable/type_system.py +12 -14
  72. pixeltable/utils/documents.py +45 -42
  73. pixeltable/utils/formatter.py +2 -2
  74. {pixeltable-0.2.17.dist-info → pixeltable-0.2.18.dist-info}/METADATA +79 -21
  75. pixeltable-0.2.18.dist-info/RECORD +147 -0
  76. pixeltable-0.2.17.dist-info/RECORD +0 -144
  77. {pixeltable-0.2.17.dist-info → pixeltable-0.2.18.dist-info}/LICENSE +0 -0
  78. {pixeltable-0.2.17.dist-info → pixeltable-0.2.18.dist-info}/WHEEL +0 -0
  79. {pixeltable-0.2.17.dist-info → pixeltable-0.2.18.dist-info}/entry_points.txt +0 -0
pixeltable/__version__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  # These version placeholders will be replaced during build.
2
- __version__ = "0.2.17"
3
- __version_tuple__ = (0, 2, 17)
2
+ __version__ = "0.2.18"
3
+ __version_tuple__ = (0, 2, 18)
@@ -1,8 +1,9 @@
1
1
  from __future__ import annotations
2
- from typing import Optional, List, Any, Dict, Tuple
3
- from uuid import UUID
2
+
4
3
  import dataclasses
5
4
  import logging
5
+ from typing import Optional
6
+ from uuid import UUID
6
7
 
7
8
  import sqlalchemy as sql
8
9
  import sqlalchemy.orm as orm
@@ -10,8 +11,8 @@ import sqlalchemy.orm as orm
10
11
  from .table_version import TableVersion
11
12
  from .table_version_path import TableVersionPath
12
13
  from .table import Table
13
- from .named_function import NamedFunction
14
14
  from .path_dict import PathDict
15
+
15
16
  import pixeltable.env as env
16
17
  import pixeltable.metadata.schema as schema
17
18
 
@@ -39,10 +40,10 @@ class Catalog:
39
40
  # key: [id, version]
40
41
  # - mutable version of a table: version == None (even though TableVersion.version is set correctly)
41
42
  # - snapshot versions: records the version of the snapshot
42
- self.tbl_versions: Dict[Tuple[UUID, Optional[int]], TableVersion] = {}
43
+ self.tbl_versions: dict[tuple[UUID, Optional[int]], TableVersion] = {}
43
44
 
44
- self.tbls: Dict[UUID, Table] = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
45
- self.tbl_dependents: Dict[UUID, List[Table]] = {}
45
+ self.tbls: dict[UUID, Table] = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
46
+ self.tbl_dependents: dict[UUID, list[Table]] = {}
46
47
 
47
48
  self._init_store()
48
49
  self.paths = PathDict() # do this after _init_catalog()
@@ -133,7 +134,7 @@ class Catalog:
133
134
  base_path=base_path if not is_snapshot else None)
134
135
  view_path = TableVersionPath(tbl_version, base=base_path)
135
136
 
136
- tbl = View(
137
+ tbl: Table = View(
137
138
  tbl_record.id, tbl_record.dir_id, tbl_md.name, view_path, base_tbl_id,
138
139
  snapshot_only=snapshot_only)
139
140
  self.tbl_dependents[base_tbl_id].append(tbl)
@@ -1,15 +1,19 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- from typing import Any, Callable, Optional, Union
4
+ from typing import TYPE_CHECKING, Any, Callable, Optional, Union
5
5
 
6
6
  import sqlalchemy as sql
7
7
 
8
8
  import pixeltable.exceptions as excs
9
9
  import pixeltable.type_system as ts
10
+ from pixeltable import exprs
10
11
 
11
12
  from .globals import is_valid_identifier
12
13
 
14
+ if TYPE_CHECKING:
15
+ from .table_version import TableVersion
16
+
13
17
  _logger = logging.getLogger('pixeltable')
14
18
 
15
19
  class Column:
@@ -20,7 +24,7 @@ class Column:
20
24
  """
21
25
  def __init__(
22
26
  self, name: Optional[str], col_type: Optional[ts.ColumnType] = None,
23
- computed_with: Optional[Union['Expr', Callable]] = None,
27
+ computed_with: Optional[Union[exprs.Expr, Callable]] = None,
24
28
  is_pk: bool = False, stored: bool = True,
25
29
  col_id: Optional[int] = None, schema_version_add: Optional[int] = None,
26
30
  schema_version_drop: Optional[int] = None, sa_col_type: Optional[sql.sqltypes.TypeEngine] = None,
@@ -57,15 +61,14 @@ class Column:
57
61
  if col_type is None and computed_with is None:
58
62
  raise excs.Error(f'Column `{name}`: col_type is required if computed_with is not specified')
59
63
 
60
- self._value_expr: Optional['Expr'] = None
64
+ self._value_expr: Optional[exprs.Expr] = None
61
65
  self.compute_func: Optional[Callable] = None
62
66
  self.value_expr_dict = value_expr_dict
63
- from pixeltable import exprs
64
67
  if computed_with is not None:
65
68
  value_expr = exprs.Expr.from_object(computed_with)
66
69
  if value_expr is None:
67
70
  # computed_with needs to be a Callable
68
- if not isinstance(computed_with, Callable):
71
+ if not callable(computed_with):
69
72
  raise excs.Error(
70
73
  f'Column {name}: computed_with needs to be either a Pixeltable expression or a Callable, '
71
74
  f'but it is a {type(computed_with)}')
@@ -103,7 +106,7 @@ class Column:
103
106
  self.tbl: Optional[TableVersion] = None # set by owning TableVersion
104
107
 
105
108
  @property
106
- def value_expr(self) -> Optional['Expr']:
109
+ def value_expr(self) -> Optional[exprs.Expr]:
107
110
  """Instantiate value_expr on-demand"""
108
111
  # TODO: instantiate expr in the c'tor and add an Expr.prepare() that can create additional state after the
109
112
  # catalog has been fully loaded; that way, we encounter bugs in the serialization/deserialization logic earlier
@@ -112,7 +115,7 @@ class Column:
112
115
  self._value_expr = exprs.Expr.from_dict(self.value_expr_dict)
113
116
  return self._value_expr
114
117
 
115
- def set_value_expr(self, value_expr: 'Expr') -> None:
118
+ def set_value_expr(self, value_expr: exprs.Expr) -> None:
116
119
  self._value_expr = value_expr
117
120
  self.value_expr_dict = None
118
121
 
@@ -130,7 +133,7 @@ class Column:
130
133
  l = list(self.value_expr.subexprs(filter=lambda e: isinstance(e, exprs.FunctionCall) and e.is_window_fn_call))
131
134
  return len(l) > 0
132
135
 
133
- def get_idx_info(self) -> dict[str, 'pixeltable.catalog.TableVersion.IndexInfo']:
136
+ def get_idx_info(self) -> dict[str, 'TableVersion.IndexInfo']:
134
137
  assert self.tbl is not None
135
138
  return {name: info for name, info in self.tbl.idxs_by_name.items() if info.col == self}
136
139
 
@@ -82,7 +82,7 @@ class InsertableTable(Table):
82
82
  @overload
83
83
  def insert(self, *, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any) -> UpdateStatus: ...
84
84
 
85
- def insert(
85
+ def insert( # type: ignore[misc]
86
86
  self, rows: Optional[Iterable[dict[str, Any]]] = None, /, *, print_stats: bool = False,
87
87
  fail_on_exception: bool = True, **kwargs: Any
88
88
  ) -> UpdateStatus:
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import copy
4
4
  import logging
5
- from typing import Optional, List, Dict, Type
5
+ from typing import Optional
6
6
  from uuid import UUID
7
7
 
8
8
  import sqlalchemy.orm as orm
@@ -10,6 +10,7 @@ import sqlalchemy.orm as orm
10
10
  from pixeltable import exceptions as excs
11
11
  from pixeltable.env import Env
12
12
  from pixeltable.metadata import schema
13
+
13
14
  from .dir import Dir
14
15
  from .path import Path
15
16
  from .schema_object import SchemaObject
@@ -19,8 +20,8 @@ _logger = logging.getLogger('pixeltable')
19
20
  class PathDict:
20
21
  """Keep track of all paths in a Db instance"""
21
22
  def __init__(self):
22
- self.dir_contents: Dict[UUID, Dict[str, SchemaObject]] = {}
23
- self.schema_objs: Dict[UUID, SchemaObject] = {}
23
+ self.dir_contents: dict[UUID, dict[str, SchemaObject]] = {}
24
+ self.schema_objs: dict[UUID, SchemaObject] = {}
24
25
 
25
26
  # load dirs
26
27
  with orm.Session(Env.get().engine, future=True) as session:
@@ -36,7 +37,8 @@ class PathDict:
36
37
  self.root_dir = root_dirs[0]
37
38
 
38
39
  # build dir_contents
39
- def record_dir(dir: Dir) -> None:
40
+ def record_dir(dir: SchemaObject) -> None:
41
+ assert isinstance(dir, Dir)
40
42
  if dir._id in self.dir_contents:
41
43
  return
42
44
  else:
@@ -99,7 +101,7 @@ class PathDict:
99
101
  assert to_path.name not in self.dir_contents[to_dir._id]
100
102
  self.dir_contents[to_dir._id][to_path.name] = obj
101
103
 
102
- def check_is_valid(self, path: Path, expected: Optional[Type[SchemaObject]]) -> None:
104
+ def check_is_valid(self, path: Path, expected: Optional[type[SchemaObject]]) -> None:
103
105
  """Check that path is valid and that the object at path has the expected type.
104
106
 
105
107
  Args:
@@ -124,7 +126,7 @@ class PathDict:
124
126
  obj = self.dir_contents[parent_obj._id][path.name]
125
127
  raise excs.Error(f"{type(obj)._display_name()} '{str(path)}' already exists")
126
128
 
127
- def get_children(self, parent: Path, child_type: Optional[Type[SchemaObject]], recursive: bool) -> List[Path]:
129
+ def get_children(self, parent: Path, child_type: Optional[type[SchemaObject]], recursive: bool) -> list[Path]:
128
130
  dir = self._resolve_path(parent)
129
131
  if not isinstance(dir, Dir):
130
132
  raise excs.Error(f'{str(parent)} is a {type(dir)._display_name()}, not a directory')
@@ -1,14 +1,16 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import abc
4
+ import builtins
4
5
  import itertools
5
6
  import json
6
7
  import logging
7
8
  from pathlib import Path
8
- from typing import Any, Callable, Iterable, Literal, Optional, Set, Tuple, Type, Union, overload
9
+ from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Set, Tuple, Type, Union, overload
9
10
  from uuid import UUID
10
11
 
11
12
  import pandas as pd
13
+ import pandas.io.formats.style
12
14
  import sqlalchemy as sql
13
15
 
14
16
  import pixeltable
@@ -26,6 +28,9 @@ from .schema_object import SchemaObject
26
28
  from .table_version import TableVersion
27
29
  from .table_version_path import TableVersionPath
28
30
 
31
+ if TYPE_CHECKING:
32
+ import torch.utils.data
33
+
29
34
  _logger = logging.getLogger('pixeltable')
30
35
 
31
36
  class Table(SchemaObject):
@@ -211,23 +216,24 @@ class Table(SchemaObject):
211
216
  })
212
217
  return df
213
218
 
214
- def _description_html(self) -> pd.DataFrame:
219
+ def _description_html(self) -> pandas.io.formats.style.Styler:
215
220
  pd_df = self._description()
216
221
  # white-space: pre-wrap: print \n as newline
217
222
  # th: center-align headings
218
- return pd_df.style.set_properties(**{'white-space': 'pre-wrap', 'text-align': 'left'}) \
219
- .set_table_styles([dict(selector='th', props=[('text-align', 'center')])]) \
223
+ return (
224
+ pd_df.style.set_properties(None, **{'white-space': 'pre-wrap', 'text-align': 'left'})
225
+ .set_table_styles([dict(selector='th', props=[('text-align', 'center')])])
220
226
  .hide(axis='index')
227
+ )
221
228
 
222
229
  def describe(self) -> None:
223
230
  """
224
231
  Print the table schema.
225
232
  """
226
- try:
227
- __IPYTHON__
233
+ if getattr(builtins, '__IPYTHON__', False):
228
234
  from IPython.display import display
229
235
  display(self._description_html())
230
- except NameError:
236
+ else:
231
237
  print(self.__repr__())
232
238
 
233
239
  # TODO: Display comments in _repr_html()
@@ -240,7 +246,7 @@ class Table(SchemaObject):
240
246
  return f'{self._display_name()} \'{self._name}\'\n{comment}{description_str}'
241
247
 
242
248
  def _repr_html_(self) -> str:
243
- return self._description_html()._repr_html_()
249
+ return self._description_html()._repr_html_() # type: ignore[attr-defined]
244
250
 
245
251
  def _drop(self) -> None:
246
252
  self._check_is_dropped()
@@ -282,7 +288,7 @@ class Table(SchemaObject):
282
288
  raise excs.Error(f'Column name must be a string, got {type(col_name)}')
283
289
  if not isinstance(spec, (ts.ColumnType, exprs.Expr)):
284
290
  raise excs.Error(f'Column spec must be a ColumnType or an Expr, got {type(spec)}')
285
- self.add_column(**{col_name: spec})
291
+ self.add_column(type=None, stored=None, print_stats=False, **{col_name: spec})
286
292
 
287
293
  def add_column(
288
294
  self,
@@ -368,7 +374,7 @@ class Table(SchemaObject):
368
374
  col_schema['stored'] = stored
369
375
 
370
376
  new_col = self._create_columns({col_name: col_schema})[0]
371
- self._verify_column(new_col, set(self._schema.keys()), self._query_names)
377
+ self._verify_column(new_col, set(self._schema.keys()), set(self._query_names))
372
378
  return self._tbl_version.add_column(new_col, print_stats=print_stats)
373
379
 
374
380
  @classmethod
@@ -395,7 +401,7 @@ class Table(SchemaObject):
395
401
  value_expr = exprs.Expr.from_object(value_spec)
396
402
  if value_expr is None:
397
403
  # needs to be a Callable
398
- if not isinstance(value_spec, Callable):
404
+ if not callable(value_spec):
399
405
  raise excs.Error(
400
406
  f'Column {name}: value needs to be either a Pixeltable expression or a Callable, '
401
407
  f'but it is a {type(value_spec)}')
@@ -427,7 +433,7 @@ class Table(SchemaObject):
427
433
  elif isinstance(spec, exprs.Expr):
428
434
  # create copy so we can modify it
429
435
  value_expr = spec.copy()
430
- elif isinstance(spec, Callable):
436
+ elif callable(spec):
431
437
  raise excs.Error((
432
438
  f'Column {name} computed with a Callable: specify using a dictionary with '
433
439
  f'the "value" and "type" keys (e.g., "{name}": {{"value": <Callable>, "type": IntType()}})'
@@ -546,6 +552,7 @@ class Table(SchemaObject):
546
552
  metric: str = 'cosine'
547
553
  ) -> None:
548
554
  """Add an index to the table.
555
+
549
556
  Args:
550
557
  col_name: name of column to index
551
558
  idx_name: name of index, which needs to be unique for the table; if not provided, a name will be generated
@@ -656,7 +663,7 @@ class Table(SchemaObject):
656
663
  @overload
657
664
  def insert(self, *, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any) -> UpdateStatus: ...
658
665
 
659
- @abc.abstractmethod
666
+ @abc.abstractmethod # type: ignore[misc]
660
667
  def insert(
661
668
  self, rows: Optional[Iterable[dict[str, Any]]] = None, /, *, print_stats: bool = False,
662
669
  fail_on_exception: bool = True, **kwargs: Any