pixeltable 0.2.15__py3-none-any.whl → 0.2.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (55) hide show
  1. pixeltable/__version__.py +2 -2
  2. pixeltable/catalog/column.py +3 -0
  3. pixeltable/catalog/dir.py +1 -1
  4. pixeltable/catalog/globals.py +15 -6
  5. pixeltable/catalog/insertable_table.py +23 -8
  6. pixeltable/catalog/named_function.py +1 -1
  7. pixeltable/catalog/path_dict.py +4 -4
  8. pixeltable/catalog/schema_object.py +30 -18
  9. pixeltable/catalog/table.py +84 -99
  10. pixeltable/catalog/table_version.py +35 -24
  11. pixeltable/catalog/table_version_path.py +2 -2
  12. pixeltable/catalog/view.py +15 -8
  13. pixeltable/dataframe.py +56 -56
  14. pixeltable/env.py +7 -5
  15. pixeltable/exec/__init__.py +3 -3
  16. pixeltable/exec/aggregation_node.py +3 -3
  17. pixeltable/exec/expr_eval_node.py +3 -3
  18. pixeltable/exec/in_memory_data_node.py +4 -4
  19. pixeltable/exec/sql_node.py +4 -1
  20. pixeltable/exprs/array_slice.py +3 -4
  21. pixeltable/exprs/column_ref.py +20 -4
  22. pixeltable/exprs/comparison.py +11 -6
  23. pixeltable/exprs/data_row.py +3 -0
  24. pixeltable/exprs/expr.py +51 -23
  25. pixeltable/exprs/function_call.py +8 -1
  26. pixeltable/exprs/inline_array.py +2 -2
  27. pixeltable/exprs/json_path.py +36 -20
  28. pixeltable/exprs/row_builder.py +4 -4
  29. pixeltable/exprs/rowid_ref.py +1 -1
  30. pixeltable/functions/__init__.py +1 -2
  31. pixeltable/functions/anthropic.py +97 -0
  32. pixeltable/functions/audio.py +32 -0
  33. pixeltable/functions/fireworks.py +1 -1
  34. pixeltable/functions/huggingface.py +4 -4
  35. pixeltable/functions/image.py +1 -1
  36. pixeltable/functions/together.py +1 -1
  37. pixeltable/functions/video.py +5 -1
  38. pixeltable/functions/vision.py +2 -6
  39. pixeltable/globals.py +57 -28
  40. pixeltable/io/external_store.py +4 -4
  41. pixeltable/io/globals.py +12 -13
  42. pixeltable/io/label_studio.py +6 -6
  43. pixeltable/io/pandas.py +27 -12
  44. pixeltable/io/parquet.py +14 -14
  45. pixeltable/iterators/document.py +7 -7
  46. pixeltable/plan.py +58 -29
  47. pixeltable/store.py +32 -31
  48. pixeltable/tool/create_test_db_dump.py +12 -6
  49. pixeltable/type_system.py +89 -97
  50. pixeltable/utils/pytorch.py +12 -10
  51. {pixeltable-0.2.15.dist-info → pixeltable-0.2.17.dist-info}/METADATA +10 -10
  52. {pixeltable-0.2.15.dist-info → pixeltable-0.2.17.dist-info}/RECORD +55 -53
  53. {pixeltable-0.2.15.dist-info → pixeltable-0.2.17.dist-info}/LICENSE +0 -0
  54. {pixeltable-0.2.15.dist-info → pixeltable-0.2.17.dist-info}/WHEEL +0 -0
  55. {pixeltable-0.2.15.dist-info → pixeltable-0.2.17.dist-info}/entry_points.txt +0 -0
pixeltable/dataframe.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import builtins
3
4
  import copy
4
5
  import hashlib
5
6
  import json
@@ -7,7 +8,7 @@ import logging
7
8
  import mimetypes
8
9
  import traceback
9
10
  from pathlib import Path
10
- from typing import List, Optional, Any, Dict, Iterator, Tuple, Set, Callable
11
+ from typing import TYPE_CHECKING, Any, Callable, Dict, Hashable, Iterator, List, Optional, Set, Tuple
11
12
 
12
13
  import pandas as pd
13
14
  import pandas.io.formats.style
@@ -16,6 +17,7 @@ import sqlalchemy as sql
16
17
  import pixeltable.catalog as catalog
17
18
  import pixeltable.exceptions as excs
18
19
  import pixeltable.exprs as exprs
20
+ from pixeltable import exec
19
21
  from pixeltable.catalog import is_valid_identifier
20
22
  from pixeltable.catalog.globals import UpdateStatus
21
23
  from pixeltable.env import Env
@@ -24,6 +26,9 @@ from pixeltable.type_system import ColumnType
24
26
  from pixeltable.utils.formatter import Formatter
25
27
  from pixeltable.utils.http_server import get_file_uri
26
28
 
29
+ if TYPE_CHECKING:
30
+ import torch
31
+
27
32
  __all__ = ['DataFrame']
28
33
 
29
34
  _logger = logging.getLogger('pixeltable')
@@ -38,27 +43,25 @@ def _create_source_tag(file_path: str) -> str:
38
43
 
39
44
 
40
45
  class DataFrameResultSet:
41
- def __init__(self, rows: List[List[Any]], col_names: List[str], col_types: List[ColumnType]):
46
+ def __init__(self, rows: list[list[Any]], schema: dict[str, ColumnType]):
42
47
  self._rows = rows
43
- self._col_names = col_names
44
- self._col_types = col_types
48
+ self._col_names = list(schema.keys())
49
+ self.__schema = schema
45
50
  self.__formatter = Formatter(len(self._rows), len(self._col_names), Env.get().http_address)
46
51
 
52
+ @property
53
+ def schema(self) -> dict[str, ColumnType]:
54
+ return self.__schema
55
+
47
56
  def __len__(self) -> int:
48
57
  return len(self._rows)
49
58
 
50
- def column_names(self) -> List[str]:
51
- return self._col_names
52
-
53
- def column_types(self) -> List[ColumnType]:
54
- return self._col_types
55
-
56
59
  def __repr__(self) -> str:
57
60
  return self.to_pandas().__repr__()
58
61
 
59
62
  def _repr_html_(self) -> str:
60
- formatters: dict[str, Callable] = {}
61
- for col_name, col_type in zip(self._col_names, self._col_types):
63
+ formatters: dict[Hashable, Callable[[object], str]] = {}
64
+ for col_name, col_type in self.schema.items():
62
65
  formatter = self.__formatter.get_pandas_formatter(col_type)
63
66
  if formatter is not None:
64
67
  formatters[col_name] = formatter
@@ -169,8 +172,9 @@ class DataFrame:
169
172
  DataFrame._select_list_check_rep(list(zip(select_list_exprs, column_names)))
170
173
  # check select list after expansion to catch early
171
174
  # the following two lists are always non empty, even if select list is None.
175
+ assert len(column_names) == len(select_list_exprs)
172
176
  self._select_list_exprs = select_list_exprs
173
- self._column_names = column_names
177
+ self._schema = {column_names[i]: select_list_exprs[i].col_type for i in range(len(column_names))}
174
178
  self.select_list = select_list
175
179
 
176
180
  self.where_clause = copy.deepcopy(where_clause)
@@ -202,22 +206,20 @@ class DataFrame:
202
206
  def _normalize_select_list(
203
207
  cls,
204
208
  tbl: catalog.TableVersionPath,
205
- select_list: Optional[List[Tuple[exprs.Expr, Optional[str]]]],
206
- ) -> Tuple[List[exprs.Expr], List[str]]:
209
+ select_list: Optional[list[tuple[exprs.Expr, Optional[str]]]],
210
+ ) -> tuple[list[exprs.Expr], list[str]]:
207
211
  """
208
212
  Expand select list information with all columns and their names
209
213
  Returns:
210
214
  a pair composed of the list of expressions and the list of corresponding names
211
215
  """
212
216
  if select_list is None:
213
- expanded_list = [(exprs.ColumnRef(col), None) for col in tbl.columns()]
214
- else:
215
- expanded_list = select_list
217
+ select_list = [(exprs.ColumnRef(col), None) for col in tbl.columns()]
216
218
 
217
- out_exprs: List[exprs.Expr] = []
218
- out_names: List[str] = [] # keep track of order
219
+ out_exprs: list[exprs.Expr] = []
220
+ out_names: list[str] = [] # keep track of order
219
221
  seen_out_names: set[str] = set() # use to check for duplicates in loop, avoid square complexity
220
- for i, (expr, name) in enumerate(expanded_list):
222
+ for i, (expr, name) in enumerate(select_list):
221
223
  if name is None:
222
224
  # use default, add suffix if needed so default adds no duplicates
223
225
  default_name = expr.default_column_name()
@@ -275,6 +277,24 @@ class DataFrame:
275
277
  """Run the query and return rows as a generator.
276
278
  This function must not modify the state of the DataFrame, otherwise it breaks dataset caching.
277
279
  """
280
+ plan = self._create_query_plan()
281
+
282
+ def exec_plan(conn: sql.engine.Connection) -> Iterator[exec.DataRowBatch]:
283
+ plan.ctx.set_conn(conn)
284
+ plan.open()
285
+ try:
286
+ for row_batch in plan:
287
+ yield from row_batch
288
+ finally:
289
+ plan.close()
290
+
291
+ if conn is None:
292
+ with Env.get().engine.begin() as conn:
293
+ yield from exec_plan(conn)
294
+ else:
295
+ yield from exec_plan(conn)
296
+
297
+ def _create_query_plan(self) -> exec.ExecNode:
278
298
  # construct a group-by clause if we're grouping by a table
279
299
  group_by_clause: List[exprs.Expr] = []
280
300
  if self.grouping_tbl is not None:
@@ -289,7 +309,7 @@ class DataFrame:
289
309
  for item in self._select_list_exprs:
290
310
  item.bind_rel_paths(None)
291
311
 
292
- plan = Planner.create_query_plan(
312
+ return Planner.create_query_plan(
293
313
  self.tbl,
294
314
  self._select_list_exprs,
295
315
  where_clause=self.where_clause,
@@ -298,21 +318,6 @@ class DataFrame:
298
318
  limit=self.limit_val if self.limit_val is not None else 0,
299
319
  ) # limit_val == 0: no limit_val
300
320
 
301
- def exec_plan(conn: sql.engine.Connection) -> Iterator[exprs.DataRow]:
302
- plan.ctx.set_conn(conn)
303
- plan.open()
304
- try:
305
- for row_batch in plan:
306
- for data_row in row_batch:
307
- yield data_row
308
- finally:
309
- plan.close()
310
-
311
- if conn is None:
312
- with Env.get().engine.begin() as conn:
313
- yield from exec_plan(conn)
314
- else:
315
- yield from exec_plan(conn)
316
321
 
317
322
  def show(self, n: int = 20) -> DataFrameResultSet:
318
323
  assert n is not None
@@ -334,11 +339,9 @@ class DataFrame:
334
339
  result._reverse()
335
340
  return result
336
341
 
337
- def get_column_names(self) -> List[str]:
338
- return self._column_names
339
-
340
- def get_column_types(self) -> List[ColumnType]:
341
- return [expr.col_type for expr in self._select_list_exprs]
342
+ @property
343
+ def schema(self) -> dict[str, ColumnType]:
344
+ return self._schema
342
345
 
343
346
  def bind(self, args: dict[str, Any]) -> DataFrame:
344
347
  """Bind arguments to parameters and return a new DataFrame."""
@@ -369,7 +372,7 @@ class DataFrame:
369
372
  if order_by_exprs is not None:
370
373
  exprs.Expr.list_substitute(order_by_exprs, var_exprs)
371
374
 
372
- select_list = list(zip(select_list_exprs, self._column_names))
375
+ select_list = list(zip(select_list_exprs, self.schema.keys()))
373
376
  order_by_clause: Optional[list[tuple[exprs.Expr, bool]]] = None
374
377
  if order_by_exprs is not None:
375
378
  order_by_clause = [
@@ -409,8 +412,7 @@ class DataFrame:
409
412
  except sql.exc.DBAPIError as e:
410
413
  raise excs.Error(f'Error during SQL execution:\n{e}')
411
414
 
412
- col_types = self.get_column_types()
413
- return DataFrameResultSet(result_rows, self._column_names, col_types)
415
+ return DataFrameResultSet(result_rows, self.schema)
414
416
 
415
417
  def count(self) -> int:
416
418
  from pixeltable.plan import Planner
@@ -429,7 +431,7 @@ class DataFrame:
429
431
  assert len(self.select_list) > 0
430
432
  heading_vals.append('Select')
431
433
  heading_vals.extend([''] * (len(self.select_list) - 1))
432
- info_vals.extend(self.get_column_names())
434
+ info_vals.extend(self.schema.keys())
433
435
  if self.where_clause is not None:
434
436
  heading_vals.append('Where')
435
437
  info_vals.append(self.where_clause.display_str(inline=False))
@@ -457,7 +459,7 @@ class DataFrame:
457
459
  # white-space: pre-wrap: print \n as newline
458
460
  # th: center-align headings
459
461
  return (
460
- pd_df.style.set_properties(**{'white-space': 'pre-wrap', 'text-align': 'left'})
462
+ pd_df.style.set_properties(None, **{'white-space': 'pre-wrap', 'text-align': 'left'})
461
463
  .set_table_styles([dict(selector='th', props=[('text-align', 'center')])])
462
464
  .hide(axis='index')
463
465
  .hide(axis='columns')
@@ -469,19 +471,17 @@ class DataFrame:
469
471
  The description has two columns, heading and info, which list the contents of each 'component'
470
472
  (select list, where clause, ...) vertically.
471
473
  """
472
- try:
473
- __IPYTHON__
474
+ if getattr(builtins, '__IPYTHON__', False):
474
475
  from IPython.display import display
475
-
476
476
  display(self._description_html())
477
- except NameError:
477
+ else:
478
478
  print(self.__repr__())
479
479
 
480
480
  def __repr__(self) -> str:
481
481
  return self._description().to_string(header=False, index=False)
482
482
 
483
483
  def _repr_html_(self) -> str:
484
- return self._description_html()._repr_html_()
484
+ return self._description_html()._repr_html_() # type: ignore[attr-defined]
485
485
 
486
486
  def select(self, *items: Any, **named_items: Any) -> DataFrame:
487
487
  if self.select_list is not None:
@@ -562,7 +562,7 @@ class DataFrame:
562
562
  # we need to make sure that the grouping table is a base of self.tbl
563
563
  base = self.tbl.find_tbl_version(item._tbl_version_path.tbl_id())
564
564
  if base is None or base.id == self.tbl.tbl_id():
565
- raise excs.Error(f'group_by(): {item.name} is not a base table of {self.tbl.tbl_name()}')
565
+ raise excs.Error(f'group_by(): {item._name} is not a base table of {self.tbl.tbl_name()}')
566
566
  grouping_tbl = item._tbl_version_path.tbl_version
567
567
  break
568
568
  if not isinstance(item, exprs.Expr):
@@ -756,12 +756,12 @@ class DataFrame:
756
756
  Env.get().require_package('torch')
757
757
  Env.get().require_package('torchvision')
758
758
 
759
- from pixeltable.io.parquet import save_parquet # pylint: disable=import-outside-toplevel
760
- from pixeltable.utils.pytorch import PixeltablePytorchDataset # pylint: disable=import-outside-toplevel
759
+ from pixeltable.io.parquet import save_parquet
760
+ from pixeltable.utils.pytorch import PixeltablePytorchDataset
761
761
 
762
762
  cache_key = self._hash_result_set()
763
763
 
764
- dest_path = (Env.get().dataset_cache_dir / f'df_{cache_key}').with_suffix('.parquet') # pylint: disable = protected-access
764
+ dest_path = (Env.get().dataset_cache_dir / f'df_{cache_key}').with_suffix('.parquet')
765
765
  if dest_path.exists(): # fast path: use cache
766
766
  assert dest_path.is_dir()
767
767
  else:
pixeltable/env.py CHANGED
@@ -268,7 +268,7 @@ class Env:
268
268
 
269
269
  # in pixeltable_pgserver.get_server(): cleanup_mode=None will leave db on for debugging purposes
270
270
  self._db_server = pixeltable_pgserver.get_server(self._pgdata_dir, cleanup_mode=None)
271
- self._db_url = self._db_server.get_uri(database=self._db_name)
271
+ self._db_url = self._db_server.get_uri(database=self._db_name, driver='psycopg')
272
272
 
273
273
  if reinit_db:
274
274
  if self._store_db_exists():
@@ -297,7 +297,7 @@ class Env:
297
297
  def _store_db_exists(self) -> bool:
298
298
  assert self._db_name is not None
299
299
  # don't try to connect to self.db_name, it may not exist
300
- db_url = self._db_server.get_uri(database='postgres')
300
+ db_url = self._db_server.get_uri(database='postgres', driver='psycopg')
301
301
  engine = sql.create_engine(db_url, future=True)
302
302
  try:
303
303
  with engine.begin() as conn:
@@ -312,7 +312,7 @@ class Env:
312
312
  def _create_store_db(self) -> None:
313
313
  assert self._db_name is not None
314
314
  # create the db
315
- pg_db_url = self._db_server.get_uri(database='postgres')
315
+ pg_db_url = self._db_server.get_uri(database='postgres', driver='psycopg')
316
316
  engine = sql.create_engine(pg_db_url, future=True, isolation_level='AUTOCOMMIT')
317
317
  preparer = engine.dialect.identifier_preparer
318
318
  try:
@@ -327,7 +327,7 @@ class Env:
327
327
  engine.dispose()
328
328
 
329
329
  # enable pgvector
330
- store_db_url = self._db_server.get_uri(database=self._db_name)
330
+ store_db_url = self._db_server.get_uri(database=self._db_name, driver='psycopg')
331
331
  engine = sql.create_engine(store_db_url, future=True, isolation_level='AUTOCOMMIT')
332
332
  try:
333
333
  with engine.begin() as conn:
@@ -337,7 +337,7 @@ class Env:
337
337
 
338
338
  def _drop_store_db(self) -> None:
339
339
  assert self._db_name is not None
340
- db_url = self._db_server.get_uri(database='postgres')
340
+ db_url = self._db_server.get_uri(database='postgres', driver='psycopg')
341
341
  engine = sql.create_engine(db_url, future=True, isolation_level='AUTOCOMMIT')
342
342
  preparer = engine.dialect.identifier_preparer
343
343
  try:
@@ -425,6 +425,7 @@ class Env:
425
425
  else:
426
426
  self._installed_packages[package] = None
427
427
 
428
+ check('toml')
428
429
  check('datasets')
429
430
  check('torch')
430
431
  check('torchvision')
@@ -443,6 +444,7 @@ class Env:
443
444
  self._spacy_nlp = spacy.load('en_core_web_sm')
444
445
  check('tiktoken')
445
446
  check('openai')
447
+ check('anthropic')
446
448
  check('together')
447
449
  check('fireworks')
448
450
  check('label_studio_sdk')
@@ -1,11 +1,11 @@
1
1
  from .aggregation_node import AggregationNode
2
2
  from .cache_prefetch_node import CachePrefetchNode
3
3
  from .component_iteration_node import ComponentIterationNode
4
+ from .data_row_batch import DataRowBatch
4
5
  from .exec_context import ExecContext
5
6
  from .exec_node import ExecNode
6
7
  from .expr_eval_node import ExprEvalNode
7
8
  from .in_memory_data_node import InMemoryDataNode
8
- from .sql_node import SqlScanNode, SqlLookupNode
9
- from .row_update_node import RowUpdateNode
10
9
  from .media_validation_node import MediaValidationNode
11
- from .data_row_batch import DataRowBatch
10
+ from .row_update_node import RowUpdateNode
11
+ from .sql_node import SqlLookupNode, SqlScanNode
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import logging
4
4
  import sys
5
- from typing import List, Optional, Any
5
+ from typing import Iterable, List, Optional, Any
6
6
 
7
7
  import pixeltable.catalog as catalog
8
8
  import pixeltable.exceptions as excs
@@ -15,12 +15,12 @@ _logger = logging.getLogger('pixeltable')
15
15
  class AggregationNode(ExecNode):
16
16
  def __init__(
17
17
  self, tbl: catalog.TableVersion, row_builder: exprs.RowBuilder, group_by: List[exprs.Expr],
18
- agg_fn_calls: List[exprs.FunctionCall], input_exprs: List[exprs.Expr], input: ExecNode
18
+ agg_fn_calls: List[exprs.FunctionCall], input_exprs: Iterable[exprs.Expr], input: ExecNode
19
19
  ):
20
20
  super().__init__(row_builder, group_by + agg_fn_calls, input_exprs, input)
21
21
  self.input = input
22
22
  self.group_by = group_by
23
- self.input_exprs = input_exprs
23
+ self.input_exprs = list(input_exprs)
24
24
  self.agg_fn_calls = agg_fn_calls
25
25
  self.agg_fn_eval_ctx = row_builder.create_eval_ctx(agg_fn_calls, exclude=input_exprs)
26
26
  self.output_batch = DataRowBatch(tbl, row_builder, 0)
@@ -3,7 +3,7 @@ import sys
3
3
  import time
4
4
  import warnings
5
5
  from dataclasses import dataclass
6
- from typing import List, Optional
6
+ from typing import Iterable, List, Optional
7
7
 
8
8
  from tqdm import tqdm, TqdmWarning
9
9
 
@@ -23,12 +23,12 @@ class ExprEvalNode(ExecNode):
23
23
  """List of exprs that form an evaluation context and contain calls to at most one external function"""
24
24
  exprs: List[exprs.Expr]
25
25
  batched_fn: Optional[CallableFunction]
26
- segment_ctxs: List[exprs.RowBuilder.EvalCtx]
26
+ segment_ctxs: List['exprs.RowBuilder.EvalCtx']
27
27
  target_slot_idxs: List[int]
28
28
  batch_size: int = 8
29
29
 
30
30
  def __init__(
31
- self, row_builder: exprs.RowBuilder, output_exprs: List[exprs.Expr], input_exprs: List[exprs.Expr],
31
+ self, row_builder: exprs.RowBuilder, output_exprs: Iterable[exprs.Expr], input_exprs: Iterable[exprs.Expr],
32
32
  input: ExecNode
33
33
  ):
34
34
  super().__init__(row_builder, output_exprs, input_exprs, input)
@@ -1,9 +1,10 @@
1
1
  import logging
2
- from typing import List, Dict, Any, Optional
2
+ from typing import Any, Optional
3
3
 
4
4
  import pixeltable.catalog as catalog
5
5
  import pixeltable.exprs as exprs
6
6
  from pixeltable.utils.media_store import MediaStore
7
+
7
8
  from .data_row_batch import DataRowBatch
8
9
  from .exec_node import ExecNode
9
10
 
@@ -18,8 +19,8 @@ class InMemoryDataNode(ExecNode):
18
19
  - if an input row doesn't provide a value, sets the slot to the column default
19
20
  """
20
21
  def __init__(
21
- self, tbl: catalog.TableVersionPath, rows: List[Dict[str, Any]],
22
- row_builder: exprs.RowBuilder, start_row_id: int,
22
+ self, tbl: catalog.TableVersion, rows: list[dict[str, Any]],
23
+ row_builder: exprs.RowBuilder, start_row_id: int,
23
24
  ):
24
25
  # we materialize all output slots
25
26
  output_exprs = [e for e in row_builder.get_output_exprs() if isinstance(e, exprs.ColumnRef)]
@@ -75,4 +76,3 @@ class InMemoryDataNode(ExecNode):
75
76
  self.has_returned_data = True
76
77
  _logger.debug(f'InMemoryDataNode: created row batch with {len(self.output_rows)} output_rows')
77
78
  return self.output_rows
78
-
@@ -258,6 +258,10 @@ class SqlLookupNode(SqlNode):
258
258
  """
259
259
  Materializes data from the store via a Select stmt with a WHERE clause that matches a list of key values
260
260
  """
261
+
262
+ stmt: sql.Select
263
+ where_clause: sql.ColumnElement[bool]
264
+
261
265
  def __init__(
262
266
  self, tbl: catalog.TableVersionPath, row_builder: exprs.RowBuilder,
263
267
  select_list: Iterable[exprs.Expr], sa_key_cols: list[sql.Column], key_vals: list[tuple],
@@ -287,4 +291,3 @@ class SqlLookupNode(SqlNode):
287
291
  _logger.debug(f'SqlLookupNode stmt:\n{stmt_str}')
288
292
  except Exception as e:
289
293
  pass
290
-
@@ -1,14 +1,13 @@
1
1
  from __future__ import annotations
2
- from typing import Optional, List, Any, Dict, Tuple
3
- import copy
2
+
3
+ from typing import Any, Dict, List, Optional, Tuple
4
4
 
5
5
  import sqlalchemy as sql
6
6
 
7
+ from .data_row import DataRow
7
8
  from .expr import Expr
8
9
  from .globals import print_slice
9
- from .data_row import DataRow
10
10
  from .row_builder import RowBuilder
11
- import pixeltable.catalog as catalog
12
11
 
13
12
 
14
13
  class ArraySlice(Expr):
@@ -19,19 +19,29 @@ class ColumnRef(Expr):
19
19
  For that reason, a ColumnRef needs to be serialized with the qualifying table id (column ids are only
20
20
  unique in the context of a particular table).
21
21
  """
22
+
23
+ col: catalog.Column
24
+ is_unstored_iter_col: bool
25
+ iter_arg_ctx: Optional[RowBuilder.EvalCtx]
26
+ base_rowid_len: int
27
+ base_rowid: list[Optional[Any]]
28
+ iterator: Optional[iters.ComponentIterator]
29
+ pos_idx: Optional[int]
30
+ id: int
31
+
22
32
  def __init__(self, col: catalog.Column):
23
33
  super().__init__(col.col_type)
24
34
  assert col.tbl is not None
25
35
  self.col = col
26
36
  self.is_unstored_iter_col = \
27
37
  col.tbl.is_component_view() and col.tbl.is_iterator_column(col) and not col.is_stored
28
- self.iter_arg_ctx: Optional[RowBuilder.EvalCtx] = None
38
+ self.iter_arg_ctx = None
29
39
  # number of rowid columns in the base table
30
40
  self.base_rowid_len = col.tbl.base.num_rowid_columns() if self.is_unstored_iter_col else 0
31
41
  self.base_rowid = [None] * self.base_rowid_len
32
- self.iterator: Optional[iters.ComponentIterator] = None
42
+ self.iterator = None
33
43
  # index of the position column in the view's primary key; don't try to reference tbl.store_tbl here
34
- self.pos_idx: Optional[int] = col.tbl.num_rowid_columns() - 1 if self.is_unstored_iter_col else None
44
+ self.pos_idx = col.tbl.num_rowid_columns() - 1 if self.is_unstored_iter_col else None
35
45
  self.id = self._create_id()
36
46
 
37
47
  def set_iter_arg_ctx(self, iter_arg_ctx: RowBuilder.EvalCtx) -> None:
@@ -74,7 +84,13 @@ class ColumnRef(Expr):
74
84
  return self.col == other.col
75
85
 
76
86
  def __str__(self) -> str:
77
- return self.col.name
87
+ if self.col.name is None:
88
+ return f'<unnamed column {self.col.id}>'
89
+ else:
90
+ return self.col.name
91
+
92
+ def __repr__(self) -> str:
93
+ return f'ColumnRef({self.col!r})'
78
94
 
79
95
  def sql_expr(self) -> Optional[sql.ClauseElement]:
80
96
  return self.col.sa_col
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from datetime import datetime
3
4
  from typing import Optional, List, Any, Dict
4
5
 
5
6
  import sqlalchemy as sql
@@ -78,6 +79,7 @@ class Comparison(Expr):
78
79
  right = self._op2.sql_expr()
79
80
  if left is None or right is None:
80
81
  return None
82
+
81
83
  if self.operator == ComparisonOperator.LT:
82
84
  return left < right
83
85
  if self.operator == ComparisonOperator.LE:
@@ -92,18 +94,21 @@ class Comparison(Expr):
92
94
  return left >= right
93
95
 
94
96
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
97
+ left = data_row[self._op1.slot_idx]
98
+ right = data_row[self._op2.slot_idx]
99
+
95
100
  if self.operator == ComparisonOperator.LT:
96
- data_row[self.slot_idx] = data_row[self._op1.slot_idx] < data_row[self._op2.slot_idx]
101
+ data_row[self.slot_idx] = left < right
97
102
  elif self.operator == ComparisonOperator.LE:
98
- data_row[self.slot_idx] = data_row[self._op1.slot_idx] <= data_row[self._op2.slot_idx]
103
+ data_row[self.slot_idx] = left <= right
99
104
  elif self.operator == ComparisonOperator.EQ:
100
- data_row[self.slot_idx] = data_row[self._op1.slot_idx] == data_row[self._op2.slot_idx]
105
+ data_row[self.slot_idx] = left == right
101
106
  elif self.operator == ComparisonOperator.NE:
102
- data_row[self.slot_idx] = data_row[self._op1.slot_idx] != data_row[self._op2.slot_idx]
107
+ data_row[self.slot_idx] = left != right
103
108
  elif self.operator == ComparisonOperator.GT:
104
- data_row[self.slot_idx] = data_row[self._op1.slot_idx] > data_row[self._op2.slot_idx]
109
+ data_row[self.slot_idx] = left > right
105
110
  elif self.operator == ComparisonOperator.GE:
106
- data_row[self.slot_idx] = data_row[self._op1.slot_idx] >= data_row[self._op2.slot_idx]
111
+ data_row[self.slot_idx] = left >= right
107
112
 
108
113
  def _as_dict(self) -> Dict:
109
114
  return {'operator': self.operator.value, **super()._as_dict()}
@@ -96,6 +96,9 @@ class DataRow:
96
96
  self.file_paths[slot_idx] = None
97
97
  self.file_urls[slot_idx] = None
98
98
 
99
+ def __len__(self) -> int:
100
+ return len(self.vals)
101
+
99
102
  def __getitem__(self, index: object) -> Any:
100
103
  """Returns in-memory value, ie, what is needed for expr evaluation"""
101
104
  if not self.has_val[index]: