pixeltable 0.3.8__py3-none-any.whl → 0.3.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (52) hide show
  1. pixeltable/__init__.py +1 -2
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/catalog.py +509 -103
  4. pixeltable/catalog/column.py +5 -0
  5. pixeltable/catalog/dir.py +15 -6
  6. pixeltable/catalog/globals.py +16 -0
  7. pixeltable/catalog/insertable_table.py +82 -41
  8. pixeltable/catalog/path.py +15 -0
  9. pixeltable/catalog/schema_object.py +7 -12
  10. pixeltable/catalog/table.py +81 -67
  11. pixeltable/catalog/table_version.py +23 -7
  12. pixeltable/catalog/view.py +9 -6
  13. pixeltable/env.py +15 -9
  14. pixeltable/exec/exec_node.py +1 -1
  15. pixeltable/exprs/__init__.py +2 -1
  16. pixeltable/exprs/arithmetic_expr.py +2 -0
  17. pixeltable/exprs/column_ref.py +38 -2
  18. pixeltable/exprs/expr.py +61 -12
  19. pixeltable/exprs/function_call.py +1 -4
  20. pixeltable/exprs/globals.py +12 -0
  21. pixeltable/exprs/json_mapper.py +4 -4
  22. pixeltable/exprs/json_path.py +10 -11
  23. pixeltable/exprs/similarity_expr.py +5 -20
  24. pixeltable/exprs/string_op.py +107 -0
  25. pixeltable/ext/functions/yolox.py +21 -64
  26. pixeltable/func/callable_function.py +5 -2
  27. pixeltable/func/query_template_function.py +6 -18
  28. pixeltable/func/tools.py +2 -2
  29. pixeltable/functions/__init__.py +1 -1
  30. pixeltable/functions/globals.py +16 -5
  31. pixeltable/globals.py +172 -262
  32. pixeltable/io/__init__.py +3 -2
  33. pixeltable/io/datarows.py +138 -0
  34. pixeltable/io/external_store.py +8 -5
  35. pixeltable/io/globals.py +7 -160
  36. pixeltable/io/hf_datasets.py +21 -98
  37. pixeltable/io/pandas.py +29 -43
  38. pixeltable/io/parquet.py +17 -42
  39. pixeltable/io/table_data_conduit.py +569 -0
  40. pixeltable/io/utils.py +6 -21
  41. pixeltable/metadata/__init__.py +1 -1
  42. pixeltable/metadata/converters/convert_30.py +50 -0
  43. pixeltable/metadata/converters/util.py +26 -1
  44. pixeltable/metadata/notes.py +1 -0
  45. pixeltable/metadata/schema.py +3 -0
  46. pixeltable/utils/arrow.py +32 -7
  47. pixeltable/utils/coroutine.py +41 -0
  48. {pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/METADATA +1 -1
  49. {pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/RECORD +52 -47
  50. {pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/WHEEL +1 -1
  51. {pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/LICENSE +0 -0
  52. {pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,107 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Optional, Union
4
+
5
+ import sqlalchemy as sql
6
+
7
+ import pixeltable.exceptions as excs
8
+ import pixeltable.type_system as ts
9
+
10
+ from .data_row import DataRow
11
+ from .expr import Expr
12
+ from .globals import StringOperator
13
+ from .row_builder import RowBuilder
14
+ from .sql_element_cache import SqlElementCache
15
+
16
+
17
+ class StringOp(Expr):
18
+ """
19
+ Allows operations on strings
20
+ """
21
+
22
+ operator: StringOperator
23
+
24
+ def __init__(self, operator: StringOperator, op1: Expr, op2: Expr):
25
+ super().__init__(ts.StringType(nullable=op1.col_type.nullable))
26
+ self.operator = operator
27
+ self.components = [op1, op2]
28
+ assert op1.col_type.is_string_type()
29
+ if operator in {StringOperator.CONCAT, StringOperator.REPEAT}:
30
+ if operator == StringOperator.CONCAT and not op2.col_type.is_string_type():
31
+ raise excs.Error(
32
+ f'{self}: {operator} on strings requires string type, but {op2} has type {op2.col_type}'
33
+ )
34
+ if operator == StringOperator.REPEAT and not op2.col_type.is_int_type():
35
+ raise excs.Error(f'{self}: {operator} on strings requires int type, but {op2} has type {op2.col_type}')
36
+ else:
37
+ raise excs.Error(
38
+ f'{self}: invalid operation {operator} on strings; '
39
+ f'only operators {StringOperator.CONCAT} and {StringOperator.REPEAT} are supported'
40
+ )
41
+ self.id = self._create_id()
42
+
43
+ @property
44
+ def _op1(self) -> Expr:
45
+ return self.components[0]
46
+
47
+ @property
48
+ def _op2(self) -> Expr:
49
+ return self.components[1]
50
+
51
+ def __repr__(self) -> str:
52
+ # add parentheses around operands that are StringOpExpr to express precedence
53
+ op1_str = f'({self._op1})' if isinstance(self._op1, StringOp) else str(self._op1)
54
+ op2_str = f'({self._op2})' if isinstance(self._op2, StringOp) else str(self._op2)
55
+ return f'{op1_str} {self.operator} {op2_str}'
56
+
57
+ def _equals(self, other: StringOp) -> bool:
58
+ return self.operator == other.operator
59
+
60
+ def _id_attrs(self) -> list[tuple[str, Any]]:
61
+ return [*super()._id_attrs(), ('operator', self.operator.value)]
62
+
63
+ def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
64
+ left = sql_elements.get(self._op1)
65
+ right = sql_elements.get(self._op2)
66
+ if left is None or right is None:
67
+ return None
68
+ if self.operator == StringOperator.CONCAT:
69
+ return left.concat(right)
70
+ if self.operator == StringOperator.REPEAT:
71
+ return sql.func.repeat(sql.cast(left, sql.String), sql.cast(right, sql.Integer))
72
+ return None
73
+
74
+ def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
75
+ op1_val = data_row[self._op1.slot_idx]
76
+ op2_val = data_row[self._op2.slot_idx]
77
+ data_row[self.slot_idx] = self.eval_nullable(op1_val, op2_val)
78
+
79
+ def eval_nullable(self, op1_val: Union[str, None], op2_val: Union[int, str, None]) -> Union[str, None]:
80
+ """
81
+ Return the result of evaluating the expression on two nullable int/float operands,
82
+ None is interpreted as SQL NULL
83
+ """
84
+ if op1_val is None or op2_val is None:
85
+ return None
86
+ return self.eval_non_null(op1_val, op2_val)
87
+
88
+ def eval_non_null(self, op1_val: str, op2_val: Union[int, str]) -> str:
89
+ """
90
+ Return the result of evaluating the expression on two int/float operands
91
+ """
92
+ assert self.operator in {StringOperator.CONCAT, StringOperator.REPEAT}
93
+ if self.operator == StringOperator.CONCAT:
94
+ assert isinstance(op2_val, str)
95
+ return op1_val + op2_val
96
+ else:
97
+ assert isinstance(op2_val, int)
98
+ return op1_val * op2_val
99
+
100
+ def _as_dict(self) -> dict:
101
+ return {'operator': self.operator.value, **super()._as_dict()}
102
+
103
+ @classmethod
104
+ def _from_dict(cls, d: dict, components: list[Expr]) -> StringOp:
105
+ assert 'operator' in d
106
+ assert len(components) == 2
107
+ return cls(StringOperator(d['operator']), components[0], components[1])
@@ -1,21 +1,15 @@
1
1
  import logging
2
- from pathlib import Path
3
- from typing import TYPE_CHECKING, Iterable, Iterator
4
- from urllib.request import urlretrieve
2
+ from typing import TYPE_CHECKING
5
3
 
6
- import numpy as np
7
4
  import PIL.Image
8
5
 
9
6
  import pixeltable as pxt
10
- from pixeltable import env
11
7
  from pixeltable.func import Batch
12
8
  from pixeltable.functions.util import normalize_image_mode
13
9
  from pixeltable.utils.code import local_public_names
14
10
 
15
11
  if TYPE_CHECKING:
16
- import torch
17
- from yolox.exp import Exp # type: ignore[import-untyped]
18
- from yolox.models import YOLOX # type: ignore[import-untyped]
12
+ from yolox.models import Yolox, YoloxProcessor # type: ignore[import-untyped]
19
13
 
20
14
  _logger = logging.getLogger('pixeltable')
21
15
 
@@ -30,7 +24,7 @@ def yolox(images: Batch[PIL.Image.Image], *, model_id: str, threshold: float = 0
30
24
 
31
25
  __Requirements__:
32
26
 
33
- - `pip install git+https://github.com/Megvii-BaseDetection/YOLOX`
27
+ - `pip install pixeltable-yolox`
34
28
 
35
29
  Args:
36
30
  model_id: one of: `yolox_nano`, `yolox_tiny`, `yolox_s`, `yolox_m`, `yolox_l`, `yolox_x`
@@ -46,31 +40,14 @@ def yolox(images: Batch[PIL.Image.Image], *, model_id: str, threshold: float = 0
46
40
  >>> tbl.add_computed_column(detections=yolox(tbl.image, model_id='yolox_m', threshold=0.8))
47
41
  """
48
42
  import torch
49
- from yolox.utils import postprocess # type: ignore[import-untyped]
50
-
51
- model, exp = _lookup_model(model_id, 'cpu')
52
- image_tensors = list(_images_to_tensors(images, exp))
53
- batch_tensor = torch.stack(image_tensors)
54
43
 
44
+ model = _lookup_model(model_id, 'cpu')
45
+ processor = _lookup_processor(model_id)
46
+ normalized_images = [normalize_image_mode(image) for image in images]
55
47
  with torch.no_grad():
56
- output_tensor = model(batch_tensor)
57
-
58
- outputs = postprocess(output_tensor, 80, threshold, exp.nmsthre, class_agnostic=False)
59
-
60
- results: list[dict] = []
61
- for image in images:
62
- ratio = min(exp.test_size[0] / image.height, exp.test_size[1] / image.width)
63
- if outputs[0] is None:
64
- results.append({'bboxes': [], 'scores': [], 'labels': []})
65
- else:
66
- results.append(
67
- {
68
- 'bboxes': [(output[:4] / ratio).tolist() for output in outputs[0]],
69
- 'scores': [output[4].item() * output[5].item() for output in outputs[0]],
70
- 'labels': [int(output[6]) for output in outputs[0]],
71
- }
72
- )
73
- return results
48
+ tensor = processor(normalized_images)
49
+ output = model(tensor)
50
+ return processor.postprocess(normalized_images, output, threshold=threshold)
74
51
 
75
52
 
76
53
  @pxt.udf
@@ -107,47 +84,27 @@ def yolo_to_coco(detections: dict) -> list:
107
84
  return result
108
85
 
109
86
 
110
- def _images_to_tensors(images: Iterable[PIL.Image.Image], exp: 'Exp') -> Iterator['torch.Tensor']:
111
- import torch
112
- from yolox.data import ValTransform # type: ignore[import-untyped]
113
-
114
- val_transform = ValTransform(legacy=False)
115
- for image in images:
116
- normalized_image = normalize_image_mode(image)
117
- image_transform, _ = val_transform(np.array(normalized_image), None, exp.test_size)
118
- yield torch.from_numpy(image_transform)
119
-
120
-
121
- def _lookup_model(model_id: str, device: str) -> tuple['YOLOX', 'Exp']:
122
- import torch
123
- from yolox.exp import get_exp
87
+ def _lookup_model(model_id: str, device: str) -> 'Yolox':
88
+ from yolox.models import Yolox
124
89
 
125
90
  key = (model_id, device)
126
- if key in _model_cache:
127
- return _model_cache[key]
91
+ if key not in _model_cache:
92
+ _model_cache[key] = Yolox.from_pretrained(model_id, device=device)
128
93
 
129
- weights_url = f'https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/{model_id}.pth'
130
- weights_file = Path(f'{env.Env.get().tmp_dir}/{model_id}.pth')
131
- if not weights_file.exists():
132
- _logger.info(f'Downloading weights for YOLOX model {model_id}: from {weights_url} -> {weights_file}')
133
- urlretrieve(weights_url, weights_file)
94
+ return _model_cache[key]
134
95
 
135
- exp = get_exp(exp_name=model_id)
136
- model = exp.get_model().to(device)
137
96
 
138
- model.eval()
139
- model.head.training = False
140
- model.training = False
97
+ def _lookup_processor(model_id: str) -> 'YoloxProcessor':
98
+ from yolox.models import YoloxProcessor
141
99
 
142
- # Load in the weights from training
143
- weights = torch.load(weights_file, map_location=torch.device(device))
144
- model.load_state_dict(weights['model'])
100
+ if model_id not in _processor_cache:
101
+ _processor_cache[model_id] = YoloxProcessor(model_id)
145
102
 
146
- _model_cache[key] = (model, exp)
147
- return model, exp
103
+ return _processor_cache[model_id]
148
104
 
149
105
 
150
- _model_cache: dict[tuple[str, str], tuple['YOLOX', 'Exp']] = {}
106
+ _model_cache: dict[tuple[str, str], 'Yolox'] = {}
107
+ _processor_cache: dict[str, 'YoloxProcessor'] = {}
151
108
 
152
109
 
153
110
  __all__ = local_public_names(__name__)
@@ -8,6 +8,7 @@ from uuid import UUID
8
8
  import cloudpickle # type: ignore[import-untyped]
9
9
 
10
10
  import pixeltable.exceptions as excs
11
+ from pixeltable.utils.coroutine import run_coroutine_synchronously
11
12
 
12
13
  from .function import Function
13
14
  from .signature import Signature
@@ -93,13 +94,15 @@ class CallableFunction(Function):
93
94
  batched_kwargs = {k: [v] for k, v in kwargs.items() if k not in constant_param_names}
94
95
  result: list[Any]
95
96
  if inspect.iscoroutinefunction(self.py_fn):
96
- result = asyncio.run(self.py_fn(*batched_args, **constant_kwargs, **batched_kwargs))
97
+ # TODO: This is temporary (see note in utils/coroutine.py)
98
+ result = run_coroutine_synchronously(self.py_fn(*batched_args, **constant_kwargs, **batched_kwargs))
97
99
  else:
98
100
  result = self.py_fn(*batched_args, **constant_kwargs, **batched_kwargs)
99
101
  assert len(result) == 1
100
102
  return result[0]
101
103
  elif inspect.iscoroutinefunction(self.py_fn):
102
- return asyncio.run(self.py_fn(*args, **kwargs))
104
+ # TODO: This is temporary (see note in utils/coroutine.py)
105
+ return run_coroutine_synchronously(self.py_fn(*args, **kwargs))
103
106
  else:
104
107
  return self.py_fn(*args, **kwargs)
105
108
 
@@ -17,8 +17,6 @@ class QueryTemplateFunction(Function):
17
17
 
18
18
  template_df: Optional['DataFrame']
19
19
  self_name: Optional[str]
20
- # conn: Optional[sql.engine.Connection]
21
- defaults: dict[str, exprs.Literal]
22
20
 
23
21
  @classmethod
24
22
  def create(
@@ -46,20 +44,6 @@ class QueryTemplateFunction(Function):
46
44
  self.self_name = name
47
45
  self.template_df = template_df
48
46
 
49
- # if we're running as part of an ongoing update operation, we need to use the same connection, otherwise
50
- # we end up with a deadlock
51
- # TODO: figure out a more general way to make execution state available
52
- # self.conn = None
53
-
54
- # convert defaults to Literals
55
- self.defaults = {} # key: param name, value: default value converted to a Literal
56
- param_types = self.template_df.parameters()
57
- for param in [p for p in sig.parameters.values() if p.has_default()]:
58
- assert param.name in param_types
59
- param_type = param_types[param.name]
60
- literal_default = exprs.Literal(param.default, col_type=param_type)
61
- self.defaults[param.name] = literal_default
62
-
63
47
  def _update_as_overload_resolution(self, signature_idx: int) -> None:
64
48
  pass # only one signature supported for QueryTemplateFunction
65
49
 
@@ -72,7 +56,11 @@ class QueryTemplateFunction(Function):
72
56
  bound_args = self.signature.py_signature.bind(*args, **kwargs).arguments
73
57
  # apply defaults, otherwise we might have Parameters left over
74
58
  bound_args.update(
75
- {param_name: default for param_name, default in self.defaults.items() if param_name not in bound_args}
59
+ {
60
+ param.name: param.default
61
+ for param in self.signature.parameters.values()
62
+ if param.has_default() and param.name not in bound_args
63
+ }
76
64
  )
77
65
  bound_df = self.template_df.bind(bound_args)
78
66
  result = await bound_df._acollect()
@@ -87,7 +75,7 @@ class QueryTemplateFunction(Function):
87
75
  return self.self_name
88
76
 
89
77
  def _as_dict(self) -> dict:
90
- return {'name': self.name, 'signature': self.signatures[0].as_dict(), 'df': self.template_df.as_dict()}
78
+ return {'name': self.name, 'signature': self.signature.as_dict(), 'df': self.template_df.as_dict()}
91
79
 
92
80
  @classmethod
93
81
  def _from_dict(cls, d: dict) -> Function:
pixeltable/func/tools.py CHANGED
@@ -51,10 +51,10 @@ class Tool(pydantic.BaseModel):
51
51
  # The output of `tool_calls` must be a dict in standardized tool invocation format:
52
52
  # {tool_name: [{'args': {name1: value1, name2: value2, ...}}, ...], ...}
53
53
  def invoke(self, tool_calls: 'exprs.Expr') -> 'exprs.Expr':
54
- from pixeltable import exprs
54
+ import pixeltable.functions as pxtf
55
55
 
56
56
  func_name = self.name or self.fn.name
57
- return exprs.JsonMapper(tool_calls[func_name]['*'], self.__invoke_kwargs(exprs.RELATIVE_PATH_ROOT.args))
57
+ return pxtf.map(tool_calls[func_name]['*'], lambda x: self.__invoke_kwargs(x.args))
58
58
 
59
59
  def __invoke_kwargs(self, kwargs: 'exprs.Expr') -> 'exprs.FunctionCall':
60
60
  kwargs = {param.name: self.__extract_tool_arg(param, kwargs) for param in self.parameters.values()}
@@ -24,7 +24,7 @@ from . import (
24
24
  vision,
25
25
  whisper,
26
26
  )
27
- from .globals import count, max, mean, min, sum
27
+ from .globals import count, map, max, mean, min, sum
28
28
 
29
29
  __all__ = local_public_names(__name__, exclude=['globals']) + local_public_names(globals.__name__)
30
30
 
@@ -1,15 +1,14 @@
1
1
  import builtins
2
2
  import typing
3
-
4
- from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
5
- from typing import Optional, Union
3
+ from typing import Any, Callable, Optional, Union
6
4
 
7
5
  import sqlalchemy as sql
8
6
 
9
- import pixeltable.type_system as ts
10
- from pixeltable import exprs, func
7
+ from pixeltable import exceptions as excs, exprs, func, type_system as ts
11
8
  from pixeltable.utils.code import local_public_names
12
9
 
10
+ from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
11
+
13
12
 
14
13
  # TODO: remove and replace calls with astype()
15
14
  def cast(expr: exprs.Expr, target_type: Union[ts.ColumnType, type, _GenericAlias]) -> exprs.Expr:
@@ -168,6 +167,18 @@ def _(val: sql.ColumnElement) -> Optional[sql.ColumnElement]:
168
167
  return sql.sql.func.avg(val)
169
168
 
170
169
 
170
+ def map(expr: exprs.Expr, fn: Callable[[exprs.Expr], Any]) -> exprs.Expr:
171
+ target_expr: exprs.Expr
172
+ try:
173
+ target_expr = exprs.Expr.from_object(fn(exprs.json_path.RELATIVE_PATH_ROOT))
174
+ except Exception as e:
175
+ raise excs.Error(
176
+ 'Failed to evaluate map function. '
177
+ '(The `fn` argument to `map()` must produce a valid Pixeltable expression.)'
178
+ ) from e
179
+ return exprs.JsonMapper(expr, target_expr)
180
+
181
+
171
182
  __all__ = local_public_names(__name__)
172
183
 
173
184