pixeltable 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (63) hide show
  1. pixeltable/__init__.py +1 -0
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/catalog.py +9 -2
  4. pixeltable/catalog/column.py +1 -1
  5. pixeltable/catalog/dir.py +1 -1
  6. pixeltable/catalog/table.py +3 -1
  7. pixeltable/catalog/table_version.py +12 -2
  8. pixeltable/catalog/table_version_path.py +2 -2
  9. pixeltable/catalog/view.py +64 -20
  10. pixeltable/dataframe.py +11 -6
  11. pixeltable/env.py +12 -0
  12. pixeltable/exec/expr_eval/evaluators.py +4 -2
  13. pixeltable/exec/expr_eval/expr_eval_node.py +4 -1
  14. pixeltable/exprs/comparison.py +8 -4
  15. pixeltable/exprs/data_row.py +9 -7
  16. pixeltable/exprs/expr.py +2 -2
  17. pixeltable/exprs/function_call.py +155 -313
  18. pixeltable/exprs/json_mapper.py +25 -8
  19. pixeltable/exprs/json_path.py +6 -5
  20. pixeltable/exprs/object_ref.py +16 -5
  21. pixeltable/exprs/row_builder.py +10 -3
  22. pixeltable/func/aggregate_function.py +29 -15
  23. pixeltable/func/callable_function.py +11 -8
  24. pixeltable/func/expr_template_function.py +3 -9
  25. pixeltable/func/function.py +148 -74
  26. pixeltable/func/signature.py +65 -30
  27. pixeltable/func/tools.py +26 -26
  28. pixeltable/func/udf.py +1 -1
  29. pixeltable/functions/__init__.py +1 -0
  30. pixeltable/functions/anthropic.py +9 -3
  31. pixeltable/functions/deepseek.py +121 -0
  32. pixeltable/functions/image.py +7 -7
  33. pixeltable/functions/openai.py +30 -13
  34. pixeltable/functions/video.py +14 -7
  35. pixeltable/globals.py +14 -3
  36. pixeltable/index/embedding_index.py +4 -13
  37. pixeltable/io/globals.py +88 -77
  38. pixeltable/io/hf_datasets.py +34 -34
  39. pixeltable/io/pandas.py +75 -76
  40. pixeltable/io/parquet.py +19 -27
  41. pixeltable/io/utils.py +115 -0
  42. pixeltable/iterators/audio.py +2 -1
  43. pixeltable/iterators/video.py +1 -1
  44. pixeltable/metadata/__init__.py +2 -1
  45. pixeltable/metadata/converters/convert_15.py +18 -8
  46. pixeltable/metadata/converters/convert_27.py +31 -0
  47. pixeltable/metadata/converters/convert_28.py +15 -0
  48. pixeltable/metadata/converters/convert_29.py +111 -0
  49. pixeltable/metadata/converters/util.py +12 -1
  50. pixeltable/metadata/notes.py +3 -0
  51. pixeltable/metadata/schema.py +8 -0
  52. pixeltable/share/__init__.py +1 -0
  53. pixeltable/share/packager.py +41 -13
  54. pixeltable/share/publish.py +97 -0
  55. pixeltable/type_system.py +40 -14
  56. pixeltable/utils/__init__.py +41 -0
  57. pixeltable/utils/arrow.py +40 -7
  58. pixeltable/utils/formatter.py +1 -1
  59. {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/METADATA +34 -49
  60. {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/RECORD +63 -57
  61. {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/WHEEL +1 -1
  62. {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/LICENSE +0 -0
  63. {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/entry_points.txt +0 -0
@@ -1,16 +1,18 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import dataclasses
4
- import enum
5
4
  import inspect
6
5
  import json
7
6
  import logging
8
7
  import typing
9
- from typing import Any, Callable, Optional, Union
8
+ from typing import TYPE_CHECKING, Any, Callable, Optional
10
9
 
11
10
  import pixeltable.exceptions as excs
12
11
  import pixeltable.type_system as ts
13
12
 
13
+ if TYPE_CHECKING:
14
+ from pixeltable import exprs
15
+
14
16
  _logger = logging.getLogger('pixeltable')
15
17
 
16
18
 
@@ -21,25 +23,24 @@ class Parameter:
21
23
  kind: inspect._ParameterKind
22
24
  # for some reason, this needs to precede is_batched in the dataclass definition,
23
25
  # otherwise Python complains that an argument with a default is followed by an argument without a default
24
- default: Any = inspect.Parameter.empty # default value for the parameter
26
+ default: Optional['exprs.Literal'] = None # default value for the parameter
25
27
  is_batched: bool = False # True if the parameter is a batched parameter (eg, Batch[dict])
26
28
 
27
29
  def __post_init__(self) -> None:
28
- # make sure that default is json-serializable and of the correct type
29
- if self.default is inspect.Parameter.empty or self.default is None:
30
- return
31
- try:
32
- _ = json.dumps(self.default)
33
- except TypeError:
34
- raise excs.Error(f'Default value for parameter {self.name} is not JSON-serializable: {str(self.default)}')
35
- if self.col_type is not None:
36
- try:
37
- self.col_type.validate_literal(self.default)
38
- except TypeError as e:
39
- raise excs.Error(f'Default value for parameter {self.name}: {str(e)}')
30
+ from pixeltable import exprs
31
+
32
+ if self.default is not None:
33
+ if self.col_type is None:
34
+ raise excs.Error(f'Cannot have a default value for variable parameter {self.name!r}')
35
+ if not isinstance(self.default, exprs.Literal):
36
+ raise excs.Error(f'Default value for parameter {self.name!r} is not a constant')
37
+ if not self.col_type.is_supertype_of(self.default.col_type):
38
+ raise excs.Error(
39
+ f'Default value for parameter {self.name!r} is not of type {self.col_type!r}: {self.default}'
40
+ )
40
41
 
41
42
  def has_default(self) -> bool:
42
- return self.default is not inspect.Parameter.empty
43
+ return self.default is not None
43
44
 
44
45
  def as_dict(self) -> dict[str, Any]:
45
46
  return {
@@ -47,17 +48,15 @@ class Parameter:
47
48
  'col_type': self.col_type.as_dict() if self.col_type is not None else None,
48
49
  'kind': self.kind.name,
49
50
  'is_batched': self.is_batched,
50
- 'has_default': self.has_default(),
51
- 'default': self.default if self.has_default() else None,
51
+ 'default': None if self.default is None else self.default.as_dict(),
52
52
  }
53
53
 
54
54
  @classmethod
55
55
  def from_dict(cls, d: dict[str, Any]) -> Parameter:
56
- has_default = d['has_default']
57
- if has_default:
58
- default = d['default']
59
- else:
60
- default = inspect.Parameter.empty
56
+ from pixeltable import exprs
57
+
58
+ assert d['default'] is None or isinstance(d['default'], dict), d
59
+ default = None if d['default'] is None else exprs.Literal.from_dict(d['default'])
61
60
  return cls(
62
61
  name=d['name'],
63
62
  col_type=ts.ColumnType.from_dict(d['col_type']) if d['col_type'] is not None else None,
@@ -67,7 +66,8 @@ class Parameter:
67
66
  )
68
67
 
69
68
  def to_py_param(self) -> inspect.Parameter:
70
- return inspect.Parameter(self.name, self.kind, default=self.default)
69
+ py_default = self.default.val if self.default is not None else inspect.Parameter.empty
70
+ return inspect.Parameter(self.name, self.kind, default=py_default)
71
71
 
72
72
 
73
73
  T = typing.TypeVar('T')
@@ -147,6 +147,37 @@ class Signature:
147
147
 
148
148
  return True
149
149
 
150
+ def validate_args(self, bound_args: dict[str, Optional['exprs.Expr']], context: str = '') -> None:
151
+ if context != '':
152
+ context = f' ({context})'
153
+
154
+ for param_name, arg in bound_args.items():
155
+ assert param_name in self.parameters
156
+ param = self.parameters[param_name]
157
+ is_var_param = param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD)
158
+ if is_var_param:
159
+ continue
160
+ assert param.col_type is not None
161
+
162
+ if arg is None:
163
+ raise excs.Error(f'Parameter {param_name!r}{context}: invalid argument')
164
+
165
+ # Check that the argument is consistent with the expected parameter type, with the allowance that
166
+ # non-nullable parameters can still accept nullable arguments (since in that event, FunctionCall.eval()
167
+ # detects the Nones and skips evaluation).
168
+ if not (
169
+ param.col_type.is_supertype_of(arg.col_type, ignore_nullable=True)
170
+ # TODO: this is a hack to allow JSON columns to be passed to functions that accept scalar
171
+ # types. It's necessary to avoid littering notebooks with `apply(str)` calls or equivalent.
172
+ # (Previously, this wasn't necessary because `is_supertype_of()` was improperly implemented.)
173
+ # We need to think through the right way to handle this scenario.
174
+ or (arg.col_type.is_json_type() and param.col_type.is_scalar_type())
175
+ ):
176
+ raise excs.Error(
177
+ f'Parameter {param_name!r}{context}: argument type {arg.col_type} does not'
178
+ f' match parameter type {param.col_type}'
179
+ )
180
+
150
181
  def __eq__(self, other: object) -> bool:
151
182
  if not isinstance(other, Signature):
152
183
  return False
@@ -199,6 +230,8 @@ class Signature:
199
230
  type_substitutions: Optional[dict] = None,
200
231
  is_cls_method: bool = False,
201
232
  ) -> list[Parameter]:
233
+ from pixeltable import exprs
234
+
202
235
  assert (py_fn is None) != (py_params is None)
203
236
  if py_fn is not None:
204
237
  sig = inspect.signature(py_fn)
@@ -212,7 +245,7 @@ class Signature:
212
245
  if is_cls_method and idx == 0:
213
246
  continue # skip 'self' or 'cls' parameter
214
247
  if param.name in cls.SPECIAL_PARAM_NAMES:
215
- raise excs.Error(f"'{param.name}' is a reserved parameter name")
248
+ raise excs.Error(f'{param.name!r} is a reserved parameter name')
216
249
  if param.kind == inspect.Parameter.VAR_POSITIONAL or param.kind == inspect.Parameter.VAR_KEYWORD:
217
250
  parameters.append(Parameter(param.name, col_type=None, kind=param.kind))
218
251
  continue
@@ -220,7 +253,7 @@ class Signature:
220
253
  # check non-var parameters for name collisions and default value compatibility
221
254
  if param_types is not None:
222
255
  if idx >= len(param_types):
223
- raise excs.Error(f'Missing type for parameter {param.name}')
256
+ raise excs.Error(f'Missing type for parameter {param.name!r}')
224
257
  param_type = param_types[idx]
225
258
  is_batched = False
226
259
  else:
@@ -231,12 +264,14 @@ class Signature:
231
264
  py_type = param.annotation
232
265
  param_type, is_batched = cls._infer_type(py_type)
233
266
  if param_type is None:
234
- raise excs.Error(f'Cannot infer pixeltable type for parameter {param.name}')
267
+ raise excs.Error(f'Cannot infer pixeltable type for parameter {param.name!r}')
268
+
269
+ default = None if param.default is inspect.Parameter.empty else exprs.Expr.from_object(param.default)
270
+ if not (default is None or isinstance(default, exprs.Literal)):
271
+ raise excs.Error(f'Default value for parameter {param.name!r} must be a constant')
235
272
 
236
273
  parameters.append(
237
- Parameter(
238
- param.name, col_type=param_type, kind=param.kind, is_batched=is_batched, default=param.default
239
- )
274
+ Parameter(param.name, col_type=param_type, kind=param.kind, is_batched=is_batched, default=default)
240
275
  )
241
276
 
242
277
  return parameters
pixeltable/func/tools.py CHANGED
@@ -48,22 +48,27 @@ class Tool(pydantic.BaseModel):
48
48
  'additionalProperties': False, # TODO Handle kwargs?
49
49
  }
50
50
 
51
- # `tool_calls` must be in standardized tool invocation format:
52
- # {tool_name: {'args': {name1: value1, name2: value2, ...}}, ...}
53
- def invoke(self, tool_calls: 'exprs.Expr') -> 'exprs.FunctionCall':
54
- kwargs = {param.name: self.__extract_tool_arg(param, tool_calls) for param in self.parameters.values()}
55
- return self.fn(**kwargs)
51
+ # The output of `tool_calls` must be a dict in standardized tool invocation format:
52
+ # {tool_name: [{'args': {name1: value1, name2: value2, ...}}, ...], ...}
53
+ def invoke(self, tool_calls: 'exprs.Expr') -> 'exprs.Expr':
54
+ from pixeltable import exprs
56
55
 
57
- def __extract_tool_arg(self, param: Parameter, tool_calls: 'exprs.Expr') -> 'exprs.Expr':
58
56
  func_name = self.name or self.fn.name
57
+ return exprs.JsonMapper(tool_calls[func_name]['*'], self.__invoke_kwargs(exprs.RELATIVE_PATH_ROOT.args))
58
+
59
+ def __invoke_kwargs(self, kwargs: 'exprs.Expr') -> 'exprs.FunctionCall':
60
+ kwargs = {param.name: self.__extract_tool_arg(param, kwargs) for param in self.parameters.values()}
61
+ return self.fn(**kwargs)
62
+
63
+ def __extract_tool_arg(self, param: Parameter, kwargs: 'exprs.Expr') -> 'exprs.FunctionCall':
59
64
  if param.col_type.is_string_type():
60
- return _extract_str_tool_arg(tool_calls, func_name=func_name, param_name=param.name)
65
+ return _extract_str_tool_arg(kwargs, param_name=param.name)
61
66
  if param.col_type.is_int_type():
62
- return _extract_int_tool_arg(tool_calls, func_name=func_name, param_name=param.name)
67
+ return _extract_int_tool_arg(kwargs, param_name=param.name)
63
68
  if param.col_type.is_float_type():
64
- return _extract_float_tool_arg(tool_calls, func_name=func_name, param_name=param.name)
69
+ return _extract_float_tool_arg(kwargs, param_name=param.name)
65
70
  if param.col_type.is_bool_type():
66
- return _extract_bool_tool_arg(tool_calls, func_name=func_name, param_name=param.name)
71
+ return _extract_bool_tool_arg(kwargs, param_name=param.name)
67
72
  assert False
68
73
 
69
74
 
@@ -113,34 +118,29 @@ class Tools(pydantic.BaseModel):
113
118
 
114
119
 
115
120
  @udf
116
- def _extract_str_tool_arg(tool_calls: dict[str, Any], func_name: str, param_name: str) -> Optional[str]:
117
- return _extract_arg(str, tool_calls, func_name, param_name)
121
+ def _extract_str_tool_arg(kwargs: dict[str, Any], param_name: str) -> Optional[str]:
122
+ return _extract_arg(str, kwargs, param_name)
118
123
 
119
124
 
120
125
  @udf
121
- def _extract_int_tool_arg(tool_calls: dict[str, Any], func_name: str, param_name: str) -> Optional[int]:
122
- return _extract_arg(int, tool_calls, func_name, param_name)
126
+ def _extract_int_tool_arg(kwargs: dict[str, Any], param_name: str) -> Optional[int]:
127
+ return _extract_arg(int, kwargs, param_name)
123
128
 
124
129
 
125
130
  @udf
126
- def _extract_float_tool_arg(tool_calls: dict[str, Any], func_name: str, param_name: str) -> Optional[float]:
127
- return _extract_arg(float, tool_calls, func_name, param_name)
131
+ def _extract_float_tool_arg(kwargs: dict[str, Any], param_name: str) -> Optional[float]:
132
+ return _extract_arg(float, kwargs, param_name)
128
133
 
129
134
 
130
135
  @udf
131
- def _extract_bool_tool_arg(tool_calls: dict[str, Any], func_name: str, param_name: str) -> Optional[bool]:
132
- return _extract_arg(bool, tool_calls, func_name, param_name)
136
+ def _extract_bool_tool_arg(kwargs: dict[str, Any], param_name: str) -> Optional[bool]:
137
+ return _extract_arg(bool, kwargs, param_name)
133
138
 
134
139
 
135
140
  T = TypeVar('T')
136
141
 
137
142
 
138
- def _extract_arg(
139
- eval_fn: Callable[[Any], T], tool_calls: dict[str, Any], func_name: str, param_name: str
140
- ) -> Optional[T]:
141
- if func_name in tool_calls:
142
- arguments = tool_calls[func_name]['args']
143
- if param_name in arguments:
144
- return eval_fn(arguments[param_name])
145
- return None
143
+ def _extract_arg(eval_fn: Callable[[Any], T], kwargs: dict[str, Any], param_name: str) -> Optional[T]:
144
+ if param_name in kwargs:
145
+ return eval_fn(kwargs[param_name])
146
146
  return None
pixeltable/func/udf.py CHANGED
@@ -283,7 +283,7 @@ def from_table(
283
283
  result_dict[name] = var
284
284
  # Since this is a data column, it becomes a UDF parameter.
285
285
  # If the column is nullable, then the parameter will have a default value of None.
286
- default_value = None if col.col_type.nullable else inspect.Parameter.empty
286
+ default_value = exprs.Literal(None) if col.col_type.nullable else None
287
287
  param = Parameter(name, col.col_type, inspect._ParameterKind.POSITIONAL_OR_KEYWORD, default_value)
288
288
  params.append(param)
289
289
 
@@ -3,6 +3,7 @@ from pixeltable.utils.code import local_public_names
3
3
  from . import (
4
4
  anthropic,
5
5
  audio,
6
+ deepseek,
6
7
  fireworks,
7
8
  gemini,
8
9
  huggingface,
@@ -213,9 +213,15 @@ def invoke_tools(tools: Tools, response: exprs.Expr) -> exprs.InlineDict:
213
213
  @pxt.udf
214
214
  def _anthropic_response_to_pxt_tool_calls(response: dict) -> Optional[dict]:
215
215
  anthropic_tool_calls = [r for r in response['content'] if r['type'] == 'tool_use']
216
- if len(anthropic_tool_calls) > 0:
217
- return {tool_call['name']: {'args': tool_call['input']} for tool_call in anthropic_tool_calls}
218
- return None
216
+ if len(anthropic_tool_calls) == 0:
217
+ return None
218
+ pxt_tool_calls: dict[str, list[dict[str, Any]]] = {}
219
+ for tool_call in anthropic_tool_calls:
220
+ tool_name = tool_call['name']
221
+ if tool_name not in pxt_tool_calls:
222
+ pxt_tool_calls[tool_name] = []
223
+ pxt_tool_calls[tool_name].append({'args': tool_call['input']})
224
+ return pxt_tool_calls
219
225
 
220
226
 
221
227
  _T = TypeVar('_T')
@@ -0,0 +1,121 @@
1
+ import json
2
+ from typing import TYPE_CHECKING, Any, Optional, Union, cast
3
+
4
+ import httpx
5
+
6
+ import pixeltable as pxt
7
+ from pixeltable import env
8
+ from pixeltable.utils.code import local_public_names
9
+
10
+ from .openai import _opt
11
+
12
+ if TYPE_CHECKING:
13
+ import openai
14
+
15
+
16
+ @env.register_client('deepseek')
17
+ def _(api_key: str) -> 'openai.AsyncOpenAI':
18
+ import openai
19
+
20
+ return openai.AsyncOpenAI(
21
+ api_key=api_key,
22
+ base_url='https://api.deepseek.com',
23
+ http_client=httpx.AsyncClient(limits=httpx.Limits(max_keepalive_connections=100, max_connections=500)),
24
+ )
25
+
26
+
27
+ def _deepseek_client() -> 'openai.AsyncOpenAI':
28
+ return env.Env.get().get_client('deepseek')
29
+
30
+
31
+ @pxt.udf
32
+ async def chat_completions(
33
+ messages: list,
34
+ *,
35
+ model: str,
36
+ frequency_penalty: Optional[float] = None,
37
+ logprobs: Optional[bool] = None,
38
+ top_logprobs: Optional[int] = None,
39
+ max_tokens: Optional[int] = None,
40
+ presence_penalty: Optional[float] = None,
41
+ response_format: Optional[dict] = None,
42
+ stop: Optional[list[str]] = None,
43
+ temperature: Optional[float] = None,
44
+ tools: Optional[list[dict]] = None,
45
+ tool_choice: Optional[dict] = None,
46
+ top_p: Optional[float] = None,
47
+ ) -> dict:
48
+ """
49
+ Creates a model response for the given chat conversation.
50
+
51
+ Equivalent to the Deepseek `chat/completions` API endpoint.
52
+ For additional details, see: <https://api-docs.deepseek.com/api/create-chat-completion>
53
+
54
+ Deepseek uses the OpenAI SDK, so you will need to install the `openai` package to use this UDF.
55
+
56
+ __Requirements:__
57
+
58
+ - `pip install openai`
59
+
60
+ Args:
61
+ messages: A list of messages to use for chat completion, as described in the Deepseek API documentation.
62
+ model: The model to use for chat completion.
63
+
64
+ For details on the other parameters, see: <https://api-docs.deepseek.com/api/create-chat-completion>
65
+
66
+ Returns:
67
+ A dictionary containing the response and other metadata.
68
+
69
+ Examples:
70
+ Add a computed column that applies the model `deepseek-chat` to an existing Pixeltable column `tbl.prompt`
71
+ of the table `tbl`:
72
+
73
+ >>> messages = [
74
+ {'role': 'system', 'content': 'You are a helpful assistant.'},
75
+ {'role': 'user', 'content': tbl.prompt}
76
+ ]
77
+ tbl.add_computed_column(response=chat_completions(messages, model='deepseek-chat'))
78
+ """
79
+ if tools is not None:
80
+ tools = [{'type': 'function', 'function': tool} for tool in tools]
81
+
82
+ tool_choice_: Union[str, dict, None] = None
83
+ if tool_choice is not None:
84
+ if tool_choice['auto']:
85
+ tool_choice_ = 'auto'
86
+ elif tool_choice['required']:
87
+ tool_choice_ = 'required'
88
+ else:
89
+ assert tool_choice['tool'] is not None
90
+ tool_choice_ = {'type': 'function', 'function': {'name': tool_choice['tool']}}
91
+
92
+ extra_body: Optional[dict[str, Any]] = None
93
+ if tool_choice is not None and not tool_choice['parallel_tool_calls']:
94
+ extra_body = {'parallel_tool_calls': False}
95
+
96
+ # cast(Any, ...): avoid mypy errors
97
+ result = await _deepseek_client().chat.completions.with_raw_response.create(
98
+ messages=messages,
99
+ model=model,
100
+ frequency_penalty=_opt(frequency_penalty),
101
+ logprobs=_opt(logprobs),
102
+ top_logprobs=_opt(top_logprobs),
103
+ max_tokens=_opt(max_tokens),
104
+ presence_penalty=_opt(presence_penalty),
105
+ response_format=_opt(cast(Any, response_format)),
106
+ stop=_opt(stop),
107
+ temperature=_opt(temperature),
108
+ tools=_opt(cast(Any, tools)),
109
+ tool_choice=_opt(cast(Any, tool_choice_)),
110
+ top_p=_opt(top_p),
111
+ extra_body=extra_body,
112
+ )
113
+
114
+ return json.loads(result.text)
115
+
116
+
117
+ __all__ = local_public_names(__name__)
118
+
119
+
120
+ def __dir__():
121
+ return __all__
@@ -131,6 +131,13 @@ def getchannel(self: PIL.Image.Image, channel: int) -> PIL.Image.Image:
131
131
  pass
132
132
 
133
133
 
134
+ @getchannel.conditional_return_type
135
+ def _(self: Expr) -> pxt.ColumnType:
136
+ input_type = self.col_type
137
+ assert isinstance(input_type, pxt.ImageType)
138
+ return pxt.ImageType(size=input_type.size, mode='L', nullable=input_type.nullable)
139
+
140
+
134
141
  @pxt.udf(is_method=True)
135
142
  def get_metadata(self: PIL.Image.Image) -> dict:
136
143
  """
@@ -146,13 +153,6 @@ def get_metadata(self: PIL.Image.Image) -> dict:
146
153
  }
147
154
 
148
155
 
149
- @getchannel.conditional_return_type
150
- def _(self: Expr) -> pxt.ColumnType:
151
- input_type = self.col_type
152
- assert isinstance(input_type, pxt.ImageType)
153
- return pxt.ImageType(size=input_type.size, mode='L', nullable=input_type.nullable)
154
-
155
-
156
156
  # Image.point()
157
157
  @pxt.udf(is_method=True)
158
158
  def point(self: PIL.Image.Image, lut: list[int], mode: Optional[str] = None) -> PIL.Image.Image:
@@ -325,10 +325,30 @@ async def translations(
325
325
 
326
326
 
327
327
  def _default_max_tokens(model: str) -> int:
328
- if model in ('o1', 'o3-mini'):
328
+ if (
329
+ _is_model_family(model, 'gpt-4o-realtime')
330
+ or _is_model_family(model, 'gpt-4o-mini-realtime')
331
+ or _is_model_family(model, 'gpt-4-turbo')
332
+ or _is_model_family(model, 'gpt-3.5-turbo')
333
+ ):
334
+ return 4096
335
+ if _is_model_family(model, 'gpt-4'):
336
+ return 8192 # All other gpt-4 models (will not match on gpt-4o models)
337
+ if _is_model_family(model, 'gpt-4o') or _is_model_family(model, 'gpt-4.5-preview'):
338
+ return 16384 # All other gpt-4o / gpt-4.5 models
339
+ if _is_model_family(model, 'o1-preview'):
340
+ return 32768
341
+ if _is_model_family(model, 'o1-mini'):
329
342
  return 65536
330
- else:
331
- return 1024
343
+ if _is_model_family(model, 'o1') or _is_model_family(model, 'o3'):
344
+ return 100000 # All other o1 / o3 models
345
+ return 100000 # global default
346
+
347
+
348
+ def _is_model_family(model: str, family: str) -> bool:
349
+ # `model.startswith(family)` would be a simpler match, but increases the risk of false positives.
350
+ # We use a slightly more complicated criterion to make things a little less error prone.
351
+ return model == family or model.startswith(f'{family}-')
332
352
 
333
353
 
334
354
  def _chat_completions_get_request_resources(
@@ -427,9 +447,6 @@ async def chat_completions(
427
447
  resource_pool, lambda: OpenAIRateLimitsInfo(_chat_completions_get_request_resources)
428
448
  )
429
449
 
430
- if max_completion_tokens is None and max_tokens is None:
431
- max_completion_tokens = _default_max_tokens(model)
432
-
433
450
  # cast(Any, ...): avoid mypy errors
434
451
  result = await _openai_client().chat.completions.with_raw_response.create(
435
452
  messages=messages,
@@ -555,9 +572,6 @@ async def vision(
555
572
  resource_pool, lambda: OpenAIRateLimitsInfo(_vision_get_request_resources)
556
573
  )
557
574
 
558
- if max_completion_tokens is None and max_tokens is None:
559
- max_completion_tokens = _default_max_tokens(model)
560
-
561
575
  result = await _openai_client().chat.completions.with_raw_response.create(
562
576
  messages=messages, # type: ignore
563
577
  model=model,
@@ -802,10 +816,13 @@ def _openai_response_to_pxt_tool_calls(response: dict) -> Optional[dict]:
802
816
  if 'tool_calls' not in response['choices'][0]['message'] or response['choices'][0]['message']['tool_calls'] is None:
803
817
  return None
804
818
  openai_tool_calls = response['choices'][0]['message']['tool_calls']
805
- return {
806
- tool_call['function']['name']: {'args': json.loads(tool_call['function']['arguments'])}
807
- for tool_call in openai_tool_calls
808
- }
819
+ pxt_tool_calls: dict[str, list[dict[str, Any]]] = {}
820
+ for tool_call in openai_tool_calls:
821
+ tool_name = tool_call['function']['name']
822
+ if tool_name not in pxt_tool_calls:
823
+ pxt_tool_calls[tool_name] = []
824
+ pxt_tool_calls[tool_name].append({'args': json.loads(tool_call['function']['arguments'])})
825
+ return pxt_tool_calls
809
826
 
810
827
 
811
828
  _T = TypeVar('_T')
@@ -14,9 +14,9 @@ t.select(pxt_video.extract_audio(t.video_col)).collect()
14
14
  import tempfile
15
15
  import uuid
16
16
  from pathlib import Path
17
- from typing import Optional
17
+ from typing import Any, Optional
18
18
 
19
- import av # type: ignore[import-untyped]
19
+ import av
20
20
  import numpy as np
21
21
  import PIL.Image
22
22
 
@@ -53,10 +53,14 @@ class make_video(pxt.Aggregator):
53
53
  Aggregator that creates a video from a sequence of images.
54
54
  """
55
55
 
56
+ container: Optional[av.container.OutputContainer]
57
+ stream: Optional[av.video.stream.VideoStream]
58
+ fps: int
59
+
56
60
  def __init__(self, fps: int = 25):
57
61
  """follows https://pyav.org/docs/develop/cookbook/numpy.html#generating-video"""
58
- self.container: Optional[av.container.OutputContainer] = None
59
- self.stream: Optional[av.stream.Stream] = None
62
+ self.container = None
63
+ self.stream = None
60
64
  self.fps = fps
61
65
 
62
66
  def update(self, frame: PIL.Image.Image) -> None:
@@ -107,9 +111,10 @@ def extract_audio(
107
111
 
108
112
  with av.open(output_filename, 'w', format=format) as output_container:
109
113
  output_stream = output_container.add_stream(codec or default_codec)
114
+ assert isinstance(output_stream, av.audio.stream.AudioStream)
110
115
  for packet in container.demux(audio_stream):
111
116
  for frame in packet.decode():
112
- output_container.mux(output_stream.encode(frame))
117
+ output_container.mux(output_stream.encode(frame)) # type: ignore[arg-type]
113
118
 
114
119
  return output_filename
115
120
 
@@ -141,7 +146,7 @@ def __get_stream_metadata(stream: av.stream.Stream) -> dict:
141
146
  return {'type': stream.type} # Currently unsupported
142
147
 
143
148
  codec_context = stream.codec_context
144
- codec_context_md = {
149
+ codec_context_md: dict[str, Any] = {
145
150
  'name': codec_context.name,
146
151
  'codec_tag': codec_context.codec_tag.encode('unicode-escape').decode('utf-8'),
147
152
  'profile': codec_context.profile,
@@ -160,9 +165,11 @@ def __get_stream_metadata(stream: av.stream.Stream) -> dict:
160
165
 
161
166
  if stream.type == 'audio':
162
167
  # Additional metadata for audio
163
- codec_context_md['channels'] = int(codec_context.channels) if codec_context.channels is not None else None
168
+ channels = getattr(stream.codec_context, 'channels', None)
169
+ codec_context_md['channels'] = int(channels) if channels is not None else None
164
170
  else:
165
171
  assert stream.type == 'video'
172
+ assert isinstance(stream, av.video.stream.VideoStream)
166
173
  # Additional metadata for video
167
174
  codec_context_md['pix_fmt'] = getattr(stream.codec_context, 'pix_fmt', None)
168
175
  metadata.update(
pixeltable/globals.py CHANGED
@@ -1,5 +1,6 @@
1
1
  import dataclasses
2
2
  import logging
3
+ import urllib.parse
3
4
  from typing import Any, Iterable, Literal, Optional, Union
4
5
  from uuid import UUID
5
6
 
@@ -10,7 +11,7 @@ from sqlalchemy.util.preloaded import orm
10
11
 
11
12
  import pixeltable.exceptions as excs
12
13
  import pixeltable.exprs as exprs
13
- from pixeltable import DataFrame, catalog, func
14
+ from pixeltable import DataFrame, catalog, func, share
14
15
  from pixeltable.catalog import Catalog
15
16
  from pixeltable.dataframe import DataFrameResultSet
16
17
  from pixeltable.env import Env
@@ -279,14 +280,16 @@ def create_view(
279
280
  ... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 100), if_exists='replace_force')
280
281
  """
281
282
  where: Optional[exprs.Expr] = None
283
+ select_list: Optional[list[tuple[exprs.Expr, Optional[str]]]] = None
282
284
  if isinstance(base, catalog.Table):
283
285
  tbl_version_path = base._tbl_version_path
284
286
  elif isinstance(base, DataFrame):
285
- base._validate_mutable('create_view')
287
+ base._validate_mutable('create_view', allow_select=True)
286
288
  if len(base._from_clause.tbls) > 1:
287
289
  raise excs.Error('Cannot create a view of a join')
288
290
  tbl_version_path = base._from_clause.tbls[0]
289
291
  where = base.where_clause
292
+ select_list = base.select_list
290
293
  else:
291
294
  raise excs.Error('`base` must be an instance of `Table` or `DataFrame`')
292
295
  assert isinstance(base, catalog.Table) or isinstance(base, DataFrame)
@@ -322,6 +325,7 @@ def create_view(
322
325
  dir._id,
323
326
  path.name,
324
327
  base=tbl_version_path,
328
+ select_list=select_list,
325
329
  additional_columns=additional_columns,
326
330
  predicate=where,
327
331
  is_snapshot=is_snapshot,
@@ -630,7 +634,7 @@ def create_dir(
630
634
  parent = cat.paths[path.parent]
631
635
  assert parent is not None
632
636
  with orm.Session(Env.get().engine, future=True) as session:
633
- dir_md = schema.DirMd(name=path.name)
637
+ dir_md = schema.DirMd(name=path.name, user=None, additional_md={})
634
638
  dir_record = schema.Dir(parent_id=parent._id, md=dataclasses.asdict(dir_md))
635
639
  session.add(dir_record)
636
640
  session.flush()
@@ -723,6 +727,13 @@ def drop_dir(path_str: str, force: bool = False, if_not_exists: Literal['error',
723
727
  _logger.info(f'Removed directory `{path_str}`.')
724
728
 
725
729
 
730
+ def publish_snapshot(dest_uri: str, table: catalog.Table) -> None:
731
+ parsed_uri = urllib.parse.urlparse(dest_uri)
732
+ if parsed_uri.scheme != 'pxt':
733
+ raise excs.Error(f'Invalid Pixeltable URI (does not start with pxt://): {dest_uri}')
734
+ share.publish_snapshot(dest_uri, table)
735
+
736
+
726
737
  def list_dirs(path_str: str = '', recursive: bool = True) -> list[str]:
727
738
  """List the directories in a directory.
728
739