pixeltable 0.2.24__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (101) hide show
  1. pixeltable/__init__.py +2 -2
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +1 -1
  4. pixeltable/catalog/dir.py +6 -0
  5. pixeltable/catalog/globals.py +25 -0
  6. pixeltable/catalog/named_function.py +4 -0
  7. pixeltable/catalog/path_dict.py +37 -11
  8. pixeltable/catalog/schema_object.py +6 -0
  9. pixeltable/catalog/table.py +531 -251
  10. pixeltable/catalog/table_version.py +22 -8
  11. pixeltable/catalog/view.py +8 -7
  12. pixeltable/dataframe.py +439 -105
  13. pixeltable/env.py +19 -5
  14. pixeltable/exec/__init__.py +1 -1
  15. pixeltable/exec/exec_node.py +6 -7
  16. pixeltable/exec/expr_eval_node.py +1 -1
  17. pixeltable/exec/sql_node.py +92 -45
  18. pixeltable/exprs/__init__.py +1 -0
  19. pixeltable/exprs/arithmetic_expr.py +1 -1
  20. pixeltable/exprs/array_slice.py +1 -1
  21. pixeltable/exprs/column_property_ref.py +1 -1
  22. pixeltable/exprs/column_ref.py +29 -2
  23. pixeltable/exprs/comparison.py +1 -1
  24. pixeltable/exprs/compound_predicate.py +1 -1
  25. pixeltable/exprs/expr.py +12 -5
  26. pixeltable/exprs/expr_set.py +8 -0
  27. pixeltable/exprs/function_call.py +147 -39
  28. pixeltable/exprs/in_predicate.py +1 -1
  29. pixeltable/exprs/inline_expr.py +25 -5
  30. pixeltable/exprs/is_null.py +1 -1
  31. pixeltable/exprs/json_mapper.py +1 -1
  32. pixeltable/exprs/json_path.py +1 -1
  33. pixeltable/exprs/method_ref.py +1 -1
  34. pixeltable/exprs/row_builder.py +1 -1
  35. pixeltable/exprs/rowid_ref.py +1 -1
  36. pixeltable/exprs/similarity_expr.py +17 -7
  37. pixeltable/exprs/sql_element_cache.py +4 -0
  38. pixeltable/exprs/type_cast.py +2 -2
  39. pixeltable/exprs/variable.py +3 -0
  40. pixeltable/func/__init__.py +5 -4
  41. pixeltable/func/aggregate_function.py +151 -68
  42. pixeltable/func/callable_function.py +48 -16
  43. pixeltable/func/expr_template_function.py +64 -23
  44. pixeltable/func/function.py +227 -23
  45. pixeltable/func/function_registry.py +2 -1
  46. pixeltable/func/query_template_function.py +51 -9
  47. pixeltable/func/signature.py +65 -7
  48. pixeltable/func/tools.py +153 -0
  49. pixeltable/func/udf.py +57 -35
  50. pixeltable/functions/__init__.py +2 -2
  51. pixeltable/functions/anthropic.py +51 -4
  52. pixeltable/functions/gemini.py +85 -0
  53. pixeltable/functions/globals.py +54 -34
  54. pixeltable/functions/huggingface.py +10 -28
  55. pixeltable/functions/json.py +3 -8
  56. pixeltable/functions/math.py +67 -0
  57. pixeltable/functions/mistralai.py +0 -2
  58. pixeltable/functions/ollama.py +8 -8
  59. pixeltable/functions/openai.py +51 -4
  60. pixeltable/functions/timestamp.py +1 -1
  61. pixeltable/functions/video.py +3 -9
  62. pixeltable/functions/vision.py +1 -1
  63. pixeltable/globals.py +374 -89
  64. pixeltable/index/embedding_index.py +106 -29
  65. pixeltable/io/__init__.py +1 -1
  66. pixeltable/io/label_studio.py +1 -1
  67. pixeltable/io/parquet.py +39 -19
  68. pixeltable/iterators/__init__.py +1 -0
  69. pixeltable/iterators/document.py +12 -0
  70. pixeltable/iterators/image.py +100 -0
  71. pixeltable/iterators/video.py +7 -8
  72. pixeltable/metadata/__init__.py +1 -1
  73. pixeltable/metadata/converters/convert_16.py +2 -1
  74. pixeltable/metadata/converters/convert_17.py +2 -1
  75. pixeltable/metadata/converters/convert_22.py +17 -0
  76. pixeltable/metadata/converters/convert_23.py +35 -0
  77. pixeltable/metadata/converters/convert_24.py +56 -0
  78. pixeltable/metadata/converters/convert_25.py +19 -0
  79. pixeltable/metadata/converters/util.py +4 -2
  80. pixeltable/metadata/notes.py +4 -0
  81. pixeltable/metadata/schema.py +1 -0
  82. pixeltable/plan.py +129 -51
  83. pixeltable/store.py +1 -1
  84. pixeltable/type_system.py +196 -54
  85. pixeltable/utils/arrow.py +8 -3
  86. pixeltable/utils/description_helper.py +89 -0
  87. pixeltable/utils/documents.py +14 -0
  88. {pixeltable-0.2.24.dist-info → pixeltable-0.3.0.dist-info}/METADATA +32 -22
  89. pixeltable-0.3.0.dist-info/RECORD +155 -0
  90. {pixeltable-0.2.24.dist-info → pixeltable-0.3.0.dist-info}/WHEEL +1 -1
  91. pixeltable-0.3.0.dist-info/entry_points.txt +3 -0
  92. pixeltable/tool/create_test_db_dump.py +0 -308
  93. pixeltable/tool/create_test_video.py +0 -81
  94. pixeltable/tool/doc_plugins/griffe.py +0 -50
  95. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  96. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  97. pixeltable/tool/embed_udf.py +0 -9
  98. pixeltable/tool/mypy_plugin.py +0 -55
  99. pixeltable-0.2.24.dist-info/RECORD +0 -153
  100. pixeltable-0.2.24.dist-info/entry_points.txt +0 -3
  101. {pixeltable-0.2.24.dist-info → pixeltable-0.3.0.dist-info}/LICENSE +0 -0
@@ -144,9 +144,9 @@ def cross_encoder_list(sentence1: str, sentences2: list, *, model_id: str) -> li
144
144
 
145
145
 
146
146
  @pxt.udf(batch_size=32)
147
- def clip_text(text: Batch[str], *, model_id: str) -> Batch[pxt.Array[(None,), pxt.Float]]:
147
+ def clip(text: Batch[str], *, model_id: str) -> Batch[pxt.Array[(None,), pxt.Float]]:
148
148
  """
149
- Computes a CLIP embedding for the specified text. `model_id` should be a reference to a pretrained
149
+ Computes a CLIP embedding for the specified text or image. `model_id` should be a reference to a pretrained
150
150
  [CLIP Model](https://huggingface.co/docs/transformers/model_doc/clip).
151
151
 
152
152
  __Requirements:__
@@ -164,7 +164,11 @@ def clip_text(text: Batch[str], *, model_id: str) -> Batch[pxt.Array[(None,), px
164
164
  Add a computed column that applies the model `openai/clip-vit-base-patch32` to an existing
165
165
  Pixeltable column `tbl.text` of the table `tbl`:
166
166
 
167
- >>> tbl['result'] = clip_text(tbl.text, model_id='openai/clip-vit-base-patch32')
167
+ >>> tbl.add_computed_column(
168
+ ... result=clip(tbl.text, model_id='openai/clip-vit-base-patch32')
169
+ ... )
170
+
171
+ The same would work with an image column `tbl.image` in place of `tbl.text`.
168
172
  """
169
173
  env.Env.get().require_package('transformers')
170
174
  device = resolve_torch_device('auto')
@@ -181,29 +185,8 @@ def clip_text(text: Batch[str], *, model_id: str) -> Batch[pxt.Array[(None,), px
181
185
  return [embeddings[i] for i in range(embeddings.shape[0])]
182
186
 
183
187
 
184
- @pxt.udf(batch_size=32)
185
- def clip_image(image: Batch[PIL.Image.Image], *, model_id: str) -> Batch[pxt.Array[(None,), pxt.Float]]:
186
- """
187
- Computes a CLIP embedding for the specified image. `model_id` should be a reference to a pretrained
188
- [CLIP Model](https://huggingface.co/docs/transformers/model_doc/clip).
189
-
190
- __Requirements:__
191
-
192
- - `pip install torch transformers`
193
-
194
- Args:
195
- image: The image to embed.
196
- model_id: The pretrained model to use for the embedding.
197
-
198
- Returns:
199
- An array containing the output of the embedding model.
200
-
201
- Examples:
202
- Add a computed column that applies the model `openai/clip-vit-base-patch32` to an existing
203
- Pixeltable column `image` of the table `tbl`:
204
-
205
- >>> tbl['result'] = clip_image(tbl.image, model_id='openai/clip-vit-base-patch32')
206
- """
188
+ @clip.overload
189
+ def _(image: Batch[PIL.Image.Image], *, model_id: str) -> Batch[pxt.Array[(None,), pxt.Float]]:
207
190
  env.Env.get().require_package('transformers')
208
191
  device = resolve_torch_device('auto')
209
192
  import torch
@@ -219,8 +202,7 @@ def clip_image(image: Batch[PIL.Image.Image], *, model_id: str) -> Batch[pxt.Arr
219
202
  return [embeddings[i] for i in range(embeddings.shape[0])]
220
203
 
221
204
 
222
- @clip_text.conditional_return_type
223
- @clip_image.conditional_return_type
205
+ @clip.conditional_return_type
224
206
  def _(model_id: str) -> pxt.ArrayType:
225
207
  try:
226
208
  from transformers import CLIPModel
@@ -16,20 +16,15 @@ import pixeltable as pxt
16
16
  from pixeltable.utils.code import local_public_names
17
17
 
18
18
 
19
- @pxt.uda(
20
- update_types=[pxt.JsonType(nullable=True)],
21
- value_type=pxt.JsonType(),
22
- requires_order_by=False,
23
- allows_window=False,
24
- )
19
+ @pxt.uda
25
20
  class make_list(pxt.Aggregator):
26
21
  """
27
22
  Collects arguments into a list.
28
23
  """
29
- def __init__(self):
24
+ def __init__(self) -> None:
30
25
  self.output: list[Any] = []
31
26
 
32
- def update(self, obj: Any) -> None:
27
+ def update(self, obj: pxt.Json) -> None:
33
28
  if obj is None:
34
29
  return
35
30
  self.output.append(obj)
@@ -0,0 +1,67 @@
1
+ import builtins
2
+ import math
3
+ from typing import Optional
4
+
5
+ import sqlalchemy as sql
6
+
7
+ import pixeltable as pxt
8
+ from pixeltable.utils.code import local_public_names
9
+
10
+
11
+ @pxt.udf(is_method=True)
12
+ def abs(self: float) -> float:
13
+ return builtins.abs(self)
14
+
15
+
16
+ @abs.to_sql
17
+ def _(self: sql.ColumnElement) -> sql.ColumnElement:
18
+ return sql.func.abs(self)
19
+
20
+
21
+ @pxt.udf(is_method=True)
22
+ def ceil(self: float) -> float:
23
+ # This ensures the same behavior as SQL
24
+ if math.isfinite(self):
25
+ return float(math.ceil(self))
26
+ else:
27
+ return self
28
+
29
+
30
+ @ceil.to_sql
31
+ def _(self: sql.ColumnElement) -> sql.ColumnElement:
32
+ return sql.func.ceiling(self)
33
+
34
+
35
+ @pxt.udf(is_method=True)
36
+ def floor(self: float) -> float:
37
+ # This ensures the same behavior as SQL
38
+ if math.isfinite(self):
39
+ return float(math.floor(self))
40
+ else:
41
+ return self
42
+
43
+
44
+ @floor.to_sql
45
+ def _(self: sql.ColumnElement) -> sql.ColumnElement:
46
+ return sql.func.floor(self)
47
+
48
+
49
+ @pxt.udf(is_method=True)
50
+ def round(self: float, digits: Optional[int] = None) -> float:
51
+ # Set digits explicitly to 0 to guarantee a return type of float; this ensures the same behavior as SQL
52
+ return builtins.round(self, digits or 0)
53
+
54
+
55
+ @round.to_sql
56
+ def _(self: sql.ColumnElement, digits: Optional[sql.ColumnElement] = None) -> sql.ColumnElement:
57
+ if digits is None:
58
+ return sql.func.round(self)
59
+ else:
60
+ return sql.func.round(sql.cast(self, sql.Numeric), sql.cast(digits, sql.Integer))
61
+
62
+
63
+ __all__ = local_public_names(__name__)
64
+
65
+
66
+ def __dir__():
67
+ return __all__
@@ -36,7 +36,6 @@ def chat_completions(
36
36
  temperature: Optional[float] = 0.7,
37
37
  top_p: Optional[float] = 1.0,
38
38
  max_tokens: Optional[int] = None,
39
- min_tokens: Optional[int] = None,
40
39
  stop: Optional[list[str]] = None,
41
40
  random_seed: Optional[int] = None,
42
41
  response_format: Optional[dict] = None,
@@ -75,7 +74,6 @@ def chat_completions(
75
74
  temperature=temperature,
76
75
  top_p=top_p,
77
76
  max_tokens=_opt(max_tokens),
78
- min_tokens=_opt(min_tokens),
79
77
  stop=stop,
80
78
  random_seed=_opt(random_seed),
81
79
  response_format=response_format, # type: ignore[arg-type]
@@ -34,7 +34,7 @@ def generate(
34
34
  template: str = '',
35
35
  context: Optional[list[int]] = None,
36
36
  raw: bool = False,
37
- format: str = '',
37
+ format: Optional[str] = None,
38
38
  options: Optional[dict] = None,
39
39
  ) -> dict:
40
40
  """
@@ -44,7 +44,7 @@ def generate(
44
44
  prompt: The prompt to generate a response for.
45
45
  model: The model name.
46
46
  suffix: The text after the model response.
47
- format: The format of the response; must be one of `'json'` or `''` (the empty string).
47
+ format: The format of the response; must be one of `'json'` or `None`.
48
48
  system: System message.
49
49
  template: Prompt template to use.
50
50
  context: The context parameter returned from a previous call to `generate()`.
@@ -68,7 +68,7 @@ def generate(
68
68
  raw=raw,
69
69
  format=format,
70
70
  options=options,
71
- ) # type: ignore[call-overload]
71
+ ).dict() # type: ignore[call-overload]
72
72
 
73
73
 
74
74
  @pxt.udf
@@ -77,7 +77,7 @@ def chat(
77
77
  *,
78
78
  model: str,
79
79
  tools: Optional[list[dict]] = None,
80
- format: str = '',
80
+ format: Optional[str] = None,
81
81
  options: Optional[dict] = None,
82
82
  ) -> dict:
83
83
  """
@@ -87,7 +87,7 @@ def chat(
87
87
  messages: The messages of the chat.
88
88
  model: The model name.
89
89
  tools: Tools for the model to use.
90
- format: The format of the response; must be one of `'json'` or `''` (the empty string).
90
+ format: The format of the response; must be one of `'json'` or `None`.
91
91
  options: Additional options to pass to the `chat` call, such as `max_tokens`, `temperature`, `top_p`, and `top_k`.
92
92
  For details, see the
93
93
  [Valid Parameters and Values](https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values)
@@ -103,7 +103,7 @@ def chat(
103
103
  tools=tools,
104
104
  format=format,
105
105
  options=options,
106
- ) # type: ignore[call-overload]
106
+ ).dict() # type: ignore[call-overload]
107
107
 
108
108
 
109
109
  @pxt.udf(batch_size=16)
@@ -135,8 +135,8 @@ def embed(
135
135
  model=model,
136
136
  input=input,
137
137
  truncate=truncate,
138
- options=options, # type: ignore[arg-type]
139
- )
138
+ options=options,
139
+ ).dict()
140
140
  return [np.array(data, dtype=np.float64) for data in results['embeddings']]
141
141
 
142
142
 
@@ -7,17 +7,18 @@ the [Working with OpenAI](https://pixeltable.readme.io/docs/working-with-openai)
7
7
 
8
8
  import base64
9
9
  import io
10
+ import json
10
11
  import pathlib
11
12
  import uuid
12
- from typing import TYPE_CHECKING, Callable, Optional, TypeVar, Union
13
+ from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar, Union
13
14
 
14
15
  import numpy as np
15
16
  import PIL.Image
16
17
  import tenacity
17
18
 
18
19
  import pixeltable as pxt
19
- from pixeltable import env
20
- from pixeltable.func import Batch
20
+ from pixeltable import env, exprs
21
+ from pixeltable.func import Batch, Tools
21
22
  from pixeltable.utils.code import local_public_names
22
23
 
23
24
  if TYPE_CHECKING:
@@ -225,6 +226,33 @@ def chat_completions(
225
226
  ]
226
227
  tbl['response'] = chat_completions(messages, model='gpt-4o-mini')
227
228
  """
229
+
230
+ if tools is not None:
231
+ tools = [
232
+ {
233
+ 'type': 'function',
234
+ 'function': tool
235
+ }
236
+ for tool in tools
237
+ ]
238
+
239
+ tool_choice_: Union[str, dict, None] = None
240
+ if tool_choice is not None:
241
+ if tool_choice['auto']:
242
+ tool_choice_ = 'auto'
243
+ elif tool_choice['required']:
244
+ tool_choice_ = 'required'
245
+ else:
246
+ assert tool_choice['tool'] is not None
247
+ tool_choice_ = {
248
+ 'type': 'function',
249
+ 'function': {'name': tool_choice['tool']}
250
+ }
251
+
252
+ extra_body: Optional[dict[str, Any]] = None
253
+ if tool_choice is not None and not tool_choice['parallel_tool_calls']:
254
+ extra_body = {'parallel_tool_calls': False}
255
+
228
256
  result = _retry(_openai_client().chat.completions.create)(
229
257
  messages=messages,
230
258
  model=model,
@@ -241,8 +269,9 @@ def chat_completions(
241
269
  temperature=_opt(temperature),
242
270
  top_p=_opt(top_p),
243
271
  tools=_opt(tools),
244
- tool_choice=_opt(tool_choice),
272
+ tool_choice=_opt(tool_choice_),
245
273
  user=_opt(user),
274
+ extra_body=extra_body,
246
275
  )
247
276
  return result.dict()
248
277
 
@@ -453,6 +482,24 @@ def moderations(input: str, *, model: Optional[str] = None) -> dict:
453
482
  return result.dict()
454
483
 
455
484
 
485
+ def invoke_tools(tools: Tools, response: exprs.Expr) -> exprs.InlineDict:
486
+ """Converts an OpenAI response dict to Pixeltable tool invocation format and calls `tools._invoke()`."""
487
+ return tools._invoke(_openai_response_to_pxt_tool_calls(response))
488
+
489
+
490
+ @pxt.udf
491
+ def _openai_response_to_pxt_tool_calls(response: dict) -> Optional[dict]:
492
+ openai_tool_calls = response['choices'][0]['message']['tool_calls']
493
+ if openai_tool_calls is not None:
494
+ return {
495
+ tool_call['function']['name']: {
496
+ 'args': json.loads(tool_call['function']['arguments'])
497
+ }
498
+ for tool_call in openai_tool_calls
499
+ }
500
+ return None
501
+
502
+
456
503
  _T = TypeVar('_T')
457
504
 
458
505
 
@@ -232,7 +232,7 @@ def _(
232
232
  sql.cast(day, sql.Integer),
233
233
  sql.cast(hour, sql.Integer),
234
234
  sql.cast(minute, sql.Integer),
235
- sql.cast(second + microsecond / 1000000.0, sql.Double))
235
+ sql.cast(second + microsecond / 1000000.0, sql.Float))
236
236
 
237
237
  # @pxt.udf
238
238
  # def date(self: datetime) -> datetime:
@@ -47,13 +47,7 @@ _format_defaults = { # format -> (codec, ext)
47
47
  # output_container.mux(packet)
48
48
 
49
49
 
50
- @pxt.uda(
51
- init_types=[pxt.IntType()],
52
- update_types=[pxt.ImageType()],
53
- value_type=pxt.VideoType(),
54
- requires_order_by=True,
55
- allows_window=False,
56
- )
50
+ @pxt.uda(requires_order_by=True)
57
51
  class make_video(pxt.Aggregator):
58
52
  """
59
53
  Aggregator that creates a video from a sequence of images.
@@ -80,7 +74,7 @@ class make_video(pxt.Aggregator):
80
74
  for packet in self.stream.encode(av_frame):
81
75
  self.container.mux(packet)
82
76
 
83
- def value(self) -> str:
77
+ def value(self) -> pxt.Video:
84
78
  for packet in self.stream.encode():
85
79
  self.container.mux(packet)
86
80
  self.container.close()
@@ -132,7 +126,7 @@ def _get_metadata(path: str) -> dict:
132
126
  assert isinstance(container, av.container.InputContainer)
133
127
  streams_info = [__get_stream_metadata(stream) for stream in container.streams]
134
128
  result = {
135
- 'bit_exact': container.bit_exact,
129
+ 'bit_exact': getattr(container, 'bit_exact', False),
136
130
  'bit_rate': container.bit_rate,
137
131
  'size': container.size,
138
132
  'metadata': container.metadata,
@@ -220,7 +220,7 @@ def eval_detections(
220
220
  return result
221
221
 
222
222
 
223
- @pxt.uda(update_types=[pxt.JsonType()], value_type=pxt.JsonType(), allows_std_agg=True, allows_window=False)
223
+ @pxt.uda
224
224
  class mean_ap(pxt.Aggregator):
225
225
  """
226
226
  Calculates the mean average precision (mAP) over