pixeltable 0.3.12__py3-none-any.whl → 0.3.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +2 -27
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +9 -7
- pixeltable/catalog/column.py +6 -2
- pixeltable/catalog/dir.py +2 -1
- pixeltable/catalog/insertable_table.py +11 -0
- pixeltable/catalog/schema_object.py +2 -1
- pixeltable/catalog/table.py +27 -38
- pixeltable/catalog/table_version.py +19 -0
- pixeltable/catalog/table_version_path.py +7 -0
- pixeltable/catalog/view.py +31 -0
- pixeltable/dataframe.py +50 -7
- pixeltable/env.py +1 -1
- pixeltable/exceptions.py +20 -2
- pixeltable/exec/aggregation_node.py +14 -0
- pixeltable/exec/cache_prefetch_node.py +1 -1
- pixeltable/exec/expr_eval/evaluators.py +0 -4
- pixeltable/exec/expr_eval/expr_eval_node.py +1 -2
- pixeltable/exec/sql_node.py +3 -2
- pixeltable/exprs/column_ref.py +42 -17
- pixeltable/exprs/data_row.py +3 -0
- pixeltable/exprs/globals.py +1 -1
- pixeltable/exprs/literal.py +11 -1
- pixeltable/exprs/rowid_ref.py +4 -1
- pixeltable/exprs/similarity_expr.py +1 -1
- pixeltable/func/function.py +1 -1
- pixeltable/func/udf.py +1 -1
- pixeltable/functions/__init__.py +2 -0
- pixeltable/functions/anthropic.py +1 -1
- pixeltable/functions/bedrock.py +130 -0
- pixeltable/functions/date.py +185 -0
- pixeltable/functions/gemini.py +22 -20
- pixeltable/functions/globals.py +1 -16
- pixeltable/functions/huggingface.py +7 -6
- pixeltable/functions/image.py +15 -16
- pixeltable/functions/json.py +2 -1
- pixeltable/functions/math.py +40 -0
- pixeltable/functions/mistralai.py +3 -2
- pixeltable/functions/openai.py +9 -8
- pixeltable/functions/string.py +1 -2
- pixeltable/functions/together.py +4 -3
- pixeltable/functions/video.py +2 -2
- pixeltable/globals.py +26 -9
- pixeltable/io/datarows.py +4 -3
- pixeltable/io/hf_datasets.py +2 -2
- pixeltable/io/label_studio.py +17 -17
- pixeltable/io/pandas.py +29 -16
- pixeltable/io/parquet.py +2 -0
- pixeltable/io/table_data_conduit.py +8 -2
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_34.py +21 -0
- pixeltable/metadata/notes.py +1 -0
- pixeltable/plan.py +12 -5
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +219 -119
- pixeltable/share/publish.py +61 -16
- pixeltable/store.py +45 -20
- pixeltable/type_system.py +46 -2
- pixeltable/utils/arrow.py +8 -2
- pixeltable/utils/pytorch.py +4 -0
- {pixeltable-0.3.12.dist-info → pixeltable-0.3.14.dist-info}/METADATA +2 -4
- {pixeltable-0.3.12.dist-info → pixeltable-0.3.14.dist-info}/RECORD +66 -63
- {pixeltable-0.3.12.dist-info → pixeltable-0.3.14.dist-info}/WHEEL +1 -1
- {pixeltable-0.3.12.dist-info → pixeltable-0.3.14.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.12.dist-info → pixeltable-0.3.14.dist-info}/entry_points.txt +0 -0
pixeltable/functions/gemini.py
CHANGED
|
@@ -1,32 +1,35 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
|
|
3
3
|
that wrap various endpoints from the Google Gemini API. In order to use them, you must
|
|
4
|
-
first `pip install google-
|
|
4
|
+
first `pip install google-genai` and configure your Gemini credentials, as described in
|
|
5
5
|
the [Working with Gemini](https://pixeltable.readme.io/docs/working-with-gemini) tutorial.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
from typing import Optional
|
|
8
|
+
from typing import TYPE_CHECKING, Optional
|
|
9
9
|
|
|
10
10
|
import pixeltable as pxt
|
|
11
11
|
from pixeltable import env
|
|
12
12
|
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from google import genai
|
|
15
|
+
|
|
13
16
|
|
|
14
17
|
@env.register_client('gemini')
|
|
15
|
-
def _(api_key: str) ->
|
|
16
|
-
|
|
18
|
+
def _(api_key: str) -> 'genai.client.Client':
|
|
19
|
+
from google import genai
|
|
17
20
|
|
|
18
|
-
genai.
|
|
21
|
+
return genai.client.Client(api_key=api_key)
|
|
19
22
|
|
|
20
23
|
|
|
21
|
-
def
|
|
22
|
-
env.Env.get().get_client('gemini')
|
|
24
|
+
def _genai_client() -> 'genai.client.Client':
|
|
25
|
+
return env.Env.get().get_client('gemini')
|
|
23
26
|
|
|
24
27
|
|
|
25
28
|
@pxt.udf(resource_pool='request-rate:gemini')
|
|
26
29
|
async def generate_content(
|
|
27
30
|
contents: str,
|
|
28
31
|
*,
|
|
29
|
-
|
|
32
|
+
model: str,
|
|
30
33
|
candidate_count: Optional[int] = None,
|
|
31
34
|
stop_sequences: Optional[list[str]] = None,
|
|
32
35
|
max_output_tokens: Optional[int] = None,
|
|
@@ -48,11 +51,11 @@ async def generate_content(
|
|
|
48
51
|
|
|
49
52
|
__Requirements:__
|
|
50
53
|
|
|
51
|
-
- `pip install google-
|
|
54
|
+
- `pip install google-genai`
|
|
52
55
|
|
|
53
56
|
Args:
|
|
54
57
|
contents: The input content to generate from.
|
|
55
|
-
|
|
58
|
+
model: The name of the model to use.
|
|
56
59
|
|
|
57
60
|
For details on the other parameters, see: <https://ai.google.dev/gemini-api/docs>
|
|
58
61
|
|
|
@@ -63,14 +66,12 @@ async def generate_content(
|
|
|
63
66
|
Add a computed column that applies the model `gemini-1.5-flash`
|
|
64
67
|
to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
|
|
65
68
|
|
|
66
|
-
>>> tbl.add_computed_column(response=generate_content(tbl.prompt,
|
|
69
|
+
>>> tbl.add_computed_column(response=generate_content(tbl.prompt, model='gemini-1.5-flash'))
|
|
67
70
|
"""
|
|
68
|
-
env.Env.get().require_package('google.
|
|
69
|
-
|
|
70
|
-
import google.generativeai as genai
|
|
71
|
+
env.Env.get().require_package('google.genai')
|
|
72
|
+
from google.genai import types
|
|
71
73
|
|
|
72
|
-
|
|
73
|
-
gc = genai.GenerationConfig(
|
|
74
|
+
config = types.GenerateContentConfig(
|
|
74
75
|
candidate_count=candidate_count,
|
|
75
76
|
stop_sequences=stop_sequences,
|
|
76
77
|
max_output_tokens=max_output_tokens,
|
|
@@ -82,10 +83,11 @@ async def generate_content(
|
|
|
82
83
|
presence_penalty=presence_penalty,
|
|
83
84
|
frequency_penalty=frequency_penalty,
|
|
84
85
|
)
|
|
85
|
-
|
|
86
|
-
|
|
86
|
+
|
|
87
|
+
response = await _genai_client().aio.models.generate_content(model=model, contents=contents, config=config)
|
|
88
|
+
return response.model_dump()
|
|
87
89
|
|
|
88
90
|
|
|
89
91
|
@generate_content.resource_pool
|
|
90
|
-
def _(
|
|
91
|
-
return f'request-rate:gemini:{
|
|
92
|
+
def _(model: str) -> str:
|
|
93
|
+
return f'request-rate:gemini:{model}'
|
pixeltable/functions/globals.py
CHANGED
|
@@ -49,22 +49,7 @@ def _(val: sql.ColumnElement) -> Optional[sql.ColumnElement]:
|
|
|
49
49
|
allows_window=True,
|
|
50
50
|
# Allow counting non-null values of any type
|
|
51
51
|
# TODO: should we have an "Any" type that can be used here?
|
|
52
|
-
type_substitutions=tuple(
|
|
53
|
-
{T: Optional[t]} # type: ignore[misc]
|
|
54
|
-
for t in (
|
|
55
|
-
ts.String,
|
|
56
|
-
ts.Int,
|
|
57
|
-
ts.Float,
|
|
58
|
-
ts.Bool,
|
|
59
|
-
ts.Timestamp,
|
|
60
|
-
ts.Array,
|
|
61
|
-
ts.Json,
|
|
62
|
-
ts.Image,
|
|
63
|
-
ts.Video,
|
|
64
|
-
ts.Audio,
|
|
65
|
-
ts.Document,
|
|
66
|
-
)
|
|
67
|
-
),
|
|
52
|
+
type_substitutions=tuple({T: Optional[t]} for t in ts.ALL_PIXELTABLE_TYPES), # type: ignore[misc]
|
|
68
53
|
)
|
|
69
54
|
class count(func.Aggregator, typing.Generic[T]):
|
|
70
55
|
def __init__(self) -> None:
|
|
@@ -13,6 +13,7 @@ import PIL.Image
|
|
|
13
13
|
|
|
14
14
|
import pixeltable as pxt
|
|
15
15
|
import pixeltable.exceptions as excs
|
|
16
|
+
import pixeltable.type_system as ts
|
|
16
17
|
from pixeltable import env
|
|
17
18
|
from pixeltable.func import Batch
|
|
18
19
|
from pixeltable.functions.util import normalize_image_mode, resolve_torch_device
|
|
@@ -61,14 +62,14 @@ def sentence_transformer(
|
|
|
61
62
|
|
|
62
63
|
|
|
63
64
|
@sentence_transformer.conditional_return_type
|
|
64
|
-
def _(model_id: str) ->
|
|
65
|
+
def _(model_id: str) -> ts.ArrayType:
|
|
65
66
|
try:
|
|
66
67
|
from sentence_transformers import SentenceTransformer
|
|
67
68
|
|
|
68
69
|
model = _lookup_model(model_id, SentenceTransformer)
|
|
69
|
-
return
|
|
70
|
+
return ts.ArrayType((model.get_sentence_embedding_dimension(),), dtype=ts.FloatType(), nullable=False)
|
|
70
71
|
except ImportError:
|
|
71
|
-
return
|
|
72
|
+
return ts.ArrayType((None,), dtype=ts.FloatType(), nullable=False)
|
|
72
73
|
|
|
73
74
|
|
|
74
75
|
@pxt.udf
|
|
@@ -199,14 +200,14 @@ def _(image: Batch[PIL.Image.Image], *, model_id: str) -> Batch[pxt.Array[(None,
|
|
|
199
200
|
|
|
200
201
|
|
|
201
202
|
@clip.conditional_return_type
|
|
202
|
-
def _(model_id: str) ->
|
|
203
|
+
def _(model_id: str) -> ts.ArrayType:
|
|
203
204
|
try:
|
|
204
205
|
from transformers import CLIPModel
|
|
205
206
|
|
|
206
207
|
model = _lookup_model(model_id, CLIPModel.from_pretrained)
|
|
207
|
-
return
|
|
208
|
+
return ts.ArrayType((model.config.projection_dim,), dtype=ts.FloatType(), nullable=False)
|
|
208
209
|
except ImportError:
|
|
209
|
-
return
|
|
210
|
+
return ts.ArrayType((None,), dtype=ts.FloatType(), nullable=False)
|
|
210
211
|
|
|
211
212
|
|
|
212
213
|
@pxt.udf(batch_size=4)
|
pixeltable/functions/image.py
CHANGED
|
@@ -16,6 +16,7 @@ from typing import Optional
|
|
|
16
16
|
import PIL.Image
|
|
17
17
|
|
|
18
18
|
import pixeltable as pxt
|
|
19
|
+
import pixeltable.type_system as ts
|
|
19
20
|
from pixeltable.exprs import Expr
|
|
20
21
|
from pixeltable.utils.code import local_public_names
|
|
21
22
|
|
|
@@ -88,10 +89,10 @@ def convert(self: PIL.Image.Image, mode: str) -> PIL.Image.Image:
|
|
|
88
89
|
|
|
89
90
|
|
|
90
91
|
@convert.conditional_return_type
|
|
91
|
-
def _(self: Expr, mode: str) ->
|
|
92
|
+
def _(self: Expr, mode: str) -> ts.ColumnType:
|
|
92
93
|
input_type = self.col_type
|
|
93
|
-
assert isinstance(input_type,
|
|
94
|
-
return
|
|
94
|
+
assert isinstance(input_type, ts.ImageType)
|
|
95
|
+
return ts.ImageType(size=input_type.size, mode=mode, nullable=input_type.nullable)
|
|
95
96
|
|
|
96
97
|
|
|
97
98
|
# Image.crop()
|
|
@@ -108,14 +109,12 @@ def crop(self: PIL.Image.Image, box: tuple[int, int, int, int]) -> PIL.Image.Ima
|
|
|
108
109
|
|
|
109
110
|
|
|
110
111
|
@crop.conditional_return_type
|
|
111
|
-
def _(self: Expr, box: tuple[int, int, int, int]) ->
|
|
112
|
+
def _(self: Expr, box: tuple[int, int, int, int]) -> ts.ColumnType:
|
|
112
113
|
input_type = self.col_type
|
|
113
|
-
assert isinstance(input_type,
|
|
114
|
+
assert isinstance(input_type, ts.ImageType)
|
|
114
115
|
if (isinstance(box, (list, tuple))) and len(box) == 4 and all(isinstance(x, int) for x in box):
|
|
115
|
-
return
|
|
116
|
-
|
|
117
|
-
)
|
|
118
|
-
return pxt.ImageType(mode=input_type.mode, nullable=input_type.nullable) # we can't compute the size statically
|
|
116
|
+
return ts.ImageType(size=(box[2] - box[0], box[3] - box[1]), mode=input_type.mode, nullable=input_type.nullable)
|
|
117
|
+
return ts.ImageType(mode=input_type.mode, nullable=input_type.nullable) # we can't compute the size statically
|
|
119
118
|
|
|
120
119
|
|
|
121
120
|
# Image.getchannel()
|
|
@@ -134,10 +133,10 @@ def getchannel(self: PIL.Image.Image, channel: int) -> PIL.Image.Image:
|
|
|
134
133
|
|
|
135
134
|
|
|
136
135
|
@getchannel.conditional_return_type
|
|
137
|
-
def _(self: Expr) ->
|
|
136
|
+
def _(self: Expr) -> ts.ColumnType:
|
|
138
137
|
input_type = self.col_type
|
|
139
|
-
assert isinstance(input_type,
|
|
140
|
-
return
|
|
138
|
+
assert isinstance(input_type, ts.ImageType)
|
|
139
|
+
return ts.ImageType(size=input_type.size, mode='L', nullable=input_type.nullable)
|
|
141
140
|
|
|
142
141
|
|
|
143
142
|
@pxt.udf(is_method=True)
|
|
@@ -183,10 +182,10 @@ def resize(self: PIL.Image.Image, size: tuple[int, int]) -> PIL.Image.Image:
|
|
|
183
182
|
|
|
184
183
|
|
|
185
184
|
@resize.conditional_return_type
|
|
186
|
-
def _(self: Expr, size: tuple[int, int]) ->
|
|
185
|
+
def _(self: Expr, size: tuple[int, int]) -> ts.ColumnType:
|
|
187
186
|
input_type = self.col_type
|
|
188
|
-
assert isinstance(input_type,
|
|
189
|
-
return
|
|
187
|
+
assert isinstance(input_type, ts.ImageType)
|
|
188
|
+
return ts.ImageType(size=size, mode=input_type.mode, nullable=input_type.nullable)
|
|
190
189
|
|
|
191
190
|
|
|
192
191
|
# Image.rotate()
|
|
@@ -237,7 +236,7 @@ def transpose(self: PIL.Image.Image, method: int) -> PIL.Image.Image:
|
|
|
237
236
|
@rotate.conditional_return_type
|
|
238
237
|
@effect_spread.conditional_return_type
|
|
239
238
|
@transpose.conditional_return_type
|
|
240
|
-
def _(self: Expr) ->
|
|
239
|
+
def _(self: Expr) -> ts.ColumnType:
|
|
241
240
|
return self.col_type
|
|
242
241
|
|
|
243
242
|
|
pixeltable/functions/json.py
CHANGED
|
@@ -4,9 +4,10 @@ Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
|
|
|
4
4
|
Example:
|
|
5
5
|
```python
|
|
6
6
|
import pixeltable as pxt
|
|
7
|
+
import pixeltable.functions as pxtf
|
|
7
8
|
|
|
8
9
|
t = pxt.get_table(...)
|
|
9
|
-
t.select(
|
|
10
|
+
t.select(pxtf.json.make_list(t.json_col)).collect()
|
|
10
11
|
```
|
|
11
12
|
"""
|
|
12
13
|
|
pixeltable/functions/math.py
CHANGED
|
@@ -1,3 +1,15 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs) for mathematical operations.
|
|
3
|
+
|
|
4
|
+
Example:
|
|
5
|
+
```python
|
|
6
|
+
import pixeltable as pxt
|
|
7
|
+
|
|
8
|
+
t = pxt.get_table(...)
|
|
9
|
+
t.select(t.float_col.floor()).collect()
|
|
10
|
+
```
|
|
11
|
+
"""
|
|
12
|
+
|
|
1
13
|
import builtins
|
|
2
14
|
import math
|
|
3
15
|
from typing import Optional
|
|
@@ -10,6 +22,11 @@ from pixeltable.utils.code import local_public_names
|
|
|
10
22
|
|
|
11
23
|
@pxt.udf(is_method=True)
|
|
12
24
|
def abs(self: float) -> float:
|
|
25
|
+
"""
|
|
26
|
+
Return the absolute value of the given number.
|
|
27
|
+
|
|
28
|
+
Equivalent to Python [`builtins.abs()`](https://docs.python.org/3/library/functions.html#abs).
|
|
29
|
+
"""
|
|
13
30
|
return builtins.abs(self)
|
|
14
31
|
|
|
15
32
|
|
|
@@ -20,6 +37,14 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
20
37
|
|
|
21
38
|
@pxt.udf(is_method=True)
|
|
22
39
|
def ceil(self: float) -> float:
|
|
40
|
+
"""
|
|
41
|
+
Return the ceiling of the given number.
|
|
42
|
+
|
|
43
|
+
Equivalent to Python [`float(math.ceil(self))`](https://docs.python.org/3/library/math.html#math.ceil) if `self`
|
|
44
|
+
is finite, or `self` itself if `self` is infinite. (This is slightly different from the default behavior of
|
|
45
|
+
`math.ceil(self)`, which always returns an `int` and raises an error if `self` is infinite. The behavior in
|
|
46
|
+
Pixeltable generalizes the Python operator and is chosen to align with the SQL standard.)
|
|
47
|
+
"""
|
|
23
48
|
# This ensures the same behavior as SQL
|
|
24
49
|
if math.isfinite(self):
|
|
25
50
|
return float(math.ceil(self))
|
|
@@ -34,6 +59,14 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
34
59
|
|
|
35
60
|
@pxt.udf(is_method=True)
|
|
36
61
|
def floor(self: float) -> float:
|
|
62
|
+
"""
|
|
63
|
+
Return the ceiling of the given number.
|
|
64
|
+
|
|
65
|
+
Equivalent to Python [`float(math.floor(self))`](https://docs.python.org/3/library/math.html#math.ceil) if `self`
|
|
66
|
+
is finite, or `self` itself if `self` is infinite. (This is slightly different from the default behavior of
|
|
67
|
+
`math.floor(self)`, which always returns an `int` and raises an error if `self` is infinite. The behavior of
|
|
68
|
+
Pixeltable generalizes the Python operator and is chosen to align with the SQL standard.)
|
|
69
|
+
"""
|
|
37
70
|
# This ensures the same behavior as SQL
|
|
38
71
|
if math.isfinite(self):
|
|
39
72
|
return float(math.floor(self))
|
|
@@ -48,6 +81,13 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
48
81
|
|
|
49
82
|
@pxt.udf(is_method=True)
|
|
50
83
|
def round(self: float, digits: Optional[int] = None) -> float:
|
|
84
|
+
"""
|
|
85
|
+
Round a number to a given precision in decimal digits.
|
|
86
|
+
|
|
87
|
+
Equivalent to Python [`builtins.round(self, digits or 0)`](https://docs.python.org/3/library/functions.html#round).
|
|
88
|
+
Note that if `digits` is not specified, the behavior matches `builtins.round(self, 0)` rather than
|
|
89
|
+
`builtins.round(self)`; this ensures that the return type is always `float` (as in SQL) rather than `int`.
|
|
90
|
+
"""
|
|
51
91
|
# Set digits explicitly to 0 to guarantee a return type of float; this ensures the same behavior as SQL
|
|
52
92
|
return builtins.round(self, digits or 0)
|
|
53
93
|
|
|
@@ -10,6 +10,7 @@ from typing import TYPE_CHECKING, Optional, TypeVar, Union
|
|
|
10
10
|
import numpy as np
|
|
11
11
|
|
|
12
12
|
import pixeltable as pxt
|
|
13
|
+
import pixeltable.type_system as ts
|
|
13
14
|
from pixeltable.env import Env, register_client
|
|
14
15
|
from pixeltable.func.signature import Batch
|
|
15
16
|
from pixeltable.utils.code import local_public_names
|
|
@@ -176,9 +177,9 @@ async def embeddings(input: Batch[str], *, model: str) -> Batch[pxt.Array[(None,
|
|
|
176
177
|
|
|
177
178
|
|
|
178
179
|
@embeddings.conditional_return_type
|
|
179
|
-
def _(model: str) ->
|
|
180
|
+
def _(model: str) -> ts.ArrayType:
|
|
180
181
|
dimensions = _embedding_dimensions_cache.get(model) # `None` if unknown model
|
|
181
|
-
return
|
|
182
|
+
return ts.ArrayType((dimensions,), dtype=ts.FloatType())
|
|
182
183
|
|
|
183
184
|
|
|
184
185
|
_T = TypeVar('_T')
|
pixeltable/functions/openai.py
CHANGED
|
@@ -21,6 +21,7 @@ import numpy as np
|
|
|
21
21
|
import PIL
|
|
22
22
|
|
|
23
23
|
import pixeltable as pxt
|
|
24
|
+
import pixeltable.type_system as ts
|
|
24
25
|
from pixeltable import env, exprs
|
|
25
26
|
from pixeltable.func import Batch, Tools
|
|
26
27
|
from pixeltable.utils.code import local_public_names
|
|
@@ -666,13 +667,13 @@ async def embeddings(
|
|
|
666
667
|
|
|
667
668
|
|
|
668
669
|
@embeddings.conditional_return_type
|
|
669
|
-
def _(model: str, dimensions: Optional[int] = None) ->
|
|
670
|
+
def _(model: str, dimensions: Optional[int] = None) -> ts.ArrayType:
|
|
670
671
|
if dimensions is None:
|
|
671
672
|
if model not in _embedding_dimensions_cache:
|
|
672
673
|
# TODO: find some other way to retrieve a sample
|
|
673
|
-
return
|
|
674
|
+
return ts.ArrayType((None,), dtype=ts.FloatType(), nullable=False)
|
|
674
675
|
dimensions = _embedding_dimensions_cache.get(model)
|
|
675
|
-
return
|
|
676
|
+
return ts.ArrayType((dimensions,), dtype=ts.FloatType(), nullable=False)
|
|
676
677
|
|
|
677
678
|
|
|
678
679
|
#####################################
|
|
@@ -738,17 +739,17 @@ async def image_generations(
|
|
|
738
739
|
|
|
739
740
|
|
|
740
741
|
@image_generations.conditional_return_type
|
|
741
|
-
def _(size: Optional[str] = None) ->
|
|
742
|
+
def _(size: Optional[str] = None) -> ts.ImageType:
|
|
742
743
|
if size is None:
|
|
743
|
-
return
|
|
744
|
+
return ts.ImageType(size=(1024, 1024))
|
|
744
745
|
x_pos = size.find('x')
|
|
745
746
|
if x_pos == -1:
|
|
746
|
-
return
|
|
747
|
+
return ts.ImageType()
|
|
747
748
|
try:
|
|
748
749
|
width, height = int(size[:x_pos]), int(size[x_pos + 1 :])
|
|
749
750
|
except ValueError:
|
|
750
|
-
return
|
|
751
|
-
return
|
|
751
|
+
return ts.ImageType()
|
|
752
|
+
return ts.ImageType(size=(width, height))
|
|
752
753
|
|
|
753
754
|
|
|
754
755
|
#####################################
|
pixeltable/functions/string.py
CHANGED
|
@@ -5,10 +5,9 @@ It closely follows the Pandas `pandas.Series.str` API.
|
|
|
5
5
|
Example:
|
|
6
6
|
```python
|
|
7
7
|
import pixeltable as pxt
|
|
8
|
-
from pixeltable.functions import string as pxt_str
|
|
9
8
|
|
|
10
9
|
t = pxt.get_table(...)
|
|
11
|
-
t.select(
|
|
10
|
+
t.select(t.str_col.capitalize()).collect()
|
|
12
11
|
```
|
|
13
12
|
"""
|
|
14
13
|
|
pixeltable/functions/together.py
CHANGED
|
@@ -16,6 +16,7 @@ import tenacity
|
|
|
16
16
|
|
|
17
17
|
import pixeltable as pxt
|
|
18
18
|
import pixeltable.exceptions as excs
|
|
19
|
+
import pixeltable.type_system as ts
|
|
19
20
|
from pixeltable import env
|
|
20
21
|
from pixeltable.func import Batch
|
|
21
22
|
from pixeltable.utils.code import local_public_names
|
|
@@ -225,12 +226,12 @@ async def embeddings(input: Batch[str], *, model: str) -> Batch[pxt.Array[(None,
|
|
|
225
226
|
|
|
226
227
|
|
|
227
228
|
@embeddings.conditional_return_type
|
|
228
|
-
def _(model: str) ->
|
|
229
|
+
def _(model: str) -> ts.ArrayType:
|
|
229
230
|
if model not in _embedding_dimensions_cache:
|
|
230
231
|
# TODO: find some other way to retrieve a sample
|
|
231
|
-
return
|
|
232
|
+
return ts.ArrayType((None,), dtype=ts.FloatType())
|
|
232
233
|
dimensions = _embedding_dimensions_cache[model]
|
|
233
|
-
return
|
|
234
|
+
return ts.ArrayType((dimensions,), dtype=ts.FloatType())
|
|
234
235
|
|
|
235
236
|
|
|
236
237
|
@pxt.udf(resource_pool='request-rate:together:images')
|
pixeltable/functions/video.py
CHANGED
|
@@ -4,10 +4,10 @@ Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
|
|
|
4
4
|
Example:
|
|
5
5
|
```python
|
|
6
6
|
import pixeltable as pxt
|
|
7
|
-
|
|
7
|
+
import pixeltable.functions as pxtf
|
|
8
8
|
|
|
9
9
|
t = pxt.get_table(...)
|
|
10
|
-
t.select(
|
|
10
|
+
t.select(pxtf.video.extract_audio(t.video_col)).collect()
|
|
11
11
|
```
|
|
12
12
|
"""
|
|
13
13
|
|
pixeltable/globals.py
CHANGED
|
@@ -2,7 +2,6 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
|
-
import urllib.parse
|
|
6
5
|
from pathlib import Path
|
|
7
6
|
from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Optional, Union
|
|
8
7
|
|
|
@@ -372,6 +371,31 @@ def create_snapshot(
|
|
|
372
371
|
)
|
|
373
372
|
|
|
374
373
|
|
|
374
|
+
def create_replica(destination: str, source: Union[str, catalog.Table]) -> Optional[catalog.Table]:
|
|
375
|
+
"""
|
|
376
|
+
Create a replica of a table. Can be used either to create a remote replica of a local table, or to create a local
|
|
377
|
+
replica of a remote table. A given table can have at most one replica per Pixeltable instance.
|
|
378
|
+
|
|
379
|
+
Args:
|
|
380
|
+
destination: Path where the replica will be created. Can be either a local path such as `'my_dir.my_table'`, or
|
|
381
|
+
a remote URI such as `'pxt://username/mydir.my_table'`.
|
|
382
|
+
source: Path to the source table, or (if the source table is a local table) a handle to the source table.
|
|
383
|
+
"""
|
|
384
|
+
remote_dest = destination.startswith('pxt://')
|
|
385
|
+
remote_source = isinstance(source, str) and source.startswith('pxt://')
|
|
386
|
+
if remote_dest == remote_source:
|
|
387
|
+
raise excs.Error('Exactly one of `destination` or `source` must be a remote URI.')
|
|
388
|
+
|
|
389
|
+
if remote_dest:
|
|
390
|
+
if isinstance(source, str):
|
|
391
|
+
source = get_table(source)
|
|
392
|
+
share.push_replica(destination, source)
|
|
393
|
+
return None
|
|
394
|
+
else:
|
|
395
|
+
assert isinstance(source, str)
|
|
396
|
+
return share.pull_replica(destination, source)
|
|
397
|
+
|
|
398
|
+
|
|
375
399
|
def get_table(path: str) -> catalog.Table:
|
|
376
400
|
"""Get a handle to an existing table, view, or snapshot.
|
|
377
401
|
|
|
@@ -470,7 +494,7 @@ def drop_table(
|
|
|
470
494
|
# if we're dropping a table by handle, we first need to get the current path, then drop the S lock on
|
|
471
495
|
# the Table record, and then get X locks in the correct order (first containing directory, then table)
|
|
472
496
|
with Env.get().begin_xact():
|
|
473
|
-
tbl_path = table._path
|
|
497
|
+
tbl_path = table._path
|
|
474
498
|
else:
|
|
475
499
|
assert isinstance(table, str)
|
|
476
500
|
tbl_path = table
|
|
@@ -627,13 +651,6 @@ def _extract_paths(
|
|
|
627
651
|
return result
|
|
628
652
|
|
|
629
653
|
|
|
630
|
-
def publish_snapshot(dest_uri: str, table: catalog.Table) -> None:
|
|
631
|
-
parsed_uri = urllib.parse.urlparse(dest_uri)
|
|
632
|
-
if parsed_uri.scheme != 'pxt':
|
|
633
|
-
raise excs.Error(f'Invalid Pixeltable URI (does not start with pxt://): {dest_uri}')
|
|
634
|
-
share.publish_snapshot(dest_uri, table)
|
|
635
|
-
|
|
636
|
-
|
|
637
654
|
def list_dirs(path: str = '', recursive: bool = True) -> list[str]:
|
|
638
655
|
"""List the directories in a directory.
|
|
639
656
|
|
pixeltable/io/datarows.py
CHANGED
|
@@ -3,13 +3,14 @@ from __future__ import annotations
|
|
|
3
3
|
from typing import Any, Iterable, Optional, Union
|
|
4
4
|
|
|
5
5
|
import pixeltable as pxt
|
|
6
|
+
import pixeltable.type_system as ts
|
|
6
7
|
from pixeltable import exceptions as excs
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
def _infer_schema_from_rows(
|
|
10
11
|
rows: Iterable[dict[str, Any]], schema_overrides: dict[str, Any], primary_key: list[str]
|
|
11
|
-
) -> dict[str,
|
|
12
|
-
schema: dict[str,
|
|
12
|
+
) -> dict[str, ts.ColumnType]:
|
|
13
|
+
schema: dict[str, ts.ColumnType] = {}
|
|
13
14
|
cols_with_nones: set[str] = set()
|
|
14
15
|
|
|
15
16
|
for n, row in enumerate(rows):
|
|
@@ -23,7 +24,7 @@ def _infer_schema_from_rows(
|
|
|
23
24
|
elif value is not None:
|
|
24
25
|
# If `key` is not in `schema_overrides`, then we infer its type from the data.
|
|
25
26
|
# The column type will always be nullable by default.
|
|
26
|
-
col_type =
|
|
27
|
+
col_type = ts.ColumnType.infer_literal_type(value, nullable=col_name not in primary_key)
|
|
27
28
|
if col_type is None:
|
|
28
29
|
raise excs.Error(
|
|
29
30
|
f'Could not infer type for column `{col_name}`; the value in row {n} '
|
pixeltable/io/hf_datasets.py
CHANGED
|
@@ -31,8 +31,8 @@ _hf_to_pxt: dict[str, ts.ColumnType] = {
|
|
|
31
31
|
'timestamp[s]': ts.TimestampType(nullable=True),
|
|
32
32
|
'timestamp[ms]': ts.TimestampType(nullable=True), # HF dataset iterator converts timestamps to datetime.datetime
|
|
33
33
|
'timestamp[us]': ts.TimestampType(nullable=True),
|
|
34
|
-
'date32': ts.
|
|
35
|
-
'date64': ts.
|
|
34
|
+
'date32': ts.DateType(nullable=True),
|
|
35
|
+
'date64': ts.DateType(nullable=True),
|
|
36
36
|
}
|
|
37
37
|
|
|
38
38
|
|
pixeltable/io/label_studio.py
CHANGED
|
@@ -11,7 +11,7 @@ import label_studio_sdk # type: ignore[import-untyped]
|
|
|
11
11
|
import PIL.Image
|
|
12
12
|
from requests.exceptions import HTTPError
|
|
13
13
|
|
|
14
|
-
import pixeltable as
|
|
14
|
+
import pixeltable.type_system as ts
|
|
15
15
|
from pixeltable import Column, Table, env, exceptions as excs
|
|
16
16
|
from pixeltable.config import Config
|
|
17
17
|
from pixeltable.exprs import ColumnRef, DataRow, Expr
|
|
@@ -89,21 +89,21 @@ class LabelStudioProject(Project):
|
|
|
89
89
|
def __project_config(self) -> '_LabelStudioConfig':
|
|
90
90
|
return self.__parse_project_config(self.project_params['label_config'])
|
|
91
91
|
|
|
92
|
-
def get_export_columns(self) -> dict[str,
|
|
92
|
+
def get_export_columns(self) -> dict[str, ts.ColumnType]:
|
|
93
93
|
"""
|
|
94
94
|
The data keys and preannotation fields specified in this Label Studio project.
|
|
95
95
|
"""
|
|
96
96
|
return self.__project_config.export_columns
|
|
97
97
|
|
|
98
|
-
def get_import_columns(self) -> dict[str,
|
|
98
|
+
def get_import_columns(self) -> dict[str, ts.ColumnType]:
|
|
99
99
|
"""
|
|
100
100
|
Always contains a single entry:
|
|
101
101
|
|
|
102
102
|
```
|
|
103
|
-
{"annotations":
|
|
103
|
+
{"annotations": ts.JsonType(nullable=True)}
|
|
104
104
|
```
|
|
105
105
|
"""
|
|
106
|
-
return {ANNOTATIONS_COLUMN:
|
|
106
|
+
return {ANNOTATIONS_COLUMN: ts.JsonType(nullable=True)}
|
|
107
107
|
|
|
108
108
|
def sync(self, t: Table, export_data: bool, import_data: bool) -> SyncStatus:
|
|
109
109
|
_logger.info(
|
|
@@ -412,8 +412,8 @@ class LabelStudioProject(Project):
|
|
|
412
412
|
# TODO(aaron-siegel): Simplify this once propagation is properly implemented in batch_update
|
|
413
413
|
ancestor = t
|
|
414
414
|
while local_annotations_col not in ancestor._tbl_version.get().cols:
|
|
415
|
-
assert ancestor.
|
|
416
|
-
ancestor = ancestor.
|
|
415
|
+
assert ancestor._base_table is not None
|
|
416
|
+
ancestor = ancestor._base_table
|
|
417
417
|
update_status = ancestor.batch_update(updates)
|
|
418
418
|
env.Env.get().console_logger.info(f'Updated annotation(s) from {len(updates)} task(s) in {self}.')
|
|
419
419
|
return SyncStatus(pxt_rows_updated=update_status.num_rows, num_excs=update_status.num_excs)
|
|
@@ -577,10 +577,10 @@ class LabelStudioProject(Project):
|
|
|
577
577
|
else:
|
|
578
578
|
local_annotations_column = next(k for k, v in col_mapping.items() if v == ANNOTATIONS_COLUMN)
|
|
579
579
|
if local_annotations_column not in t._schema:
|
|
580
|
-
t.add_columns({local_annotations_column:
|
|
580
|
+
t.add_columns({local_annotations_column: ts.JsonType(nullable=True)})
|
|
581
581
|
|
|
582
582
|
resolved_col_mapping = cls.validate_columns(
|
|
583
|
-
t, config.export_columns, {ANNOTATIONS_COLUMN:
|
|
583
|
+
t, config.export_columns, {ANNOTATIONS_COLUMN: ts.JsonType(nullable=True)}, col_mapping
|
|
584
584
|
)
|
|
585
585
|
|
|
586
586
|
# Perform some additional validation
|
|
@@ -649,7 +649,7 @@ class LabelStudioProject(Project):
|
|
|
649
649
|
@dataclass(frozen=True)
|
|
650
650
|
class _DataKey:
|
|
651
651
|
name: Optional[str] # The 'name' attribute of the data key; may differ from the field name
|
|
652
|
-
column_type:
|
|
652
|
+
column_type: ts.ColumnType
|
|
653
653
|
|
|
654
654
|
|
|
655
655
|
@dataclass(frozen=True)
|
|
@@ -673,18 +673,18 @@ class _LabelStudioConfig:
|
|
|
673
673
|
)
|
|
674
674
|
|
|
675
675
|
@property
|
|
676
|
-
def export_columns(self) -> dict[str,
|
|
676
|
+
def export_columns(self) -> dict[str, ts.ColumnType]:
|
|
677
677
|
data_key_cols = {key_id: key_info.column_type for key_id, key_info in self.data_keys.items()}
|
|
678
|
-
rl_cols = {name:
|
|
678
|
+
rl_cols = {name: ts.JsonType() for name in self.rectangle_labels}
|
|
679
679
|
return {**data_key_cols, **rl_cols}
|
|
680
680
|
|
|
681
681
|
|
|
682
682
|
ANNOTATIONS_COLUMN = 'annotations'
|
|
683
683
|
_PAGE_SIZE = 100 # This is the default used in the LS SDK
|
|
684
684
|
_LS_TAG_MAP = {
|
|
685
|
-
'header':
|
|
686
|
-
'text':
|
|
687
|
-
'image':
|
|
688
|
-
'video':
|
|
689
|
-
'audio':
|
|
685
|
+
'header': ts.StringType(),
|
|
686
|
+
'text': ts.StringType(),
|
|
687
|
+
'image': ts.ImageType(),
|
|
688
|
+
'video': ts.VideoType(),
|
|
689
|
+
'audio': ts.AudioType(),
|
|
690
690
|
}
|