pixeltable 0.3.13__py3-none-any.whl → 0.3.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +2 -2
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +9 -7
- pixeltable/catalog/column.py +6 -2
- pixeltable/catalog/dir.py +2 -1
- pixeltable/catalog/insertable_table.py +1 -1
- pixeltable/catalog/schema_object.py +2 -1
- pixeltable/catalog/table.py +12 -8
- pixeltable/catalog/table_version.py +19 -0
- pixeltable/catalog/table_version_path.py +7 -0
- pixeltable/catalog/view.py +3 -3
- pixeltable/dataframe.py +48 -5
- pixeltable/env.py +1 -1
- pixeltable/exec/aggregation_node.py +14 -0
- pixeltable/exec/cache_prefetch_node.py +1 -1
- pixeltable/exec/expr_eval/expr_eval_node.py +1 -1
- pixeltable/exprs/column_ref.py +42 -17
- pixeltable/exprs/data_row.py +3 -0
- pixeltable/exprs/globals.py +1 -1
- pixeltable/exprs/literal.py +11 -1
- pixeltable/exprs/rowid_ref.py +4 -1
- pixeltable/exprs/similarity_expr.py +1 -1
- pixeltable/func/function.py +1 -1
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/date.py +185 -0
- pixeltable/functions/gemini.py +22 -20
- pixeltable/functions/globals.py +1 -16
- pixeltable/functions/json.py +2 -1
- pixeltable/functions/math.py +40 -0
- pixeltable/functions/string.py +1 -2
- pixeltable/functions/video.py +2 -2
- pixeltable/globals.py +26 -9
- pixeltable/io/hf_datasets.py +2 -2
- pixeltable/io/pandas.py +16 -4
- pixeltable/io/parquet.py +2 -0
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_34.py +21 -0
- pixeltable/metadata/notes.py +1 -0
- pixeltable/plan.py +12 -5
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +219 -119
- pixeltable/share/publish.py +61 -16
- pixeltable/store.py +45 -20
- pixeltable/type_system.py +46 -2
- pixeltable/utils/arrow.py +8 -2
- pixeltable/utils/pytorch.py +4 -0
- {pixeltable-0.3.13.dist-info → pixeltable-0.3.14.dist-info}/METADATA +2 -4
- {pixeltable-0.3.13.dist-info → pixeltable-0.3.14.dist-info}/RECORD +51 -49
- {pixeltable-0.3.13.dist-info → pixeltable-0.3.14.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.13.dist-info → pixeltable-0.3.14.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.13.dist-info → pixeltable-0.3.14.dist-info}/entry_points.txt +0 -0
pixeltable/exprs/literal.py
CHANGED
|
@@ -50,6 +50,9 @@ class Literal(Expr):
|
|
|
50
50
|
assert isinstance(self.val, datetime.datetime)
|
|
51
51
|
default_tz = Env.get().default_time_zone
|
|
52
52
|
return f"'{self.val.astimezone(default_tz).isoformat()}'"
|
|
53
|
+
if self.col_type.is_date_type():
|
|
54
|
+
assert isinstance(self.val, datetime.date)
|
|
55
|
+
return f"'{self.val.isoformat()}'"
|
|
53
56
|
if self.col_type.is_array_type():
|
|
54
57
|
assert isinstance(self.val, np.ndarray)
|
|
55
58
|
return str(self.val.tolist())
|
|
@@ -82,6 +85,10 @@ class Literal(Expr):
|
|
|
82
85
|
# stored as UTC in the database)
|
|
83
86
|
encoded_val = self.val.isoformat()
|
|
84
87
|
return {'val': encoded_val, 'val_t': self.col_type._type.name, **super()._as_dict()}
|
|
88
|
+
elif self.col_type.is_date_type():
|
|
89
|
+
assert isinstance(self.val, datetime.date)
|
|
90
|
+
encoded_val = self.val.isoformat()
|
|
91
|
+
return {'val': encoded_val, 'val_t': self.col_type._type.name, **super()._as_dict()}
|
|
85
92
|
elif self.col_type.is_array_type():
|
|
86
93
|
assert isinstance(self.val, np.ndarray)
|
|
87
94
|
return {'val': self.val.tolist(), 'val_t': self.col_type._type.name, **super()._as_dict()}
|
|
@@ -96,7 +103,10 @@ class Literal(Expr):
|
|
|
96
103
|
assert 'val' in d
|
|
97
104
|
if 'val_t' in d:
|
|
98
105
|
val_t = d['val_t']
|
|
99
|
-
if val_t == ts.ColumnType.Type.
|
|
106
|
+
if val_t == ts.ColumnType.Type.DATE.name:
|
|
107
|
+
dt = datetime.date.fromisoformat(d['val'])
|
|
108
|
+
return cls(dt)
|
|
109
|
+
elif val_t == ts.ColumnType.Type.TIMESTAMP.name:
|
|
100
110
|
dt = datetime.datetime.fromisoformat(d['val'])
|
|
101
111
|
assert dt.tzinfo == datetime.timezone.utc # Must be UTC in the database
|
|
102
112
|
return cls(dt)
|
pixeltable/exprs/rowid_ref.py
CHANGED
|
@@ -30,7 +30,7 @@ class RowidRef(Expr):
|
|
|
30
30
|
|
|
31
31
|
def __init__(
|
|
32
32
|
self,
|
|
33
|
-
tbl: catalog.TableVersionHandle,
|
|
33
|
+
tbl: Optional[catalog.TableVersionHandle],
|
|
34
34
|
idx: int,
|
|
35
35
|
tbl_id: Optional[UUID] = None,
|
|
36
36
|
normalized_base_id: Optional[UUID] = None,
|
|
@@ -98,6 +98,9 @@ class RowidRef(Expr):
|
|
|
98
98
|
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
99
99
|
tbl = self.tbl.get() if self.tbl is not None else catalog.Catalog.get().get_tbl_version(self.tbl_id, None)
|
|
100
100
|
rowid_cols = tbl.store_tbl.rowid_columns()
|
|
101
|
+
assert self.rowid_component_idx <= len(rowid_cols), (
|
|
102
|
+
f'{self.rowid_component_idx} not consistent with {rowid_cols}'
|
|
103
|
+
)
|
|
101
104
|
return rowid_cols[self.rowid_component_idx]
|
|
102
105
|
|
|
103
106
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
@@ -26,7 +26,7 @@ class SimilarityExpr(Expr):
|
|
|
26
26
|
from pixeltable import index
|
|
27
27
|
|
|
28
28
|
# determine index to use
|
|
29
|
-
idx_dict =
|
|
29
|
+
idx_dict = col_ref.find_embedding_index(idx_name, 'similarity')
|
|
30
30
|
assert len(idx_dict) == 1
|
|
31
31
|
self.idx_info = next(iter(idx_dict.values()))
|
|
32
32
|
idx = self.idx_info.idx
|
pixeltable/func/function.py
CHANGED
|
@@ -514,7 +514,7 @@ class InvalidFunction(Function):
|
|
|
514
514
|
def _as_dict(self) -> dict:
|
|
515
515
|
"""
|
|
516
516
|
Here we write out (verbatim) the original metadata that failed to load (and that resulted in the
|
|
517
|
-
InvalidFunction). Note that the InvalidFunction itself is never
|
|
517
|
+
InvalidFunction). Note that the InvalidFunction itself is never serialized, so there is no corresponding
|
|
518
518
|
from_dict() method.
|
|
519
519
|
"""
|
|
520
520
|
return self.fn_dict
|
pixeltable/functions/__init__.py
CHANGED
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs) for `DateType`.
|
|
3
|
+
|
|
4
|
+
Usage example:
|
|
5
|
+
```python
|
|
6
|
+
import pixeltable as pxt
|
|
7
|
+
|
|
8
|
+
t = pxt.get_table(...)
|
|
9
|
+
t.select(t.date_col.year, t.date_col.weekday()).collect()
|
|
10
|
+
```
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from datetime import date, timedelta
|
|
14
|
+
|
|
15
|
+
import sqlalchemy as sql
|
|
16
|
+
|
|
17
|
+
import pixeltable as pxt
|
|
18
|
+
from pixeltable.utils.code import local_public_names
|
|
19
|
+
|
|
20
|
+
_SQL_ZERO = sql.literal(0)
|
|
21
|
+
|
|
22
|
+
# NOT YET SUPPORTED date +/- integer
|
|
23
|
+
# NOT YET SUPPORTED date1 - date2 -> integer
|
|
24
|
+
# NOT YET SUPPORTED timestamp(date)
|
|
25
|
+
# NOT YET SUPPORTED date(timestamp)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@pxt.udf(is_property=True)
|
|
29
|
+
def year(self: date) -> int:
|
|
30
|
+
"""
|
|
31
|
+
Between [`MINYEAR`](https://docs.python.org/3/library/datetime.html#datetime.MINYEAR) and
|
|
32
|
+
[`MAXYEAR`](https://docs.python.org/3/library/datetime.html#datetime.MAXYEAR) inclusive.
|
|
33
|
+
|
|
34
|
+
Equivalent to [`date.year`](https://docs.python.org/3/library/datetime.html#datetime.date.year).
|
|
35
|
+
"""
|
|
36
|
+
return self.year
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@year.to_sql
|
|
40
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
41
|
+
return sql.extract('year', self)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@pxt.udf(is_property=True)
|
|
45
|
+
def month(self: date) -> int:
|
|
46
|
+
"""
|
|
47
|
+
Between 1 and 12 inclusive.
|
|
48
|
+
|
|
49
|
+
Equivalent to [`date.month`](https://docs.python.org/3/library/datetime.html#datetime.date.month).
|
|
50
|
+
"""
|
|
51
|
+
return self.month
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@month.to_sql
|
|
55
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
56
|
+
return sql.extract('month', self)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@pxt.udf(is_property=True)
|
|
60
|
+
def day(self: date) -> int:
|
|
61
|
+
"""
|
|
62
|
+
Between 1 and the number of days in the given month of the given year.
|
|
63
|
+
|
|
64
|
+
Equivalent to [`date.day`](https://docs.python.org/3/library/datetime.html#datetime.date.day).
|
|
65
|
+
"""
|
|
66
|
+
return self.day
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@day.to_sql
|
|
70
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
71
|
+
return sql.extract('day', self)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@pxt.udf(is_method=True)
|
|
75
|
+
def make_date(year: int, month: int, day: int) -> date:
|
|
76
|
+
"""
|
|
77
|
+
Create a date.
|
|
78
|
+
|
|
79
|
+
Equivalent to [`datetime()`](https://docs.python.org/3/library/datetime.html#datetime.date).
|
|
80
|
+
"""
|
|
81
|
+
return date(year, month, day)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@make_date.to_sql
|
|
85
|
+
def _(year: sql.ColumnElement, month: sql.ColumnElement, day: sql.ColumnElement) -> sql.ColumnElement:
|
|
86
|
+
return sql.func.make_date(sql.cast(year, sql.Integer), sql.cast(month, sql.Integer), sql.cast(day, sql.Integer))
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@pxt.udf(is_method=True)
|
|
90
|
+
def weekday(self: date) -> int:
|
|
91
|
+
"""
|
|
92
|
+
Between 0 (Monday) and 6 (Sunday) inclusive.
|
|
93
|
+
|
|
94
|
+
Equivalent to [`date.weekday()`](https://docs.python.org/3/library/datetime.html#datetime.date.weekday).
|
|
95
|
+
"""
|
|
96
|
+
return self.weekday()
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
@weekday.to_sql
|
|
100
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
101
|
+
return sql.extract('isodow', self) - 1
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@pxt.udf(is_method=True)
|
|
105
|
+
def isoweekday(self: date) -> int:
|
|
106
|
+
"""
|
|
107
|
+
Return the day of the week as an integer, where Monday is 1 and Sunday is 7.
|
|
108
|
+
|
|
109
|
+
Equivalent to [`date.isoweekday()`](https://docs.python.org/3/library/datetime.html#datetime.date.isoweekday).
|
|
110
|
+
"""
|
|
111
|
+
return self.isoweekday()
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
@isoweekday.to_sql
|
|
115
|
+
def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
116
|
+
return sql.extract('isodow', self)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
@pxt.udf(is_method=True)
|
|
120
|
+
def isocalendar(self: date) -> dict:
|
|
121
|
+
"""
|
|
122
|
+
Return a dictionary with three entries: `'year'`, `'week'`, and `'weekday'`.
|
|
123
|
+
|
|
124
|
+
Equivalent to
|
|
125
|
+
[`date.isocalendar()`](https://docs.python.org/3/library/datetime.html#datetime.date.isocalendar).
|
|
126
|
+
"""
|
|
127
|
+
iso_year, iso_week, iso_weekday = self.isocalendar()
|
|
128
|
+
return {'year': iso_year, 'week': iso_week, 'weekday': iso_weekday}
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
@pxt.udf(is_method=True)
|
|
132
|
+
def isoformat(self: date, sep: str = 'T', timespec: str = 'auto') -> str:
|
|
133
|
+
"""
|
|
134
|
+
Return a string representing the date and time in ISO 8601 format.
|
|
135
|
+
|
|
136
|
+
Equivalent to [`date.isoformat()`](https://docs.python.org/3/library/datetime.html#datetime.date.isoformat).
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
sep: Separator between date and time.
|
|
140
|
+
timespec: The number of additional terms in the output. See the
|
|
141
|
+
[`date.isoformat()`](https://docs.python.org/3/library/datetime.html#datetime.date.isoformat)
|
|
142
|
+
documentation for more details.
|
|
143
|
+
"""
|
|
144
|
+
return self.isoformat()
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
@pxt.udf(is_method=True)
|
|
148
|
+
def toordinal(self: date) -> int:
|
|
149
|
+
"""
|
|
150
|
+
Return the proleptic Gregorian ordinal of the date, where January 1 of year 1 has ordinal 1.
|
|
151
|
+
|
|
152
|
+
Equivalent to [`date.toordinal()`](https://docs.python.org/3/library/datetime.html#datetime.date.toordinal).
|
|
153
|
+
"""
|
|
154
|
+
return self.toordinal()
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
@pxt.udf(is_method=True)
|
|
158
|
+
def strftime(self: date, format: str) -> str:
|
|
159
|
+
"""
|
|
160
|
+
Return a string representing the date and time, controlled by an explicit format string.
|
|
161
|
+
|
|
162
|
+
Equivalent to [`date.strftime()`](https://docs.python.org/3/library/datetime.html#datetime.date.strftime).
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
format: The format string to control the output. For a complete list of formatting directives, see
|
|
166
|
+
[`strftime()` and `strptime()` Behavior](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior).
|
|
167
|
+
"""
|
|
168
|
+
return self.strftime(format)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
@pxt.udf(is_method=True)
|
|
172
|
+
def add_days(self: date, n: int) -> date:
|
|
173
|
+
"""
|
|
174
|
+
Add `n` days to the date.
|
|
175
|
+
|
|
176
|
+
Equivalent to [`date + timedelta(days=n)`](https://docs.python.org/3/library/datetime.html#datetime.timedelta).
|
|
177
|
+
"""
|
|
178
|
+
return self + timedelta(days=n)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
__all__ = local_public_names(__name__)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def __dir__() -> list[str]:
|
|
185
|
+
return __all__
|
pixeltable/functions/gemini.py
CHANGED
|
@@ -1,32 +1,35 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
|
|
3
3
|
that wrap various endpoints from the Google Gemini API. In order to use them, you must
|
|
4
|
-
first `pip install google-
|
|
4
|
+
first `pip install google-genai` and configure your Gemini credentials, as described in
|
|
5
5
|
the [Working with Gemini](https://pixeltable.readme.io/docs/working-with-gemini) tutorial.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
from typing import Optional
|
|
8
|
+
from typing import TYPE_CHECKING, Optional
|
|
9
9
|
|
|
10
10
|
import pixeltable as pxt
|
|
11
11
|
from pixeltable import env
|
|
12
12
|
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from google import genai
|
|
15
|
+
|
|
13
16
|
|
|
14
17
|
@env.register_client('gemini')
|
|
15
|
-
def _(api_key: str) ->
|
|
16
|
-
|
|
18
|
+
def _(api_key: str) -> 'genai.client.Client':
|
|
19
|
+
from google import genai
|
|
17
20
|
|
|
18
|
-
genai.
|
|
21
|
+
return genai.client.Client(api_key=api_key)
|
|
19
22
|
|
|
20
23
|
|
|
21
|
-
def
|
|
22
|
-
env.Env.get().get_client('gemini')
|
|
24
|
+
def _genai_client() -> 'genai.client.Client':
|
|
25
|
+
return env.Env.get().get_client('gemini')
|
|
23
26
|
|
|
24
27
|
|
|
25
28
|
@pxt.udf(resource_pool='request-rate:gemini')
|
|
26
29
|
async def generate_content(
|
|
27
30
|
contents: str,
|
|
28
31
|
*,
|
|
29
|
-
|
|
32
|
+
model: str,
|
|
30
33
|
candidate_count: Optional[int] = None,
|
|
31
34
|
stop_sequences: Optional[list[str]] = None,
|
|
32
35
|
max_output_tokens: Optional[int] = None,
|
|
@@ -48,11 +51,11 @@ async def generate_content(
|
|
|
48
51
|
|
|
49
52
|
__Requirements:__
|
|
50
53
|
|
|
51
|
-
- `pip install google-
|
|
54
|
+
- `pip install google-genai`
|
|
52
55
|
|
|
53
56
|
Args:
|
|
54
57
|
contents: The input content to generate from.
|
|
55
|
-
|
|
58
|
+
model: The name of the model to use.
|
|
56
59
|
|
|
57
60
|
For details on the other parameters, see: <https://ai.google.dev/gemini-api/docs>
|
|
58
61
|
|
|
@@ -63,14 +66,12 @@ async def generate_content(
|
|
|
63
66
|
Add a computed column that applies the model `gemini-1.5-flash`
|
|
64
67
|
to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
|
|
65
68
|
|
|
66
|
-
>>> tbl.add_computed_column(response=generate_content(tbl.prompt,
|
|
69
|
+
>>> tbl.add_computed_column(response=generate_content(tbl.prompt, model='gemini-1.5-flash'))
|
|
67
70
|
"""
|
|
68
|
-
env.Env.get().require_package('google.
|
|
69
|
-
|
|
70
|
-
import google.generativeai as genai
|
|
71
|
+
env.Env.get().require_package('google.genai')
|
|
72
|
+
from google.genai import types
|
|
71
73
|
|
|
72
|
-
|
|
73
|
-
gc = genai.GenerationConfig(
|
|
74
|
+
config = types.GenerateContentConfig(
|
|
74
75
|
candidate_count=candidate_count,
|
|
75
76
|
stop_sequences=stop_sequences,
|
|
76
77
|
max_output_tokens=max_output_tokens,
|
|
@@ -82,10 +83,11 @@ async def generate_content(
|
|
|
82
83
|
presence_penalty=presence_penalty,
|
|
83
84
|
frequency_penalty=frequency_penalty,
|
|
84
85
|
)
|
|
85
|
-
|
|
86
|
-
|
|
86
|
+
|
|
87
|
+
response = await _genai_client().aio.models.generate_content(model=model, contents=contents, config=config)
|
|
88
|
+
return response.model_dump()
|
|
87
89
|
|
|
88
90
|
|
|
89
91
|
@generate_content.resource_pool
|
|
90
|
-
def _(
|
|
91
|
-
return f'request-rate:gemini:{
|
|
92
|
+
def _(model: str) -> str:
|
|
93
|
+
return f'request-rate:gemini:{model}'
|
pixeltable/functions/globals.py
CHANGED
|
@@ -49,22 +49,7 @@ def _(val: sql.ColumnElement) -> Optional[sql.ColumnElement]:
|
|
|
49
49
|
allows_window=True,
|
|
50
50
|
# Allow counting non-null values of any type
|
|
51
51
|
# TODO: should we have an "Any" type that can be used here?
|
|
52
|
-
type_substitutions=tuple(
|
|
53
|
-
{T: Optional[t]} # type: ignore[misc]
|
|
54
|
-
for t in (
|
|
55
|
-
ts.String,
|
|
56
|
-
ts.Int,
|
|
57
|
-
ts.Float,
|
|
58
|
-
ts.Bool,
|
|
59
|
-
ts.Timestamp,
|
|
60
|
-
ts.Array,
|
|
61
|
-
ts.Json,
|
|
62
|
-
ts.Image,
|
|
63
|
-
ts.Video,
|
|
64
|
-
ts.Audio,
|
|
65
|
-
ts.Document,
|
|
66
|
-
)
|
|
67
|
-
),
|
|
52
|
+
type_substitutions=tuple({T: Optional[t]} for t in ts.ALL_PIXELTABLE_TYPES), # type: ignore[misc]
|
|
68
53
|
)
|
|
69
54
|
class count(func.Aggregator, typing.Generic[T]):
|
|
70
55
|
def __init__(self) -> None:
|
pixeltable/functions/json.py
CHANGED
|
@@ -4,9 +4,10 @@ Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
|
|
|
4
4
|
Example:
|
|
5
5
|
```python
|
|
6
6
|
import pixeltable as pxt
|
|
7
|
+
import pixeltable.functions as pxtf
|
|
7
8
|
|
|
8
9
|
t = pxt.get_table(...)
|
|
9
|
-
t.select(
|
|
10
|
+
t.select(pxtf.json.make_list(t.json_col)).collect()
|
|
10
11
|
```
|
|
11
12
|
"""
|
|
12
13
|
|
pixeltable/functions/math.py
CHANGED
|
@@ -1,3 +1,15 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs) for mathematical operations.
|
|
3
|
+
|
|
4
|
+
Example:
|
|
5
|
+
```python
|
|
6
|
+
import pixeltable as pxt
|
|
7
|
+
|
|
8
|
+
t = pxt.get_table(...)
|
|
9
|
+
t.select(t.float_col.floor()).collect()
|
|
10
|
+
```
|
|
11
|
+
"""
|
|
12
|
+
|
|
1
13
|
import builtins
|
|
2
14
|
import math
|
|
3
15
|
from typing import Optional
|
|
@@ -10,6 +22,11 @@ from pixeltable.utils.code import local_public_names
|
|
|
10
22
|
|
|
11
23
|
@pxt.udf(is_method=True)
|
|
12
24
|
def abs(self: float) -> float:
|
|
25
|
+
"""
|
|
26
|
+
Return the absolute value of the given number.
|
|
27
|
+
|
|
28
|
+
Equivalent to Python [`builtins.abs()`](https://docs.python.org/3/library/functions.html#abs).
|
|
29
|
+
"""
|
|
13
30
|
return builtins.abs(self)
|
|
14
31
|
|
|
15
32
|
|
|
@@ -20,6 +37,14 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
20
37
|
|
|
21
38
|
@pxt.udf(is_method=True)
|
|
22
39
|
def ceil(self: float) -> float:
|
|
40
|
+
"""
|
|
41
|
+
Return the ceiling of the given number.
|
|
42
|
+
|
|
43
|
+
Equivalent to Python [`float(math.ceil(self))`](https://docs.python.org/3/library/math.html#math.ceil) if `self`
|
|
44
|
+
is finite, or `self` itself if `self` is infinite. (This is slightly different from the default behavior of
|
|
45
|
+
`math.ceil(self)`, which always returns an `int` and raises an error if `self` is infinite. The behavior in
|
|
46
|
+
Pixeltable generalizes the Python operator and is chosen to align with the SQL standard.)
|
|
47
|
+
"""
|
|
23
48
|
# This ensures the same behavior as SQL
|
|
24
49
|
if math.isfinite(self):
|
|
25
50
|
return float(math.ceil(self))
|
|
@@ -34,6 +59,14 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
34
59
|
|
|
35
60
|
@pxt.udf(is_method=True)
|
|
36
61
|
def floor(self: float) -> float:
|
|
62
|
+
"""
|
|
63
|
+
Return the ceiling of the given number.
|
|
64
|
+
|
|
65
|
+
Equivalent to Python [`float(math.floor(self))`](https://docs.python.org/3/library/math.html#math.ceil) if `self`
|
|
66
|
+
is finite, or `self` itself if `self` is infinite. (This is slightly different from the default behavior of
|
|
67
|
+
`math.floor(self)`, which always returns an `int` and raises an error if `self` is infinite. The behavior of
|
|
68
|
+
Pixeltable generalizes the Python operator and is chosen to align with the SQL standard.)
|
|
69
|
+
"""
|
|
37
70
|
# This ensures the same behavior as SQL
|
|
38
71
|
if math.isfinite(self):
|
|
39
72
|
return float(math.floor(self))
|
|
@@ -48,6 +81,13 @@ def _(self: sql.ColumnElement) -> sql.ColumnElement:
|
|
|
48
81
|
|
|
49
82
|
@pxt.udf(is_method=True)
|
|
50
83
|
def round(self: float, digits: Optional[int] = None) -> float:
|
|
84
|
+
"""
|
|
85
|
+
Round a number to a given precision in decimal digits.
|
|
86
|
+
|
|
87
|
+
Equivalent to Python [`builtins.round(self, digits or 0)`](https://docs.python.org/3/library/functions.html#round).
|
|
88
|
+
Note that if `digits` is not specified, the behavior matches `builtins.round(self, 0)` rather than
|
|
89
|
+
`builtins.round(self)`; this ensures that the return type is always `float` (as in SQL) rather than `int`.
|
|
90
|
+
"""
|
|
51
91
|
# Set digits explicitly to 0 to guarantee a return type of float; this ensures the same behavior as SQL
|
|
52
92
|
return builtins.round(self, digits or 0)
|
|
53
93
|
|
pixeltable/functions/string.py
CHANGED
|
@@ -5,10 +5,9 @@ It closely follows the Pandas `pandas.Series.str` API.
|
|
|
5
5
|
Example:
|
|
6
6
|
```python
|
|
7
7
|
import pixeltable as pxt
|
|
8
|
-
from pixeltable.functions import string as pxt_str
|
|
9
8
|
|
|
10
9
|
t = pxt.get_table(...)
|
|
11
|
-
t.select(
|
|
10
|
+
t.select(t.str_col.capitalize()).collect()
|
|
12
11
|
```
|
|
13
12
|
"""
|
|
14
13
|
|
pixeltable/functions/video.py
CHANGED
|
@@ -4,10 +4,10 @@ Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
|
|
|
4
4
|
Example:
|
|
5
5
|
```python
|
|
6
6
|
import pixeltable as pxt
|
|
7
|
-
|
|
7
|
+
import pixeltable.functions as pxtf
|
|
8
8
|
|
|
9
9
|
t = pxt.get_table(...)
|
|
10
|
-
t.select(
|
|
10
|
+
t.select(pxtf.video.extract_audio(t.video_col)).collect()
|
|
11
11
|
```
|
|
12
12
|
"""
|
|
13
13
|
|
pixeltable/globals.py
CHANGED
|
@@ -2,7 +2,6 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
|
-
import urllib.parse
|
|
6
5
|
from pathlib import Path
|
|
7
6
|
from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Optional, Union
|
|
8
7
|
|
|
@@ -372,6 +371,31 @@ def create_snapshot(
|
|
|
372
371
|
)
|
|
373
372
|
|
|
374
373
|
|
|
374
|
+
def create_replica(destination: str, source: Union[str, catalog.Table]) -> Optional[catalog.Table]:
|
|
375
|
+
"""
|
|
376
|
+
Create a replica of a table. Can be used either to create a remote replica of a local table, or to create a local
|
|
377
|
+
replica of a remote table. A given table can have at most one replica per Pixeltable instance.
|
|
378
|
+
|
|
379
|
+
Args:
|
|
380
|
+
destination: Path where the replica will be created. Can be either a local path such as `'my_dir.my_table'`, or
|
|
381
|
+
a remote URI such as `'pxt://username/mydir.my_table'`.
|
|
382
|
+
source: Path to the source table, or (if the source table is a local table) a handle to the source table.
|
|
383
|
+
"""
|
|
384
|
+
remote_dest = destination.startswith('pxt://')
|
|
385
|
+
remote_source = isinstance(source, str) and source.startswith('pxt://')
|
|
386
|
+
if remote_dest == remote_source:
|
|
387
|
+
raise excs.Error('Exactly one of `destination` or `source` must be a remote URI.')
|
|
388
|
+
|
|
389
|
+
if remote_dest:
|
|
390
|
+
if isinstance(source, str):
|
|
391
|
+
source = get_table(source)
|
|
392
|
+
share.push_replica(destination, source)
|
|
393
|
+
return None
|
|
394
|
+
else:
|
|
395
|
+
assert isinstance(source, str)
|
|
396
|
+
return share.pull_replica(destination, source)
|
|
397
|
+
|
|
398
|
+
|
|
375
399
|
def get_table(path: str) -> catalog.Table:
|
|
376
400
|
"""Get a handle to an existing table, view, or snapshot.
|
|
377
401
|
|
|
@@ -470,7 +494,7 @@ def drop_table(
|
|
|
470
494
|
# if we're dropping a table by handle, we first need to get the current path, then drop the S lock on
|
|
471
495
|
# the Table record, and then get X locks in the correct order (first containing directory, then table)
|
|
472
496
|
with Env.get().begin_xact():
|
|
473
|
-
tbl_path = table._path
|
|
497
|
+
tbl_path = table._path
|
|
474
498
|
else:
|
|
475
499
|
assert isinstance(table, str)
|
|
476
500
|
tbl_path = table
|
|
@@ -627,13 +651,6 @@ def _extract_paths(
|
|
|
627
651
|
return result
|
|
628
652
|
|
|
629
653
|
|
|
630
|
-
def publish_snapshot(dest_uri: str, table: catalog.Table) -> None:
|
|
631
|
-
parsed_uri = urllib.parse.urlparse(dest_uri)
|
|
632
|
-
if parsed_uri.scheme != 'pxt':
|
|
633
|
-
raise excs.Error(f'Invalid Pixeltable URI (does not start with pxt://): {dest_uri}')
|
|
634
|
-
share.publish_snapshot(dest_uri, table)
|
|
635
|
-
|
|
636
|
-
|
|
637
654
|
def list_dirs(path: str = '', recursive: bool = True) -> list[str]:
|
|
638
655
|
"""List the directories in a directory.
|
|
639
656
|
|
pixeltable/io/hf_datasets.py
CHANGED
|
@@ -31,8 +31,8 @@ _hf_to_pxt: dict[str, ts.ColumnType] = {
|
|
|
31
31
|
'timestamp[s]': ts.TimestampType(nullable=True),
|
|
32
32
|
'timestamp[ms]': ts.TimestampType(nullable=True), # HF dataset iterator converts timestamps to datetime.datetime
|
|
33
33
|
'timestamp[us]': ts.TimestampType(nullable=True),
|
|
34
|
-
'date32': ts.
|
|
35
|
-
'date64': ts.
|
|
34
|
+
'date32': ts.DateType(nullable=True),
|
|
35
|
+
'date64': ts.DateType(nullable=True),
|
|
36
36
|
}
|
|
37
37
|
|
|
38
38
|
|
pixeltable/io/pandas.py
CHANGED
|
@@ -9,6 +9,7 @@ from pandas.api.types import is_datetime64_any_dtype, is_extension_array_dtype
|
|
|
9
9
|
import pixeltable as pxt
|
|
10
10
|
import pixeltable.exceptions as excs
|
|
11
11
|
import pixeltable.type_system as ts
|
|
12
|
+
from pixeltable.env import Env
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
def import_pandas(
|
|
@@ -209,14 +210,25 @@ def _df_row_to_pxt_row(
|
|
|
209
210
|
nval = bool(val)
|
|
210
211
|
elif pxt_type.is_string_type():
|
|
211
212
|
nval = str(val)
|
|
213
|
+
elif pxt_type.is_date_type():
|
|
214
|
+
if pd.isnull(val):
|
|
215
|
+
# pandas has the bespoke 'NaT' valud for a missing timestamp
|
|
216
|
+
# This is not supported by postgres, and must be converted to None
|
|
217
|
+
nval = None
|
|
218
|
+
else:
|
|
219
|
+
nval = pd.Timestamp(val).date()
|
|
212
220
|
elif pxt_type.is_timestamp_type():
|
|
213
221
|
if pd.isnull(val):
|
|
214
|
-
# pandas has the bespoke 'NaT'
|
|
215
|
-
#
|
|
216
|
-
# table contents as a pandas DataFrame, it will correctly restore the 'NaT'!)
|
|
222
|
+
# pandas has the bespoke 'NaT' value for a missing timestamp
|
|
223
|
+
# This is not supported by postgres, and must be converted to None
|
|
217
224
|
nval = None
|
|
218
225
|
else:
|
|
219
|
-
|
|
226
|
+
tval = pd.Timestamp(val)
|
|
227
|
+
# pandas supports tz-aware and naive timestamps.
|
|
228
|
+
if tval.tz is None:
|
|
229
|
+
nval = pd.Timestamp(tval).tz_localize(tz=Env.get().default_time_zone)
|
|
230
|
+
else:
|
|
231
|
+
nval = tval.astimezone(Env.get().default_time_zone)
|
|
220
232
|
else:
|
|
221
233
|
nval = val
|
|
222
234
|
pxt_row[pxt_name] = nval
|
pixeltable/io/parquet.py
CHANGED
pixeltable/metadata/__init__.py
CHANGED
|
@@ -16,7 +16,7 @@ _console_logger = ConsoleLogger(logging.getLogger('pixeltable'))
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
# current version of the metadata; this is incremented whenever the metadata schema changes
|
|
19
|
-
VERSION =
|
|
19
|
+
VERSION = 35
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
def create_system_info(engine: sql.engine.Engine) -> None:
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from typing import Any, Optional
|
|
2
|
+
|
|
3
|
+
import sqlalchemy as sql
|
|
4
|
+
|
|
5
|
+
from pixeltable.metadata import register_converter
|
|
6
|
+
from pixeltable.metadata.converters.util import convert_table_md
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@register_converter(version=34)
|
|
10
|
+
def _(engine: sql.engine.Engine) -> None:
|
|
11
|
+
convert_table_md(engine, substitution_fn=__substitute_md)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
|
|
15
|
+
if isinstance(v, dict) and '_classname' in v and v['_classname'] == 'ColumnRef':
|
|
16
|
+
# Add reference_tbl to ColumnRef; for historical metadata it is always equal to tbl
|
|
17
|
+
assert 'reference_tbl' not in v
|
|
18
|
+
v['reference_tbl'] = None
|
|
19
|
+
return k, v
|
|
20
|
+
|
|
21
|
+
return None
|
pixeltable/metadata/notes.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
# rather than as a comment, so that the existence of a description can be enforced by
|
|
3
3
|
# the unit tests when new versions are added.
|
|
4
4
|
VERSION_NOTES = {
|
|
5
|
+
35: 'Track reference_tbl in ColumnRef',
|
|
5
6
|
34: 'Set default value for is_pk field in column metadata to False',
|
|
6
7
|
33: 'Add is_replica field to table metadata',
|
|
7
8
|
32: 'Add the lock_dummy BIGINT column to the dirs table',
|