pixeltable 0.4.3__py3-none-any.whl → 0.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -1
- pixeltable/catalog/catalog.py +619 -255
- pixeltable/catalog/dir.py +1 -2
- pixeltable/catalog/insertable_table.py +9 -9
- pixeltable/catalog/path.py +59 -20
- pixeltable/catalog/schema_object.py +10 -4
- pixeltable/catalog/table.py +51 -53
- pixeltable/catalog/table_version.py +216 -156
- pixeltable/catalog/table_version_path.py +1 -1
- pixeltable/catalog/tbl_ops.py +44 -0
- pixeltable/catalog/view.py +63 -65
- pixeltable/config.py +12 -4
- pixeltable/dataframe.py +75 -6
- pixeltable/env.py +46 -17
- pixeltable/exec/aggregation_node.py +1 -1
- pixeltable/exec/cache_prefetch_node.py +2 -6
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +10 -51
- pixeltable/exec/expr_eval/expr_eval_node.py +2 -2
- pixeltable/exec/in_memory_data_node.py +17 -16
- pixeltable/exec/sql_node.py +6 -7
- pixeltable/exprs/column_ref.py +2 -1
- pixeltable/exprs/data_row.py +13 -13
- pixeltable/exprs/row_builder.py +16 -4
- pixeltable/exprs/string_op.py +1 -1
- pixeltable/func/expr_template_function.py +1 -4
- pixeltable/functions/date.py +1 -1
- pixeltable/functions/gemini.py +4 -4
- pixeltable/functions/math.py +1 -1
- pixeltable/functions/openai.py +9 -6
- pixeltable/functions/timestamp.py +6 -6
- pixeltable/functions/video.py +2 -6
- pixeltable/globals.py +62 -33
- pixeltable/io/datarows.py +2 -1
- pixeltable/io/pandas.py +1 -0
- pixeltable/io/table_data_conduit.py +12 -13
- pixeltable/iterators/audio.py +17 -8
- pixeltable/iterators/image.py +5 -2
- pixeltable/metadata/schema.py +39 -2
- pixeltable/plan.py +5 -14
- pixeltable/share/packager.py +13 -13
- pixeltable/store.py +31 -7
- pixeltable/type_system.py +2 -1
- pixeltable/utils/filecache.py +1 -1
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/media_store.py +90 -34
- {pixeltable-0.4.3.dist-info → pixeltable-0.4.5.dist-info}/METADATA +1 -1
- {pixeltable-0.4.3.dist-info → pixeltable-0.4.5.dist-info}/RECORD +52 -51
- {pixeltable-0.4.3.dist-info → pixeltable-0.4.5.dist-info}/LICENSE +0 -0
- {pixeltable-0.4.3.dist-info → pixeltable-0.4.5.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.3.dist-info → pixeltable-0.4.5.dist-info}/entry_points.txt +0 -0
|
@@ -3,8 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import logging
|
|
4
4
|
from typing import Iterator, Optional
|
|
5
5
|
|
|
6
|
-
from pixeltable import
|
|
7
|
-
from pixeltable.utils.media_store import MediaStore
|
|
6
|
+
from pixeltable import exprs
|
|
8
7
|
|
|
9
8
|
_logger = logging.getLogger('pixeltable')
|
|
10
9
|
|
|
@@ -15,51 +14,19 @@ class DataRowBatch:
|
|
|
15
14
|
Contains the metadata needed to initialize DataRows.
|
|
16
15
|
"""
|
|
17
16
|
|
|
18
|
-
tbl: Optional[catalog.TableVersionHandle]
|
|
19
17
|
row_builder: exprs.RowBuilder
|
|
20
|
-
img_slot_idxs: list[int]
|
|
21
|
-
media_slot_idxs: list[int] # non-image media slots
|
|
22
|
-
array_slot_idxs: list[int]
|
|
23
18
|
rows: list[exprs.DataRow]
|
|
24
19
|
|
|
25
|
-
def __init__(
|
|
26
|
-
self,
|
|
27
|
-
tbl: Optional[catalog.TableVersionHandle],
|
|
28
|
-
row_builder: exprs.RowBuilder,
|
|
29
|
-
num_rows: Optional[int] = None,
|
|
30
|
-
rows: Optional[list[exprs.DataRow]] = None,
|
|
31
|
-
):
|
|
20
|
+
def __init__(self, row_builder: exprs.RowBuilder, rows: Optional[list[exprs.DataRow]] = None):
|
|
32
21
|
"""
|
|
33
22
|
Requires either num_rows or rows to be specified, but not both.
|
|
34
23
|
"""
|
|
35
|
-
assert num_rows is None or rows is None
|
|
36
|
-
self.tbl = tbl
|
|
37
24
|
self.row_builder = row_builder
|
|
38
|
-
self.
|
|
39
|
-
# non-image media slots
|
|
40
|
-
self.media_slot_idxs = [
|
|
41
|
-
e.slot_idx
|
|
42
|
-
for e in row_builder.unique_exprs
|
|
43
|
-
if e.col_type.is_media_type() and not e.col_type.is_image_type()
|
|
44
|
-
]
|
|
45
|
-
self.array_slot_idxs = [e.slot_idx for e in row_builder.unique_exprs if e.col_type.is_array_type()]
|
|
46
|
-
if rows is not None:
|
|
47
|
-
self.rows = rows
|
|
48
|
-
else:
|
|
49
|
-
if num_rows is None:
|
|
50
|
-
num_rows = 0
|
|
51
|
-
self.rows = [
|
|
52
|
-
exprs.DataRow(
|
|
53
|
-
row_builder.num_materialized, self.img_slot_idxs, self.media_slot_idxs, self.array_slot_idxs
|
|
54
|
-
)
|
|
55
|
-
for _ in range(num_rows)
|
|
56
|
-
]
|
|
25
|
+
self.rows = [] if rows is None else rows
|
|
57
26
|
|
|
58
|
-
def add_row(self, row: Optional[exprs.DataRow]
|
|
27
|
+
def add_row(self, row: Optional[exprs.DataRow]) -> exprs.DataRow:
|
|
59
28
|
if row is None:
|
|
60
|
-
row =
|
|
61
|
-
self.row_builder.num_materialized, self.img_slot_idxs, self.media_slot_idxs, self.array_slot_idxs
|
|
62
|
-
)
|
|
29
|
+
row = self.row_builder.make_row()
|
|
63
30
|
self.rows.append(row)
|
|
64
31
|
return row
|
|
65
32
|
|
|
@@ -73,26 +40,18 @@ class DataRowBatch:
|
|
|
73
40
|
return self.rows[index]
|
|
74
41
|
|
|
75
42
|
def flush_imgs(
|
|
76
|
-
self,
|
|
77
|
-
idx_range: Optional[slice] = None,
|
|
78
|
-
stored_img_info: Optional[list[exprs.ColumnSlotIdx]] = None,
|
|
79
|
-
flushed_slot_idxs: Optional[list[int]] = None,
|
|
43
|
+
self, idx_range: Optional[slice], stored_img_info: list[exprs.ColumnSlotIdx], flushed_img_slots: list[int]
|
|
80
44
|
) -> None:
|
|
81
45
|
"""Flushes images in the given range of rows."""
|
|
82
|
-
|
|
83
|
-
if stored_img_info is None:
|
|
84
|
-
stored_img_info = []
|
|
85
|
-
if flushed_slot_idxs is None:
|
|
86
|
-
flushed_slot_idxs = []
|
|
87
|
-
if len(stored_img_info) == 0 and len(flushed_slot_idxs) == 0:
|
|
46
|
+
if len(stored_img_info) == 0 and len(flushed_img_slots) == 0:
|
|
88
47
|
return
|
|
48
|
+
|
|
89
49
|
if idx_range is None:
|
|
90
50
|
idx_range = slice(0, len(self.rows))
|
|
91
51
|
for row in self.rows[idx_range]:
|
|
92
52
|
for info in stored_img_info:
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
for slot_idx in flushed_slot_idxs:
|
|
53
|
+
row.flush_img(info.slot_idx, info.col)
|
|
54
|
+
for slot_idx in flushed_img_slots:
|
|
96
55
|
row.flush_img(slot_idx)
|
|
97
56
|
|
|
98
57
|
def __iter__(self) -> Iterator[exprs.DataRow]:
|
|
@@ -240,7 +240,7 @@ class ExprEvalNode(ExecNode):
|
|
|
240
240
|
# make sure we top up our in-flight rows before yielding
|
|
241
241
|
self._dispatch_input_rows()
|
|
242
242
|
self._log_state(f'yielding {len(batch_rows)} rows')
|
|
243
|
-
yield DataRowBatch(
|
|
243
|
+
yield DataRowBatch(row_builder=self.row_builder, rows=batch_rows)
|
|
244
244
|
# at this point, we may have more completed rows
|
|
245
245
|
|
|
246
246
|
assert self.completed_rows.empty() # all completed rows should be sitting in output_buffer
|
|
@@ -254,7 +254,7 @@ class ExprEvalNode(ExecNode):
|
|
|
254
254
|
batch_rows = self.output_buffer.get_rows(self.output_buffer.num_ready)
|
|
255
255
|
self.num_output_rows += len(batch_rows)
|
|
256
256
|
self._log_state(f'yielding {len(batch_rows)} rows')
|
|
257
|
-
yield DataRowBatch(
|
|
257
|
+
yield DataRowBatch(row_builder=self.row_builder, rows=batch_rows)
|
|
258
258
|
|
|
259
259
|
assert self.output_buffer.num_rows == 0
|
|
260
260
|
return
|
|
@@ -23,7 +23,7 @@ class InMemoryDataNode(ExecNode):
|
|
|
23
23
|
|
|
24
24
|
input_rows: list[dict[str, Any]]
|
|
25
25
|
start_row_id: int
|
|
26
|
-
|
|
26
|
+
output_batch: Optional[DataRowBatch]
|
|
27
27
|
|
|
28
28
|
# output_exprs is declared in the superclass, but we redeclare it here with a more specific type
|
|
29
29
|
output_exprs: list[exprs.ColumnRef]
|
|
@@ -42,7 +42,7 @@ class InMemoryDataNode(ExecNode):
|
|
|
42
42
|
self.tbl = tbl
|
|
43
43
|
self.input_rows = rows
|
|
44
44
|
self.start_row_id = start_row_id
|
|
45
|
-
self.
|
|
45
|
+
self.output_batch = None
|
|
46
46
|
|
|
47
47
|
def _open(self) -> None:
|
|
48
48
|
"""Create row batch and populate with self.input_rows"""
|
|
@@ -56,22 +56,22 @@ class InMemoryDataNode(ExecNode):
|
|
|
56
56
|
}
|
|
57
57
|
output_slot_idxs = {e.slot_idx for e in self.output_exprs}
|
|
58
58
|
|
|
59
|
-
self.
|
|
60
|
-
for
|
|
59
|
+
self.output_batch = DataRowBatch(self.row_builder)
|
|
60
|
+
for input_row in self.input_rows:
|
|
61
|
+
output_row = self.row_builder.make_row()
|
|
61
62
|
# populate the output row with the values provided in the input row
|
|
62
63
|
input_slot_idxs: set[int] = set()
|
|
63
64
|
for col_name, val in input_row.items():
|
|
64
65
|
col_info = user_cols_by_name.get(col_name)
|
|
65
66
|
assert col_info is not None
|
|
66
|
-
|
|
67
|
-
if
|
|
68
|
-
# this is a literal
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
self.output_rows[row_idx][col_info.slot_idx] = path
|
|
67
|
+
col = col_info.col
|
|
68
|
+
if col.col_type.is_image_type() and isinstance(val, bytes):
|
|
69
|
+
# this is a literal media file, ie, a sequence of bytes; save it as a binary file and store the path
|
|
70
|
+
assert col.tbl.id == self.tbl.id
|
|
71
|
+
filepath, _ = MediaStore.save_media_object(val, col, format=None)
|
|
72
|
+
output_row[col_info.slot_idx] = str(filepath)
|
|
73
73
|
else:
|
|
74
|
-
|
|
74
|
+
output_row[col_info.slot_idx] = val
|
|
75
75
|
|
|
76
76
|
input_slot_idxs.add(col_info.slot_idx)
|
|
77
77
|
|
|
@@ -80,10 +80,11 @@ class InMemoryDataNode(ExecNode):
|
|
|
80
80
|
for slot_idx in missing_slot_idxs:
|
|
81
81
|
col_info = output_cols_by_idx.get(slot_idx)
|
|
82
82
|
assert col_info is not None
|
|
83
|
-
|
|
83
|
+
output_row[col_info.slot_idx] = None
|
|
84
|
+
self.output_batch.add_row(output_row)
|
|
84
85
|
|
|
85
|
-
self.ctx.num_rows = len(self.
|
|
86
|
+
self.ctx.num_rows = len(self.output_batch)
|
|
86
87
|
|
|
87
88
|
async def __aiter__(self) -> AsyncIterator[DataRowBatch]:
|
|
88
|
-
_logger.debug(f'InMemoryDataNode: created row batch with {len(self.
|
|
89
|
-
yield self.
|
|
89
|
+
_logger.debug(f'InMemoryDataNode: created row batch with {len(self.output_batch)} rows')
|
|
90
|
+
yield self.output_batch
|
pixeltable/exec/sql_node.py
CHANGED
|
@@ -316,8 +316,7 @@ class SqlNode(ExecNode):
|
|
|
316
316
|
for _ in w:
|
|
317
317
|
pass
|
|
318
318
|
|
|
319
|
-
|
|
320
|
-
output_batch = DataRowBatch(tbl_version, self.row_builder)
|
|
319
|
+
output_batch = DataRowBatch(self.row_builder)
|
|
321
320
|
output_row: Optional[exprs.DataRow] = None
|
|
322
321
|
num_rows_returned = 0
|
|
323
322
|
|
|
@@ -359,7 +358,7 @@ class SqlNode(ExecNode):
|
|
|
359
358
|
if self.ctx.batch_size > 0 and len(output_batch) == self.ctx.batch_size:
|
|
360
359
|
_logger.debug(f'SqlScanNode: returning {len(output_batch)} rows')
|
|
361
360
|
yield output_batch
|
|
362
|
-
output_batch = DataRowBatch(
|
|
361
|
+
output_batch = DataRowBatch(self.row_builder)
|
|
363
362
|
|
|
364
363
|
if len(output_batch) > 0:
|
|
365
364
|
_logger.debug(f'SqlScanNode: returning {len(output_batch)} rows')
|
|
@@ -569,10 +568,10 @@ class SqlSampleNode(SqlNode):
|
|
|
569
568
|
General SQL form is:
|
|
570
569
|
- MD5(<seed::text> [ + '___' + <rowid_col_val>::text]+
|
|
571
570
|
"""
|
|
572
|
-
sql_expr: sql.ColumnElement =
|
|
571
|
+
sql_expr: sql.ColumnElement = seed.cast(sql.String)
|
|
573
572
|
for e in sql_cols:
|
|
574
573
|
# Quotes are required below to guarantee that the string is properly presented in SQL
|
|
575
|
-
sql_expr = sql_expr + sql.literal_column("'___'", sql.Text) +
|
|
574
|
+
sql_expr = sql_expr + sql.literal_column("'___'", sql.Text) + e.cast(sql.String)
|
|
576
575
|
sql_expr = sql.func.md5(sql_expr)
|
|
577
576
|
return sql_expr
|
|
578
577
|
|
|
@@ -591,9 +590,9 @@ class SqlSampleNode(SqlNode):
|
|
|
591
590
|
s_key = self._create_key_sql(self.input_cte)
|
|
592
591
|
|
|
593
592
|
# Construct a suitable where clause
|
|
594
|
-
|
|
593
|
+
fraction_md5 = SampleClause.fraction_to_md5_hex(self.sample_clause.fraction)
|
|
595
594
|
order_by = self._create_key_sql(self.input_cte)
|
|
596
|
-
return sql.select(*self.input_cte.c).where(s_key <
|
|
595
|
+
return sql.select(*self.input_cte.c).where(s_key < fraction_md5).order_by(order_by)
|
|
597
596
|
|
|
598
597
|
return self._create_stmt_stratified_fraction(self.sample_clause.fraction)
|
|
599
598
|
else:
|
pixeltable/exprs/column_ref.py
CHANGED
|
@@ -325,7 +325,8 @@ class ColumnRef(Expr):
|
|
|
325
325
|
@classmethod
|
|
326
326
|
def get_column(cls, d: dict) -> catalog.Column:
|
|
327
327
|
tbl_id, version, col_id = UUID(d['tbl_id']), d['tbl_version'], d['col_id']
|
|
328
|
-
|
|
328
|
+
# validate_initialized=False: this gets called as part of TableVersion.init()
|
|
329
|
+
tbl_version = catalog.Catalog.get().get_tbl_version(tbl_id, version, validate_initialized=False)
|
|
329
330
|
# don't use tbl_version.cols_by_id here, this might be a snapshot reference to a column that was then dropped
|
|
330
331
|
col = next(col for col in tbl_version.cols if col.id == col_id)
|
|
331
332
|
return col
|
pixeltable/exprs/data_row.py
CHANGED
|
@@ -13,7 +13,8 @@ import PIL
|
|
|
13
13
|
import PIL.Image
|
|
14
14
|
import sqlalchemy as sql
|
|
15
15
|
|
|
16
|
-
from pixeltable import env
|
|
16
|
+
from pixeltable import catalog, env
|
|
17
|
+
from pixeltable.utils.media_store import MediaStore
|
|
17
18
|
|
|
18
19
|
|
|
19
20
|
class DataRow:
|
|
@@ -256,23 +257,22 @@ class DataRow:
|
|
|
256
257
|
self.vals[idx] = val
|
|
257
258
|
self.has_val[idx] = True
|
|
258
259
|
|
|
259
|
-
def flush_img(self, index: int,
|
|
260
|
-
"""
|
|
260
|
+
def flush_img(self, index: int, col: Optional[catalog.Column] = None) -> None:
|
|
261
|
+
"""Save or discard the in-memory value (required to be a PIL.Image.Image)"""
|
|
261
262
|
if self.vals[index] is None:
|
|
262
263
|
return
|
|
263
264
|
assert self.excs[index] is None
|
|
264
265
|
if self.file_paths[index] is None:
|
|
265
|
-
if
|
|
266
|
+
if col is not None:
|
|
266
267
|
image = self.vals[index]
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
self.file_paths[index] = filepath
|
|
274
|
-
self.file_urls[index] =
|
|
275
|
-
image.save(filepath, format=format)
|
|
268
|
+
format = None
|
|
269
|
+
if isinstance(image, PIL.Image.Image):
|
|
270
|
+
# Default to JPEG unless the image has a transparency layer (which isn't supported by JPEG).
|
|
271
|
+
# In that case, use WebP instead.
|
|
272
|
+
format = 'webp' if image.has_transparency_data else 'jpeg'
|
|
273
|
+
filepath, url = MediaStore.save_media_object(image, col, format=format)
|
|
274
|
+
self.file_paths[index] = str(filepath)
|
|
275
|
+
self.file_urls[index] = url
|
|
276
276
|
else:
|
|
277
277
|
# we discard the content of this cell
|
|
278
278
|
self.has_val[index] = False
|
pixeltable/exprs/row_builder.py
CHANGED
|
@@ -8,9 +8,8 @@ from uuid import UUID
|
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
10
10
|
|
|
11
|
-
from pixeltable import catalog, exceptions as excs, utils
|
|
11
|
+
from pixeltable import catalog, exceptions as excs, exprs, utils
|
|
12
12
|
from pixeltable.env import Env
|
|
13
|
-
from pixeltable.utils.media_store import MediaStore
|
|
14
13
|
|
|
15
14
|
from .data_row import DataRow
|
|
16
15
|
from .expr import Expr, ExprScope
|
|
@@ -85,6 +84,10 @@ class RowBuilder:
|
|
|
85
84
|
# (a subexpr can be shared across multiple output exprs)
|
|
86
85
|
output_expr_ids: list[set[int]]
|
|
87
86
|
|
|
87
|
+
img_slot_idxs: list[int] # Indices of image slots
|
|
88
|
+
media_slot_idxs: list[int] # Indices of non-image media slots
|
|
89
|
+
array_slot_idxs: list[int] # Indices of array slots
|
|
90
|
+
|
|
88
91
|
@dataclass
|
|
89
92
|
class EvalCtx:
|
|
90
93
|
"""Context for evaluating a set of target exprs"""
|
|
@@ -235,6 +238,12 @@ class RowBuilder:
|
|
|
235
238
|
for e in self.output_exprs:
|
|
236
239
|
self._record_output_expr_id(e, e.slot_idx)
|
|
237
240
|
|
|
241
|
+
self.img_slot_idxs = [e.slot_idx for e in self.unique_exprs if e.col_type.is_image_type()]
|
|
242
|
+
self.media_slot_idxs = [
|
|
243
|
+
e.slot_idx for e in self.unique_exprs if e.col_type.is_media_type() and not e.col_type.is_image_type()
|
|
244
|
+
]
|
|
245
|
+
self.array_slot_idxs = [e.slot_idx for e in self.unique_exprs if e.col_type.is_array_type()]
|
|
246
|
+
|
|
238
247
|
def add_table_column(self, col: catalog.Column, slot_idx: int) -> None:
|
|
239
248
|
"""Record a column that is part of the table row"""
|
|
240
249
|
assert self.tbl is not None
|
|
@@ -462,8 +471,7 @@ class RowBuilder:
|
|
|
462
471
|
else:
|
|
463
472
|
if col.col_type.is_image_type() and data_row.file_urls[slot_idx] is None:
|
|
464
473
|
# we have yet to store this image
|
|
465
|
-
|
|
466
|
-
data_row.flush_img(slot_idx, filepath)
|
|
474
|
+
data_row.flush_img(slot_idx, col)
|
|
467
475
|
val = data_row.get_stored_val(slot_idx, col.get_sa_col_type())
|
|
468
476
|
table_row.append(val)
|
|
469
477
|
if col.stores_cellmd:
|
|
@@ -489,3 +497,7 @@ class RowBuilder:
|
|
|
489
497
|
store_col_names.append(col.col.cellmd_store_name())
|
|
490
498
|
|
|
491
499
|
return store_col_names, media_cols
|
|
500
|
+
|
|
501
|
+
def make_row(self) -> exprs.DataRow:
|
|
502
|
+
"""Creates a new DataRow with the current row_builder's configuration."""
|
|
503
|
+
return exprs.DataRow(self.num_materialized, self.img_slot_idxs, self.media_slot_idxs, self.array_slot_idxs)
|
pixeltable/exprs/string_op.py
CHANGED
|
@@ -68,7 +68,7 @@ class StringOp(Expr):
|
|
|
68
68
|
if self.operator == StringOperator.CONCAT:
|
|
69
69
|
return left.concat(right)
|
|
70
70
|
if self.operator == StringOperator.REPEAT:
|
|
71
|
-
return sql.func.repeat(
|
|
71
|
+
return sql.func.repeat(left.cast(sql.String), right.cast(sql.Integer))
|
|
72
72
|
return None
|
|
73
73
|
|
|
74
74
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
@@ -101,13 +101,10 @@ class ExprTemplateFunction(Function):
|
|
|
101
101
|
return None
|
|
102
102
|
|
|
103
103
|
def exec(self, args: Sequence[Any], kwargs: dict[str, Any]) -> Any:
|
|
104
|
-
from pixeltable import exec
|
|
105
|
-
|
|
106
104
|
assert not self.is_polymorphic
|
|
107
105
|
expr = self.instantiate(args, kwargs)
|
|
108
106
|
row_builder = exprs.RowBuilder(output_exprs=[expr], columns=[], input_exprs=[])
|
|
109
|
-
|
|
110
|
-
row = row_batch[0]
|
|
107
|
+
row = row_builder.make_row()
|
|
111
108
|
row_builder.eval(row, ctx=row_builder.default_eval_ctx)
|
|
112
109
|
return row[row_builder.get_output_exprs()[0].slot_idx]
|
|
113
110
|
|
pixeltable/functions/date.py
CHANGED
|
@@ -83,7 +83,7 @@ def make_date(year: int, month: int, day: int) -> date:
|
|
|
83
83
|
|
|
84
84
|
@make_date.to_sql
|
|
85
85
|
def _(year: sql.ColumnElement, month: sql.ColumnElement, day: sql.ColumnElement) -> sql.ColumnElement:
|
|
86
|
-
return sql.func.make_date(
|
|
86
|
+
return sql.func.make_date(year.cast(sql.Integer), month.cast(sql.Integer), day.cast(sql.Integer))
|
|
87
87
|
|
|
88
88
|
|
|
89
89
|
@pxt.udf(is_method=True)
|
pixeltable/functions/gemini.py
CHANGED
|
@@ -7,7 +7,6 @@ the [Working with Gemini](https://pixeltable.readme.io/docs/working-with-gemini)
|
|
|
7
7
|
|
|
8
8
|
import asyncio
|
|
9
9
|
import io
|
|
10
|
-
import tempfile
|
|
11
10
|
from pathlib import Path
|
|
12
11
|
from typing import TYPE_CHECKING, Optional
|
|
13
12
|
|
|
@@ -215,9 +214,10 @@ async def generate_videos(
|
|
|
215
214
|
video_bytes = await _genai_client().aio.files.download(file=video.video) # type: ignore[arg-type]
|
|
216
215
|
assert video_bytes is not None
|
|
217
216
|
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
217
|
+
# Create a temporary file to store the video bytes
|
|
218
|
+
output_path = env.Env.get().create_tmp_path('.mp4')
|
|
219
|
+
Path(output_path).write_bytes(video_bytes)
|
|
220
|
+
return str(output_path)
|
|
221
221
|
|
|
222
222
|
|
|
223
223
|
@generate_videos.resource_pool
|
pixeltable/functions/math.py
CHANGED
|
@@ -97,7 +97,7 @@ def _(self: sql.ColumnElement, digits: Optional[sql.ColumnElement] = None) -> sq
|
|
|
97
97
|
if digits is None:
|
|
98
98
|
return sql.func.round(self)
|
|
99
99
|
else:
|
|
100
|
-
return sql.func.round(
|
|
100
|
+
return sql.func.round(self.cast(sql.Numeric), digits.cast(sql.Integer))
|
|
101
101
|
|
|
102
102
|
|
|
103
103
|
@pxt.udf(is_method=True)
|
pixeltable/functions/openai.py
CHANGED
|
@@ -13,7 +13,6 @@ import logging
|
|
|
13
13
|
import math
|
|
14
14
|
import pathlib
|
|
15
15
|
import re
|
|
16
|
-
import uuid
|
|
17
16
|
from typing import TYPE_CHECKING, Any, Callable, Optional, Type
|
|
18
17
|
|
|
19
18
|
import httpx
|
|
@@ -32,11 +31,15 @@ _logger = logging.getLogger('pixeltable')
|
|
|
32
31
|
|
|
33
32
|
|
|
34
33
|
@env.register_client('openai')
|
|
35
|
-
def _(api_key: str) -> 'openai.AsyncOpenAI':
|
|
34
|
+
def _(api_key: str, base_url: Optional[str] = None, api_version: Optional[str] = None) -> 'openai.AsyncOpenAI':
|
|
36
35
|
import openai
|
|
37
36
|
|
|
37
|
+
default_query = None if api_version is None else {'api-version': api_version}
|
|
38
|
+
|
|
38
39
|
return openai.AsyncOpenAI(
|
|
39
40
|
api_key=api_key,
|
|
41
|
+
base_url=base_url,
|
|
42
|
+
default_query=default_query,
|
|
40
43
|
# recommended to increase limits for async client to avoid connection errors
|
|
41
44
|
http_client=httpx.AsyncClient(limits=httpx.Limits(max_keepalive_connections=100, max_connections=500)),
|
|
42
45
|
)
|
|
@@ -125,7 +128,7 @@ _header_duration_pattern = re.compile(r'(?:(\d+)d)?(?:(\d+)h)?(?:(\d+)ms)|(?:(\d
|
|
|
125
128
|
def _parse_header_duration(duration_str: str) -> datetime.timedelta:
|
|
126
129
|
match = _header_duration_pattern.match(duration_str)
|
|
127
130
|
if not match:
|
|
128
|
-
raise ValueError('Invalid duration format')
|
|
131
|
+
raise ValueError(f'Invalid duration format: {duration_str}')
|
|
129
132
|
|
|
130
133
|
days = int(match.group(1) or 0)
|
|
131
134
|
hours = int(match.group(2) or 0)
|
|
@@ -148,7 +151,7 @@ def _get_header_info(
|
|
|
148
151
|
requests_limit = int(requests_limit_str) if requests_limit_str is not None else None
|
|
149
152
|
requests_remaining_str = headers.get('x-ratelimit-remaining-requests')
|
|
150
153
|
requests_remaining = int(requests_remaining_str) if requests_remaining_str is not None else None
|
|
151
|
-
requests_reset_str = headers.get('x-ratelimit-reset-requests')
|
|
154
|
+
requests_reset_str = headers.get('x-ratelimit-reset-requests', '5s') # Default to 5 seconds
|
|
152
155
|
requests_reset_ts = now + _parse_header_duration(requests_reset_str)
|
|
153
156
|
requests_info = (requests_limit, requests_remaining, requests_reset_ts)
|
|
154
157
|
|
|
@@ -158,7 +161,7 @@ def _get_header_info(
|
|
|
158
161
|
tokens_limit = int(tokens_limit_str) if tokens_limit_str is not None else None
|
|
159
162
|
tokens_remaining_str = headers.get('x-ratelimit-remaining-tokens')
|
|
160
163
|
tokens_remaining = int(tokens_remaining_str) if tokens_remaining_str is not None else None
|
|
161
|
-
tokens_reset_str = headers.get('x-ratelimit-reset-tokens')
|
|
164
|
+
tokens_reset_str = headers.get('x-ratelimit-reset-tokens', '5s') # Default to 5 seconds
|
|
162
165
|
tokens_reset_ts = now + _parse_header_duration(tokens_reset_str)
|
|
163
166
|
tokens_info = (tokens_limit, tokens_remaining, tokens_reset_ts)
|
|
164
167
|
|
|
@@ -207,7 +210,7 @@ async def speech(input: str, *, model: str, voice: str, model_kwargs: Optional[d
|
|
|
207
210
|
|
|
208
211
|
content = await _openai_client().audio.speech.create(input=input, model=model, voice=voice, **model_kwargs)
|
|
209
212
|
ext = model_kwargs.get('response_format', 'mp3')
|
|
210
|
-
output_filename = str(env.Env.get().
|
|
213
|
+
output_filename = str(env.Env.get().create_tmp_path(f'.{ext}'))
|
|
211
214
|
content.write_to_file(output_filename)
|
|
212
215
|
return output_filename
|
|
213
216
|
|
|
@@ -237,12 +237,12 @@ def _(
|
|
|
237
237
|
microsecond: sql.ColumnElement = _SQL_ZERO,
|
|
238
238
|
) -> sql.ColumnElement:
|
|
239
239
|
return sql.func.make_timestamptz(
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
240
|
+
year.cast(sql.Integer),
|
|
241
|
+
month.cast(sql.Integer),
|
|
242
|
+
day.cast(sql.Integer),
|
|
243
|
+
hour.cast(sql.Integer),
|
|
244
|
+
minute.cast(sql.Integer),
|
|
245
|
+
(second + microsecond / 1000000.0).cast(sql.Float),
|
|
246
246
|
)
|
|
247
247
|
|
|
248
248
|
|
pixeltable/functions/video.py
CHANGED
|
@@ -2,9 +2,6 @@
|
|
|
2
2
|
Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs) for `VideoType`.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
import tempfile
|
|
6
|
-
import uuid
|
|
7
|
-
from pathlib import Path
|
|
8
5
|
from typing import Any, Optional
|
|
9
6
|
|
|
10
7
|
import av
|
|
@@ -59,8 +56,7 @@ class make_video(pxt.Aggregator):
|
|
|
59
56
|
if frame is None:
|
|
60
57
|
return
|
|
61
58
|
if self.container is None:
|
|
62
|
-
|
|
63
|
-
self.out_file = Path(output_filename)
|
|
59
|
+
self.out_file = env.Env.get().create_tmp_path('.mp4')
|
|
64
60
|
self.container = av.open(str(self.out_file), mode='w')
|
|
65
61
|
self.stream = self.container.add_stream('h264', rate=self.fps)
|
|
66
62
|
self.stream.pix_fmt = 'yuv420p'
|
|
@@ -109,7 +105,7 @@ def extract_audio(
|
|
|
109
105
|
return None
|
|
110
106
|
audio_stream = container.streams.audio[stream_idx]
|
|
111
107
|
# create this in our tmp directory, so it'll get cleaned up if it's being generated as part of a query
|
|
112
|
-
output_filename = str(env.Env.get().
|
|
108
|
+
output_filename = str(env.Env.get().create_tmp_path(f'.{ext}'))
|
|
113
109
|
|
|
114
110
|
with av.open(output_filename, 'w', format=format) as output_container:
|
|
115
111
|
output_stream = output_container.add_stream(codec or default_codec)
|