pixeltable 0.4.3__py3-none-any.whl → 0.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (52) hide show
  1. pixeltable/__version__.py +2 -2
  2. pixeltable/catalog/__init__.py +1 -1
  3. pixeltable/catalog/catalog.py +619 -255
  4. pixeltable/catalog/dir.py +1 -2
  5. pixeltable/catalog/insertable_table.py +9 -9
  6. pixeltable/catalog/path.py +59 -20
  7. pixeltable/catalog/schema_object.py +10 -4
  8. pixeltable/catalog/table.py +51 -53
  9. pixeltable/catalog/table_version.py +216 -156
  10. pixeltable/catalog/table_version_path.py +1 -1
  11. pixeltable/catalog/tbl_ops.py +44 -0
  12. pixeltable/catalog/view.py +63 -65
  13. pixeltable/config.py +12 -4
  14. pixeltable/dataframe.py +75 -6
  15. pixeltable/env.py +46 -17
  16. pixeltable/exec/aggregation_node.py +1 -1
  17. pixeltable/exec/cache_prefetch_node.py +2 -6
  18. pixeltable/exec/component_iteration_node.py +4 -3
  19. pixeltable/exec/data_row_batch.py +10 -51
  20. pixeltable/exec/expr_eval/expr_eval_node.py +2 -2
  21. pixeltable/exec/in_memory_data_node.py +17 -16
  22. pixeltable/exec/sql_node.py +6 -7
  23. pixeltable/exprs/column_ref.py +2 -1
  24. pixeltable/exprs/data_row.py +13 -13
  25. pixeltable/exprs/row_builder.py +16 -4
  26. pixeltable/exprs/string_op.py +1 -1
  27. pixeltable/func/expr_template_function.py +1 -4
  28. pixeltable/functions/date.py +1 -1
  29. pixeltable/functions/gemini.py +4 -4
  30. pixeltable/functions/math.py +1 -1
  31. pixeltable/functions/openai.py +9 -6
  32. pixeltable/functions/timestamp.py +6 -6
  33. pixeltable/functions/video.py +2 -6
  34. pixeltable/globals.py +62 -33
  35. pixeltable/io/datarows.py +2 -1
  36. pixeltable/io/pandas.py +1 -0
  37. pixeltable/io/table_data_conduit.py +12 -13
  38. pixeltable/iterators/audio.py +17 -8
  39. pixeltable/iterators/image.py +5 -2
  40. pixeltable/metadata/schema.py +39 -2
  41. pixeltable/plan.py +5 -14
  42. pixeltable/share/packager.py +13 -13
  43. pixeltable/store.py +31 -7
  44. pixeltable/type_system.py +2 -1
  45. pixeltable/utils/filecache.py +1 -1
  46. pixeltable/utils/http_server.py +2 -3
  47. pixeltable/utils/media_store.py +90 -34
  48. {pixeltable-0.4.3.dist-info → pixeltable-0.4.5.dist-info}/METADATA +1 -1
  49. {pixeltable-0.4.3.dist-info → pixeltable-0.4.5.dist-info}/RECORD +52 -51
  50. {pixeltable-0.4.3.dist-info → pixeltable-0.4.5.dist-info}/LICENSE +0 -0
  51. {pixeltable-0.4.3.dist-info → pixeltable-0.4.5.dist-info}/WHEEL +0 -0
  52. {pixeltable-0.4.3.dist-info → pixeltable-0.4.5.dist-info}/entry_points.txt +0 -0
@@ -3,8 +3,7 @@ from __future__ import annotations
3
3
  import logging
4
4
  from typing import Iterator, Optional
5
5
 
6
- from pixeltable import catalog, exprs
7
- from pixeltable.utils.media_store import MediaStore
6
+ from pixeltable import exprs
8
7
 
9
8
  _logger = logging.getLogger('pixeltable')
10
9
 
@@ -15,51 +14,19 @@ class DataRowBatch:
15
14
  Contains the metadata needed to initialize DataRows.
16
15
  """
17
16
 
18
- tbl: Optional[catalog.TableVersionHandle]
19
17
  row_builder: exprs.RowBuilder
20
- img_slot_idxs: list[int]
21
- media_slot_idxs: list[int] # non-image media slots
22
- array_slot_idxs: list[int]
23
18
  rows: list[exprs.DataRow]
24
19
 
25
- def __init__(
26
- self,
27
- tbl: Optional[catalog.TableVersionHandle],
28
- row_builder: exprs.RowBuilder,
29
- num_rows: Optional[int] = None,
30
- rows: Optional[list[exprs.DataRow]] = None,
31
- ):
20
+ def __init__(self, row_builder: exprs.RowBuilder, rows: Optional[list[exprs.DataRow]] = None):
32
21
  """
33
22
  Requires either num_rows or rows to be specified, but not both.
34
23
  """
35
- assert num_rows is None or rows is None
36
- self.tbl = tbl
37
24
  self.row_builder = row_builder
38
- self.img_slot_idxs = [e.slot_idx for e in row_builder.unique_exprs if e.col_type.is_image_type()]
39
- # non-image media slots
40
- self.media_slot_idxs = [
41
- e.slot_idx
42
- for e in row_builder.unique_exprs
43
- if e.col_type.is_media_type() and not e.col_type.is_image_type()
44
- ]
45
- self.array_slot_idxs = [e.slot_idx for e in row_builder.unique_exprs if e.col_type.is_array_type()]
46
- if rows is not None:
47
- self.rows = rows
48
- else:
49
- if num_rows is None:
50
- num_rows = 0
51
- self.rows = [
52
- exprs.DataRow(
53
- row_builder.num_materialized, self.img_slot_idxs, self.media_slot_idxs, self.array_slot_idxs
54
- )
55
- for _ in range(num_rows)
56
- ]
25
+ self.rows = [] if rows is None else rows
57
26
 
58
- def add_row(self, row: Optional[exprs.DataRow] = None) -> exprs.DataRow:
27
+ def add_row(self, row: Optional[exprs.DataRow]) -> exprs.DataRow:
59
28
  if row is None:
60
- row = exprs.DataRow(
61
- self.row_builder.num_materialized, self.img_slot_idxs, self.media_slot_idxs, self.array_slot_idxs
62
- )
29
+ row = self.row_builder.make_row()
63
30
  self.rows.append(row)
64
31
  return row
65
32
 
@@ -73,26 +40,18 @@ class DataRowBatch:
73
40
  return self.rows[index]
74
41
 
75
42
  def flush_imgs(
76
- self,
77
- idx_range: Optional[slice] = None,
78
- stored_img_info: Optional[list[exprs.ColumnSlotIdx]] = None,
79
- flushed_slot_idxs: Optional[list[int]] = None,
43
+ self, idx_range: Optional[slice], stored_img_info: list[exprs.ColumnSlotIdx], flushed_img_slots: list[int]
80
44
  ) -> None:
81
45
  """Flushes images in the given range of rows."""
82
- assert self.tbl is not None
83
- if stored_img_info is None:
84
- stored_img_info = []
85
- if flushed_slot_idxs is None:
86
- flushed_slot_idxs = []
87
- if len(stored_img_info) == 0 and len(flushed_slot_idxs) == 0:
46
+ if len(stored_img_info) == 0 and len(flushed_img_slots) == 0:
88
47
  return
48
+
89
49
  if idx_range is None:
90
50
  idx_range = slice(0, len(self.rows))
91
51
  for row in self.rows[idx_range]:
92
52
  for info in stored_img_info:
93
- filepath = str(MediaStore.prepare_media_path(self.tbl.id, info.col.id, self.tbl.get().version))
94
- row.flush_img(info.slot_idx, filepath)
95
- for slot_idx in flushed_slot_idxs:
53
+ row.flush_img(info.slot_idx, info.col)
54
+ for slot_idx in flushed_img_slots:
96
55
  row.flush_img(slot_idx)
97
56
 
98
57
  def __iter__(self) -> Iterator[exprs.DataRow]:
@@ -240,7 +240,7 @@ class ExprEvalNode(ExecNode):
240
240
  # make sure we top up our in-flight rows before yielding
241
241
  self._dispatch_input_rows()
242
242
  self._log_state(f'yielding {len(batch_rows)} rows')
243
- yield DataRowBatch(tbl=None, row_builder=self.row_builder, rows=batch_rows)
243
+ yield DataRowBatch(row_builder=self.row_builder, rows=batch_rows)
244
244
  # at this point, we may have more completed rows
245
245
 
246
246
  assert self.completed_rows.empty() # all completed rows should be sitting in output_buffer
@@ -254,7 +254,7 @@ class ExprEvalNode(ExecNode):
254
254
  batch_rows = self.output_buffer.get_rows(self.output_buffer.num_ready)
255
255
  self.num_output_rows += len(batch_rows)
256
256
  self._log_state(f'yielding {len(batch_rows)} rows')
257
- yield DataRowBatch(tbl=None, row_builder=self.row_builder, rows=batch_rows)
257
+ yield DataRowBatch(row_builder=self.row_builder, rows=batch_rows)
258
258
 
259
259
  assert self.output_buffer.num_rows == 0
260
260
  return
@@ -23,7 +23,7 @@ class InMemoryDataNode(ExecNode):
23
23
 
24
24
  input_rows: list[dict[str, Any]]
25
25
  start_row_id: int
26
- output_rows: Optional[DataRowBatch]
26
+ output_batch: Optional[DataRowBatch]
27
27
 
28
28
  # output_exprs is declared in the superclass, but we redeclare it here with a more specific type
29
29
  output_exprs: list[exprs.ColumnRef]
@@ -42,7 +42,7 @@ class InMemoryDataNode(ExecNode):
42
42
  self.tbl = tbl
43
43
  self.input_rows = rows
44
44
  self.start_row_id = start_row_id
45
- self.output_rows = None
45
+ self.output_batch = None
46
46
 
47
47
  def _open(self) -> None:
48
48
  """Create row batch and populate with self.input_rows"""
@@ -56,22 +56,22 @@ class InMemoryDataNode(ExecNode):
56
56
  }
57
57
  output_slot_idxs = {e.slot_idx for e in self.output_exprs}
58
58
 
59
- self.output_rows = DataRowBatch(self.tbl, self.row_builder, len(self.input_rows))
60
- for row_idx, input_row in enumerate(self.input_rows):
59
+ self.output_batch = DataRowBatch(self.row_builder)
60
+ for input_row in self.input_rows:
61
+ output_row = self.row_builder.make_row()
61
62
  # populate the output row with the values provided in the input row
62
63
  input_slot_idxs: set[int] = set()
63
64
  for col_name, val in input_row.items():
64
65
  col_info = user_cols_by_name.get(col_name)
65
66
  assert col_info is not None
66
-
67
- if col_info.col.col_type.is_image_type() and isinstance(val, bytes):
68
- # this is a literal image, ie, a sequence of bytes; we save this as a media file and store the path
69
- path = str(MediaStore.prepare_media_path(self.tbl.id, col_info.col.id, self.tbl.get().version))
70
- with open(path, 'wb') as fp:
71
- fp.write(val)
72
- self.output_rows[row_idx][col_info.slot_idx] = path
67
+ col = col_info.col
68
+ if col.col_type.is_image_type() and isinstance(val, bytes):
69
+ # this is a literal media file, ie, a sequence of bytes; save it as a binary file and store the path
70
+ assert col.tbl.id == self.tbl.id
71
+ filepath, _ = MediaStore.save_media_object(val, col, format=None)
72
+ output_row[col_info.slot_idx] = str(filepath)
73
73
  else:
74
- self.output_rows[row_idx][col_info.slot_idx] = val
74
+ output_row[col_info.slot_idx] = val
75
75
 
76
76
  input_slot_idxs.add(col_info.slot_idx)
77
77
 
@@ -80,10 +80,11 @@ class InMemoryDataNode(ExecNode):
80
80
  for slot_idx in missing_slot_idxs:
81
81
  col_info = output_cols_by_idx.get(slot_idx)
82
82
  assert col_info is not None
83
- self.output_rows[row_idx][col_info.slot_idx] = None
83
+ output_row[col_info.slot_idx] = None
84
+ self.output_batch.add_row(output_row)
84
85
 
85
- self.ctx.num_rows = len(self.output_rows)
86
+ self.ctx.num_rows = len(self.output_batch)
86
87
 
87
88
  async def __aiter__(self) -> AsyncIterator[DataRowBatch]:
88
- _logger.debug(f'InMemoryDataNode: created row batch with {len(self.output_rows)} output_rows')
89
- yield self.output_rows
89
+ _logger.debug(f'InMemoryDataNode: created row batch with {len(self.output_batch)} rows')
90
+ yield self.output_batch
@@ -316,8 +316,7 @@ class SqlNode(ExecNode):
316
316
  for _ in w:
317
317
  pass
318
318
 
319
- tbl_version = self.tbl.tbl_version if self.tbl is not None else None
320
- output_batch = DataRowBatch(tbl_version, self.row_builder)
319
+ output_batch = DataRowBatch(self.row_builder)
321
320
  output_row: Optional[exprs.DataRow] = None
322
321
  num_rows_returned = 0
323
322
 
@@ -359,7 +358,7 @@ class SqlNode(ExecNode):
359
358
  if self.ctx.batch_size > 0 and len(output_batch) == self.ctx.batch_size:
360
359
  _logger.debug(f'SqlScanNode: returning {len(output_batch)} rows')
361
360
  yield output_batch
362
- output_batch = DataRowBatch(tbl_version, self.row_builder)
361
+ output_batch = DataRowBatch(self.row_builder)
363
362
 
364
363
  if len(output_batch) > 0:
365
364
  _logger.debug(f'SqlScanNode: returning {len(output_batch)} rows')
@@ -569,10 +568,10 @@ class SqlSampleNode(SqlNode):
569
568
  General SQL form is:
570
569
  - MD5(<seed::text> [ + '___' + <rowid_col_val>::text]+
571
570
  """
572
- sql_expr: sql.ColumnElement = sql.cast(seed, sql.Text)
571
+ sql_expr: sql.ColumnElement = seed.cast(sql.String)
573
572
  for e in sql_cols:
574
573
  # Quotes are required below to guarantee that the string is properly presented in SQL
575
- sql_expr = sql_expr + sql.literal_column("'___'", sql.Text) + sql.cast(e, sql.Text)
574
+ sql_expr = sql_expr + sql.literal_column("'___'", sql.Text) + e.cast(sql.String)
576
575
  sql_expr = sql.func.md5(sql_expr)
577
576
  return sql_expr
578
577
 
@@ -591,9 +590,9 @@ class SqlSampleNode(SqlNode):
591
590
  s_key = self._create_key_sql(self.input_cte)
592
591
 
593
592
  # Construct a suitable where clause
594
- fraction_sql = sql.cast(SampleClause.fraction_to_md5_hex(float(self.sample_clause.fraction)), sql.Text)
593
+ fraction_md5 = SampleClause.fraction_to_md5_hex(self.sample_clause.fraction)
595
594
  order_by = self._create_key_sql(self.input_cte)
596
- return sql.select(*self.input_cte.c).where(s_key < fraction_sql).order_by(order_by)
595
+ return sql.select(*self.input_cte.c).where(s_key < fraction_md5).order_by(order_by)
597
596
 
598
597
  return self._create_stmt_stratified_fraction(self.sample_clause.fraction)
599
598
  else:
@@ -325,7 +325,8 @@ class ColumnRef(Expr):
325
325
  @classmethod
326
326
  def get_column(cls, d: dict) -> catalog.Column:
327
327
  tbl_id, version, col_id = UUID(d['tbl_id']), d['tbl_version'], d['col_id']
328
- tbl_version = catalog.Catalog.get().get_tbl_version(tbl_id, version)
328
+ # validate_initialized=False: this gets called as part of TableVersion.init()
329
+ tbl_version = catalog.Catalog.get().get_tbl_version(tbl_id, version, validate_initialized=False)
329
330
  # don't use tbl_version.cols_by_id here, this might be a snapshot reference to a column that was then dropped
330
331
  col = next(col for col in tbl_version.cols if col.id == col_id)
331
332
  return col
@@ -13,7 +13,8 @@ import PIL
13
13
  import PIL.Image
14
14
  import sqlalchemy as sql
15
15
 
16
- from pixeltable import env
16
+ from pixeltable import catalog, env
17
+ from pixeltable.utils.media_store import MediaStore
17
18
 
18
19
 
19
20
  class DataRow:
@@ -256,23 +257,22 @@ class DataRow:
256
257
  self.vals[idx] = val
257
258
  self.has_val[idx] = True
258
259
 
259
- def flush_img(self, index: int, filepath: Optional[str] = None) -> None:
260
- """Discard the in-memory value and save it to a local file, if filepath is not None"""
260
+ def flush_img(self, index: int, col: Optional[catalog.Column] = None) -> None:
261
+ """Save or discard the in-memory value (required to be a PIL.Image.Image)"""
261
262
  if self.vals[index] is None:
262
263
  return
263
264
  assert self.excs[index] is None
264
265
  if self.file_paths[index] is None:
265
- if filepath is not None:
266
+ if col is not None:
266
267
  image = self.vals[index]
267
- assert isinstance(image, PIL.Image.Image)
268
- # Default to JPEG unless the image has a transparency layer (which isn't supported by JPEG).
269
- # In that case, use WebP instead.
270
- format = 'webp' if image.has_transparency_data else 'jpeg'
271
- if not filepath.endswith(f'.{format}'):
272
- filepath += f'.{format}'
273
- self.file_paths[index] = filepath
274
- self.file_urls[index] = urllib.parse.urljoin('file:', urllib.request.pathname2url(filepath))
275
- image.save(filepath, format=format)
268
+ format = None
269
+ if isinstance(image, PIL.Image.Image):
270
+ # Default to JPEG unless the image has a transparency layer (which isn't supported by JPEG).
271
+ # In that case, use WebP instead.
272
+ format = 'webp' if image.has_transparency_data else 'jpeg'
273
+ filepath, url = MediaStore.save_media_object(image, col, format=format)
274
+ self.file_paths[index] = str(filepath)
275
+ self.file_urls[index] = url
276
276
  else:
277
277
  # we discard the content of this cell
278
278
  self.has_val[index] = False
@@ -8,9 +8,8 @@ from uuid import UUID
8
8
 
9
9
  import numpy as np
10
10
 
11
- from pixeltable import catalog, exceptions as excs, utils
11
+ from pixeltable import catalog, exceptions as excs, exprs, utils
12
12
  from pixeltable.env import Env
13
- from pixeltable.utils.media_store import MediaStore
14
13
 
15
14
  from .data_row import DataRow
16
15
  from .expr import Expr, ExprScope
@@ -85,6 +84,10 @@ class RowBuilder:
85
84
  # (a subexpr can be shared across multiple output exprs)
86
85
  output_expr_ids: list[set[int]]
87
86
 
87
+ img_slot_idxs: list[int] # Indices of image slots
88
+ media_slot_idxs: list[int] # Indices of non-image media slots
89
+ array_slot_idxs: list[int] # Indices of array slots
90
+
88
91
  @dataclass
89
92
  class EvalCtx:
90
93
  """Context for evaluating a set of target exprs"""
@@ -235,6 +238,12 @@ class RowBuilder:
235
238
  for e in self.output_exprs:
236
239
  self._record_output_expr_id(e, e.slot_idx)
237
240
 
241
+ self.img_slot_idxs = [e.slot_idx for e in self.unique_exprs if e.col_type.is_image_type()]
242
+ self.media_slot_idxs = [
243
+ e.slot_idx for e in self.unique_exprs if e.col_type.is_media_type() and not e.col_type.is_image_type()
244
+ ]
245
+ self.array_slot_idxs = [e.slot_idx for e in self.unique_exprs if e.col_type.is_array_type()]
246
+
238
247
  def add_table_column(self, col: catalog.Column, slot_idx: int) -> None:
239
248
  """Record a column that is part of the table row"""
240
249
  assert self.tbl is not None
@@ -462,8 +471,7 @@ class RowBuilder:
462
471
  else:
463
472
  if col.col_type.is_image_type() and data_row.file_urls[slot_idx] is None:
464
473
  # we have yet to store this image
465
- filepath = str(MediaStore.prepare_media_path(col.tbl.id, col.id, col.tbl.version))
466
- data_row.flush_img(slot_idx, filepath)
474
+ data_row.flush_img(slot_idx, col)
467
475
  val = data_row.get_stored_val(slot_idx, col.get_sa_col_type())
468
476
  table_row.append(val)
469
477
  if col.stores_cellmd:
@@ -489,3 +497,7 @@ class RowBuilder:
489
497
  store_col_names.append(col.col.cellmd_store_name())
490
498
 
491
499
  return store_col_names, media_cols
500
+
501
+ def make_row(self) -> exprs.DataRow:
502
+ """Creates a new DataRow with the current row_builder's configuration."""
503
+ return exprs.DataRow(self.num_materialized, self.img_slot_idxs, self.media_slot_idxs, self.array_slot_idxs)
@@ -68,7 +68,7 @@ class StringOp(Expr):
68
68
  if self.operator == StringOperator.CONCAT:
69
69
  return left.concat(right)
70
70
  if self.operator == StringOperator.REPEAT:
71
- return sql.func.repeat(sql.cast(left, sql.String), sql.cast(right, sql.Integer))
71
+ return sql.func.repeat(left.cast(sql.String), right.cast(sql.Integer))
72
72
  return None
73
73
 
74
74
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
@@ -101,13 +101,10 @@ class ExprTemplateFunction(Function):
101
101
  return None
102
102
 
103
103
  def exec(self, args: Sequence[Any], kwargs: dict[str, Any]) -> Any:
104
- from pixeltable import exec
105
-
106
104
  assert not self.is_polymorphic
107
105
  expr = self.instantiate(args, kwargs)
108
106
  row_builder = exprs.RowBuilder(output_exprs=[expr], columns=[], input_exprs=[])
109
- row_batch = exec.DataRowBatch(tbl=None, row_builder=row_builder, num_rows=1)
110
- row = row_batch[0]
107
+ row = row_builder.make_row()
111
108
  row_builder.eval(row, ctx=row_builder.default_eval_ctx)
112
109
  return row[row_builder.get_output_exprs()[0].slot_idx]
113
110
 
@@ -83,7 +83,7 @@ def make_date(year: int, month: int, day: int) -> date:
83
83
 
84
84
  @make_date.to_sql
85
85
  def _(year: sql.ColumnElement, month: sql.ColumnElement, day: sql.ColumnElement) -> sql.ColumnElement:
86
- return sql.func.make_date(sql.cast(year, sql.Integer), sql.cast(month, sql.Integer), sql.cast(day, sql.Integer))
86
+ return sql.func.make_date(year.cast(sql.Integer), month.cast(sql.Integer), day.cast(sql.Integer))
87
87
 
88
88
 
89
89
  @pxt.udf(is_method=True)
@@ -7,7 +7,6 @@ the [Working with Gemini](https://pixeltable.readme.io/docs/working-with-gemini)
7
7
 
8
8
  import asyncio
9
9
  import io
10
- import tempfile
11
10
  from pathlib import Path
12
11
  from typing import TYPE_CHECKING, Optional
13
12
 
@@ -215,9 +214,10 @@ async def generate_videos(
215
214
  video_bytes = await _genai_client().aio.files.download(file=video.video) # type: ignore[arg-type]
216
215
  assert video_bytes is not None
217
216
 
218
- _, output_filename = tempfile.mkstemp(suffix='.mp4', dir=str(env.Env.get().tmp_dir))
219
- Path(output_filename).write_bytes(video_bytes)
220
- return output_filename
217
+ # Create a temporary file to store the video bytes
218
+ output_path = env.Env.get().create_tmp_path('.mp4')
219
+ Path(output_path).write_bytes(video_bytes)
220
+ return str(output_path)
221
221
 
222
222
 
223
223
  @generate_videos.resource_pool
@@ -97,7 +97,7 @@ def _(self: sql.ColumnElement, digits: Optional[sql.ColumnElement] = None) -> sq
97
97
  if digits is None:
98
98
  return sql.func.round(self)
99
99
  else:
100
- return sql.func.round(sql.cast(self, sql.Numeric), sql.cast(digits, sql.Integer))
100
+ return sql.func.round(self.cast(sql.Numeric), digits.cast(sql.Integer))
101
101
 
102
102
 
103
103
  @pxt.udf(is_method=True)
@@ -13,7 +13,6 @@ import logging
13
13
  import math
14
14
  import pathlib
15
15
  import re
16
- import uuid
17
16
  from typing import TYPE_CHECKING, Any, Callable, Optional, Type
18
17
 
19
18
  import httpx
@@ -32,11 +31,15 @@ _logger = logging.getLogger('pixeltable')
32
31
 
33
32
 
34
33
  @env.register_client('openai')
35
- def _(api_key: str) -> 'openai.AsyncOpenAI':
34
+ def _(api_key: str, base_url: Optional[str] = None, api_version: Optional[str] = None) -> 'openai.AsyncOpenAI':
36
35
  import openai
37
36
 
37
+ default_query = None if api_version is None else {'api-version': api_version}
38
+
38
39
  return openai.AsyncOpenAI(
39
40
  api_key=api_key,
41
+ base_url=base_url,
42
+ default_query=default_query,
40
43
  # recommended to increase limits for async client to avoid connection errors
41
44
  http_client=httpx.AsyncClient(limits=httpx.Limits(max_keepalive_connections=100, max_connections=500)),
42
45
  )
@@ -125,7 +128,7 @@ _header_duration_pattern = re.compile(r'(?:(\d+)d)?(?:(\d+)h)?(?:(\d+)ms)|(?:(\d
125
128
  def _parse_header_duration(duration_str: str) -> datetime.timedelta:
126
129
  match = _header_duration_pattern.match(duration_str)
127
130
  if not match:
128
- raise ValueError('Invalid duration format')
131
+ raise ValueError(f'Invalid duration format: {duration_str}')
129
132
 
130
133
  days = int(match.group(1) or 0)
131
134
  hours = int(match.group(2) or 0)
@@ -148,7 +151,7 @@ def _get_header_info(
148
151
  requests_limit = int(requests_limit_str) if requests_limit_str is not None else None
149
152
  requests_remaining_str = headers.get('x-ratelimit-remaining-requests')
150
153
  requests_remaining = int(requests_remaining_str) if requests_remaining_str is not None else None
151
- requests_reset_str = headers.get('x-ratelimit-reset-requests')
154
+ requests_reset_str = headers.get('x-ratelimit-reset-requests', '5s') # Default to 5 seconds
152
155
  requests_reset_ts = now + _parse_header_duration(requests_reset_str)
153
156
  requests_info = (requests_limit, requests_remaining, requests_reset_ts)
154
157
 
@@ -158,7 +161,7 @@ def _get_header_info(
158
161
  tokens_limit = int(tokens_limit_str) if tokens_limit_str is not None else None
159
162
  tokens_remaining_str = headers.get('x-ratelimit-remaining-tokens')
160
163
  tokens_remaining = int(tokens_remaining_str) if tokens_remaining_str is not None else None
161
- tokens_reset_str = headers.get('x-ratelimit-reset-tokens')
164
+ tokens_reset_str = headers.get('x-ratelimit-reset-tokens', '5s') # Default to 5 seconds
162
165
  tokens_reset_ts = now + _parse_header_duration(tokens_reset_str)
163
166
  tokens_info = (tokens_limit, tokens_remaining, tokens_reset_ts)
164
167
 
@@ -207,7 +210,7 @@ async def speech(input: str, *, model: str, voice: str, model_kwargs: Optional[d
207
210
 
208
211
  content = await _openai_client().audio.speech.create(input=input, model=model, voice=voice, **model_kwargs)
209
212
  ext = model_kwargs.get('response_format', 'mp3')
210
- output_filename = str(env.Env.get().tmp_dir / f'{uuid.uuid4()}.{ext}')
213
+ output_filename = str(env.Env.get().create_tmp_path(f'.{ext}'))
211
214
  content.write_to_file(output_filename)
212
215
  return output_filename
213
216
 
@@ -237,12 +237,12 @@ def _(
237
237
  microsecond: sql.ColumnElement = _SQL_ZERO,
238
238
  ) -> sql.ColumnElement:
239
239
  return sql.func.make_timestamptz(
240
- sql.cast(year, sql.Integer),
241
- sql.cast(month, sql.Integer),
242
- sql.cast(day, sql.Integer),
243
- sql.cast(hour, sql.Integer),
244
- sql.cast(minute, sql.Integer),
245
- sql.cast(second + microsecond / 1000000.0, sql.Float),
240
+ year.cast(sql.Integer),
241
+ month.cast(sql.Integer),
242
+ day.cast(sql.Integer),
243
+ hour.cast(sql.Integer),
244
+ minute.cast(sql.Integer),
245
+ (second + microsecond / 1000000.0).cast(sql.Float),
246
246
  )
247
247
 
248
248
 
@@ -2,9 +2,6 @@
2
2
  Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs) for `VideoType`.
3
3
  """
4
4
 
5
- import tempfile
6
- import uuid
7
- from pathlib import Path
8
5
  from typing import Any, Optional
9
6
 
10
7
  import av
@@ -59,8 +56,7 @@ class make_video(pxt.Aggregator):
59
56
  if frame is None:
60
57
  return
61
58
  if self.container is None:
62
- (_, output_filename) = tempfile.mkstemp(suffix='.mp4', dir=str(env.Env.get().tmp_dir))
63
- self.out_file = Path(output_filename)
59
+ self.out_file = env.Env.get().create_tmp_path('.mp4')
64
60
  self.container = av.open(str(self.out_file), mode='w')
65
61
  self.stream = self.container.add_stream('h264', rate=self.fps)
66
62
  self.stream.pix_fmt = 'yuv420p'
@@ -109,7 +105,7 @@ def extract_audio(
109
105
  return None
110
106
  audio_stream = container.streams.audio[stream_idx]
111
107
  # create this in our tmp directory, so it'll get cleaned up if it's being generated as part of a query
112
- output_filename = str(env.Env.get().tmp_dir / f'{uuid.uuid4()}.{ext}')
108
+ output_filename = str(env.Env.get().create_tmp_path(f'.{ext}'))
113
109
 
114
110
  with av.open(output_filename, 'w', format=format) as output_container:
115
111
  output_stream = output_container.add_stream(codec or default_codec)