pixeltable 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (51) hide show
  1. pixeltable/__init__.py +1 -0
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +3 -10
  4. pixeltable/catalog/catalog.py +139 -59
  5. pixeltable/catalog/column.py +32 -23
  6. pixeltable/catalog/globals.py +2 -45
  7. pixeltable/catalog/insertable_table.py +5 -2
  8. pixeltable/catalog/path.py +6 -0
  9. pixeltable/catalog/table.py +173 -23
  10. pixeltable/catalog/table_version.py +156 -92
  11. pixeltable/catalog/table_version_handle.py +26 -1
  12. pixeltable/catalog/update_status.py +179 -0
  13. pixeltable/catalog/view.py +12 -3
  14. pixeltable/config.py +76 -12
  15. pixeltable/dataframe.py +1 -1
  16. pixeltable/env.py +29 -0
  17. pixeltable/exec/exec_node.py +7 -24
  18. pixeltable/exec/expr_eval/schedulers.py +134 -7
  19. pixeltable/exprs/column_property_ref.py +23 -20
  20. pixeltable/exprs/column_ref.py +24 -18
  21. pixeltable/exprs/data_row.py +9 -0
  22. pixeltable/exprs/function_call.py +2 -2
  23. pixeltable/exprs/row_builder.py +46 -14
  24. pixeltable/exprs/rowid_ref.py +0 -4
  25. pixeltable/func/function.py +3 -3
  26. pixeltable/functions/audio.py +36 -9
  27. pixeltable/functions/video.py +57 -10
  28. pixeltable/globals.py +61 -1
  29. pixeltable/io/__init__.py +1 -1
  30. pixeltable/io/external_store.py +39 -64
  31. pixeltable/io/globals.py +4 -4
  32. pixeltable/io/hf_datasets.py +10 -2
  33. pixeltable/io/label_studio.py +52 -48
  34. pixeltable/metadata/__init__.py +1 -1
  35. pixeltable/metadata/converters/convert_38.py +39 -0
  36. pixeltable/metadata/converters/convert_39.py +125 -0
  37. pixeltable/metadata/converters/util.py +3 -0
  38. pixeltable/metadata/notes.py +2 -0
  39. pixeltable/metadata/schema.py +14 -2
  40. pixeltable/metadata/utils.py +78 -0
  41. pixeltable/plan.py +26 -18
  42. pixeltable/share/packager.py +20 -38
  43. pixeltable/store.py +121 -142
  44. pixeltable/type_system.py +2 -2
  45. pixeltable/utils/coroutine.py +6 -23
  46. pixeltable/utils/media_store.py +39 -0
  47. {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/METADATA +1 -1
  48. {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/RECORD +51 -47
  49. {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/LICENSE +0 -0
  50. {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/WHEEL +0 -0
  51. {pixeltable-0.4.1.dist-info → pixeltable-0.4.3.dist-info}/entry_points.txt +0 -0
pixeltable/store.py CHANGED
@@ -2,18 +2,16 @@ from __future__ import annotations
2
2
 
3
3
  import abc
4
4
  import logging
5
- import os
6
5
  import sys
7
- import urllib.parse
8
- import urllib.request
9
6
  import warnings
10
- from typing import Any, Iterable, Iterator, Literal, Optional, Union
7
+ from typing import Any, Iterable, Iterator, Optional, Union
11
8
 
12
9
  import more_itertools
13
10
  import sqlalchemy as sql
14
11
  from tqdm import TqdmWarning, tqdm
15
12
 
16
- from pixeltable import catalog, exceptions as excs, exprs
13
+ from pixeltable import catalog, exceptions as excs
14
+ from pixeltable.catalog.update_status import RowCountStats
17
15
  from pixeltable.env import Env
18
16
  from pixeltable.exec import ExecNode
19
17
  from pixeltable.metadata import schema
@@ -41,7 +39,10 @@ class StoreBase:
41
39
  v_max_col: sql.Column
42
40
  base: Optional[StoreBase]
43
41
 
44
- __INSERT_BATCH_SIZE = 1000
42
+ # In my cursory experiments this was the optimal batch size: it was an improvement over 5_000 and there was no real
43
+ # benefit to going higher.
44
+ # TODO: Perform more rigorous experiments with different table structures and OS environments to refine this.
45
+ __INSERT_BATCH_SIZE = 10_000
45
46
 
46
47
  def __init__(self, tbl_version: catalog.TableVersion):
47
48
  self.tbl_version = catalog.TableVersionHandle(
@@ -89,9 +90,8 @@ class StoreBase:
89
90
  # to the last sql.Table version we created and cannot be reused
90
91
  col.create_sa_cols()
91
92
  all_cols.append(col.sa_col)
92
- if col.records_errors:
93
- all_cols.append(col.sa_errormsg_col)
94
- all_cols.append(col.sa_errortype_col)
93
+ if col.stores_cellmd:
94
+ all_cols.append(col.sa_cellmd_col)
95
95
 
96
96
  if self.sa_tbl is not None:
97
97
  # if we're called in response to a schema change, we need to remove the old table first
@@ -123,49 +123,14 @@ class StoreBase:
123
123
  """Return the name of the data store table"""
124
124
 
125
125
  def _move_tmp_media_file(self, file_url: Optional[str], col: catalog.Column, v_min: int) -> str:
126
- """Move tmp media file with given url to Env.media_dir and return new url, or given url if not a tmp_dir file"""
127
- pxt_tmp_dir = str(Env.get().tmp_dir)
128
- if file_url is None:
129
- return None
130
- parsed = urllib.parse.urlparse(file_url)
131
- # We should never be passed a local file path here. The "len > 1" ensures that Windows
132
- # file paths aren't mistaken for URLs with a single-character scheme.
133
- assert len(parsed.scheme) > 1
134
- if parsed.scheme != 'file':
135
- # remote url
136
- return file_url
137
- file_path = urllib.parse.unquote(urllib.request.url2pathname(parsed.path))
138
- if not file_path.startswith(pxt_tmp_dir):
139
- # not a tmp file
140
- return file_url
141
- _, ext = os.path.splitext(file_path)
142
- new_path = str(MediaStore.prepare_media_path(self.tbl_version.id, col.id, v_min, ext=ext))
143
- os.rename(file_path, new_path)
144
- new_file_url = urllib.parse.urljoin('file:', urllib.request.pathname2url(new_path))
145
- return new_file_url
126
+ return MediaStore.move_tmp_media_file(file_url, self.tbl_version.id, col.id, v_min)
146
127
 
147
128
  def _move_tmp_media_files(
148
- self, table_rows: list[dict[str, Any]], media_cols: list[catalog.Column], v_min: int
129
+ self, table_row: list[Any], media_cols_by_sql_idx: dict[int, catalog.Column], v_min: int
149
130
  ) -> None:
150
131
  """Move tmp media files that we generated to a permanent location"""
151
- for c in media_cols:
152
- for table_row in table_rows:
153
- file_url = table_row[c.store_name()]
154
- table_row[c.store_name()] = self._move_tmp_media_file(file_url, c, v_min)
155
-
156
- def _create_table_row(
157
- self, input_row: exprs.DataRow, row_builder: exprs.RowBuilder, exc_col_ids: set[int], pk: tuple[int, ...]
158
- ) -> tuple[dict[str, Any], int]:
159
- """Return Tuple[complete table row, # of exceptions] for insert()
160
- Creates a row that includes the PK columns, with the values from input_row.pk.
161
- Returns:
162
- Tuple[complete table row, # of exceptions]
163
- """
164
- table_row, num_excs = row_builder.create_table_row(input_row, exc_col_ids)
165
- assert len(pk) == len(self._pk_cols)
166
- for pk_col, pk_val in zip(self._pk_cols, pk):
167
- table_row[pk_col.name] = pk_val
168
- return table_row, num_excs
132
+ for n, col in media_cols_by_sql_idx.items():
133
+ table_row[n] = self._move_tmp_media_file(table_row[n], col, v_min)
169
134
 
170
135
  def count(self) -> int:
171
136
  """Return the number of rows visible in self.tbl_version"""
@@ -200,11 +165,10 @@ class StoreBase:
200
165
  col_type_str = col.get_sa_col_type().compile(dialect=conn.dialect)
201
166
  s_txt = f'ALTER TABLE {self._storage_name()} ADD COLUMN {col.store_name()} {col_type_str} NULL'
202
167
  added_storage_cols = [col.store_name()]
203
- if col.records_errors:
204
- # we also need to create the errormsg and errortype storage cols
205
- s_txt += f' , ADD COLUMN {col.errormsg_store_name()} VARCHAR DEFAULT NULL'
206
- s_txt += f' , ADD COLUMN {col.errortype_store_name()} VARCHAR DEFAULT NULL'
207
- added_storage_cols.extend([col.errormsg_store_name(), col.errortype_store_name()])
168
+ if col.stores_cellmd:
169
+ cellmd_type_str = col.sa_cellmd_type().compile(dialect=conn.dialect)
170
+ s_txt += f' , ADD COLUMN {col.cellmd_store_name()} {cellmd_type_str} DEFAULT NULL'
171
+ added_storage_cols.append(col.cellmd_store_name())
208
172
 
209
173
  stmt = sql.text(s_txt)
210
174
  log_stmt(_logger, stmt)
@@ -215,9 +179,8 @@ class StoreBase:
215
179
  def drop_column(self, col: catalog.Column) -> None:
216
180
  """Execute Alter Table Drop Column statement"""
217
181
  s_txt = f'ALTER TABLE {self._storage_name()} DROP COLUMN {col.store_name()}'
218
- if col.records_errors:
219
- s_txt += f' , DROP COLUMN {col.errormsg_store_name()}'
220
- s_txt += f' , DROP COLUMN {col.errortype_store_name()}'
182
+ if col.stores_cellmd:
183
+ s_txt += f' , DROP COLUMN {col.cellmd_store_name()}'
221
184
  stmt = sql.text(s_txt)
222
185
  log_stmt(_logger, stmt)
223
186
  Env.get().conn.execute(stmt)
@@ -231,9 +194,7 @@ class StoreBase:
231
194
  if col.store_name() not in existing_cols:
232
195
  self.add_column(col)
233
196
 
234
- def load_column(
235
- self, col: catalog.Column, exec_plan: ExecNode, value_expr_slot_idx: int, on_error: Literal['abort', 'ignore']
236
- ) -> int:
197
+ def load_column(self, col: catalog.Column, exec_plan: ExecNode, abort_on_exc: bool) -> int:
237
198
  """Update store column of a computed column with values produced by an execution plan
238
199
 
239
200
  Returns:
@@ -247,72 +208,61 @@ class StoreBase:
247
208
  num_rows = 0
248
209
  # create temp table to store output of exec_plan, with the same primary key as the store table
249
210
  tmp_name = f'temp_{self._storage_name()}'
250
- tmp_pk_cols = [sql.Column(col.name, col.type, primary_key=True) for col in self.pk_columns()]
251
- tmp_cols = tmp_pk_cols.copy()
211
+ tmp_pk_cols = tuple(sql.Column(col.name, col.type, primary_key=True) for col in self.pk_columns())
212
+ tmp_val_col_sql_idx = len(tmp_pk_cols)
252
213
  tmp_val_col = sql.Column(col.sa_col.name, col.sa_col.type)
253
- tmp_cols.append(tmp_val_col)
214
+ tmp_cols = [*tmp_pk_cols, tmp_val_col]
254
215
  # add error columns if the store column records errors
255
- if col.records_errors:
256
- tmp_errortype_col = sql.Column(col.sa_errortype_col.name, col.sa_errortype_col.type)
257
- tmp_cols.append(tmp_errortype_col)
258
- tmp_errormsg_col = sql.Column(col.sa_errormsg_col.name, col.sa_errormsg_col.type)
259
- tmp_cols.append(tmp_errormsg_col)
216
+ if col.stores_cellmd:
217
+ tmp_cellmd_col = sql.Column(col.sa_cellmd_col.name, col.sa_cellmd_col.type)
218
+ tmp_cols.append(tmp_cellmd_col)
219
+ tmp_col_names = [col.name for col in tmp_cols]
220
+
260
221
  tmp_tbl = sql.Table(tmp_name, self.sa_md, *tmp_cols, prefixes=['TEMPORARY'])
261
222
  conn = Env.get().conn
262
223
  tmp_tbl.create(bind=conn)
263
224
 
225
+ row_builder = exec_plan.row_builder
226
+
264
227
  try:
228
+ table_rows: list[tuple[Any]] = []
229
+
265
230
  # insert rows from exec_plan into temp table
266
- # TODO: unify the table row construction logic with RowBuilder.create_table_row()
267
231
  for row_batch in exec_plan:
268
232
  num_rows += len(row_batch)
269
- tbl_rows: list[dict[str, Any]] = []
270
- for result_row in row_batch:
271
- tbl_row: dict[str, Any] = {}
272
- for pk_col, pk_val in zip(self.pk_columns(), result_row.pk):
273
- tbl_row[pk_col.name] = pk_val
274
-
275
- if col.is_computed:
276
- if result_row.has_exc(value_expr_slot_idx):
277
- num_excs += 1
278
- value_exc = result_row.get_exc(value_expr_slot_idx)
279
- if on_error == 'abort':
280
- raise excs.Error(
281
- f'Error while evaluating computed column `{col.name}`:\n{value_exc}'
282
- ) from value_exc
283
- # we store a NULL value and record the exception/exc type
284
- error_type = type(value_exc).__name__
285
- error_msg = str(value_exc)
286
- tbl_row[col.sa_col.name] = None
287
- tbl_row[col.sa_errortype_col.name] = error_type
288
- tbl_row[col.sa_errormsg_col.name] = error_msg
289
- else:
290
- if col.col_type.is_image_type() and result_row.file_urls[value_expr_slot_idx] is None:
291
- # we have yet to store this image
292
- filepath = str(MediaStore.prepare_media_path(col.tbl.id, col.id, col.tbl.version))
293
- result_row.flush_img(value_expr_slot_idx, filepath)
294
- val = result_row.get_stored_val(value_expr_slot_idx, col.sa_col.type)
295
- if col.col_type.is_media_type():
296
- val = self._move_tmp_media_file(val, col, result_row.pk[-1])
297
- tbl_row[col.sa_col.name] = val
298
- if col.records_errors:
299
- tbl_row[col.sa_errortype_col.name] = None
300
- tbl_row[col.sa_errormsg_col.name] = None
301
-
302
- tbl_rows.append(tbl_row)
303
- conn.execute(sql.insert(tmp_tbl), tbl_rows)
233
+ batch_table_rows: list[tuple[Any]] = []
234
+
235
+ for row in row_batch:
236
+ if abort_on_exc and row.has_exc():
237
+ exc = row.get_first_exc()
238
+ raise excs.Error(f'Error while evaluating computed column {col.name!r}:\n{exc}') from exc
239
+ table_row, num_row_exc = row_builder.create_table_row(row, None, row.pk)
240
+ if col.col_type.is_media_type():
241
+ table_row[tmp_val_col_sql_idx] = self._move_tmp_media_file(
242
+ table_row[tmp_val_col_sql_idx], col, row.pk[-1]
243
+ )
244
+ num_excs += num_row_exc
245
+ batch_table_rows.append(tuple(table_row))
246
+
247
+ table_rows.extend(batch_table_rows)
248
+
249
+ if len(table_rows) >= self.__INSERT_BATCH_SIZE:
250
+ self.sql_insert(tmp_tbl, tmp_col_names, table_rows)
251
+ table_rows.clear()
252
+
253
+ if len(table_rows) > 0:
254
+ self.sql_insert(tmp_tbl, tmp_col_names, table_rows)
304
255
 
305
256
  # update store table with values from temp table
306
257
  update_stmt = sql.update(self.sa_tbl)
307
258
  for pk_col, tmp_pk_col in zip(self.pk_columns(), tmp_pk_cols):
308
259
  update_stmt = update_stmt.where(pk_col == tmp_pk_col)
309
260
  update_stmt = update_stmt.values({col.sa_col: tmp_val_col})
310
- if col.records_errors:
311
- update_stmt = update_stmt.values(
312
- {col.sa_errortype_col: tmp_errortype_col, col.sa_errormsg_col: tmp_errormsg_col}
313
- )
261
+ if col.stores_cellmd:
262
+ update_stmt = update_stmt.values({col.sa_cellmd_col: tmp_cellmd_col})
314
263
  log_explain(_logger, update_stmt, conn)
315
264
  conn.execute(update_stmt)
265
+
316
266
  finally:
317
267
 
318
268
  def remove_tmp_tbl() -> None:
@@ -320,6 +270,7 @@ class StoreBase:
320
270
  tmp_tbl.drop(bind=conn)
321
271
 
322
272
  run_cleanup(remove_tmp_tbl, raise_error=True)
273
+
323
274
  return num_excs
324
275
 
325
276
  def insert_rows(
@@ -329,7 +280,7 @@ class StoreBase:
329
280
  show_progress: bool = True,
330
281
  rowids: Optional[Iterator[int]] = None,
331
282
  abort_on_exc: bool = False,
332
- ) -> tuple[int, int, set[int]]:
283
+ ) -> tuple[set[int], RowCountStats]:
333
284
  """Insert rows into the store table and update the catalog table's md
334
285
  Returns:
335
286
  number of inserted rows, number of exceptions, set of column ids that have exceptions
@@ -341,50 +292,78 @@ class StoreBase:
341
292
  cols_with_excs: set[int] = set()
342
293
  progress_bar: Optional[tqdm] = None # create this only after we started executing
343
294
  row_builder = exec_plan.row_builder
344
- media_cols = [info.col for info in row_builder.table_columns if info.col.col_type.is_media_type()]
345
- conn = Env.get().conn
295
+
296
+ store_col_names, media_cols_by_idx = row_builder.store_column_names()
346
297
 
347
298
  try:
299
+ table_rows: list[tuple[Any]] = []
348
300
  exec_plan.open()
301
+
349
302
  for row_batch in exec_plan:
350
303
  num_rows += len(row_batch)
351
- for batch_start_idx in range(0, len(row_batch), self.__INSERT_BATCH_SIZE):
352
- # compute batch of rows and convert them into table rows
353
- table_rows: list[dict[str, Any]] = []
354
- batch_stop_idx = min(batch_start_idx + self.__INSERT_BATCH_SIZE, len(row_batch))
355
- for row_idx in range(batch_start_idx, batch_stop_idx):
356
- row = row_batch[row_idx]
357
- # if abort_on_exc == True, we need to check for media validation exceptions
358
- if abort_on_exc and row.has_exc():
359
- exc = row.get_first_exc()
360
- raise exc
361
-
362
- rowid = (next(rowids),) if rowids is not None else row.pk[:-1]
363
- pk = (*rowid, v_min)
364
- table_row, num_row_exc = self._create_table_row(row, row_builder, cols_with_excs, pk=pk)
365
- num_excs += num_row_exc
366
- table_rows.append(table_row)
367
-
368
- if show_progress:
369
- if progress_bar is None:
370
- warnings.simplefilter('ignore', category=TqdmWarning)
371
- progress_bar = tqdm(
372
- desc=f'Inserting rows into `{self.tbl_version.get().name}`',
373
- unit=' rows',
374
- ncols=100,
375
- file=sys.stdout,
376
- )
377
- progress_bar.update(1)
378
-
379
- # insert batch of rows
380
- self._move_tmp_media_files(table_rows, media_cols, v_min)
381
- conn.execute(sql.insert(self.sa_tbl), table_rows)
304
+ batch_table_rows: list[tuple[Any]] = []
305
+
306
+ # compute batch of rows and convert them into table rows
307
+ for row in row_batch:
308
+ # if abort_on_exc == True, we need to check for media validation exceptions
309
+ if abort_on_exc and row.has_exc():
310
+ exc = row.get_first_exc()
311
+ raise exc
312
+
313
+ rowid = (next(rowids),) if rowids is not None else row.pk[:-1]
314
+ pk = (*rowid, v_min)
315
+ assert len(pk) == len(self._pk_cols)
316
+ table_row, num_row_exc = row_builder.create_table_row(row, cols_with_excs, pk)
317
+ num_excs += num_row_exc
318
+
319
+ if show_progress:
320
+ if progress_bar is None:
321
+ warnings.simplefilter('ignore', category=TqdmWarning)
322
+ progress_bar = tqdm(
323
+ desc=f'Inserting rows into `{self.tbl_version.get().name}`',
324
+ unit=' rows',
325
+ ncols=100,
326
+ file=sys.stdout,
327
+ )
328
+ progress_bar.update(1)
329
+
330
+ self._move_tmp_media_files(table_row, media_cols_by_idx, v_min)
331
+ batch_table_rows.append(tuple(table_row))
332
+
333
+ table_rows.extend(batch_table_rows)
334
+
335
+ # if a batch is ready for insertion into the database, insert it
336
+ if len(table_rows) >= self.__INSERT_BATCH_SIZE:
337
+ self.sql_insert(self.sa_tbl, store_col_names, table_rows)
338
+ table_rows.clear()
339
+
340
+ # insert any remaining rows
341
+ if len(table_rows) > 0:
342
+ self.sql_insert(self.sa_tbl, store_col_names, table_rows)
343
+
382
344
  if progress_bar is not None:
383
345
  progress_bar.close()
384
- return num_rows, num_excs, cols_with_excs
346
+ computed_values = exec_plan.ctx.num_computed_exprs * num_rows
347
+ row_counts = RowCountStats(ins_rows=num_rows, num_excs=num_excs, computed_values=computed_values)
348
+
349
+ return cols_with_excs, row_counts
385
350
  finally:
386
351
  exec_plan.close()
387
352
 
353
+ @classmethod
354
+ def sql_insert(cls, sa_tbl: sql.Table, store_col_names: list[str], table_rows: list[tuple[Any]]) -> None:
355
+ assert len(table_rows) > 0
356
+ conn = Env.get().conn
357
+ conn.execute(sql.insert(sa_tbl), [dict(zip(store_col_names, table_row)) for table_row in table_rows])
358
+
359
+ # TODO: Inserting directly via psycopg delivers a small performance benefit, but is somewhat fraught due to
360
+ # differences in the data representation that SQLAlchemy/psycopg expect. The below code will do the
361
+ # insertion in psycopg and can be used if/when we decide to pursue that optimization.
362
+ # col_names_str = ", ".join(store_col_names)
363
+ # placeholders_str = ", ".join('%s' for _ in store_col_names)
364
+ # stmt_text = f'INSERT INTO {self.sa_tbl.name} ({col_names_str}) VALUES ({placeholders_str})'
365
+ # conn.exec_driver_sql(stmt_text, table_rows)
366
+
388
367
  def _versions_clause(self, versions: list[Optional[int]], match_on_vmin: bool) -> sql.ColumnElement[bool]:
389
368
  """Return filter for base versions"""
390
369
  v = versions[0]
pixeltable/type_system.py CHANGED
@@ -1153,8 +1153,8 @@ class ImageType(ColumnType):
1153
1153
  img.load()
1154
1154
  return img
1155
1155
  except Exception as exc:
1156
- errormsg_val = val if len(val) < 50 else val[:50] + '...'
1157
- raise excs.Error(f'data URL could not be decoded into a valid image: {errormsg_val}') from exc
1156
+ error_msg_val = val if len(val) < 50 else val[:50] + '...'
1157
+ raise excs.Error(f'data URL could not be decoded into a valid image: {error_msg_val}') from exc
1158
1158
  return val
1159
1159
 
1160
1160
  def _validate_literal(self, val: Any) -> None:
@@ -1,10 +1,10 @@
1
1
  import asyncio
2
2
  import threading
3
- from concurrent.futures import ThreadPoolExecutor
4
3
  from typing import Any, Coroutine, TypeVar
5
4
 
6
- T = TypeVar('T')
5
+ from pixeltable.env import Env
7
6
 
7
+ T = TypeVar('T')
8
8
 
9
9
  # TODO This is a temporary hack to be able to run async UDFs in contexts that are not properly handled by the existing
10
10
  # scheduler logic (e.g., as an embedding function as part of a similarity lookup). Once the scheduler is fully
@@ -15,27 +15,10 @@ def run_coroutine_synchronously(coroutine: Coroutine[Any, Any, T], timeout: floa
15
15
  """
16
16
  Runs the given coroutine synchronously, even if called in the context of a running event loop.
17
17
  """
18
-
19
- def run_in_new_loop() -> T:
20
- new_loop = asyncio.new_event_loop()
21
- asyncio.set_event_loop(new_loop)
22
- try:
23
- return new_loop.run_until_complete(coroutine)
24
- finally:
25
- new_loop.close()
26
-
27
- try:
28
- loop = asyncio.get_running_loop()
29
- except RuntimeError:
30
- # No event loop; just call `asyncio.run()`
31
- return asyncio.run(coroutine)
18
+ loop = Env.get().event_loop
32
19
 
33
20
  if threading.current_thread() is threading.main_thread():
34
- if not loop.is_running():
35
- return loop.run_until_complete(coroutine)
36
- else:
37
- with ThreadPoolExecutor() as pool:
38
- future = pool.submit(run_in_new_loop)
39
- return future.result(timeout=timeout)
21
+ return loop.run_until_complete(coroutine)
40
22
  else:
41
- return asyncio.run_coroutine_threadsafe(coroutine, loop).result()
23
+ # Not in main thread, use run_coroutine_threadsafe
24
+ return asyncio.run_coroutine_threadsafe(coroutine, loop).result(timeout)
@@ -2,6 +2,7 @@ import glob
2
2
  import os
3
3
  import re
4
4
  import shutil
5
+ import urllib
5
6
  import uuid
6
7
  from collections import defaultdict
7
8
  from pathlib import Path
@@ -34,6 +35,44 @@ class MediaStore:
34
35
  parent.mkdir(parents=True, exist_ok=True)
35
36
  return parent / f'{tbl_id.hex}_{col_id}_{version}_{id_hex}{ext or ""}'
36
37
 
38
+ @classmethod
39
+ def move_tmp_media_file(cls, file_url: Optional[str], tbl_id: UUID, col_id: int, v_min: int) -> Optional[str]:
40
+ """Move a tmp media file with given url into the MediaStore, and return new url
41
+ If it is not a tmp file in the tmp_dir, return the original url.
42
+
43
+ Args:
44
+ file_url: URL of the tmp media file to move
45
+ tbl_id: Table ID to associate with the media file
46
+ col_id: Column ID to associate with the media file
47
+ v_min: Version number to associate with the media file
48
+
49
+ Returns:
50
+ URL of the media final location of the file
51
+ """
52
+ if file_url is None:
53
+ return None
54
+ assert isinstance(file_url, str), type(file_url)
55
+ pxt_tmp_dir = str(Env.get().tmp_dir)
56
+ parsed = urllib.parse.urlparse(file_url)
57
+ # We should never be passed a local file path here. The "len > 1" ensures that Windows
58
+ # file paths aren't mistaken for URLs with a single-character scheme.
59
+ assert len(parsed.scheme) > 1, file_url
60
+ if parsed.scheme != 'file':
61
+ # remote url
62
+ return file_url
63
+ file_path = urllib.parse.unquote(urllib.request.url2pathname(parsed.path))
64
+ if not file_path.startswith(pxt_tmp_dir):
65
+ # not a tmp file
66
+ return file_url
67
+ new_file_url = cls.relocate_local_media_file(Path(file_path), tbl_id, col_id, v_min)
68
+ return new_file_url
69
+
70
+ @classmethod
71
+ def relocate_local_media_file(cls, src_path: Path, tbl_id: UUID, col_id: int, tbl_version: int) -> str:
72
+ dest_path = MediaStore.prepare_media_path(tbl_id, col_id, tbl_version, ext=src_path.suffix)
73
+ src_path.rename(dest_path)
74
+ return urllib.parse.urljoin('file:', urllib.request.pathname2url(str(dest_path)))
75
+
37
76
  @classmethod
38
77
  def delete(cls, tbl_id: UUID, version: Optional[int] = None) -> None:
39
78
  """Delete all files belonging to tbl_id. If version is not None, delete
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: pixeltable
3
- Version: 0.4.1
3
+ Version: 0.4.3
4
4
  Summary: AI Data Infrastructure: Declarative, Multimodal, and Incremental
5
5
  License: Apache-2.0
6
6
  Keywords: data-science,machine-learning,database,ai,computer-vision,chatbot,ml,artificial-intelligence,feature-engineering,multimodal,mlops,feature-store,vector-database,llm,genai