pixeltable 0.2.19__py3-none-any.whl → 0.2.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/insertable_table.py +9 -7
- pixeltable/catalog/table.py +18 -4
- pixeltable/env.py +106 -35
- pixeltable/exceptions.py +7 -4
- pixeltable/exec/component_iteration_node.py +62 -41
- pixeltable/exprs/json_path.py +3 -6
- pixeltable/functions/together.py +24 -10
- pixeltable/globals.py +2 -0
- pixeltable/io/globals.py +1 -1
- pixeltable/io/hf_datasets.py +3 -3
- pixeltable/iterators/document.py +1 -1
- pixeltable/tool/create_test_db_dump.py +1 -1
- pixeltable/utils/filecache.py +126 -79
- {pixeltable-0.2.19.dist-info → pixeltable-0.2.20.dist-info}/METADATA +11 -3
- {pixeltable-0.2.19.dist-info → pixeltable-0.2.20.dist-info}/RECORD +19 -19
- {pixeltable-0.2.19.dist-info → pixeltable-0.2.20.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.19.dist-info → pixeltable-0.2.20.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.19.dist-info → pixeltable-0.2.20.dist-info}/entry_points.txt +0 -0
pixeltable/__version__.py
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
# These version placeholders will be replaced during build.
|
|
2
|
-
__version__ = "0.2.
|
|
3
|
-
__version_tuple__ = (0, 2,
|
|
2
|
+
__version__ = "0.2.20"
|
|
3
|
+
__version_tuple__ = (0, 2, 20)
|
|
@@ -10,6 +10,7 @@ import pixeltable as pxt
|
|
|
10
10
|
import pixeltable.type_system as ts
|
|
11
11
|
from pixeltable import exceptions as excs
|
|
12
12
|
from pixeltable.env import Env
|
|
13
|
+
from pixeltable.utils.filecache import FileCache
|
|
13
14
|
|
|
14
15
|
from .catalog import Catalog
|
|
15
16
|
from .globals import UpdateStatus
|
|
@@ -101,21 +102,22 @@ class InsertableTable(Table):
|
|
|
101
102
|
if not isinstance(row, dict):
|
|
102
103
|
raise excs.Error('rows must be a list of dictionaries')
|
|
103
104
|
self._validate_input_rows(rows)
|
|
104
|
-
|
|
105
|
+
status = self._tbl_version.insert(rows, None, print_stats=print_stats, fail_on_exception=fail_on_exception)
|
|
105
106
|
|
|
106
|
-
if
|
|
107
|
+
if status.num_excs == 0:
|
|
107
108
|
cols_with_excs_str = ''
|
|
108
109
|
else:
|
|
109
110
|
cols_with_excs_str = \
|
|
110
|
-
f' across {len(
|
|
111
|
-
cols_with_excs_str += f' ({", ".join(
|
|
111
|
+
f' across {len(status.cols_with_excs)} column{"" if len(status.cols_with_excs) == 1 else "s"}'
|
|
112
|
+
cols_with_excs_str += f' ({", ".join(status.cols_with_excs)})'
|
|
112
113
|
msg = (
|
|
113
|
-
f'Inserted {
|
|
114
|
-
f'with {
|
|
114
|
+
f'Inserted {status.num_rows} row{"" if status.num_rows == 1 else "s"} '
|
|
115
|
+
f'with {status.num_excs} error{"" if status.num_excs == 1 else "s"}{cols_with_excs_str}.'
|
|
115
116
|
)
|
|
116
117
|
print(msg)
|
|
117
118
|
_logger.info(f'InsertableTable {self._name}: {msg}')
|
|
118
|
-
|
|
119
|
+
FileCache.get().emit_eviction_warnings()
|
|
120
|
+
return status
|
|
119
121
|
|
|
120
122
|
def _validate_input_rows(self, rows: List[Dict[str, Any]]) -> None:
|
|
121
123
|
"""Verify that the input rows match the table schema"""
|
pixeltable/catalog/table.py
CHANGED
|
@@ -20,6 +20,7 @@ import pixeltable.exprs as exprs
|
|
|
20
20
|
import pixeltable.index as index
|
|
21
21
|
import pixeltable.metadata.schema as schema
|
|
22
22
|
import pixeltable.type_system as ts
|
|
23
|
+
from pixeltable.utils.filecache import FileCache
|
|
23
24
|
|
|
24
25
|
from .column import Column
|
|
25
26
|
from .globals import _ROWID_COLUMN_NAME, UpdateStatus, is_system_column_name, is_valid_identifier
|
|
@@ -33,7 +34,12 @@ if TYPE_CHECKING:
|
|
|
33
34
|
_logger = logging.getLogger('pixeltable')
|
|
34
35
|
|
|
35
36
|
class Table(SchemaObject):
|
|
36
|
-
"""
|
|
37
|
+
"""
|
|
38
|
+
Base class for table objects (base tables, views, snapshots).
|
|
39
|
+
|
|
40
|
+
Every user-invoked operation that runs an ExecNode tree (directly or indirectly) needs to call
|
|
41
|
+
FileCache.emit_eviction_warnings() at the end of the operation.
|
|
42
|
+
"""
|
|
37
43
|
|
|
38
44
|
def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath):
|
|
39
45
|
super().__init__(id, name, dir_id)
|
|
@@ -374,7 +380,10 @@ class Table(SchemaObject):
|
|
|
374
380
|
|
|
375
381
|
new_col = self._create_columns({col_name: col_schema})[0]
|
|
376
382
|
self._verify_column(new_col, set(self._schema.keys()), set(self._query_names))
|
|
377
|
-
|
|
383
|
+
status = self._tbl_version.add_column(new_col, print_stats=print_stats)
|
|
384
|
+
FileCache.get().emit_eviction_warnings()
|
|
385
|
+
return status
|
|
386
|
+
|
|
378
387
|
|
|
379
388
|
@classmethod
|
|
380
389
|
def _validate_column_spec(cls, name: str, spec: dict[str, Any]) -> None:
|
|
@@ -587,6 +596,7 @@ class Table(SchemaObject):
|
|
|
587
596
|
idx = EmbeddingIndex(col, metric=metric, string_embed=string_embed, image_embed=image_embed)
|
|
588
597
|
status = self._tbl_version.add_index(col, idx_name=idx_name, idx=idx)
|
|
589
598
|
# TODO: how to deal with exceptions here? drop the index and raise?
|
|
599
|
+
FileCache.get().emit_eviction_warnings()
|
|
590
600
|
|
|
591
601
|
def drop_embedding_index(self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None) -> None:
|
|
592
602
|
"""Drop an embedding index from the table.
|
|
@@ -732,7 +742,9 @@ class Table(SchemaObject):
|
|
|
732
742
|
>>> tbl.update({'int_col': tbl.int_col + 1}, where=tbl.int_col == 0)
|
|
733
743
|
"""
|
|
734
744
|
self._check_is_dropped()
|
|
735
|
-
|
|
745
|
+
status = self._tbl_version.update(value_spec, where, cascade)
|
|
746
|
+
FileCache.get().emit_eviction_warnings()
|
|
747
|
+
return status
|
|
736
748
|
|
|
737
749
|
def batch_update(
|
|
738
750
|
self, rows: Iterable[dict[str, Any]], cascade: bool = True,
|
|
@@ -789,9 +801,11 @@ class Table(SchemaObject):
|
|
|
789
801
|
missing_cols = pk_col_names - set(col.name for col in col_vals.keys())
|
|
790
802
|
raise excs.Error(f'Primary key columns ({", ".join(missing_cols)}) missing in {row_spec}')
|
|
791
803
|
row_updates.append(col_vals)
|
|
792
|
-
|
|
804
|
+
status = self._tbl_version.batch_update(
|
|
793
805
|
row_updates, rowids, error_if_not_exists=if_not_exists == 'error',
|
|
794
806
|
insert_if_not_exists=if_not_exists == 'insert', cascade=cascade)
|
|
807
|
+
FileCache.get().emit_eviction_warnings()
|
|
808
|
+
return status
|
|
795
809
|
|
|
796
810
|
def delete(self, where: Optional['pixeltable.exprs.Expr'] = None) -> UpdateStatus:
|
|
797
811
|
"""Delete rows in this table.
|
pixeltable/env.py
CHANGED
|
@@ -8,6 +8,7 @@ import importlib.util
|
|
|
8
8
|
import inspect
|
|
9
9
|
import logging
|
|
10
10
|
import os
|
|
11
|
+
import shutil
|
|
11
12
|
import subprocess
|
|
12
13
|
import sys
|
|
13
14
|
import threading
|
|
@@ -15,12 +16,12 @@ import uuid
|
|
|
15
16
|
import warnings
|
|
16
17
|
from dataclasses import dataclass
|
|
17
18
|
from pathlib import Path
|
|
18
|
-
from typing import TYPE_CHECKING, Any, Callable, Optional
|
|
19
|
+
from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar
|
|
19
20
|
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
|
|
20
21
|
|
|
21
22
|
import pixeltable_pgserver
|
|
22
23
|
import sqlalchemy as sql
|
|
23
|
-
import
|
|
24
|
+
import toml
|
|
24
25
|
from tqdm import TqdmWarning
|
|
25
26
|
|
|
26
27
|
import pixeltable.exceptions as excs
|
|
@@ -64,7 +65,7 @@ class Env:
|
|
|
64
65
|
_log_to_stdout: bool
|
|
65
66
|
_module_log_level: dict[str, int] # module name -> log level
|
|
66
67
|
_config_file: Optional[Path]
|
|
67
|
-
_config: Optional[
|
|
68
|
+
_config: Optional[Config]
|
|
68
69
|
_stdout_handler: logging.StreamHandler
|
|
69
70
|
_initialized: bool
|
|
70
71
|
|
|
@@ -110,6 +111,7 @@ class Env:
|
|
|
110
111
|
self._log_to_stdout = False
|
|
111
112
|
self._module_log_level = {} # module name -> log level
|
|
112
113
|
|
|
114
|
+
# config
|
|
113
115
|
self._config_file = None
|
|
114
116
|
self._config = None
|
|
115
117
|
|
|
@@ -119,7 +121,8 @@ class Env:
|
|
|
119
121
|
self._initialized = False
|
|
120
122
|
|
|
121
123
|
@property
|
|
122
|
-
def config(self):
|
|
124
|
+
def config(self) -> Config:
|
|
125
|
+
assert self._config is not None
|
|
123
126
|
return self._config
|
|
124
127
|
|
|
125
128
|
@property
|
|
@@ -227,30 +230,13 @@ class Env:
|
|
|
227
230
|
home = Path(os.environ.get('PIXELTABLE_HOME', str(Path.home() / '.pixeltable')))
|
|
228
231
|
assert self._home is None or self._home == home
|
|
229
232
|
self._home = home
|
|
230
|
-
self._config_file = Path(os.environ.get('PIXELTABLE_CONFIG', str(self._home / 'config.
|
|
233
|
+
self._config_file = Path(os.environ.get('PIXELTABLE_CONFIG', str(self._home / 'config.toml')))
|
|
231
234
|
self._media_dir = self._home / 'media'
|
|
232
235
|
self._file_cache_dir = self._home / 'file_cache'
|
|
233
236
|
self._dataset_cache_dir = self._home / 'dataset_cache'
|
|
234
237
|
self._log_dir = self._home / 'logs'
|
|
235
238
|
self._tmp_dir = self._home / 'tmp'
|
|
236
239
|
|
|
237
|
-
# Read in the config
|
|
238
|
-
if os.path.isfile(self._config_file):
|
|
239
|
-
with open(self._config_file, 'r') as stream:
|
|
240
|
-
try:
|
|
241
|
-
self._config = yaml.safe_load(stream)
|
|
242
|
-
except yaml.YAMLError as exc:
|
|
243
|
-
self._logger.error(f'Could not read config file: {self._config_file}')
|
|
244
|
-
self._config = {}
|
|
245
|
-
else:
|
|
246
|
-
self._config = {}
|
|
247
|
-
|
|
248
|
-
# Disable spurious warnings
|
|
249
|
-
warnings.simplefilter('ignore', category=TqdmWarning)
|
|
250
|
-
if 'hide_warnings' in self._config and self._config['hide_warnings']:
|
|
251
|
-
# Disable more warnings
|
|
252
|
-
warnings.simplefilter('ignore', category=UserWarning)
|
|
253
|
-
|
|
254
240
|
if self._home.exists() and not self._home.is_dir():
|
|
255
241
|
raise RuntimeError(f'{self._home} is not a directory')
|
|
256
242
|
|
|
@@ -274,6 +260,22 @@ class Env:
|
|
|
274
260
|
if not self._tmp_dir.exists():
|
|
275
261
|
self._tmp_dir.mkdir()
|
|
276
262
|
|
|
263
|
+
# Read in the config
|
|
264
|
+
self._config = Config.from_file(self._config_file)
|
|
265
|
+
self._file_cache_size_g = self._config.get_float_value('file_cache_size_g')
|
|
266
|
+
if self._file_cache_size_g is None:
|
|
267
|
+
raise excs.Error(
|
|
268
|
+
'pixeltable/file_cache_size_g is missing from configuration\n'
|
|
269
|
+
f'(either add a `file_cache_size_g` entry to the `pixeltable` section of {self._config_file},\n'
|
|
270
|
+
'or set the PIXELTABLE_FILE_CACHE_SIZE_G environment variable)'
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
# Disable spurious warnings
|
|
274
|
+
warnings.simplefilter('ignore', category=TqdmWarning)
|
|
275
|
+
if self._config.get_bool_value('hide_warnings'):
|
|
276
|
+
# Disable more warnings
|
|
277
|
+
warnings.simplefilter('ignore', category=UserWarning)
|
|
278
|
+
|
|
277
279
|
# configure _logger to log to a file
|
|
278
280
|
self._logfilename = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') + '.log'
|
|
279
281
|
fh = logging.FileHandler(self._log_dir / self._logfilename, mode='w')
|
|
@@ -313,7 +315,7 @@ class Env:
|
|
|
313
315
|
self._db_server = pixeltable_pgserver.get_server(self._pgdata_dir, cleanup_mode=None)
|
|
314
316
|
self._db_url = self._db_server.get_uri(database=self._db_name, driver='psycopg')
|
|
315
317
|
|
|
316
|
-
tz_name =
|
|
318
|
+
tz_name = self.config.get_string_value('time_zone')
|
|
317
319
|
if tz_name is not None:
|
|
318
320
|
# Validate tzname
|
|
319
321
|
if not isinstance(tz_name, str):
|
|
@@ -440,21 +442,18 @@ class Env:
|
|
|
440
442
|
if cl.client_obj is not None:
|
|
441
443
|
return cl.client_obj # Already initialized
|
|
442
444
|
|
|
443
|
-
# Construct a client
|
|
444
|
-
# if not, look in Pixeltable config from `config.yaml`.
|
|
445
|
+
# Construct a client, retrieving each parameter from config.
|
|
445
446
|
|
|
446
447
|
init_kwargs: dict[str, str] = {}
|
|
447
448
|
for param in cl.param_names:
|
|
448
|
-
|
|
449
|
-
if
|
|
450
|
-
init_kwargs[param] =
|
|
451
|
-
|
|
452
|
-
init_kwargs[param] = self._config[name.lower()][param.lower()]
|
|
453
|
-
if param not in init_kwargs or init_kwargs[param] == '':
|
|
449
|
+
arg = self._config.get_string_value(param, section=name)
|
|
450
|
+
if arg is not None and len(arg) > 0:
|
|
451
|
+
init_kwargs[param] = arg
|
|
452
|
+
else:
|
|
454
453
|
raise excs.Error(
|
|
455
454
|
f'`{name}` client not initialized: parameter `{param}` is not configured.\n'
|
|
456
|
-
f'To fix this, specify the `{
|
|
457
|
-
f'the `{name.lower()}` section of $PIXELTABLE_HOME/config.
|
|
455
|
+
f'To fix this, specify the `{name.upper()}_{param.upper()}` environment variable, or put `{param.lower()}` in '
|
|
456
|
+
f'the `{name.lower()}` section of $PIXELTABLE_HOME/config.toml.'
|
|
458
457
|
)
|
|
459
458
|
|
|
460
459
|
cl.client_obj = cl.init_fn(**init_kwargs)
|
|
@@ -506,7 +505,6 @@ class Env:
|
|
|
506
505
|
self.__register_package('spacy')
|
|
507
506
|
self.__register_package('tiktoken')
|
|
508
507
|
self.__register_package('together')
|
|
509
|
-
self.__register_package('toml')
|
|
510
508
|
self.__register_package('torch')
|
|
511
509
|
self.__register_package('torchvision')
|
|
512
510
|
self.__register_package('transformers')
|
|
@@ -643,7 +641,7 @@ def register_client(name: str) -> Callable:
|
|
|
643
641
|
Pixeltable will attempt to load the client parameters from config. For each
|
|
644
642
|
config parameter:
|
|
645
643
|
- If an environment variable named MY_CLIENT_API_KEY (for example) is set, use it;
|
|
646
|
-
- Otherwise, look for 'api_key' in the 'my_client' section of config.
|
|
644
|
+
- Otherwise, look for 'api_key' in the 'my_client' section of config.toml.
|
|
647
645
|
|
|
648
646
|
If all config parameters are found, Pixeltable calls the initialization function;
|
|
649
647
|
otherwise it throws an exception.
|
|
@@ -660,6 +658,79 @@ def register_client(name: str) -> Callable:
|
|
|
660
658
|
return decorator
|
|
661
659
|
|
|
662
660
|
|
|
661
|
+
class Config:
|
|
662
|
+
"""
|
|
663
|
+
The (global) Pixeltable configuration, as loaded from `config.toml`. Provides methods for retrieving
|
|
664
|
+
configuration values, which can be set in the config file or as environment variables.
|
|
665
|
+
"""
|
|
666
|
+
__config: dict[str, Any]
|
|
667
|
+
|
|
668
|
+
T = TypeVar('T')
|
|
669
|
+
|
|
670
|
+
@classmethod
|
|
671
|
+
def from_file(cls, path: Path) -> Config:
|
|
672
|
+
"""
|
|
673
|
+
Loads configuration from the specified TOML file. If the file does not exist, it will be
|
|
674
|
+
created and populated with the default configuration.
|
|
675
|
+
"""
|
|
676
|
+
if os.path.isfile(path):
|
|
677
|
+
with open(path, 'r') as stream:
|
|
678
|
+
try:
|
|
679
|
+
config_dict = toml.load(stream)
|
|
680
|
+
except Exception as exc:
|
|
681
|
+
raise excs.Error(f'Could not read config file: {str(path)}') from exc
|
|
682
|
+
else:
|
|
683
|
+
config_dict = cls.__create_default_config(path)
|
|
684
|
+
with open(path, 'w') as stream:
|
|
685
|
+
try:
|
|
686
|
+
toml.dump(config_dict, stream)
|
|
687
|
+
except Exception as exc:
|
|
688
|
+
raise excs.Error(f'Could not write config file: {str(path)}') from exc
|
|
689
|
+
logging.getLogger('pixeltable').info(f'Created default config file at: {str(path)}')
|
|
690
|
+
return cls(config_dict)
|
|
691
|
+
|
|
692
|
+
@classmethod
|
|
693
|
+
def __create_default_config(cls, config_path: Path) -> dict[str, Any]:
|
|
694
|
+
free_disk_space_bytes = shutil.disk_usage(config_path.parent).free
|
|
695
|
+
# Default cache size is 1/5 of free disk space
|
|
696
|
+
file_cache_size_g = free_disk_space_bytes / 5 / (1 << 30)
|
|
697
|
+
return {
|
|
698
|
+
'pixeltable': {
|
|
699
|
+
'file_cache_size_g': round(file_cache_size_g, 1),
|
|
700
|
+
'hide_warnings': False,
|
|
701
|
+
}
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
def __init__(self, config: dict[str, Any]) -> None:
|
|
705
|
+
self.__config = config
|
|
706
|
+
|
|
707
|
+
def get_value(self, key: str, expected_type: type[T], section: str = 'pixeltable') -> Optional[T]:
|
|
708
|
+
env_var = f'{section.upper()}_{key.upper()}'
|
|
709
|
+
if env_var in os.environ:
|
|
710
|
+
value = os.environ[env_var]
|
|
711
|
+
elif section in self.__config and key in self.__config[section]:
|
|
712
|
+
value = self.__config[section][key]
|
|
713
|
+
else:
|
|
714
|
+
return None
|
|
715
|
+
|
|
716
|
+
try:
|
|
717
|
+
return expected_type(value) # type: ignore[call-arg]
|
|
718
|
+
except ValueError:
|
|
719
|
+
raise excs.Error(f'Invalid value for configuration parameter {section}.{key}: {value}')
|
|
720
|
+
|
|
721
|
+
def get_string_value(self, key: str, section: str = 'pixeltable') -> Optional[str]:
|
|
722
|
+
return self.get_value(key, str, section)
|
|
723
|
+
|
|
724
|
+
def get_int_value(self, key: str, section: str = 'pixeltable') -> Optional[int]:
|
|
725
|
+
return self.get_value(key, int, section)
|
|
726
|
+
|
|
727
|
+
def get_float_value(self, key: str, section: str = 'pixeltable') -> Optional[float]:
|
|
728
|
+
return self.get_value(key, float, section)
|
|
729
|
+
|
|
730
|
+
def get_bool_value(self, key: str, section: str = 'pixeltable') -> Optional[bool]:
|
|
731
|
+
return self.get_value(key, bool, section)
|
|
732
|
+
|
|
733
|
+
|
|
663
734
|
_registered_clients: dict[str, ApiClient] = {}
|
|
664
735
|
|
|
665
736
|
|
pixeltable/exceptions.py
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
|
-
from typing import List, Any
|
|
2
|
-
from types import TracebackType
|
|
3
1
|
from dataclasses import dataclass
|
|
2
|
+
from types import TracebackType
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
4
|
+
|
|
5
|
+
if TYPE_CHECKING:
|
|
6
|
+
from pixeltable import exprs
|
|
4
7
|
|
|
5
8
|
|
|
6
9
|
class Error(Exception):
|
|
@@ -9,11 +12,11 @@ class Error(Exception):
|
|
|
9
12
|
|
|
10
13
|
@dataclass
|
|
11
14
|
class ExprEvalError(Exception):
|
|
12
|
-
expr:
|
|
15
|
+
expr: 'exprs.Expr'
|
|
13
16
|
expr_msg: str
|
|
14
17
|
exc: Exception
|
|
15
18
|
exc_tb: TracebackType
|
|
16
|
-
input_vals:
|
|
19
|
+
input_vals: list[Any]
|
|
17
20
|
row_num: int
|
|
18
21
|
|
|
19
22
|
|
|
@@ -1,10 +1,12 @@
|
|
|
1
|
-
|
|
1
|
+
import inspect
|
|
2
|
+
from typing import Iterator, Optional
|
|
2
3
|
|
|
3
|
-
from .data_row_batch import DataRowBatch
|
|
4
|
-
from .exec_node import ExecNode
|
|
5
4
|
import pixeltable.catalog as catalog
|
|
6
|
-
import pixeltable.exprs as exprs
|
|
7
5
|
import pixeltable.exceptions as excs
|
|
6
|
+
import pixeltable.exprs as exprs
|
|
7
|
+
|
|
8
|
+
from .data_row_batch import DataRowBatch
|
|
9
|
+
from .exec_node import ExecNode
|
|
8
10
|
|
|
9
11
|
|
|
10
12
|
class ComponentIterationNode(ExecNode):
|
|
@@ -12,7 +14,7 @@ class ComponentIterationNode(ExecNode):
|
|
|
12
14
|
|
|
13
15
|
Returns row batches of OUTPUT_BATCH_SIZE size.
|
|
14
16
|
"""
|
|
15
|
-
|
|
17
|
+
__OUTPUT_BATCH_SIZE = 1024
|
|
16
18
|
|
|
17
19
|
def __init__(self, view: catalog.TableVersion, input: ExecNode):
|
|
18
20
|
assert view.is_component_view()
|
|
@@ -23,57 +25,76 @@ class ComponentIterationNode(ExecNode):
|
|
|
23
25
|
self.iterator_args = iterator_args[0]
|
|
24
26
|
assert isinstance(self.iterator_args, exprs.InlineDict)
|
|
25
27
|
self.iterator_args_ctx = self.row_builder.create_eval_ctx([self.iterator_args])
|
|
26
|
-
self.iterator_output_schema, self.unstored_column_names =
|
|
28
|
+
self.iterator_output_schema, self.unstored_column_names = (
|
|
27
29
|
self.view.iterator_cls.output_schema(**self.iterator_args.to_kwargs())
|
|
30
|
+
)
|
|
28
31
|
self.iterator_output_fields = list(self.iterator_output_schema.keys())
|
|
29
|
-
self.iterator_output_cols =
|
|
30
|
-
|
|
32
|
+
self.iterator_output_cols = {
|
|
33
|
+
field_name: self.view.cols_by_name[field_name] for field_name in self.iterator_output_fields
|
|
34
|
+
}
|
|
31
35
|
# referenced iterator output fields
|
|
32
36
|
self.refd_output_slot_idxs = {
|
|
33
37
|
e.col.name: e.slot_idx for e in self.row_builder.unique_exprs
|
|
34
38
|
if isinstance(e, exprs.ColumnRef) and e.col.name in self.iterator_output_fields
|
|
35
39
|
}
|
|
36
|
-
self.
|
|
40
|
+
self.__output: Optional[Iterator[DataRowBatch]] = None
|
|
37
41
|
|
|
38
|
-
def
|
|
42
|
+
def __output_batches(self) -> Iterator[DataRowBatch]:
|
|
39
43
|
output_batch = DataRowBatch(self.view, self.row_builder)
|
|
40
44
|
for input_batch in self.input:
|
|
41
45
|
for input_row in input_batch:
|
|
42
46
|
self.row_builder.eval(input_row, self.iterator_args_ctx)
|
|
43
47
|
iterator_args = input_row[self.iterator_args.slot_idx]
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
# we can ignore this
|
|
59
|
-
continue
|
|
60
|
-
output_col = self.iterator_output_cols[field_name]
|
|
61
|
-
output_col.col_type.validate_literal(field_val)
|
|
62
|
-
output_row[self.refd_output_slot_idxs[field_name]] = field_val
|
|
63
|
-
if len(component_dict) != len(self.iterator_output_fields):
|
|
64
|
-
missing_fields = set(self.refd_output_slot_idxs.keys()) - set(component_dict.keys())
|
|
65
|
-
raise excs.Error(
|
|
66
|
-
f'Invalid output of {self.view.iterator_cls.__name__}: '
|
|
67
|
-
f'missing fields {", ".join(missing_fields)}')
|
|
68
|
-
|
|
69
|
-
if len(output_batch) == self.OUTPUT_BATCH_SIZE:
|
|
70
|
-
yield output_batch
|
|
71
|
-
output_batch = DataRowBatch(self.view, self.row_builder)
|
|
48
|
+
assert isinstance(iterator_args, dict)
|
|
49
|
+
# We need to ensure that all of the required (non-nullable) parameters of the iterator are
|
|
50
|
+
# specified and are not null. If any of them are null, then we skip this row (i.e., we emit 0
|
|
51
|
+
# output rows for this input row).
|
|
52
|
+
if self.__non_nullable_args_specified(iterator_args):
|
|
53
|
+
iterator = self.view.iterator_cls(**iterator_args)
|
|
54
|
+
for pos, component_dict in enumerate(iterator):
|
|
55
|
+
output_row = output_batch.add_row()
|
|
56
|
+
input_row.copy(output_row)
|
|
57
|
+
# we're expanding the input and need to add the iterator position to the pk
|
|
58
|
+
self.__populate_output_row(output_row, pos, component_dict)
|
|
59
|
+
if len(output_batch) == self.__OUTPUT_BATCH_SIZE:
|
|
60
|
+
yield output_batch
|
|
61
|
+
output_batch = DataRowBatch(self.view, self.row_builder)
|
|
72
62
|
|
|
73
63
|
if len(output_batch) > 0:
|
|
74
64
|
yield output_batch
|
|
75
65
|
|
|
66
|
+
def __non_nullable_args_specified(self, iterator_args: dict) -> bool:
|
|
67
|
+
"""
|
|
68
|
+
Returns true if all non-nullable iterator arguments are not `None`.
|
|
69
|
+
"""
|
|
70
|
+
input_schema = self.view.iterator_cls.input_schema()
|
|
71
|
+
for arg_name, arg_value in iterator_args.items():
|
|
72
|
+
col_type = input_schema[arg_name]
|
|
73
|
+
if arg_value is None and not col_type.nullable:
|
|
74
|
+
return False
|
|
75
|
+
return True
|
|
76
|
+
|
|
77
|
+
def __populate_output_row(self, output_row: exprs.DataRow, pos: int, component_dict: dict) -> None:
|
|
78
|
+
pk = output_row.pk[:-1] + (pos,) + output_row.pk[-1:]
|
|
79
|
+
output_row.set_pk(pk)
|
|
80
|
+
# verify and copy component_dict fields to their respective slots in output_row
|
|
81
|
+
for field_name, field_val in component_dict.items():
|
|
82
|
+
if field_name not in self.iterator_output_fields:
|
|
83
|
+
raise excs.Error(
|
|
84
|
+
f'Invalid field name {field_name} in output of {self.view.iterator_cls.__name__}')
|
|
85
|
+
if field_name not in self.refd_output_slot_idxs:
|
|
86
|
+
# we can ignore this
|
|
87
|
+
continue
|
|
88
|
+
output_col = self.iterator_output_cols[field_name]
|
|
89
|
+
output_col.col_type.validate_literal(field_val)
|
|
90
|
+
output_row[self.refd_output_slot_idxs[field_name]] = field_val
|
|
91
|
+
if len(component_dict) != len(self.iterator_output_fields):
|
|
92
|
+
missing_fields = set(self.refd_output_slot_idxs.keys()) - set(component_dict.keys())
|
|
93
|
+
raise excs.Error(
|
|
94
|
+
f'Invalid output of {self.view.iterator_cls.__name__}: '
|
|
95
|
+
f'missing fields {", ".join(missing_fields)}')
|
|
96
|
+
|
|
76
97
|
def __next__(self) -> DataRowBatch:
|
|
77
|
-
if self.
|
|
78
|
-
self.
|
|
79
|
-
return next(self.
|
|
98
|
+
if self.__output is None:
|
|
99
|
+
self.__output = self.__output_batches()
|
|
100
|
+
return next(self.__output)
|
pixeltable/exprs/json_path.py
CHANGED
|
@@ -105,12 +105,9 @@ class JsonPath(Expr):
|
|
|
105
105
|
return JsonPath(self._anchor, self.path_elements + [name])
|
|
106
106
|
|
|
107
107
|
def __getitem__(self, index: object) -> 'JsonPath':
|
|
108
|
-
if isinstance(index, str):
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
elif not isinstance(index, (int, slice)):
|
|
112
|
-
raise excs.Error(f'Invalid json list index: {index}')
|
|
113
|
-
return JsonPath(self._anchor, self.path_elements + [index])
|
|
108
|
+
if isinstance(index, (int, slice, str)):
|
|
109
|
+
return JsonPath(self._anchor, self.path_elements + [index])
|
|
110
|
+
raise excs.Error(f'Invalid json list index: {index}')
|
|
114
111
|
|
|
115
112
|
def __rshift__(self, other: object) -> 'JsonMapper':
|
|
116
113
|
rhs_expr = Expr.from_object(other)
|
pixeltable/functions/together.py
CHANGED
|
@@ -7,13 +7,15 @@ the [Working with Together AI](https://pixeltable.readme.io/docs/together-ai) tu
|
|
|
7
7
|
|
|
8
8
|
import base64
|
|
9
9
|
import io
|
|
10
|
-
from typing import TYPE_CHECKING, Callable, Optional
|
|
10
|
+
from typing import TYPE_CHECKING, Callable, Optional, TypeVar
|
|
11
11
|
|
|
12
12
|
import numpy as np
|
|
13
13
|
import PIL.Image
|
|
14
|
+
import requests
|
|
14
15
|
import tenacity
|
|
15
16
|
|
|
16
17
|
import pixeltable as pxt
|
|
18
|
+
import pixeltable.exceptions as excs
|
|
17
19
|
from pixeltable import env
|
|
18
20
|
from pixeltable.func import Batch
|
|
19
21
|
from pixeltable.utils.code import local_public_names
|
|
@@ -32,7 +34,10 @@ def _together_client() -> 'together.Together':
|
|
|
32
34
|
return env.Env.get().get_client('together')
|
|
33
35
|
|
|
34
36
|
|
|
35
|
-
|
|
37
|
+
T = TypeVar('T')
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _retry(fn: Callable[..., T]) -> Callable[..., T]:
|
|
36
41
|
import together
|
|
37
42
|
return tenacity.retry(
|
|
38
43
|
retry=tenacity.retry_if_exception_type(together.error.RateLimitError),
|
|
@@ -249,20 +254,29 @@ def image_generations(
|
|
|
249
254
|
The generated image.
|
|
250
255
|
|
|
251
256
|
Examples:
|
|
252
|
-
Add a computed column that applies the model `
|
|
257
|
+
Add a computed column that applies the model `stabilityai/stable-diffusion-xl-base-1.0`
|
|
253
258
|
to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
|
|
254
259
|
|
|
255
|
-
>>> tbl['response'] = image_generations(tbl.prompt, model='
|
|
260
|
+
>>> tbl['response'] = image_generations(tbl.prompt, model='stabilityai/stable-diffusion-xl-base-1.0')
|
|
256
261
|
"""
|
|
257
|
-
# TODO(aaron-siegel): Decompose CPU/GPU ops into separate functions
|
|
258
262
|
result = _retry(_together_client().images.generate)(
|
|
259
263
|
prompt=prompt, model=model, steps=steps, seed=seed, height=height, width=width, negative_prompt=negative_prompt
|
|
260
264
|
)
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
265
|
+
if result.data[0].b64_json is not None:
|
|
266
|
+
b64_bytes = base64.b64decode(result.data[0].b64_json)
|
|
267
|
+
img = PIL.Image.open(io.BytesIO(b64_bytes))
|
|
268
|
+
img.load()
|
|
269
|
+
return img
|
|
270
|
+
if result.data[0].url is not None:
|
|
271
|
+
try:
|
|
272
|
+
resp = requests.get(result.data[0].url)
|
|
273
|
+
with io.BytesIO(resp.content) as fp:
|
|
274
|
+
image = PIL.Image.open(fp)
|
|
275
|
+
image.load()
|
|
276
|
+
return image
|
|
277
|
+
except Exception as exc:
|
|
278
|
+
raise excs.Error('Failed to download generated image from together.ai.') from exc
|
|
279
|
+
raise excs.Error('Response does not contain a generated image.')
|
|
266
280
|
|
|
267
281
|
|
|
268
282
|
__all__ = local_public_names(__name__)
|
pixeltable/globals.py
CHANGED
|
@@ -16,6 +16,7 @@ from pixeltable.dataframe import DataFrameResultSet
|
|
|
16
16
|
from pixeltable.env import Env
|
|
17
17
|
from pixeltable.iterators import ComponentIterator
|
|
18
18
|
from pixeltable.metadata import schema
|
|
19
|
+
from pixeltable.utils.filecache import FileCache
|
|
19
20
|
|
|
20
21
|
_logger = logging.getLogger('pixeltable')
|
|
21
22
|
|
|
@@ -193,6 +194,7 @@ def create_view(
|
|
|
193
194
|
)
|
|
194
195
|
Catalog.get().paths[path] = view
|
|
195
196
|
_logger.info(f'Created view `{path_str}`.')
|
|
197
|
+
FileCache.get().emit_eviction_warnings()
|
|
196
198
|
return view
|
|
197
199
|
|
|
198
200
|
|
pixeltable/io/globals.py
CHANGED
|
@@ -43,7 +43,7 @@ def create_label_studio_project(
|
|
|
43
43
|
The API key and URL for a valid Label Studio server must be specified in Pixeltable config. Either:
|
|
44
44
|
|
|
45
45
|
* Set the `LABEL_STUDIO_API_KEY` and `LABEL_STUDIO_URL` environment variables; or
|
|
46
|
-
* Specify `api_key` and `url` fields in the `label-studio` section of `$PIXELTABLE_HOME/config.
|
|
46
|
+
* Specify `api_key` and `url` fields in the `label-studio` section of `$PIXELTABLE_HOME/config.toml`.
|
|
47
47
|
|
|
48
48
|
__Requirements:__
|
|
49
49
|
|
pixeltable/io/hf_datasets.py
CHANGED
|
@@ -34,9 +34,7 @@ _hf_to_pxt: dict[str, ts.ColumnType] = {
|
|
|
34
34
|
}
|
|
35
35
|
|
|
36
36
|
|
|
37
|
-
def _to_pixeltable_type(
|
|
38
|
-
feature_type: Union[datasets.ClassLabel, datasets.Value, datasets.Sequence],
|
|
39
|
-
) -> Optional[ts.ColumnType]:
|
|
37
|
+
def _to_pixeltable_type(feature_type: Any) -> Optional[ts.ColumnType]:
|
|
40
38
|
"""Convert a huggingface feature type to a pixeltable ColumnType if one is defined."""
|
|
41
39
|
import datasets
|
|
42
40
|
|
|
@@ -51,6 +49,8 @@ def _to_pixeltable_type(
|
|
|
51
49
|
dtype = _to_pixeltable_type(feature_type.feature)
|
|
52
50
|
length = feature_type.length if feature_type.length != -1 else None
|
|
53
51
|
return ts.ArrayType(shape=(length,), dtype=dtype)
|
|
52
|
+
elif isinstance(feature_type, datasets.Image):
|
|
53
|
+
return ts.ImageType(nullable=True)
|
|
54
54
|
else:
|
|
55
55
|
return None
|
|
56
56
|
|
pixeltable/iterators/document.py
CHANGED
|
@@ -166,7 +166,7 @@ class DocumentSplitter(ComponentIterator):
|
|
|
166
166
|
return {
|
|
167
167
|
'document': DocumentType(nullable=False),
|
|
168
168
|
'separators': StringType(nullable=False),
|
|
169
|
-
'metadata': StringType(nullable=
|
|
169
|
+
'metadata': StringType(nullable=False),
|
|
170
170
|
'limit': IntType(nullable=True),
|
|
171
171
|
'overlap': IntType(nullable=True),
|
|
172
172
|
'skip_tags': StringType(nullable=True),
|
|
@@ -36,7 +36,7 @@ class Dumper:
|
|
|
36
36
|
mock_home_dir = self.output_dir / '.pixeltable'
|
|
37
37
|
mock_home_dir.mkdir(parents=True, exist_ok=True)
|
|
38
38
|
os.environ['PIXELTABLE_HOME'] = str(mock_home_dir)
|
|
39
|
-
os.environ['PIXELTABLE_CONFIG'] = str(shared_home / 'config.
|
|
39
|
+
os.environ['PIXELTABLE_CONFIG'] = str(shared_home / 'config.toml')
|
|
40
40
|
os.environ['PIXELTABLE_DB'] = db_name
|
|
41
41
|
os.environ['PIXELTABLE_PGDATA'] = str(shared_home / 'pgdata')
|
|
42
42
|
|
pixeltable/utils/filecache.py
CHANGED
|
@@ -1,28 +1,33 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from collections import OrderedDict, defaultdict, namedtuple
|
|
4
|
-
import os
|
|
2
|
+
|
|
5
3
|
import glob
|
|
6
|
-
|
|
7
|
-
from time import time
|
|
4
|
+
import hashlib
|
|
8
5
|
import logging
|
|
6
|
+
import os
|
|
7
|
+
import warnings
|
|
8
|
+
from collections import OrderedDict, defaultdict, namedtuple
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from datetime import datetime, timezone
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Optional
|
|
9
13
|
from uuid import UUID
|
|
10
|
-
import hashlib
|
|
11
14
|
|
|
15
|
+
import pixeltable.exceptions as excs
|
|
12
16
|
from pixeltable.env import Env
|
|
13
17
|
|
|
14
|
-
|
|
15
18
|
_logger = logging.getLogger('pixeltable')
|
|
16
19
|
|
|
20
|
+
@dataclass
|
|
17
21
|
class CacheEntry:
|
|
18
|
-
def __init__(self, key: str, tbl_id: UUID, col_id: int, size: int, last_accessed_ts: int, ext: str):
|
|
19
|
-
self.key = key
|
|
20
|
-
self.tbl_id = tbl_id
|
|
21
|
-
self.col_id = col_id
|
|
22
|
-
self.size = size
|
|
23
|
-
self.last_accessed_ts = last_accessed_ts
|
|
24
|
-
self.ext = ext
|
|
25
22
|
|
|
23
|
+
key: str
|
|
24
|
+
tbl_id: UUID
|
|
25
|
+
col_id: int
|
|
26
|
+
size: int
|
|
27
|
+
last_used: datetime
|
|
28
|
+
ext: str
|
|
29
|
+
|
|
30
|
+
@property
|
|
26
31
|
def path(self) -> Path:
|
|
27
32
|
return Env.get().file_cache_dir / f'{self.tbl_id.hex}_{self.col_id}_{self.key}{self.ext}'
|
|
28
33
|
|
|
@@ -34,7 +39,11 @@ class CacheEntry:
|
|
|
34
39
|
col_id = int(components[1])
|
|
35
40
|
key = components[2]
|
|
36
41
|
file_info = os.stat(str(path))
|
|
37
|
-
|
|
42
|
+
# We use the last modified time (file_info.st_mtime) as the timestamp; `FileCache` will touch the file
|
|
43
|
+
# each time it is retrieved, so that the mtime of the file will always represent the last used time of
|
|
44
|
+
# the cache entry.
|
|
45
|
+
last_used = datetime.fromtimestamp(file_info.st_mtime, tz=timezone.utc)
|
|
46
|
+
return cls(key, tbl_id, col_id, file_info.st_size, last_used, path.suffix)
|
|
38
47
|
|
|
39
48
|
|
|
40
49
|
class FileCache:
|
|
@@ -45,31 +54,60 @@ class FileCache:
|
|
|
45
54
|
access of a cache entries is its file's mtime.
|
|
46
55
|
|
|
47
56
|
TODO:
|
|
48
|
-
- enforce a maximum capacity with LRU eviction
|
|
49
57
|
- implement MRU eviction for queries that exceed the capacity
|
|
50
58
|
"""
|
|
51
|
-
|
|
52
|
-
|
|
59
|
+
__instance: Optional[FileCache] = None
|
|
60
|
+
|
|
61
|
+
cache: OrderedDict[str, CacheEntry]
|
|
62
|
+
total_size: int
|
|
63
|
+
capacity_bytes: int
|
|
64
|
+
num_requests: int
|
|
65
|
+
num_hits: int
|
|
66
|
+
num_evictions: int
|
|
67
|
+
keys_retrieved: set[str] # keys retrieved (downloaded or accessed) this session
|
|
68
|
+
keys_evicted_after_retrieval: set[str] # keys that were evicted after having been retrieved this session
|
|
69
|
+
|
|
70
|
+
# A key is added to this set when it is already present in `keys_evicted_this_session` and is downloaded again.
|
|
71
|
+
# In other words, for a key to be added to this set, the following sequence of events must occur in this order:
|
|
72
|
+
# - It is retrieved during this session (either because it was newly downloaded, or because it was in the cache
|
|
73
|
+
# at the start of the session and was accessed at some point during the session)
|
|
74
|
+
# - It is subsequently evicted
|
|
75
|
+
# - It is subsequently retrieved a second time ("download after a previous retrieval")
|
|
76
|
+
# The contents of this set will be used to generate a more informative warning.
|
|
77
|
+
evicted_working_set_keys: set[str]
|
|
78
|
+
new_redownload_witnessed: bool # whether a new re-download has occurred since the last time a warning was issued
|
|
79
|
+
|
|
80
|
+
ColumnStats = namedtuple('FileCacheColumnStats', ('tbl_id', 'col_id', 'num_files', 'total_size'))
|
|
53
81
|
CacheStats = namedtuple(
|
|
54
|
-
'FileCacheStats',
|
|
82
|
+
'FileCacheStats',
|
|
83
|
+
('total_size', 'num_requests', 'num_hits', 'num_evictions', 'column_stats')
|
|
84
|
+
)
|
|
55
85
|
|
|
56
86
|
@classmethod
|
|
57
87
|
def get(cls) -> FileCache:
|
|
58
|
-
if cls.
|
|
59
|
-
cls.
|
|
60
|
-
return cls.
|
|
88
|
+
if cls.__instance is None:
|
|
89
|
+
cls.init()
|
|
90
|
+
return cls.__instance
|
|
91
|
+
|
|
92
|
+
@classmethod
|
|
93
|
+
def init(cls) -> None:
|
|
94
|
+
cls.__instance = cls()
|
|
61
95
|
|
|
62
96
|
def __init__(self):
|
|
63
|
-
self.cache
|
|
97
|
+
self.cache = OrderedDict()
|
|
64
98
|
self.total_size = 0
|
|
65
|
-
|
|
99
|
+
self.capacity_bytes = Env.get()._file_cache_size_g * (1 << 30)
|
|
66
100
|
self.num_requests = 0
|
|
67
101
|
self.num_hits = 0
|
|
68
102
|
self.num_evictions = 0
|
|
103
|
+
self.keys_retrieved = set()
|
|
104
|
+
self.keys_evicted_after_retrieval = set()
|
|
105
|
+
self.evicted_working_set_keys = set()
|
|
106
|
+
self.new_redownload_witnessed = False
|
|
69
107
|
paths = glob.glob(str(Env.get().file_cache_dir / '*'))
|
|
70
108
|
entries = [CacheEntry.from_file(Path(path_str)) for path_str in paths]
|
|
71
|
-
# we need to insert entries in order
|
|
72
|
-
entries.sort(key=lambda e: e.
|
|
109
|
+
# we need to insert entries in access order
|
|
110
|
+
entries.sort(key=lambda e: e.last_used)
|
|
73
111
|
for entry in entries:
|
|
74
112
|
self.cache[entry.key] = entry
|
|
75
113
|
self.total_size += entry.size
|
|
@@ -82,30 +120,43 @@ class FileCache:
|
|
|
82
120
|
def num_files(self, tbl_id: Optional[UUID] = None) -> int:
|
|
83
121
|
if tbl_id is None:
|
|
84
122
|
return len(self.cache)
|
|
85
|
-
|
|
86
|
-
return len(entries)
|
|
123
|
+
return sum(e.tbl_id == tbl_id for e in self.cache.values())
|
|
87
124
|
|
|
88
|
-
def clear(self, tbl_id: Optional[UUID] = None
|
|
125
|
+
def clear(self, tbl_id: Optional[UUID] = None) -> None:
|
|
89
126
|
"""
|
|
90
127
|
For testing purposes: allow resetting capacity and stats.
|
|
91
128
|
"""
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
129
|
+
if tbl_id is None:
|
|
130
|
+
# We need to store the entries to remove in a list, because we can't remove items from a dict while iterating
|
|
131
|
+
entries_to_remove = list(self.cache.values())
|
|
132
|
+
_logger.debug(f'clearing {self.num_files()} entries from file cache')
|
|
133
|
+
self.num_requests, self.num_hits, self.num_evictions = 0, 0, 0
|
|
134
|
+
self.keys_retrieved.clear()
|
|
135
|
+
self.keys_evicted_after_retrieval.clear()
|
|
136
|
+
self.new_redownload_witnessed = False
|
|
97
137
|
else:
|
|
98
|
-
|
|
99
|
-
|
|
138
|
+
entries_to_remove = [e for e in self.cache.values() if e.tbl_id == tbl_id]
|
|
139
|
+
_logger.debug(f'clearing {self.num_files(tbl_id)} entries from file cache for table {tbl_id}')
|
|
140
|
+
for entry in entries_to_remove:
|
|
141
|
+
os.remove(entry.path)
|
|
100
142
|
del self.cache[entry.key]
|
|
101
143
|
self.total_size -= entry.size
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
144
|
+
|
|
145
|
+
def emit_eviction_warnings(self) -> None:
|
|
146
|
+
if self.new_redownload_witnessed:
|
|
147
|
+
# Compute the additional capacity that would be needed in order to retain all the re-downloaded files
|
|
148
|
+
extra_capacity_needed = sum(self.cache[key].size for key in self.evicted_working_set_keys)
|
|
149
|
+
suggested_cache_size = self.capacity_bytes + extra_capacity_needed + (1 << 30)
|
|
150
|
+
warnings.warn(
|
|
151
|
+
f'{len(self.evicted_working_set_keys)} media file(s) had to be downloaded multiple times this session, '
|
|
152
|
+
'because they were evicted\nfrom the file cache after their first access. The total size '
|
|
153
|
+
f'of the evicted file(s) is {round(extra_capacity_needed / (1 << 30), 1)} GiB.\n'
|
|
154
|
+
f'Consider increasing the cache size to at least {round(suggested_cache_size / (1 << 30), 1)} GiB '
|
|
155
|
+
f'(it is currently {round(self.capacity_bytes / (1 << 30), 1)} GiB).\n'
|
|
156
|
+
f'You can do this by setting the value of `file_cache_size_g` in: {str(Env.get()._config_file)}',
|
|
157
|
+
excs.PixeltableWarning
|
|
158
|
+
)
|
|
159
|
+
self.new_redownload_witnessed = False
|
|
109
160
|
|
|
110
161
|
def _url_hash(self, url: str) -> str:
|
|
111
162
|
h = hashlib.sha256()
|
|
@@ -120,66 +171,62 @@ class FileCache:
|
|
|
120
171
|
_logger.debug(f'file cache miss for {url}')
|
|
121
172
|
return None
|
|
122
173
|
# update mtime and cache
|
|
123
|
-
path = entry.path
|
|
174
|
+
path = entry.path
|
|
124
175
|
path.touch(exist_ok=True)
|
|
125
176
|
file_info = os.stat(str(path))
|
|
126
|
-
entry.
|
|
177
|
+
entry.last_used = file_info.st_mtime
|
|
127
178
|
self.cache.move_to_end(key, last=True)
|
|
128
179
|
self.num_hits += 1
|
|
180
|
+
self.keys_retrieved.add(key)
|
|
129
181
|
_logger.debug(f'file cache hit for {url}')
|
|
130
182
|
return path
|
|
131
183
|
|
|
132
|
-
# def can_admit(self, query_ts: int) -> bool:
|
|
133
|
-
# if self.total_size + self.avg_file_size <= self.capacity:
|
|
134
|
-
# return True
|
|
135
|
-
# assert len(self.cache) > 0
|
|
136
|
-
# # check whether we can evict the current lru entry
|
|
137
|
-
# lru_entry = next(iter(self.cache.values()))
|
|
138
|
-
# if lru_entry.last_accessed_ts >= query_ts:
|
|
139
|
-
# # the current query brought this entry in: we're not going to evict it
|
|
140
|
-
# return False
|
|
141
|
-
# return True
|
|
142
|
-
|
|
143
184
|
def add(self, tbl_id: UUID, col_id: int, url: str, path: Path) -> Path:
|
|
144
185
|
"""Adds url at 'path' to cache and returns its new path.
|
|
145
186
|
'path' will not be accessible after this call. Retains the extension of 'path'.
|
|
146
187
|
"""
|
|
147
188
|
file_info = os.stat(str(path))
|
|
148
|
-
|
|
149
|
-
#if self.total_size + file_info.st_size > self.capacity:
|
|
150
|
-
if False:
|
|
151
|
-
if len(self.cache) == 0:
|
|
152
|
-
# nothing to evict
|
|
153
|
-
return
|
|
154
|
-
# evict entries until we're below the limit or until we run into entries the current query brought in
|
|
155
|
-
while True:
|
|
156
|
-
lru_entry = next(iter(self.cache.values()))
|
|
157
|
-
if lru_entry.last_accessed_ts >= query_ts:
|
|
158
|
-
# the current query brought this entry in: switch to MRU and ignore this put()
|
|
159
|
-
_logger.debug('file cache switched to MRU')
|
|
160
|
-
return
|
|
161
|
-
self.cache.popitem(last=False)
|
|
162
|
-
self.total_size -= lru_entry.size
|
|
163
|
-
self.num_evictions += 1
|
|
164
|
-
os.remove(str(lru_entry.path()))
|
|
165
|
-
_logger.debug(f'evicted entry for cell {lru_entry.cell_id} from file cache')
|
|
166
|
-
if self.total_size + file_info.st_size <= self.capacity:
|
|
167
|
-
break
|
|
168
|
-
|
|
189
|
+
self.ensure_capacity(file_info.st_size)
|
|
169
190
|
key = self._url_hash(url)
|
|
170
191
|
assert key not in self.cache
|
|
192
|
+
if key in self.keys_evicted_after_retrieval:
|
|
193
|
+
# This key was evicted after being retrieved earlier this session, and is now being retrieved again.
|
|
194
|
+
# Add it to `keys_multiply_downloaded` so that we may generate a warning later.
|
|
195
|
+
self.evicted_working_set_keys.add(key)
|
|
196
|
+
self.new_redownload_witnessed = True
|
|
197
|
+
self.keys_retrieved.add(key)
|
|
171
198
|
entry = CacheEntry(key, tbl_id, col_id, file_info.st_size, file_info.st_mtime, path.suffix)
|
|
172
199
|
self.cache[key] = entry
|
|
173
200
|
self.total_size += entry.size
|
|
174
|
-
new_path = entry.path
|
|
201
|
+
new_path = entry.path
|
|
175
202
|
os.rename(str(path), str(new_path))
|
|
203
|
+
new_path.touch(exist_ok=True)
|
|
176
204
|
_logger.debug(f'added entry for cell {url} to file cache')
|
|
177
205
|
return new_path
|
|
178
206
|
|
|
207
|
+
def ensure_capacity(self, size: int) -> None:
|
|
208
|
+
"""
|
|
209
|
+
Evict entries from the cache until there is at least 'size' bytes of free space.
|
|
210
|
+
"""
|
|
211
|
+
while len(self.cache) > 0 and self.total_size + size > self.capacity_bytes:
|
|
212
|
+
_, lru_entry = self.cache.popitem(last=False)
|
|
213
|
+
self.total_size -= lru_entry.size
|
|
214
|
+
self.num_evictions += 1
|
|
215
|
+
if lru_entry.key in self.keys_retrieved:
|
|
216
|
+
# This key was retrieved at some point earlier this session and is now being evicted.
|
|
217
|
+
# Make a record of the eviction, so that we can generate a warning later if the key is retrieved again.
|
|
218
|
+
self.keys_evicted_after_retrieval.add(lru_entry.key)
|
|
219
|
+
os.remove(str(lru_entry.path))
|
|
220
|
+
_logger.debug(f'evicted entry for cell {lru_entry.key} from file cache (of size {lru_entry.size // (1 << 20)} MiB)')
|
|
221
|
+
|
|
222
|
+
def set_capacity(self, capacity_bytes: int) -> None:
|
|
223
|
+
self.capacity_bytes = capacity_bytes
|
|
224
|
+
self.ensure_capacity(0) # evict entries if necessary
|
|
225
|
+
|
|
179
226
|
def stats(self) -> CacheStats:
|
|
180
227
|
# collect column stats
|
|
181
228
|
# (tbl_id, col_id) -> (num_files, total_size)
|
|
182
|
-
d:
|
|
229
|
+
d: dict[tuple[int, int], list[int]] = defaultdict(lambda: [0, 0])
|
|
183
230
|
for entry in self.cache.values():
|
|
184
231
|
t = d[(entry.tbl_id, entry.col_id)]
|
|
185
232
|
t[0] += 1
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: pixeltable
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.20
|
|
4
4
|
Summary: Pixeltable: The Multimodal AI Data Plane
|
|
5
5
|
Author: Pixeltable, Inc.
|
|
6
6
|
Author-email: contact@pixeltable.com
|
|
@@ -31,6 +31,7 @@ Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
|
|
|
31
31
|
Requires-Dist: requests (>=2.31.0,<3.0.0)
|
|
32
32
|
Requires-Dist: sqlalchemy (>=2.0.23,<3.0.0)
|
|
33
33
|
Requires-Dist: tenacity (>=8.2,<9.0)
|
|
34
|
+
Requires-Dist: toml (>=0.10)
|
|
34
35
|
Requires-Dist: tqdm (>=4.64)
|
|
35
36
|
Description-Content-Type: text/markdown
|
|
36
37
|
|
|
@@ -46,10 +47,17 @@ Description-Content-Type: text/markdown
|
|
|
46
47
|
[](https://github.com/pixeltable/pixeltable/actions/workflows/nightly.yml)
|
|
47
48
|
[](https://pypi.org/project/pixeltable/)
|
|
48
49
|
|
|
49
|
-
[Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.readme.io/) | [API Reference](https://pixeltable.github.io/pixeltable/) | [Code Samples](https://pixeltable
|
|
50
|
+
[Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.readme.io/) | [API Reference](https://pixeltable.github.io/pixeltable/) | [Code Samples](https://github.com/pixeltable/pixeltable?tab=readme-ov-file#-code-samples) | [Computer Vision](https://docs.pixeltable.com/docs/object-detection-in-videos) | [LLM](https://docs.pixeltable.com/docs/document-indexing-and-rag)
|
|
50
51
|
</div>
|
|
51
52
|
|
|
52
|
-
Pixeltable is a Python library providing a declarative interface for multimodal data (text, images, audio, video). It features built-in versioning, lineage tracking, and incremental updates, enabling users to store
|
|
53
|
+
Pixeltable is a Python library providing a declarative interface for multimodal data (text, images, audio, video). It features built-in versioning, lineage tracking, and incremental updates, enabling users to **store**, **transform**, **index**, and **iterate** on data for their ML workflows.
|
|
54
|
+
|
|
55
|
+
Data transformations, model inference, and custom logic are embedded as **computed columns**.
|
|
56
|
+
- **Load/Query all data types**: Interact with [video data](https://github.com/pixeltable/pixeltable?tab=readme-ov-file#import-media-data-into-pixeltable-videos-images-audio) at the [frame level](https://github.com/pixeltable/pixeltable?tab=readme-ov-file#text-and-image-similarity-search-on-video-frames-with-embedding-indexes) and documents at the [chunk level](https://github.com/pixeltable/pixeltable?tab=readme-ov-file#automate-data-operations-with-views-eg-split-documents-into-chunks)
|
|
57
|
+
- **Incremental updates for data transformation**: Maintain an [embedding index](https://docs.pixeltable.com/docs/embedding-vector-indexes) colocated with your data
|
|
58
|
+
- **Lazy evaluation and cache management**: Eliminates the need for [manual frame extraction](https://docs.pixeltable.com/docs/object-detection-in-videos)
|
|
59
|
+
- **Integrates with any Python libraries**: Use [built-in and custom functions (UDFs)](https://docs.pixeltable.com/docs/user-defined-functions-udfs) without complex pipelines
|
|
60
|
+
- **Data format agnostic and extensibility**: Access tables as Parquet files, [PyTorch datasets](https://pixeltable.github.io/pixeltable/api/data-frame/#pixeltable.DataFrame.to_pytorch_dataset), or [COCO annotations](https://pixeltable.github.io/pixeltable/api/table/#pixeltable.Table.to_coco_dataset)
|
|
53
61
|
|
|
54
62
|
## 💾 Installation
|
|
55
63
|
|
|
@@ -1,26 +1,26 @@
|
|
|
1
1
|
pixeltable/__init__.py,sha256=t1uRHKta7mPH9_KgkUpOWBu6AewA7DRdSGGyrm0OcSQ,1279
|
|
2
|
-
pixeltable/__version__.py,sha256=
|
|
2
|
+
pixeltable/__version__.py,sha256=RrlUQ8lgLgO05DWPZnMK9eYJ0O2bfE8N7RBzpECP5f8,114
|
|
3
3
|
pixeltable/catalog/__init__.py,sha256=E41bxaPeQIcgRYzTWc2vkDOboQhRymrJf4IcHQO7o_8,453
|
|
4
4
|
pixeltable/catalog/catalog.py,sha256=tyDyI5wQw7vV6_FChrp9qgGCRClcjiSdW3eygYT0p9s,7849
|
|
5
5
|
pixeltable/catalog/column.py,sha256=Be3WmOadMROS2s4IgtG_Ohjkr07eU9GJItl6WhNishQ,9683
|
|
6
6
|
pixeltable/catalog/dir.py,sha256=fG_BQM-fLuABpTstMVH-9dvZPx7kqi3sgTQgKveVXJI,922
|
|
7
7
|
pixeltable/catalog/globals.py,sha256=XeOeDqq1nDEcpqkY7PYBosoL6tXVAfkJSLJN9aQ_9Fg,1850
|
|
8
|
-
pixeltable/catalog/insertable_table.py,sha256=
|
|
8
|
+
pixeltable/catalog/insertable_table.py,sha256=DFL93x8ihYEnK_yCR8EdYHDQqAomJdU11ygTF0jEFWY,6822
|
|
9
9
|
pixeltable/catalog/named_function.py,sha256=W8vikP_3jMJ9pQQsksO2EfQAlaVxuQHBlo65M4924dc,1150
|
|
10
10
|
pixeltable/catalog/path.py,sha256=QgccEi_QOfaKt8YsR2zLtd_z7z7QQkU_1kprJFi2SPQ,1677
|
|
11
11
|
pixeltable/catalog/path_dict.py,sha256=4b9_Ax7Q8tkmoCYPaKNedpQkU17pE0oGDd2XB53eNZA,5979
|
|
12
12
|
pixeltable/catalog/schema_object.py,sha256=qhpeeUPOYT5doDbsyUNBcPm5QzAQPCAsikqh1PQ6d1k,2226
|
|
13
|
-
pixeltable/catalog/table.py,sha256=
|
|
13
|
+
pixeltable/catalog/table.py,sha256=NQMZwG6wPu8DzJmZLXTkDm_Dth0AmNXhcixNqiXlPuc,41307
|
|
14
14
|
pixeltable/catalog/table_version.py,sha256=4_djeYLGu9ljRSXe_f14c3HvXL0o0P2-sOZ-1bBQzYw,56991
|
|
15
15
|
pixeltable/catalog/table_version_path.py,sha256=Ee6nPh5Jgbp91qFSKkCwdzIpQ3gJqv3SG06bFFLhbBE,6139
|
|
16
16
|
pixeltable/catalog/view.py,sha256=RfQRldjPUZ7W8jMMdXJFSjbjCUe-3ynxDFvg4W27qXc,10642
|
|
17
17
|
pixeltable/dataframe.py,sha256=kAPv9YjOEx0xZViFG3fi6eXsX6zUhm3F2x5U7qDOrJU,34378
|
|
18
|
-
pixeltable/env.py,sha256=
|
|
19
|
-
pixeltable/exceptions.py,sha256=
|
|
18
|
+
pixeltable/env.py,sha256=XHxv2P5Aj1dvUxwlvfoxahVemhJFyapmW7p3pf8Vq7g,30133
|
|
19
|
+
pixeltable/exceptions.py,sha256=NuFY2WtkQpLfLHT_J70kOw9Tr0kEDkkgo-u7As4Gaq4,410
|
|
20
20
|
pixeltable/exec/__init__.py,sha256=VRENEONsAv3PPoBV0r7h-7nAB7SWM4Uglmu1FVQE5uQ,507
|
|
21
21
|
pixeltable/exec/aggregation_node.py,sha256=-DunTLlVh3OflpwTIjkwKGczotl4i3oUqrvfyvRjv6Q,3452
|
|
22
22
|
pixeltable/exec/cache_prefetch_node.py,sha256=d5pEuR6AtJQkEVy9X3XeYFI_q0szMtoNAH96vYdtBE0,5241
|
|
23
|
-
pixeltable/exec/component_iteration_node.py,sha256=
|
|
23
|
+
pixeltable/exec/component_iteration_node.py,sha256=ABuXGbDRQWLGuaBnfK7bvOxCrz81vMMiAvXHHI8SX4c,4930
|
|
24
24
|
pixeltable/exec/data_row_batch.py,sha256=1IDYHBkSQ60dwOnAGnS-Wpp3AsnbMqKcY40zUT7ku-Q,3392
|
|
25
25
|
pixeltable/exec/exec_context.py,sha256=0rg5V8HzSy-BvqmSbGr-U4aJ4eOZg2JN0x6zjYQGtBc,1090
|
|
26
26
|
pixeltable/exec/exec_node.py,sha256=ixkv3p_EfF53UDWgwLjQGKR1LNIQxzgDXsTzzJj6ea4,2211
|
|
@@ -45,7 +45,7 @@ pixeltable/exprs/in_predicate.py,sha256=vJwT07SlDXBYMbqpf-dgV2gr6je5DehrpkPBapnZ
|
|
|
45
45
|
pixeltable/exprs/inline_expr.py,sha256=FIQsgwfz-9qmghnaTSTL3522Mhr9GQUKM_SDxzA4P5w,7055
|
|
46
46
|
pixeltable/exprs/is_null.py,sha256=qkzxr0NPuID77gs-J_tXj0MYuoCPBEd3Iq6MUWJ_dSc,1101
|
|
47
47
|
pixeltable/exprs/json_mapper.py,sha256=grr-9xVOU_TUL1wtON7wNqZ10-p3mGp66cTCofQKkqc,4590
|
|
48
|
-
pixeltable/exprs/json_path.py,sha256=
|
|
48
|
+
pixeltable/exprs/json_path.py,sha256=xlwUeYL8D--dPTMhzoyCtkQVeik0sfwI7k_XlNs0eS4,6912
|
|
49
49
|
pixeltable/exprs/literal.py,sha256=ofhMe2kiT4tWNuzf2zKOiGY5pml10dRqbV0e9HGVcbs,3780
|
|
50
50
|
pixeltable/exprs/method_ref.py,sha256=6TQnl5JhsUqKNPFUbu2tzu5svF_BZf5rfm2cZo740Ts,2600
|
|
51
51
|
pixeltable/exprs/object_ref.py,sha256=UDLfpFXrOTrYZOVWH6G5dx4Ax_BxFTpLOaIab3MuyyI,1282
|
|
@@ -81,26 +81,26 @@ pixeltable/functions/mistralai.py,sha256=U7f6g4EyHMsik8HMIdJIKn6xFSCdQH6950AAOYL
|
|
|
81
81
|
pixeltable/functions/openai.py,sha256=yr2hgUa0ZtUJOezSC9aVqp-BoxADf-gmYoK8FE2jbVU,15930
|
|
82
82
|
pixeltable/functions/string.py,sha256=RCGj5bXx7MWgcdcOuy1IMTn3vBvGzjgxudyUrDqWdAg,20153
|
|
83
83
|
pixeltable/functions/timestamp.py,sha256=lyWPv2sCpejD2t9DB62nxJEm0kWLNsAW8yMiT5iEsOo,9121
|
|
84
|
-
pixeltable/functions/together.py,sha256=
|
|
84
|
+
pixeltable/functions/together.py,sha256=pmd_Xo9XaJ9M8-Zx1bDb4pnomHGZ5swBENHYx-uhmPs,9480
|
|
85
85
|
pixeltable/functions/util.py,sha256=F2iiIL7UfhYdCVzdCa3efYqWbaeLKFrbycKnuPkG57M,650
|
|
86
86
|
pixeltable/functions/video.py,sha256=qaPkeU4qO_g_lQhiMcytAOiJbwtfO89amGVxsT86MZQ,7180
|
|
87
87
|
pixeltable/functions/vision.py,sha256=K_E1Q-n2plPuFoOPlbKWRMiJp9dPgftIJ2T_o3TNL3I,15594
|
|
88
88
|
pixeltable/functions/whisper.py,sha256=VvGVWEsANHH2oCabT1bFTXoDEn5g90gQT_PCh56W4n4,3377
|
|
89
|
-
pixeltable/globals.py,sha256=
|
|
89
|
+
pixeltable/globals.py,sha256=dbLCAuobQAJgjlTASp9bGRLwOYEyBntKLl3-GP7GTgU,16755
|
|
90
90
|
pixeltable/index/__init__.py,sha256=XBwetNQQwnz0fiKwonOKhyy_U32l_cjt77kNvEIdjWs,102
|
|
91
91
|
pixeltable/index/base.py,sha256=YAQ5Dz1mfI0dfu9rxWHWroE8TjB90yKfPtXAzoADq38,1568
|
|
92
92
|
pixeltable/index/btree.py,sha256=NE4GYhcJWYJhdKyeHI0sQBlFvUaIgGOF9KLyCZOfFjE,1822
|
|
93
93
|
pixeltable/index/embedding_index.py,sha256=U1wAjcTYvw3uJf3QHIOzBV8FLOUn8IeaFsLzUb_QTmc,7829
|
|
94
94
|
pixeltable/io/__init__.py,sha256=bJGWPhKfgoMrSKFdXhLGupjQQbIXt7JaoUPwilND2PE,519
|
|
95
95
|
pixeltable/io/external_store.py,sha256=iRqvMx9QuCKmOKBe12hoY1KfXyGvDHL-q1CjaZr3Fkk,16466
|
|
96
|
-
pixeltable/io/globals.py,sha256=
|
|
97
|
-
pixeltable/io/hf_datasets.py,sha256=
|
|
96
|
+
pixeltable/io/globals.py,sha256=ZmjbLy9EMhJjXKeNVgPhsi1dmllwJ1rsHu1XHadHtgM,13330
|
|
97
|
+
pixeltable/io/hf_datasets.py,sha256=E5E2yfaHo9Hf9gFI9ZhzaztHtRC_xDL6mIyeeond2Uo,8284
|
|
98
98
|
pixeltable/io/label_studio.py,sha256=m1-ayI7S8Lxv2R1agrO-32xXyB8Z-YPP_ErAqu22c7o,31023
|
|
99
99
|
pixeltable/io/pandas.py,sha256=7eHg7wnAfRA9eBk4iC0iSSVTKOM59Ne4pXokKWdt3dY,9793
|
|
100
100
|
pixeltable/io/parquet.py,sha256=bUBJmnTFrlBZ8yIesqUJ1JufXZ76pm7vQ3Fq48hVijA,7853
|
|
101
101
|
pixeltable/iterators/__init__.py,sha256=sjldFckkT8aVRiKgEP6faeAK2NQBdzbmpwAeRhI1FkM,366
|
|
102
102
|
pixeltable/iterators/base.py,sha256=cnEh1tNN2JAxRzrLTg3dhun3N1oNQ8vifCm6ts3_UiE,1687
|
|
103
|
-
pixeltable/iterators/document.py,sha256=
|
|
103
|
+
pixeltable/iterators/document.py,sha256=dAJjCRY0HUxrdMlGjf19ZLChARmWonYoJ0QvgfxkQyQ,19455
|
|
104
104
|
pixeltable/iterators/string.py,sha256=NG_fWc_GAITDfzl6MvrDOMrSoMcZdMZf6hPQztCSatE,1305
|
|
105
105
|
pixeltable/iterators/video.py,sha256=Glp7qNjyrH8X5S4WJOEsZhCa4yChalTICiR9bbMsHlo,5734
|
|
106
106
|
pixeltable/metadata/__init__.py,sha256=8mYxCsc_uvN3tqwrmIbB9iBkQ9r9ybsdpFCMsrzNaNw,2172
|
|
@@ -119,7 +119,7 @@ pixeltable/metadata/notes.py,sha256=1Hk6TGy69a4jgqqLoaUlQPtzANMvMGkifKC5rjqeOeA,
|
|
|
119
119
|
pixeltable/metadata/schema.py,sha256=H2NjpNBxZNDw_VV3UK97fKs30dh81uQf8F3vexKeePo,8567
|
|
120
120
|
pixeltable/plan.py,sha256=pHTJxv2WzsDXtnBd9RvXtUnEFvIQjXV7NX_BIQRQiHs,38544
|
|
121
121
|
pixeltable/store.py,sha256=zlVG9rs5k0k8wcfYF2jcgAQgIOfanJ9YjIDs_kacRIQ,21106
|
|
122
|
-
pixeltable/tool/create_test_db_dump.py,sha256=
|
|
122
|
+
pixeltable/tool/create_test_db_dump.py,sha256=y4LotPVbcQeqnarpISmVPWoURBVnjKjSl9Yi2MmCZE0,11980
|
|
123
123
|
pixeltable/tool/create_test_video.py,sha256=OLfccymYReIpzE8osZn4rQvLXxxiPC_l0vc06U74hVM,2899
|
|
124
124
|
pixeltable/tool/doc_plugins/griffe.py,sha256=Q6ARBlQNBm8J21G_p625TB5c8MQ8r6hJlm7I2LoBon0,3422
|
|
125
125
|
pixeltable/tool/doc_plugins/mkdocstrings.py,sha256=afq7XOaSC5WRmugkh-FMFMK8PqOgIlDIsJdD8cuPhtE,207
|
|
@@ -131,7 +131,7 @@ pixeltable/utils/arrow.py,sha256=83_7aG5UR2qtTktw_otLkQs-RQbLk0VVM0JLJkbweNU,369
|
|
|
131
131
|
pixeltable/utils/coco.py,sha256=ISpFBhR4eO1jOcg_SPb0thVI4KdS6H0RyNQauZIA5A4,7287
|
|
132
132
|
pixeltable/utils/code.py,sha256=AOw1u2r8_DQXpX-lxJhyHWARGrCRDXOJHFVgKOi54Uc,1231
|
|
133
133
|
pixeltable/utils/documents.py,sha256=UQq2F-W4stDuldFDSGHwUe5PK1dPoalN8SfYRoGqd14,2038
|
|
134
|
-
pixeltable/utils/filecache.py,sha256=
|
|
134
|
+
pixeltable/utils/filecache.py,sha256=hQOSz5VmC2MBk0F4RaZKgG1OQFFXOyFfanp-cQMOsU4,10553
|
|
135
135
|
pixeltable/utils/formatter.py,sha256=XOuNAhZKCvA9Dlj1QYHB_ovwWUuznvvvdkWgjl4bWq0,9239
|
|
136
136
|
pixeltable/utils/help.py,sha256=cCnxJ4VP9MJ57iDqExmnDcM-JG3a1lw_q7g-D7bpSVI,252
|
|
137
137
|
pixeltable/utils/http_server.py,sha256=WQ5ILMzlz4TlwI9j5YqAPgEZyhrN1GytMNDbLD9occk,2422
|
|
@@ -140,8 +140,8 @@ pixeltable/utils/pytorch.py,sha256=VWczSB_FT_aOU5Xqv4T5ONTsnQN6KDlZmMkuoBuji08,3
|
|
|
140
140
|
pixeltable/utils/s3.py,sha256=DBfXp0SYubhiKckdAD7PsiVBX_YfVP8Rcu6DCG_3SaQ,433
|
|
141
141
|
pixeltable/utils/sql.py,sha256=5n5_OmXAGtqFdL6z5XvgnU-vlx6Ba6f1WJrO1ZwUle8,765
|
|
142
142
|
pixeltable/utils/transactional_directory.py,sha256=UGzCrGtLR3hEEf8sYGuWBzLVFAEQml3vdIavigWeTBM,1349
|
|
143
|
-
pixeltable-0.2.
|
|
144
|
-
pixeltable-0.2.
|
|
145
|
-
pixeltable-0.2.
|
|
146
|
-
pixeltable-0.2.
|
|
147
|
-
pixeltable-0.2.
|
|
143
|
+
pixeltable-0.2.20.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
144
|
+
pixeltable-0.2.20.dist-info/METADATA,sha256=QYhIK4U4RMLo_B3lTevJoPXFTM3hP8qfqO1A89R9Qjo,14972
|
|
145
|
+
pixeltable-0.2.20.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
146
|
+
pixeltable-0.2.20.dist-info/entry_points.txt,sha256=TNI1Gb5vPwFrTdw6TimSYjO8FeK8c_HuPr28vcf7o_I,108
|
|
147
|
+
pixeltable-0.2.20.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|