pixeltable 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (119) hide show
  1. pixeltable/__init__.py +53 -0
  2. pixeltable/__version__.py +3 -0
  3. pixeltable/catalog/__init__.py +13 -0
  4. pixeltable/catalog/catalog.py +159 -0
  5. pixeltable/catalog/column.py +181 -0
  6. pixeltable/catalog/dir.py +32 -0
  7. pixeltable/catalog/globals.py +33 -0
  8. pixeltable/catalog/insertable_table.py +192 -0
  9. pixeltable/catalog/named_function.py +36 -0
  10. pixeltable/catalog/path.py +58 -0
  11. pixeltable/catalog/path_dict.py +139 -0
  12. pixeltable/catalog/schema_object.py +39 -0
  13. pixeltable/catalog/table.py +695 -0
  14. pixeltable/catalog/table_version.py +1026 -0
  15. pixeltable/catalog/table_version_path.py +133 -0
  16. pixeltable/catalog/view.py +203 -0
  17. pixeltable/dataframe.py +749 -0
  18. pixeltable/env.py +466 -0
  19. pixeltable/exceptions.py +17 -0
  20. pixeltable/exec/__init__.py +10 -0
  21. pixeltable/exec/aggregation_node.py +78 -0
  22. pixeltable/exec/cache_prefetch_node.py +116 -0
  23. pixeltable/exec/component_iteration_node.py +79 -0
  24. pixeltable/exec/data_row_batch.py +94 -0
  25. pixeltable/exec/exec_context.py +22 -0
  26. pixeltable/exec/exec_node.py +61 -0
  27. pixeltable/exec/expr_eval_node.py +217 -0
  28. pixeltable/exec/in_memory_data_node.py +73 -0
  29. pixeltable/exec/media_validation_node.py +43 -0
  30. pixeltable/exec/sql_scan_node.py +226 -0
  31. pixeltable/exprs/__init__.py +25 -0
  32. pixeltable/exprs/arithmetic_expr.py +102 -0
  33. pixeltable/exprs/array_slice.py +71 -0
  34. pixeltable/exprs/column_property_ref.py +77 -0
  35. pixeltable/exprs/column_ref.py +114 -0
  36. pixeltable/exprs/comparison.py +77 -0
  37. pixeltable/exprs/compound_predicate.py +98 -0
  38. pixeltable/exprs/data_row.py +199 -0
  39. pixeltable/exprs/expr.py +594 -0
  40. pixeltable/exprs/expr_set.py +39 -0
  41. pixeltable/exprs/function_call.py +382 -0
  42. pixeltable/exprs/globals.py +69 -0
  43. pixeltable/exprs/image_member_access.py +96 -0
  44. pixeltable/exprs/in_predicate.py +96 -0
  45. pixeltable/exprs/inline_array.py +109 -0
  46. pixeltable/exprs/inline_dict.py +103 -0
  47. pixeltable/exprs/is_null.py +38 -0
  48. pixeltable/exprs/json_mapper.py +121 -0
  49. pixeltable/exprs/json_path.py +159 -0
  50. pixeltable/exprs/literal.py +66 -0
  51. pixeltable/exprs/object_ref.py +41 -0
  52. pixeltable/exprs/predicate.py +44 -0
  53. pixeltable/exprs/row_builder.py +329 -0
  54. pixeltable/exprs/rowid_ref.py +94 -0
  55. pixeltable/exprs/similarity_expr.py +65 -0
  56. pixeltable/exprs/type_cast.py +53 -0
  57. pixeltable/exprs/variable.py +45 -0
  58. pixeltable/ext/__init__.py +5 -0
  59. pixeltable/ext/functions/yolox.py +92 -0
  60. pixeltable/func/__init__.py +7 -0
  61. pixeltable/func/aggregate_function.py +197 -0
  62. pixeltable/func/callable_function.py +113 -0
  63. pixeltable/func/expr_template_function.py +99 -0
  64. pixeltable/func/function.py +141 -0
  65. pixeltable/func/function_registry.py +227 -0
  66. pixeltable/func/globals.py +46 -0
  67. pixeltable/func/nos_function.py +202 -0
  68. pixeltable/func/signature.py +162 -0
  69. pixeltable/func/udf.py +164 -0
  70. pixeltable/functions/__init__.py +95 -0
  71. pixeltable/functions/eval.py +215 -0
  72. pixeltable/functions/fireworks.py +34 -0
  73. pixeltable/functions/huggingface.py +167 -0
  74. pixeltable/functions/image.py +16 -0
  75. pixeltable/functions/openai.py +289 -0
  76. pixeltable/functions/pil/image.py +147 -0
  77. pixeltable/functions/string.py +13 -0
  78. pixeltable/functions/together.py +143 -0
  79. pixeltable/functions/util.py +52 -0
  80. pixeltable/functions/video.py +62 -0
  81. pixeltable/globals.py +425 -0
  82. pixeltable/index/__init__.py +2 -0
  83. pixeltable/index/base.py +51 -0
  84. pixeltable/index/embedding_index.py +168 -0
  85. pixeltable/io/__init__.py +3 -0
  86. pixeltable/io/hf_datasets.py +188 -0
  87. pixeltable/io/pandas.py +148 -0
  88. pixeltable/io/parquet.py +192 -0
  89. pixeltable/iterators/__init__.py +3 -0
  90. pixeltable/iterators/base.py +52 -0
  91. pixeltable/iterators/document.py +432 -0
  92. pixeltable/iterators/video.py +88 -0
  93. pixeltable/metadata/__init__.py +58 -0
  94. pixeltable/metadata/converters/convert_10.py +18 -0
  95. pixeltable/metadata/converters/convert_12.py +3 -0
  96. pixeltable/metadata/converters/convert_13.py +41 -0
  97. pixeltable/metadata/schema.py +234 -0
  98. pixeltable/plan.py +620 -0
  99. pixeltable/store.py +424 -0
  100. pixeltable/tool/create_test_db_dump.py +184 -0
  101. pixeltable/tool/create_test_video.py +81 -0
  102. pixeltable/type_system.py +846 -0
  103. pixeltable/utils/__init__.py +17 -0
  104. pixeltable/utils/arrow.py +98 -0
  105. pixeltable/utils/clip.py +18 -0
  106. pixeltable/utils/coco.py +136 -0
  107. pixeltable/utils/documents.py +69 -0
  108. pixeltable/utils/filecache.py +195 -0
  109. pixeltable/utils/help.py +11 -0
  110. pixeltable/utils/http_server.py +70 -0
  111. pixeltable/utils/media_store.py +76 -0
  112. pixeltable/utils/pytorch.py +91 -0
  113. pixeltable/utils/s3.py +13 -0
  114. pixeltable/utils/sql.py +17 -0
  115. pixeltable/utils/transactional_directory.py +35 -0
  116. pixeltable-0.0.0.dist-info/LICENSE +18 -0
  117. pixeltable-0.0.0.dist-info/METADATA +131 -0
  118. pixeltable-0.0.0.dist-info/RECORD +119 -0
  119. pixeltable-0.0.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,76 @@
1
+ import glob
2
+ import os
3
+ import re
4
+ import shutil
5
+ import uuid
6
+ from typing import Optional, List, Tuple, Dict
7
+ from pathlib import Path
8
+ from collections import defaultdict
9
+ from uuid import UUID
10
+
11
+ from pixeltable.env import Env
12
+
13
+
14
+ class MediaStore:
15
+ """
16
+ Utilities to manage media files stored in Env.media_dir
17
+
18
+ Media file names are a composite of: table id, column id, version, uuid:
19
+ the table id/column id/version are redundant but useful for identifying all files for a table
20
+ or all files created for a particular version of a table
21
+ """
22
+ pattern = re.compile(r'([0-9a-fA-F]+)_(\d+)_(\d+)_([0-9a-fA-F]+)') # tbl_id, col_id, version, uuid
23
+
24
+ @classmethod
25
+ def prepare_media_path(cls, tbl_id: UUID, col_id: int, version: int, ext: Optional[str] = None) -> Path:
26
+ """
27
+ Construct a new, unique Path name for a persisted media file, and create the parent directory
28
+ for the new Path if it does not already exist. The Path will reside in
29
+ the environment's media_dir.
30
+ """
31
+ id_hex = uuid.uuid4().hex
32
+ parent = Env.get().media_dir / tbl_id.hex / id_hex[0:2] / id_hex[0:4]
33
+ parent.mkdir(parents=True, exist_ok=True)
34
+ return parent / f'{tbl_id.hex}_{col_id}_{version}_{id_hex}{ext or ""}'
35
+
36
+ @classmethod
37
+ def delete(cls, tbl_id: UUID, version: Optional[int] = None) -> None:
38
+ """Delete all files belonging to tbl_id. If version is not None, delete
39
+ only those files belonging to the specified version."""
40
+ assert tbl_id is not None
41
+ if version is None:
42
+ # Remove the entire folder for this table id.
43
+ path = Env.get().media_dir / tbl_id.hex
44
+ if path.exists():
45
+ shutil.rmtree(path)
46
+ else:
47
+ # Remove only the elements for the specified version.
48
+ paths = glob.glob(str(Env.get().media_dir / tbl_id.hex) + f'/**/{tbl_id.hex}_*_{version}_*', recursive=True)
49
+ for path in paths:
50
+ os.remove(path)
51
+
52
+ @classmethod
53
+ def count(cls, tbl_id: UUID) -> int:
54
+ """
55
+ Return number of files for given tbl_id.
56
+ """
57
+ paths = glob.glob(str(Env.get().media_dir / tbl_id.hex) + f'/**/{tbl_id.hex}_*', recursive=True)
58
+ return len(paths)
59
+
60
+ @classmethod
61
+ def stats(cls) -> List[Tuple[int, int, int, int]]:
62
+ paths = glob.glob(str(Env.get().media_dir) + "/**", recursive=True)
63
+ # key: (tbl_id, col_id), value: (num_files, size)
64
+ d: Dict[Tuple[UUID, int], List[int]] = defaultdict(lambda: [0, 0])
65
+ for p in paths:
66
+ if not os.path.isdir(p):
67
+ matched = re.match(cls.pattern, Path(p).name)
68
+ assert matched is not None
69
+ tbl_id, col_id = UUID(hex=matched[1]), int(matched[2])
70
+ file_info = os.stat(p)
71
+ t = d[(tbl_id, col_id)]
72
+ t[0] += 1
73
+ t[1] += file_info.st_size
74
+ result = [(tbl_id, col_id, num_files, size) for (tbl_id, col_id), (num_files, size) in d.items()]
75
+ result.sort(key=lambda e: e[3], reverse=True)
76
+ return result
@@ -0,0 +1,91 @@
1
+ import io
2
+ import pyarrow as pa
3
+ import pyarrow.parquet
4
+ import torch
5
+ import torch.utils.data
6
+ from pathlib import Path
7
+ import PIL.Image
8
+ import json
9
+ from typing import Dict, Iterator, Any
10
+ import datetime
11
+
12
+ from pixeltable.type_system import ColumnType
13
+ import numpy as np
14
+
15
+ class PixeltablePytorchDataset(torch.utils.data.IterableDataset):
16
+ """
17
+ PyTorch dataset interface for pixeltable data.
18
+ NB. This class must inherit from torch.utils.data.IterableDataset for it
19
+ to work with torch.utils.data.DataLoader.
20
+ """
21
+ def __init__(
22
+ self,
23
+ path: Path,
24
+ image_format: str,
25
+ ):
26
+ """
27
+ Args:
28
+ path: path to directory containing parquet files
29
+ image_format: 'np' or 'pt'. 'np' is RGB uint8 array,
30
+ 'pt' is result of torchvision.transforms.ToTensor()
31
+ """
32
+ super().__init__()
33
+
34
+ self.path = path
35
+ self.image_format = image_format
36
+ assert image_format in ["np", "pt"]
37
+ column_type_path = path / '.pixeltable.column_types.json'
38
+ assert column_type_path.exists(), f"missing {column_type_path}"
39
+ with column_type_path.open() as f:
40
+ column_types = json.load(f)
41
+ self.column_types = {k: ColumnType.from_dict(v) for k, v in column_types.items()}
42
+ self.part_metadata = pa.parquet.ParquetDataset(path).files
43
+
44
+ def _unmarshall(self, k: str, v: Any) -> Any:
45
+ if self.column_types[k].is_image_type():
46
+ assert isinstance(v, bytes)
47
+ im = PIL.Image.open(io.BytesIO(v))
48
+ arr = np.array(im) # will copy data to guarantee "WRITEABLE" flag assertion below.
49
+ assert arr.flags["WRITEABLE"]
50
+
51
+ if self.image_format == "np":
52
+ return arr
53
+
54
+ assert self.image_format == "pt"
55
+ import torchvision # pylint: disable = import-outside-toplevel
56
+
57
+ # use arr instead of im in ToTensor() to guarantee array input
58
+ # to torch.from_numpy is writable. Using im is a suspected cause of
59
+ # https://github.com/pixeltable/pixeltable/issues/69
60
+ return torchvision.transforms.ToTensor()(arr)
61
+ elif self.column_types[k].is_json_type():
62
+ assert isinstance(v, str)
63
+ return json.loads(v)
64
+ elif self.column_types[k].is_array_type():
65
+ assert isinstance(v, np.ndarray)
66
+ if not v.flags["WRITEABLE"]:
67
+ v = v.copy()
68
+ assert v.flags["WRITEABLE"]
69
+ return v
70
+ elif self.column_types[k].is_timestamp_type():
71
+ # pytorch default collation only supports numeric types
72
+ assert isinstance(v, datetime.datetime)
73
+ return v.timestamp()
74
+ else:
75
+ assert not isinstance(v, np.ndarray) # all array outputs should be handled above
76
+ return v
77
+
78
+ def __iter__(self) -> Iterator[Dict[str, Any]]:
79
+ import pixeltable.utils.arrow as arrow
80
+ worker_info = torch.utils.data.get_worker_info()
81
+
82
+ if worker_info is None:
83
+ part_list = range(len(self.part_metadata))
84
+ else:
85
+ part_list = [ i for i in part_list if (i % worker_info.num_workers) == worker_info.id ]
86
+
87
+ for part_no in part_list:
88
+ pqf = pa.parquet.ParquetFile(self.part_metadata[part_no])
89
+ for batch in pqf.iter_batches():
90
+ for tup in arrow.iter_tuples(batch):
91
+ yield {k: self._unmarshall(k, v) for k, v in tup.items()}
pixeltable/utils/s3.py ADDED
@@ -0,0 +1,13 @@
1
+ from typing import Any
2
+
3
+
4
+ def get_client() -> Any:
5
+ import boto3
6
+ import botocore
7
+ try:
8
+ boto3.Session().get_credentials().get_frozen_credentials()
9
+ return boto3.client('s3') # credentials are available
10
+ except AttributeError:
11
+ # No credentials available, use unsigned mode
12
+ config = botocore.config.Config(signature_version=botocore.UNSIGNED)
13
+ return boto3.client('s3', config=config)
@@ -0,0 +1,17 @@
1
+ import logging
2
+
3
+ import sqlalchemy as sql
4
+
5
+
6
+ def log_stmt(logger: logging.Logger, stmt) -> None:
7
+ logger.debug(f'executing {str(stmt.compile(dialect=sql.dialects.postgresql.dialect()))}')
8
+
9
+ def log_explain(logger: logging.Logger, stmt: sql.sql.ClauseElement, conn: sql.engine.Connection) -> None:
10
+ try:
11
+ # don't set dialect=Env.get().engine.dialect: x % y turns into x %% y, which results in a syntax error
12
+ stmt_str = str(stmt.compile(compile_kwargs={'literal_binds': True}))
13
+ explain_result = conn.execute(sql.text(f'EXPLAIN {stmt_str}'))
14
+ explain_str = '\n'.join([str(row) for row in explain_result])
15
+ logger.debug(f'SqlScanNode explain:\n{explain_str}')
16
+ except Exception as e:
17
+ logger.warning(f'EXPLAIN failed')
@@ -0,0 +1,35 @@
1
+ import shutil
2
+ from contextlib import contextmanager
3
+ from pathlib import Path
4
+ from typing import Any, Generator
5
+
6
+ import pixeltable.exceptions as excs
7
+
8
+
9
+ @contextmanager
10
+ def transactional_directory(folder_path: Path) -> Generator[Path, Any, Any]:
11
+ """
12
+ Args:
13
+ folder_path: path to the folder we want to create
14
+
15
+ Yields:
16
+ A pathlib.Path to a hidden temporary folder, which can be used to accumulate changes.
17
+ If everything succeeds, the changes are committed via an atomic move operation upon exiting the 'with' block (os.replace)
18
+ If an exception occurred, no changes are visible in the original folder.
19
+
20
+ Example:
21
+ folder_path = pathlib.Path("path/to/folder")
22
+ with transactional_folder(folder_path) as temp_folder:
23
+ (temp_folder / "subfolder1").mkdir()
24
+ (temp_folder / "subfolder2").mkdir()
25
+ """
26
+ if folder_path.exists():
27
+ raise excs.Error(f"Folder {folder_path} already exists")
28
+
29
+ tmp_folder = folder_path.parent / f".tmp_{folder_path.name}"
30
+ # Remove the temporary folder if it already exists, eg if the previous run crashed
31
+ shutil.rmtree(str(tmp_folder), ignore_errors=True)
32
+ tmp_folder.mkdir(parents=True)
33
+ yield tmp_folder
34
+ # If everything succeeds, `commit' the changes by moving the temporary folder
35
+ tmp_folder.rename(folder_path)
@@ -0,0 +1,18 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+
6
+ Copyright 2023 Marcel Kornacker
7
+
8
+ Licensed under the Apache License, Version 2.0 (the "License");
9
+ you may not use this file except in compliance with the License.
10
+ You may obtain a copy of the License at
11
+
12
+ http://www.apache.org/licenses/LICENSE-2.0
13
+
14
+ Unless required by applicable law or agreed to in writing, software
15
+ distributed under the License is distributed on an "AS IS" BASIS,
16
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17
+ See the License for the specific language governing permissions and
18
+ limitations under the License.
@@ -0,0 +1,131 @@
1
+ Metadata-Version: 2.1
2
+ Name: pixeltable
3
+ Version: 0.0.0
4
+ Summary: Pixeltable: The Multimodal AI Data Plane
5
+ Author: Marcel Kornacker
6
+ Author-email: marcelk@gmail.com
7
+ Requires-Python: >=3.9,<4.0
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.9
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Requires-Dist: av (>=10.0.0)
14
+ Requires-Dist: beautifulsoup4 (>=4.0.0,<5.0.0)
15
+ Requires-Dist: cloudpickle (>=2.2.1,<3.0.0)
16
+ Requires-Dist: ftfy (>=6.2.0,<7.0.0)
17
+ Requires-Dist: jinja2 (>=3.1.3,<4.0.0)
18
+ Requires-Dist: jmespath (>=1.0.1,<2.0.0)
19
+ Requires-Dist: mistune (>=3.0.2,<4.0.0)
20
+ Requires-Dist: numpy (>=1.25)
21
+ Requires-Dist: opencv-python-headless (>=4.7.0.68,<5.0.0.0)
22
+ Requires-Dist: pandas (>=2.0,<3.0)
23
+ Requires-Dist: pgserver (==0.1.2)
24
+ Requires-Dist: pgvector (>=0.2.1,<0.3.0)
25
+ Requires-Dist: pillow (>=9.3.0)
26
+ Requires-Dist: psutil (>=5.9.5,<6.0.0)
27
+ Requires-Dist: psycopg2-binary (>=2.9.5,<3.0.0)
28
+ Requires-Dist: pymupdf (>=1.24.1,<2.0.0)
29
+ Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
30
+ Requires-Dist: requests (>=2.31.0,<3.0.0)
31
+ Requires-Dist: setuptools (==69.1.1)
32
+ Requires-Dist: sqlalchemy-utils (>=0.41.1,<0.42.0)
33
+ Requires-Dist: sqlalchemy[mypy] (>=2.0.23,<3.0.0)
34
+ Requires-Dist: tenacity (>=8.2,<9.0)
35
+ Requires-Dist: tqdm (>=4.64.1,<5.0.0)
36
+ Description-Content-Type: text/markdown
37
+
38
+ <div align="center">
39
+ <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/master/docs/pixeltable-banner.png" alt="Pixeltable" width="45%" />
40
+
41
+ # Unifying Data, Models, and Orchestration for AI Products
42
+
43
+ [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
44
+ &nbsp;&nbsp;
45
+ [![pytest status](https://github.com/pixeltable/pixeltable/actions/workflows/pytest.yml/badge.svg)](https://github.com/pixeltable/pixeltable/actions)
46
+
47
+ [Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.github.io/pixeltable/)
48
+ </div>
49
+
50
+ Pixeltable is a Python library that lets AI engineers and data scientists focus on exploration, modeling, and app development without dealing with the customary data plumbing.
51
+
52
+ ## What problems does Pixeltable solve?
53
+
54
+ Today’s solutions for AI app development require extensive custom coding and infrastructure
55
+ plumbing. Tracking lineage and versions between and across data transformations, models, and
56
+ deployment is cumbersome. Pixeltable is a replacement for traditional data plumbing, providing
57
+ a unified plane for data, models, and orchestration. It removes the data plumbing overhead in
58
+ building and productionizing AI applications.
59
+
60
+ ## ⚡Quick Start
61
+ Learn the basics of Pixeltable through interactive examples. View the notebooks on Google Colab or Kaggle, for free.
62
+
63
+ ### Pixeltable Basics
64
+ In this tutorial, we'll survey how to create tables, populate them with data, and enhance them with built-in and user-defined transformations and AI operations.
65
+
66
+ [![Kaggle](https://kaggle.com/static/images/open-in-kaggle.svg)](https://kaggle.com/kernels/welcome?src=https://github.com/pixeltable/pixeltable/blob/master/docs/tutorials/pixeltable-basics.ipynb)&nbsp;&nbsp;
67
+ <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
68
+
69
+
70
+ ## 💾 Installation
71
+ Pixeltable works with Python 3.9, 3.10, 3.11, or 3.12 running on Linux, MacOS, or Windows.
72
+
73
+ ```
74
+ pip install pixeltable
75
+ ```
76
+
77
+ To verify that it's working:
78
+
79
+ ```
80
+ import pixeltable as pxt
81
+ pxt.init()
82
+ ```
83
+
84
+ For more detailed installation instructions, see the
85
+ [Getting Started with Pixeltable](https://pixeltable.github.io/pixeltable/getting-started/)
86
+ guide. Then, check out the
87
+ [Pixeltable Basics](https://pixeltable.github.io/pixeltable/tutorials/pixeltable-basics/)
88
+ tutorial for a tour of its most important features.
89
+
90
+ ## Why should you use Pixeltable?
91
+
92
+ - It gives you transparency and reproducibility
93
+ - All generated data is automatically recorded and versioned
94
+ - You will never need to re-run a workload because you lost track of the input data
95
+ - It saves you money
96
+ - All data changes are automatically incremental
97
+ - You never need to re-run pipelines from scratch because you’re adding data
98
+ - It integrates with any existing Python code or libraries
99
+ - Bring your ever-changing code and workloads
100
+ - You choose the models, tools, and AI practices (e.g., your embedding model for a vector index); Pixeltable orchestrates the data
101
+
102
+ ## Example Use Cases
103
+
104
+ * Interact with video data at the frame level without having to think about frame extraction,
105
+ intermediate file storage, or storage space explosion.
106
+ * Augment your data incrementally and interactively with built-in functions and UDFs, such as
107
+ image transformations, model inference, and visualizations, without having to think about data pipelines,
108
+ incremental updates, or capturing function output.
109
+ * Interact with all the data relevant to your AI application (video, images, documents, audio, structured data, JSON) through
110
+ a simple dataframe-style API directly in Python. This includes:
111
+ * similarity search on embeddings, supported by high-dimensional vector indexing
112
+ * path expressions and transformations on JSON data
113
+ * PIL and OpenCV image operations
114
+ * assembling frames into videos
115
+ * Perform keyword and image similarity search at the video frame level without having to worry about frame
116
+ storage.
117
+ * Access all Pixeltable-resident data directly as a PyTorch dataset in your training scripts.
118
+ * Understand the compute and storage costs of your data at the granularity of individual augmentations and
119
+ get cost projections before adding new data and new augmentations.
120
+ * Rely on Pixeltable's automatic versioning and snapshot functionality to protect against regressions
121
+ and to ensure reproducibility.
122
+
123
+ ## Contributions & Feedback
124
+
125
+ Are you experiencing issues or bugs with Pixeltable? File an [Issue](https://github.com/pixeltable/pixeltable/issues).
126
+ </br>Do you want to contribute? Feel free to open a [PR](https://github.com/pixeltable/pixeltable/pulls).
127
+
128
+ ## :classical_building: License
129
+
130
+ This library is licensed under the Apache 2.0 License.
131
+
@@ -0,0 +1,119 @@
1
+ pixeltable/__init__.py,sha256=TGVKvEpabDRHNqsTYxdDgwU9rsDC9qLDL3D_tjyWemw,1040
2
+ pixeltable/__version__.py,sha256=8RQO_m3KZgcE-FrR49saPShq7cXAGY5pUQUQ8HUJSOc,112
3
+ pixeltable/catalog/__init__.py,sha256=E41bxaPeQIcgRYzTWc2vkDOboQhRymrJf4IcHQO7o_8,453
4
+ pixeltable/catalog/catalog.py,sha256=0TYWB1R6YBp9qCkWF7kCcX2Yw70UuburKKIemv5L1Js,7908
5
+ pixeltable/catalog/column.py,sha256=2qZ3Wxvi-kwiLseeZtXFiXWrHHzzktUa_2skPSP1FGs,8088
6
+ pixeltable/catalog/dir.py,sha256=pG1nMpG123POo6WMSHhAmnwXOQ26uUJfUcbzL-Jb4ws,919
7
+ pixeltable/catalog/globals.py,sha256=yLEGNbsSnLzjWNHVJacfjA9hbw13Q6QXLOSCRmdTlq0,943
8
+ pixeltable/catalog/insertable_table.py,sha256=1yGwuoMNbtBJQRDweEoH7hgxAI4uOB1k2VGpBAmsJxQ,8168
9
+ pixeltable/catalog/named_function.py,sha256=a96gnKtx-nz5_MzDIiD4t4Hxqdjkg9ZtijRQxvWA5WQ,1147
10
+ pixeltable/catalog/path.py,sha256=QgccEi_QOfaKt8YsR2zLtd_z7z7QQkU_1kprJFi2SPQ,1677
11
+ pixeltable/catalog/path_dict.py,sha256=xfvxg1Ze5jZCARUGASF2DRbQPh7pRVTYhuJ_u82gYUo,5941
12
+ pixeltable/catalog/schema_object.py,sha256=-UxmPLbuEBqJiJi_GGRbFdr7arAFxTqs4bt6TFmSt3M,1059
13
+ pixeltable/catalog/table.py,sha256=LgTWRrZcqx85bkW1kW9wWyDvFp15m8e5PfBYkY6E8JE,31252
14
+ pixeltable/catalog/table_version.py,sha256=7_sRbeado0Xu6hbsIAwiz3D35s-RdD5KI6csxhg5DMg,48865
15
+ pixeltable/catalog/table_version_path.py,sha256=2Ofzd0n36flcNm86KWwIWDBAfgnV5Z-FxAHdMSPgMLc,5482
16
+ pixeltable/catalog/view.py,sha256=BIL3s4DV3tWbOcqtqnhn46B2UvLaBhppfJUlNEt5nec,9734
17
+ pixeltable/dataframe.py,sha256=lzSzR7mi9C4BO39fNXYo64k3KxILyG_Z7eET6DXTgKY,31922
18
+ pixeltable/env.py,sha256=8OjBv3Jvd-74KepYYyIpLmhl2S85g-S14qDIywcbRpo,17971
19
+ pixeltable/exceptions.py,sha256=MSP9zeL0AmXT93XqjdvgGN4rzno1_KRrGriq6hpemnw,376
20
+ pixeltable/exec/__init__.py,sha256=RK7SKvrQ7Ky3G_LXDP4Bf7lHmMM_uYZl8dJaZYs0FjY,454
21
+ pixeltable/exec/aggregation_node.py,sha256=cf6rVAgrGh_uaMrCIgXJIwQTmbcboJlnrH_MmPIQSd0,3321
22
+ pixeltable/exec/cache_prefetch_node.py,sha256=d5pEuR6AtJQkEVy9X3XeYFI_q0szMtoNAH96vYdtBE0,5241
23
+ pixeltable/exec/component_iteration_node.py,sha256=Uz6zEeaJMcbvF3S0W0qmLI_uWsZsaSspHKNzuAMrasg,4069
24
+ pixeltable/exec/data_row_batch.py,sha256=1IDYHBkSQ60dwOnAGnS-Wpp3AsnbMqKcY40zUT7ku-Q,3392
25
+ pixeltable/exec/exec_context.py,sha256=E82Q2bJMJ1ulud5L5D9dh2Z8vEUQ659SgT614YKDO34,924
26
+ pixeltable/exec/exec_node.py,sha256=Hji5NCPHfa50IWyjladXrBm4I0zseV7AV4cVdx0Q8Ew,2170
27
+ pixeltable/exec/expr_eval_node.py,sha256=fEzbeZ0J-kylRQ2M0nSlUeLFRTHlwNzlvBo1yqWQ2rg,10856
28
+ pixeltable/exec/in_memory_data_node.py,sha256=SNM2AbMQSjmGDWMNJUf_5MmlXWE3P80lsuUjNfzQckA,3171
29
+ pixeltable/exec/media_validation_node.py,sha256=OKfRyKpcn7AZdACy_HD4NsDC87ZfNFs1tdrQz2NiIVw,1514
30
+ pixeltable/exec/sql_scan_node.py,sha256=s2PVls7gfSL0zQsdDWz8dC7MAE6KWeV_EDBCilx8Ros,10250
31
+ pixeltable/exprs/__init__.py,sha256=7dwrdk-NpF66OT-m5yNtFEhq-o1T476dnXHjluw2K1s,951
32
+ pixeltable/exprs/arithmetic_expr.py,sha256=sWBYCBKI6IHj9ASwDcm2BlkQ5gleVtKtmpiPvzFNBJM,4386
33
+ pixeltable/exprs/array_slice.py,sha256=VmWc6iFusrM85MjyEBBCfXG1Jnt8-Gr6-J88BXxNoOE,2131
34
+ pixeltable/exprs/column_property_ref.py,sha256=0PHiBys0fxe2LgjaMId5UHob4E-ZggyPLnnW41RgA0E,2706
35
+ pixeltable/exprs/column_ref.py,sha256=t_iJzai-x1Ds2ca3u5Qh3lzBqidP23e80Y7KcO_BDkA,5333
36
+ pixeltable/exprs/comparison.py,sha256=rAlGUF0AuzkYGspewJPu-6aaQZa4dVMJYGbMwqKyBIc,2964
37
+ pixeltable/exprs/compound_predicate.py,sha256=Gh22MKi625m5A_RunVRd-a1XFi-fitikqBVz2VNXKrs,3830
38
+ pixeltable/exprs/data_row.py,sha256=2kGnZhDna4bkgzb2y9iDnkLFe8lXSk59QAf9zW2Z-Y0,8278
39
+ pixeltable/exprs/expr.py,sha256=VYo7CqZJLb9Rsna4FbMBy6_KAZdbUy1oh6AmORpSUGw,24190
40
+ pixeltable/exprs/expr_set.py,sha256=Q64Q2yI0CTq2Ma_E-BUYlMotSstVuMm4OFZnBCedHRk,1222
41
+ pixeltable/exprs/function_call.py,sha256=JO0QwolyI60aG3t0zCqAxsaRWacvw6V6adNtY5WbyTo,17207
42
+ pixeltable/exprs/globals.py,sha256=liPgUTccTkyDRs4kG2r9ehRkwGZERmrCSJDZRdJoMqk,1537
43
+ pixeltable/exprs/image_member_access.py,sha256=KSYdTIaLh53dNRjv3SJFchPMPo7o5diJSQkV1NsyB4Y,3547
44
+ pixeltable/exprs/in_predicate.py,sha256=burxrBCH1MXqU-wrNWJvD0PRGzJdWy85intOSftQK54,3696
45
+ pixeltable/exprs/inline_array.py,sha256=293WuUEhYXrcp8-AnPDVIWQBPQMrPviB88A619Ls_Es,4499
46
+ pixeltable/exprs/inline_dict.py,sha256=TWYokJ14Nq-evODcYFVO471WSEDbz6cJqIdRb2PkbZQ,3885
47
+ pixeltable/exprs/is_null.py,sha256=nvpOXtQj1UeYJpkCWzbaGuQElzrA2HSG3XNQugOv-pw,1041
48
+ pixeltable/exprs/json_mapper.py,sha256=I60VNgus64ai80gnFCIsRn0VRWYXMkqH5VNvnATsN9s,4559
49
+ pixeltable/exprs/json_path.py,sha256=Wz_5zFsyc9TPhsSbsDjDmQ3Nb0uVIwMCx5nh-cQYBiE,6526
50
+ pixeltable/exprs/literal.py,sha256=5NNza-WL1dd3hNznwwkr_yAcTGXSIRYUszGfy30lruI,2396
51
+ pixeltable/exprs/object_ref.py,sha256=eTcx84aWRI59fIiGvbdv3_cfL0XW4xEFQ4lwpLpJkM8,1250
52
+ pixeltable/exprs/predicate.py,sha256=OSDgjfSqiK7J_5GZMUXMvjfyomKEGi0JNxeB073SGXw,1859
53
+ pixeltable/exprs/row_builder.py,sha256=cpQa7GHR2dZYxhCAwZBfz-MqO0oP-NS44mAYoVUOt7A,15662
54
+ pixeltable/exprs/rowid_ref.py,sha256=74w4rEy21YysTVbyKNc3op-pYFqDAx8VJdtl7ZPpxHs,4268
55
+ pixeltable/exprs/similarity_expr.py,sha256=LERVkFU8BwIi_S9IhAKXdFJSizJ2wI_0uN4_1AMZb1c,2664
56
+ pixeltable/exprs/type_cast.py,sha256=JMg8p1qYoFfiAXfJPSbTEnfrK7lRO_JMaqlPHOrhNQU,1793
57
+ pixeltable/exprs/variable.py,sha256=Kg_O4ytcHYZFijIyMHYBJn063cTKU1-YE583FAz8Qaw,1361
58
+ pixeltable/ext/__init__.py,sha256=0uugfuME1FybVo-MdxaVNGagRjhcvNTnv5MZUem6Cyo,269
59
+ pixeltable/ext/functions/yolox.py,sha256=LwrOtXMT57AP6-IkmRZ_12yN5-EiFRpTuh4Sexm8x24,3131
60
+ pixeltable/func/__init__.py,sha256=LCB5iB2aZyMrX-hn_oNBYnB1SE60t50hE23av_v2F50,348
61
+ pixeltable/func/aggregate_function.py,sha256=nEZ3WuVx3oabVK8yvqq6NNneI9USOkB8bL7etwQCUh4,9356
62
+ pixeltable/func/callable_function.py,sha256=nEEmXFvd8TW9TBPbDnC3q8phj9ARokAsB-OJ1_hTkGo,4612
63
+ pixeltable/func/expr_template_function.py,sha256=r0ML3IVGDgGM-7KtutnwnHBCmcDMfpblrJugh26A7Uc,4266
64
+ pixeltable/func/function.py,sha256=fANPfafLwY0Mq6CF21VYbuF-hRxxsPLHn5waoj1mOGY,5611
65
+ pixeltable/func/function_registry.py,sha256=1ibSQxEPm3Zd3r497vSlckQiDG9sfCnyJx3zcSm9t7c,11456
66
+ pixeltable/func/globals.py,sha256=sEwn6lGgHMp6VQORb_P5qRd_-Q2_bUSqvqM9-XPN_ec,1483
67
+ pixeltable/func/nos_function.py,sha256=HzIKK4XjTo1E6pML-EbhuX3u_LYibFWUuTkIxoIih7c,9650
68
+ pixeltable/func/signature.py,sha256=erOPFuSuaxkXnRyFd3nCYLuprUWcYFox3Hk3ZKUPWfM,6697
69
+ pixeltable/func/udf.py,sha256=92v3ArcZShR5D5xVWm5XB8HumCrPgc7frUrbu1yEPyo,6484
70
+ pixeltable/functions/__init__.py,sha256=uO-XB4QUbx3Jjs9GoaTXoJY2jn0AuXTL32YLkL_3_CI,3297
71
+ pixeltable/functions/eval.py,sha256=_2FANDJqwtIDzTxtcKc0Yacf7b4LTAjyy2fPDw1FG_s,8404
72
+ pixeltable/functions/fireworks.py,sha256=e_rCITg18yNndNI8TJPXRSN6DR0hYWT-_dUavoPuyfc,908
73
+ pixeltable/functions/huggingface.py,sha256=-a679an4nQyHChgQvvsfIoYGMQ_AfDmKpg2Ifc4FuV8,6458
74
+ pixeltable/functions/image.py,sha256=xR_S_0BuX6Ycc5E366GpOfP0JptD7beQwHE_fLl8ZVM,431
75
+ pixeltable/functions/openai.py,sha256=yvlxRd-9ViC4osJH0YWu18CuX83__NPPUVazXM3kJ8o,7972
76
+ pixeltable/functions/pil/image.py,sha256=6eNdMy2lZliFb8Lw12aBRUaShH07VEsFmhHSG21Jjt4,5992
77
+ pixeltable/functions/string.py,sha256=RYOgZwifjC943YloEMi3PdflnjFqOYB2FddrUvzgtXs,516
78
+ pixeltable/functions/together.py,sha256=HeiLQm0GCSgv5Jvdmw_Bqd7vKRBx-r6UazdseoEKMVg,4173
79
+ pixeltable/functions/util.py,sha256=djVqro_W5M_jUgYWzZZaXXH3lWaAWj6q-hrpzFl_Ko8,1860
80
+ pixeltable/functions/video.py,sha256=WZF4G3tV-_LfRQHUinXe_rnu1-4N68Ht60JCR_s7Bew,2403
81
+ pixeltable/globals.py,sha256=HWuz_erzAlBLkAYRqTJKphonSDLdNBTm1ZvVcxeFtwU,13606
82
+ pixeltable/index/__init__.py,sha256=tlJENOzEq6p_8xu-nX1mN4Zt9asw4481Znl5ZXYIKwc,72
83
+ pixeltable/index/base.py,sha256=MM8jLlr68e9M_R27EVsNxC7W7OVay27TOrnxrOlXz2s,1431
84
+ pixeltable/index/embedding_index.py,sha256=jxCTmW-KSNDNbFHbkQHYPI-CKTA_b6rTETH2t_qEFvM,7565
85
+ pixeltable/io/__init__.py,sha256=ejAAeWC8gIvNVxsOj7yNl4-3NHileGm-FKar9xvCy48,148
86
+ pixeltable/io/hf_datasets.py,sha256=h5M1NkXOvEU8kaeT3AON1A18Vmhnc1lVo5a3TZ5AAic,8004
87
+ pixeltable/io/pandas.py,sha256=cDHUDW2CGiBbsEJB9zE5vkXopTKxDdI-CZxNcp0OnIk,6478
88
+ pixeltable/io/parquet.py,sha256=Z1b92gsPeCBf4P9_jgWWHAEHtu51nhuC8nSJgoKiywQ,8150
89
+ pixeltable/iterators/__init__.py,sha256=kokLguXBY_nxBTqUiXZVvCxTv-vGsX4cK8tgIbsW5G8,108
90
+ pixeltable/iterators/base.py,sha256=4vss-RUq_L7ZQx9o99jZvRtFqPjtVXdwsuPtZ4JW_4s,1676
91
+ pixeltable/iterators/document.py,sha256=KmnrBJ7W39Xknj_pna7H--HbNztPmYFAleGhk9qsegY,19318
92
+ pixeltable/iterators/video.py,sha256=K39ZAIMVvqzGkE30gF2CAbIOOgoJnlBpmIPl4AnWbmY,3474
93
+ pixeltable/metadata/__init__.py,sha256=rBX4sIEfUlv11hDqgAOUl067l--zEeu-HQuGCTRZrfM,2227
94
+ pixeltable/metadata/converters/convert_10.py,sha256=0mSGCn7vqtef63riPi9msUaaUvsSQIj-NFj9QFDYPdA,733
95
+ pixeltable/metadata/converters/convert_12.py,sha256=g9rHTcKlDQZbM3_k4eBv0FBdWmQXHWCnMwx1_l6KpMI,107
96
+ pixeltable/metadata/converters/convert_13.py,sha256=FEgOH5PKf05xVoCaioDDDHOSuoWPyBzodojmsSMMZ5U,1366
97
+ pixeltable/metadata/schema.py,sha256=uuk3rzCpYr99PzEO1pIXe8nMaOoTJtwRfhnqgQ_MdDs,8335
98
+ pixeltable/plan.py,sha256=isb2-ECB3TwvHxZIR9lEKZ0wRKTCvRFp9NQDYhULxdI,32342
99
+ pixeltable/store.py,sha256=Mau3tRfXn6z1J3rzvtU3R4_-UjD-TMTv1FK8OjPMqp0,19394
100
+ pixeltable/tool/create_test_db_dump.py,sha256=yI62rFk7StF1cI8BKN1_hf6mkB6e4ndJH95gXeJOQFA,6847
101
+ pixeltable/tool/create_test_video.py,sha256=OLfccymYReIpzE8osZn4rQvLXxxiPC_l0vc06U74hVM,2899
102
+ pixeltable/type_system.py,sha256=DdI6g-ouqny8PdokEClxKwEwqjbWFDAxNrhYGh7pQLo,29224
103
+ pixeltable/utils/__init__.py,sha256=UYlrf6TIWJT0g-Hac0b34-dEk478B5Qx8dGco34YlIk,439
104
+ pixeltable/utils/arrow.py,sha256=83_7aG5UR2qtTktw_otLkQs-RQbLk0VVM0JLJkbweNU,3692
105
+ pixeltable/utils/clip.py,sha256=HXXWFBJXW9XysdMk9_3hP1V1S-3B8Hwd5rNMbJFjjnI,720
106
+ pixeltable/utils/coco.py,sha256=mk1cxjKYQC0ABm2ZQ9SNu9MvBPECmmKvnASpxnFXdL0,5604
107
+ pixeltable/utils/documents.py,sha256=Q7e5U2Hk0go83MdKzD_MIiMscwbcFsLMgRw2IU_vQF4,2213
108
+ pixeltable/utils/filecache.py,sha256=UoNONG2VaAc2IBB0e3sQdsvyOPOes2XSDc5_CsA4qek,7839
109
+ pixeltable/utils/help.py,sha256=cCnxJ4VP9MJ57iDqExmnDcM-JG3a1lw_q7g-D7bpSVI,252
110
+ pixeltable/utils/http_server.py,sha256=WQ5ILMzlz4TlwI9j5YqAPgEZyhrN1GytMNDbLD9occk,2422
111
+ pixeltable/utils/media_store.py,sha256=x71wnJDZDHcdd13VCfL4AkHQ6IJB41gNA-zBvXJwFos,3116
112
+ pixeltable/utils/pytorch.py,sha256=BR4tgfUWw-2rwWTOgzXj5qdMBpe1Arpp5SK4ax6jjpk,3483
113
+ pixeltable/utils/s3.py,sha256=rkanuhk9DWvSfmbOLQW1j1Iov4sl2KhxGGKN-AJ8LSE,432
114
+ pixeltable/utils/sql.py,sha256=5n5_OmXAGtqFdL6z5XvgnU-vlx6Ba6f1WJrO1ZwUle8,765
115
+ pixeltable/utils/transactional_directory.py,sha256=UGzCrGtLR3hEEf8sYGuWBzLVFAEQml3vdIavigWeTBM,1349
116
+ pixeltable-0.0.0.dist-info/LICENSE,sha256=0UNMmwuqWPC0xDY1NWMm4uNJ2_MyA1pnTNRgQTvuBiQ,746
117
+ pixeltable-0.0.0.dist-info/METADATA,sha256=g3cAqcj2eWyr0ZbD9bXu5cJShOnXDKK_neFLXgUxcMo,6317
118
+ pixeltable-0.0.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
119
+ pixeltable-0.0.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: poetry-core 1.9.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any