pixeltable 0.4.12__py3-none-any.whl → 0.4.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +11 -1
- pixeltable/catalog/__init__.py +2 -1
- pixeltable/catalog/table.py +72 -116
- pixeltable/catalog/table_metadata.py +96 -0
- pixeltable/env.py +1 -0
- pixeltable/globals.py +3 -4
- pixeltable/io/__init__.py +2 -1
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/parquet.py +9 -89
- pixeltable/io/table_data_conduit.py +2 -2
- pixeltable/utils/arrow.py +97 -2
- pixeltable/utils/lancedb.py +88 -0
- {pixeltable-0.4.12.dist-info → pixeltable-0.4.13.dist-info}/METADATA +162 -127
- {pixeltable-0.4.12.dist-info → pixeltable-0.4.13.dist-info}/RECORD +17 -14
- {pixeltable-0.4.12.dist-info → pixeltable-0.4.13.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.12.dist-info → pixeltable-0.4.13.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.12.dist-info → pixeltable-0.4.13.dist-info}/licenses/LICENSE +0 -0
pixeltable/__init__.py
CHANGED
|
@@ -1,7 +1,17 @@
|
|
|
1
1
|
# ruff: noqa: F401
|
|
2
2
|
|
|
3
3
|
from .__version__ import __version__, __version_tuple__
|
|
4
|
-
from .catalog import
|
|
4
|
+
from .catalog import (
|
|
5
|
+
Column,
|
|
6
|
+
ColumnMetadata,
|
|
7
|
+
IndexMetadata,
|
|
8
|
+
InsertableTable,
|
|
9
|
+
Table,
|
|
10
|
+
TableMetadata,
|
|
11
|
+
UpdateStatus,
|
|
12
|
+
VersionMetadata,
|
|
13
|
+
View,
|
|
14
|
+
)
|
|
5
15
|
from .dataframe import DataFrame
|
|
6
16
|
from .exceptions import Error, ExprEvalError, PixeltableWarning
|
|
7
17
|
from .func import Aggregator, Function, Tool, ToolChoice, Tools, expr_udf, mcp_udfs, query, retrieval_udf, uda, udf
|
pixeltable/catalog/__init__.py
CHANGED
|
@@ -8,7 +8,8 @@ from .insertable_table import InsertableTable
|
|
|
8
8
|
from .named_function import NamedFunction
|
|
9
9
|
from .path import Path
|
|
10
10
|
from .schema_object import SchemaObject
|
|
11
|
-
from .table import
|
|
11
|
+
from .table import Table
|
|
12
|
+
from .table_metadata import ColumnMetadata, IndexMetadata, TableMetadata, VersionMetadata
|
|
12
13
|
from .table_version import TableVersion
|
|
13
14
|
from .table_version_handle import ColumnHandle, TableVersionHandle
|
|
14
15
|
from .table_version_path import TableVersionPath
|
pixeltable/catalog/table.py
CHANGED
|
@@ -7,9 +7,7 @@ import json
|
|
|
7
7
|
import logging
|
|
8
8
|
from keyword import iskeyword as is_python_keyword
|
|
9
9
|
from pathlib import Path
|
|
10
|
-
from typing import TYPE_CHECKING, Any,
|
|
11
|
-
|
|
12
|
-
from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, overload
|
|
13
11
|
from uuid import UUID
|
|
14
12
|
|
|
15
13
|
import pandas as pd
|
|
@@ -17,6 +15,13 @@ import sqlalchemy as sql
|
|
|
17
15
|
|
|
18
16
|
import pixeltable as pxt
|
|
19
17
|
from pixeltable import catalog, env, exceptions as excs, exprs, index, type_system as ts
|
|
18
|
+
from pixeltable.catalog.table_metadata import (
|
|
19
|
+
ColumnMetadata,
|
|
20
|
+
EmbeddingIndexParams,
|
|
21
|
+
IndexMetadata,
|
|
22
|
+
TableMetadata,
|
|
23
|
+
VersionMetadata,
|
|
24
|
+
)
|
|
20
25
|
from pixeltable.metadata import schema
|
|
21
26
|
from pixeltable.metadata.utils import MetadataUtils
|
|
22
27
|
|
|
@@ -37,6 +42,9 @@ from .table_version_handle import TableVersionHandle
|
|
|
37
42
|
from .table_version_path import TableVersionPath
|
|
38
43
|
from .update_status import UpdateStatus
|
|
39
44
|
|
|
45
|
+
from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
|
|
46
|
+
|
|
47
|
+
|
|
40
48
|
if TYPE_CHECKING:
|
|
41
49
|
import torch.utils.data
|
|
42
50
|
|
|
@@ -95,7 +103,7 @@ class Table(SchemaObject):
|
|
|
95
103
|
|
|
96
104
|
return op()
|
|
97
105
|
|
|
98
|
-
def _get_metadata(self) ->
|
|
106
|
+
def _get_metadata(self) -> TableMetadata:
|
|
99
107
|
columns = self._tbl_version_path.columns()
|
|
100
108
|
column_info: dict[str, ColumnMetadata] = {}
|
|
101
109
|
for col in columns:
|
|
@@ -1690,43 +1698,35 @@ class Table(SchemaObject):
|
|
|
1690
1698
|
def _ipython_key_completions_(self) -> list[str]:
|
|
1691
1699
|
return list(self._get_schema().keys())
|
|
1692
1700
|
|
|
1693
|
-
|
|
1694
|
-
|
|
1695
|
-
|
|
1696
|
-
|
|
1697
|
-
|
|
1698
|
-
|
|
1699
|
-
'updates': ts.IntType(nullable=True),
|
|
1700
|
-
'deletes': ts.IntType(nullable=True),
|
|
1701
|
-
'errors': ts.IntType(nullable=True),
|
|
1702
|
-
'computed': ts.IntType(),
|
|
1703
|
-
'schema_change': ts.StringType(),
|
|
1704
|
-
}
|
|
1705
|
-
|
|
1706
|
-
def history(self, n: Optional[int] = None) -> pixeltable.dataframe.DataFrameResultSet:
|
|
1707
|
-
"""Returns rows of information about the versions of this table, most recent first.
|
|
1701
|
+
def get_versions(self, n: Optional[int] = None) -> list[VersionMetadata]:
|
|
1702
|
+
"""
|
|
1703
|
+
Returns information about versions of this table, most recent first.
|
|
1704
|
+
|
|
1705
|
+
`get_versions()` is intended for programmatic access to version metadata; for human-readable
|
|
1706
|
+
output, use [`history()`][pixeltable.Table.history] instead.
|
|
1708
1707
|
|
|
1709
1708
|
Args:
|
|
1710
|
-
n:
|
|
1709
|
+
n: if specified, will return at most `n` versions
|
|
1711
1710
|
|
|
1712
|
-
|
|
1713
|
-
|
|
1711
|
+
Returns:
|
|
1712
|
+
A list of [VersionMetadata][pixeltable.VersionMetadata] dictionaries, one per version retrieved, most
|
|
1713
|
+
recent first.
|
|
1714
1714
|
|
|
1715
|
-
|
|
1715
|
+
Examples:
|
|
1716
|
+
Retrieve metadata about all versions of the table `tbl`:
|
|
1716
1717
|
|
|
1717
|
-
|
|
1718
|
+
>>> tbl.get_versions()
|
|
1718
1719
|
|
|
1719
|
-
|
|
1720
|
+
Retrieve metadata about the most recent 5 versions of the table `tbl`:
|
|
1720
1721
|
|
|
1721
|
-
|
|
1722
|
-
A list of information about each version, ordered from most recent to oldest version.
|
|
1722
|
+
>>> tbl.get_versions(n=5)
|
|
1723
1723
|
"""
|
|
1724
1724
|
from pixeltable.catalog import Catalog
|
|
1725
1725
|
|
|
1726
1726
|
if n is None:
|
|
1727
1727
|
n = 1_000_000_000
|
|
1728
1728
|
if not isinstance(n, int) or n < 1:
|
|
1729
|
-
raise excs.Error(f'Invalid value for n
|
|
1729
|
+
raise excs.Error(f'Invalid value for `n`: {n}')
|
|
1730
1730
|
|
|
1731
1731
|
# Retrieve the table history components from the catalog
|
|
1732
1732
|
tbl_id = self._id
|
|
@@ -1744,104 +1744,60 @@ class Table(SchemaObject):
|
|
|
1744
1744
|
else:
|
|
1745
1745
|
over_count = 0
|
|
1746
1746
|
|
|
1747
|
-
|
|
1747
|
+
metadata_dicts: list[VersionMetadata] = []
|
|
1748
1748
|
for vers_md in vers_list[0 : len(vers_list) - over_count]:
|
|
1749
1749
|
version = vers_md.version_md.version
|
|
1750
|
-
schema_change = md_dict.get(version,
|
|
1750
|
+
schema_change = md_dict.get(version, None)
|
|
1751
1751
|
update_status = vers_md.version_md.update_status
|
|
1752
1752
|
if update_status is None:
|
|
1753
1753
|
update_status = UpdateStatus()
|
|
1754
|
-
change_type = 'schema' if schema_change
|
|
1755
|
-
if change_type == '':
|
|
1756
|
-
change_type = 'data'
|
|
1754
|
+
change_type: Literal['schema', 'data'] = 'schema' if schema_change is not None else 'data'
|
|
1757
1755
|
rcs = update_status.row_count_stats + update_status.cascade_row_count_stats
|
|
1758
|
-
|
|
1759
|
-
|
|
1760
|
-
|
|
1761
|
-
|
|
1762
|
-
|
|
1763
|
-
|
|
1764
|
-
|
|
1765
|
-
|
|
1766
|
-
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1770
|
-
|
|
1756
|
+
metadata_dicts.append(
|
|
1757
|
+
VersionMetadata(
|
|
1758
|
+
version=version,
|
|
1759
|
+
created_at=datetime.datetime.fromtimestamp(vers_md.version_md.created_at, tz=datetime.timezone.utc),
|
|
1760
|
+
user=vers_md.version_md.user,
|
|
1761
|
+
change_type=change_type,
|
|
1762
|
+
inserts=rcs.ins_rows,
|
|
1763
|
+
updates=rcs.upd_rows,
|
|
1764
|
+
deletes=rcs.del_rows,
|
|
1765
|
+
errors=rcs.num_excs,
|
|
1766
|
+
computed=rcs.computed_values,
|
|
1767
|
+
schema_change=schema_change,
|
|
1768
|
+
)
|
|
1769
|
+
)
|
|
1771
1770
|
|
|
1772
|
-
return
|
|
1771
|
+
return metadata_dicts
|
|
1772
|
+
|
|
1773
|
+
def history(self, n: Optional[int] = None) -> pd.DataFrame:
|
|
1774
|
+
"""
|
|
1775
|
+
Returns a human-readable report about versions of this table.
|
|
1776
|
+
|
|
1777
|
+
`history()` is intended for human-readable output of version metadata; for programmatic access,
|
|
1778
|
+
use [`get_versions()`][pixeltable.Table.get_versions] instead.
|
|
1779
|
+
|
|
1780
|
+
Args:
|
|
1781
|
+
n: if specified, will return at most `n` versions
|
|
1782
|
+
|
|
1783
|
+
Returns:
|
|
1784
|
+
A report with information about each version, one per row, most recent first.
|
|
1785
|
+
|
|
1786
|
+
Examples:
|
|
1787
|
+
Report all versions of the table:
|
|
1788
|
+
|
|
1789
|
+
>>> tbl.history()
|
|
1790
|
+
|
|
1791
|
+
Report only the most recent 5 changes to the table:
|
|
1792
|
+
|
|
1793
|
+
>>> tbl.history(n=5)
|
|
1794
|
+
"""
|
|
1795
|
+
versions = self.get_versions(n)
|
|
1796
|
+
assert len(versions) > 0
|
|
1797
|
+
return pd.DataFrame([list(v.values()) for v in versions], columns=list(versions[0].keys()))
|
|
1773
1798
|
|
|
1774
1799
|
def __check_mutable(self, op_descr: str) -> None:
|
|
1775
1800
|
if self._tbl_version_path.is_snapshot():
|
|
1776
1801
|
raise excs.Error(f'{self._display_str()}: Cannot {op_descr} a snapshot.')
|
|
1777
1802
|
if self._tbl_version_path.is_replica():
|
|
1778
1803
|
raise excs.Error(f'{self._display_str()}: Cannot {op_descr} a {self._display_name()}.')
|
|
1779
|
-
|
|
1780
|
-
|
|
1781
|
-
class ColumnMetadata(TypedDict):
|
|
1782
|
-
"""Metadata for a column of a Pixeltable table."""
|
|
1783
|
-
|
|
1784
|
-
name: str
|
|
1785
|
-
"""The name of the column."""
|
|
1786
|
-
type_: str
|
|
1787
|
-
"""The type specifier of the column."""
|
|
1788
|
-
version_added: int
|
|
1789
|
-
"""The table version when this column was added."""
|
|
1790
|
-
is_stored: bool
|
|
1791
|
-
"""`True` if this is a stored column; `False` if it is dynamically computed."""
|
|
1792
|
-
is_primary_key: bool
|
|
1793
|
-
"""`True` if this column is part of the table's primary key."""
|
|
1794
|
-
media_validation: Optional[Literal['on_read', 'on_write']]
|
|
1795
|
-
"""The media validation policy for this column."""
|
|
1796
|
-
computed_with: Optional[str]
|
|
1797
|
-
"""Expression used to compute this column; `None` if this is not a computed column."""
|
|
1798
|
-
|
|
1799
|
-
|
|
1800
|
-
class IndexMetadata(TypedDict):
|
|
1801
|
-
"""Metadata for a column of a Pixeltable table."""
|
|
1802
|
-
|
|
1803
|
-
name: str
|
|
1804
|
-
"""The name of the index."""
|
|
1805
|
-
columns: list[str]
|
|
1806
|
-
"""The table columns that are indexed."""
|
|
1807
|
-
index_type: Literal['embedding']
|
|
1808
|
-
"""The type of index (currently only `'embedding'` is supported, but others will be added in the future)."""
|
|
1809
|
-
parameters: EmbeddingIndexParams
|
|
1810
|
-
|
|
1811
|
-
|
|
1812
|
-
class EmbeddingIndexParams(TypedDict):
|
|
1813
|
-
metric: Literal['cosine', 'ip', 'l2']
|
|
1814
|
-
"""Index metric."""
|
|
1815
|
-
embeddings: list[str]
|
|
1816
|
-
"""List of embeddings defined for this index."""
|
|
1817
|
-
|
|
1818
|
-
|
|
1819
|
-
class TableMetadata(TypedDict):
|
|
1820
|
-
"""Metadata for a Pixeltable table."""
|
|
1821
|
-
|
|
1822
|
-
name: str
|
|
1823
|
-
"""The name of the table (ex: `'my_table'`)."""
|
|
1824
|
-
path: str
|
|
1825
|
-
"""The full path of the table (ex: `'my_dir.my_subdir.my_table'`)."""
|
|
1826
|
-
columns: dict[str, ColumnMetadata]
|
|
1827
|
-
"""Column metadata for all of the visible columns of the table."""
|
|
1828
|
-
indices: dict[str, IndexMetadata]
|
|
1829
|
-
"""Index metadata for all of the indices of the table."""
|
|
1830
|
-
is_replica: bool
|
|
1831
|
-
"""`True` if this table is a replica of another (shared) table."""
|
|
1832
|
-
is_view: bool
|
|
1833
|
-
"""`True` if this table is a view."""
|
|
1834
|
-
is_snapshot: bool
|
|
1835
|
-
"""`True` if this table is a snapshot."""
|
|
1836
|
-
version: int
|
|
1837
|
-
"""The current version of the table."""
|
|
1838
|
-
version_created: datetime.datetime
|
|
1839
|
-
"""The timestamp when this table version was created."""
|
|
1840
|
-
schema_version: int
|
|
1841
|
-
"""The current schema version of the table."""
|
|
1842
|
-
comment: Optional[str]
|
|
1843
|
-
"""User-provided table comment, if one exists."""
|
|
1844
|
-
media_validation: Literal['on_read', 'on_write']
|
|
1845
|
-
"""The media validation policy for this table."""
|
|
1846
|
-
base: Optional[str]
|
|
1847
|
-
"""If this table is a view or snapshot, the full path of its base table; otherwise `None`."""
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
from typing import Literal, Optional, TypedDict
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class ColumnMetadata(TypedDict):
|
|
6
|
+
"""Metadata for a column of a Pixeltable table."""
|
|
7
|
+
|
|
8
|
+
name: str
|
|
9
|
+
"""The name of the column."""
|
|
10
|
+
type_: str
|
|
11
|
+
"""The type specifier of the column."""
|
|
12
|
+
version_added: int
|
|
13
|
+
"""The table version when this column was added."""
|
|
14
|
+
is_stored: bool
|
|
15
|
+
"""`True` if this is a stored column; `False` if it is dynamically computed."""
|
|
16
|
+
is_primary_key: bool
|
|
17
|
+
"""`True` if this column is part of the table's primary key."""
|
|
18
|
+
media_validation: Optional[Literal['on_read', 'on_write']]
|
|
19
|
+
"""The media validation policy for this column."""
|
|
20
|
+
computed_with: Optional[str]
|
|
21
|
+
"""Expression used to compute this column; `None` if this is not a computed column."""
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class EmbeddingIndexParams(TypedDict):
|
|
25
|
+
metric: Literal['cosine', 'ip', 'l2']
|
|
26
|
+
"""Index metric."""
|
|
27
|
+
embeddings: list[str]
|
|
28
|
+
"""List of embeddings defined for this index."""
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class IndexMetadata(TypedDict):
|
|
32
|
+
"""Metadata for a column of a Pixeltable table."""
|
|
33
|
+
|
|
34
|
+
name: str
|
|
35
|
+
"""The name of the index."""
|
|
36
|
+
columns: list[str]
|
|
37
|
+
"""The table columns that are indexed."""
|
|
38
|
+
index_type: Literal['embedding']
|
|
39
|
+
"""The type of index (currently only `'embedding'` is supported, but others will be added in the future)."""
|
|
40
|
+
parameters: EmbeddingIndexParams
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class TableMetadata(TypedDict):
|
|
44
|
+
"""Metadata for a Pixeltable table."""
|
|
45
|
+
|
|
46
|
+
name: str
|
|
47
|
+
"""The name of the table (ex: `'my_table'`)."""
|
|
48
|
+
path: str
|
|
49
|
+
"""The full path of the table (ex: `'my_dir.my_subdir.my_table'`)."""
|
|
50
|
+
columns: dict[str, ColumnMetadata]
|
|
51
|
+
"""Column metadata for all of the visible columns of the table."""
|
|
52
|
+
indices: dict[str, IndexMetadata]
|
|
53
|
+
"""Index metadata for all of the indices of the table."""
|
|
54
|
+
is_replica: bool
|
|
55
|
+
"""`True` if this table is a replica of another (shared) table."""
|
|
56
|
+
is_view: bool
|
|
57
|
+
"""`True` if this table is a view."""
|
|
58
|
+
is_snapshot: bool
|
|
59
|
+
"""`True` if this table is a snapshot."""
|
|
60
|
+
version: int
|
|
61
|
+
"""The current version of the table."""
|
|
62
|
+
version_created: datetime.datetime
|
|
63
|
+
"""The timestamp when this table version was created."""
|
|
64
|
+
schema_version: int
|
|
65
|
+
"""The current schema version of the table."""
|
|
66
|
+
comment: Optional[str]
|
|
67
|
+
"""User-provided table comment, if one exists."""
|
|
68
|
+
media_validation: Literal['on_read', 'on_write']
|
|
69
|
+
"""The media validation policy for this table."""
|
|
70
|
+
base: Optional[str]
|
|
71
|
+
"""If this table is a view or snapshot, the full path of its base table; otherwise `None`."""
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class VersionMetadata(TypedDict):
|
|
75
|
+
"""Metadata for a specific version of a Pixeltable table."""
|
|
76
|
+
|
|
77
|
+
"""The version number."""
|
|
78
|
+
version: int
|
|
79
|
+
"""The timestamp when this version was created."""
|
|
80
|
+
created_at: datetime.datetime
|
|
81
|
+
"""The user who created this version, if defined."""
|
|
82
|
+
user: str | None
|
|
83
|
+
"""The type of table transformation that this version represents (`'data'` or `'schema'`)."""
|
|
84
|
+
change_type: Literal['data', 'schema']
|
|
85
|
+
"""The number of rows inserted in this version."""
|
|
86
|
+
inserts: int
|
|
87
|
+
"""The number of rows updated in this version."""
|
|
88
|
+
updates: int
|
|
89
|
+
"""The number of rows deleted in this version."""
|
|
90
|
+
deletes: int
|
|
91
|
+
"""The number of errors encountered during this version."""
|
|
92
|
+
errors: int
|
|
93
|
+
"""The number of computed values calculated in this version."""
|
|
94
|
+
computed: int
|
|
95
|
+
"""A description of the schema change that occurred in this version, if any."""
|
|
96
|
+
schema_change: str | None
|
pixeltable/env.py
CHANGED
|
@@ -743,6 +743,7 @@ class Env:
|
|
|
743
743
|
self.__register_package('whisper', library_name='openai-whisper')
|
|
744
744
|
self.__register_package('whisperx')
|
|
745
745
|
self.__register_package('yolox', library_name='pixeltable-yolox')
|
|
746
|
+
self.__register_package('lancedb')
|
|
746
747
|
|
|
747
748
|
def __register_package(self, package_name: str, library_name: Optional[str] = None) -> None:
|
|
748
749
|
is_installed: bool
|
pixeltable/globals.py
CHANGED
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import TYPE_CHECKING, Any, Iterable,
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Iterable, Literal, NamedTuple, Optional, Union
|
|
7
7
|
|
|
8
8
|
import pandas as pd
|
|
9
9
|
import pydantic
|
|
@@ -24,9 +24,8 @@ if TYPE_CHECKING:
|
|
|
24
24
|
str,
|
|
25
25
|
os.PathLike,
|
|
26
26
|
Path, # OS paths, filenames, URLs
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
Sequence[pydantic.BaseModel], # list of Pydantic models
|
|
27
|
+
Iterable[dict[str, Any]], # dictionaries of values
|
|
28
|
+
Iterable[pydantic.BaseModel], # Pydantic model instances
|
|
30
29
|
DataFrame, # Pixeltable DataFrame
|
|
31
30
|
pd.DataFrame, # pandas DataFrame
|
|
32
31
|
datasets.Dataset,
|
pixeltable/io/__init__.py
CHANGED
|
@@ -4,11 +4,12 @@ from .datarows import import_json, import_rows
|
|
|
4
4
|
from .external_store import ExternalStore
|
|
5
5
|
from .globals import create_label_studio_project, export_images_as_fo_dataset
|
|
6
6
|
from .hf_datasets import import_huggingface_dataset
|
|
7
|
+
from .lancedb import export_lancedb
|
|
7
8
|
from .pandas import import_csv, import_excel, import_pandas
|
|
8
9
|
from .parquet import export_parquet, import_parquet
|
|
9
10
|
|
|
10
11
|
__default_dir = {symbol for symbol in dir() if not symbol.startswith('_')}
|
|
11
|
-
__removed_symbols = {'globals', 'hf_datasets', 'pandas', 'parquet', 'datarows'}
|
|
12
|
+
__removed_symbols = {'globals', 'hf_datasets', 'pandas', 'parquet', 'datarows', 'lancedb'}
|
|
12
13
|
__all__ = sorted(__default_dir - __removed_symbols)
|
|
13
14
|
|
|
14
15
|
|
pixeltable/io/lancedb.py
ADDED
pixeltable/io/parquet.py
CHANGED
|
@@ -1,46 +1,22 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import datetime
|
|
4
|
-
import io
|
|
5
3
|
import json
|
|
6
4
|
import logging
|
|
7
5
|
import typing
|
|
8
|
-
from collections import deque
|
|
9
6
|
from pathlib import Path
|
|
10
7
|
from typing import Any, Optional
|
|
11
8
|
|
|
12
|
-
import numpy as np
|
|
13
|
-
import PIL.Image
|
|
14
|
-
|
|
15
9
|
import pixeltable as pxt
|
|
16
10
|
import pixeltable.exceptions as excs
|
|
17
11
|
from pixeltable.catalog import Catalog
|
|
18
12
|
from pixeltable.utils.transactional_directory import transactional_directory
|
|
19
13
|
|
|
20
14
|
if typing.TYPE_CHECKING:
|
|
21
|
-
import pyarrow as pa
|
|
22
|
-
|
|
23
15
|
import pixeltable as pxt
|
|
24
16
|
|
|
25
17
|
_logger = logging.getLogger('pixeltable')
|
|
26
18
|
|
|
27
19
|
|
|
28
|
-
def _write_batch(value_batch: dict[str, deque], schema: pa.Schema, output_path: Path) -> None:
|
|
29
|
-
import pyarrow as pa
|
|
30
|
-
from pyarrow import parquet
|
|
31
|
-
|
|
32
|
-
pydict = {}
|
|
33
|
-
for field in schema:
|
|
34
|
-
if isinstance(field.type, pa.FixedShapeTensorType):
|
|
35
|
-
stacked_arr = np.stack(value_batch[field.name])
|
|
36
|
-
pydict[field.name] = pa.FixedShapeTensorArray.from_numpy_ndarray(stacked_arr)
|
|
37
|
-
else:
|
|
38
|
-
pydict[field.name] = value_batch[field.name]
|
|
39
|
-
|
|
40
|
-
tab = pa.Table.from_pydict(pydict, schema=schema)
|
|
41
|
-
parquet.write_table(tab, str(output_path))
|
|
42
|
-
|
|
43
|
-
|
|
44
20
|
def export_parquet(
|
|
45
21
|
table_or_df: pxt.Table | pxt.DataFrame,
|
|
46
22
|
parquet_path: Path,
|
|
@@ -63,7 +39,9 @@ def export_parquet(
|
|
|
63
39
|
If False, will raise an error if the Dataframe has any image column.
|
|
64
40
|
Default False.
|
|
65
41
|
"""
|
|
66
|
-
|
|
42
|
+
import pyarrow as pa
|
|
43
|
+
|
|
44
|
+
from pixeltable.utils.arrow import to_record_batches
|
|
67
45
|
|
|
68
46
|
df: pxt.DataFrame
|
|
69
47
|
if isinstance(table_or_df, pxt.catalog.Table):
|
|
@@ -71,9 +49,6 @@ def export_parquet(
|
|
|
71
49
|
else:
|
|
72
50
|
df = table_or_df
|
|
73
51
|
|
|
74
|
-
type_dict = {k: v.as_dict() for k, v in df.schema.items()}
|
|
75
|
-
arrow_schema = to_arrow_schema(df.schema)
|
|
76
|
-
|
|
77
52
|
if not inline_images and any(col_type.is_image_type() for col_type in df.schema.values()):
|
|
78
53
|
raise excs.Error('Cannot export Dataframe with image columns when inline_images is False')
|
|
79
54
|
|
|
@@ -81,70 +56,15 @@ def export_parquet(
|
|
|
81
56
|
with transactional_directory(parquet_path) as temp_path:
|
|
82
57
|
# dump metadata json file so we can inspect what was the source of the parquet file later on.
|
|
83
58
|
json.dump(df.as_dict(), (temp_path / '.pixeltable.json').open('w'))
|
|
59
|
+
type_dict = {k: v.as_dict() for k, v in df.schema.items()}
|
|
84
60
|
json.dump(type_dict, (temp_path / '.pixeltable.column_types.json').open('w')) # keep type metadata
|
|
85
|
-
|
|
86
61
|
batch_num = 0
|
|
87
|
-
current_value_batch: dict[str, deque] = {k: deque() for k in df.schema}
|
|
88
|
-
current_byte_estimate = 0
|
|
89
|
-
|
|
90
62
|
with Catalog.get().begin_xact(for_write=False):
|
|
91
|
-
for
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
continue
|
|
97
|
-
|
|
98
|
-
assert val is not None
|
|
99
|
-
if col_type.is_image_type():
|
|
100
|
-
# images get inlined into the parquet file
|
|
101
|
-
if data_row.file_paths is not None and data_row.file_paths[e.slot_idx] is not None:
|
|
102
|
-
# if there is a file, read directly to preserve information
|
|
103
|
-
with open(data_row.file_paths[e.slot_idx], 'rb') as f:
|
|
104
|
-
val = f.read()
|
|
105
|
-
elif isinstance(val, PIL.Image.Image):
|
|
106
|
-
# if no file available, eg. bc it is computed, convert to png
|
|
107
|
-
buf = io.BytesIO()
|
|
108
|
-
val.save(buf, format='PNG')
|
|
109
|
-
val = buf.getvalue()
|
|
110
|
-
else:
|
|
111
|
-
raise excs.Error(f'unknown image type {type(val)}')
|
|
112
|
-
length = len(val)
|
|
113
|
-
elif col_type.is_string_type():
|
|
114
|
-
length = len(val)
|
|
115
|
-
elif col_type.is_video_type() or col_type.is_audio_type():
|
|
116
|
-
if data_row.file_paths is not None and data_row.file_paths[e.slot_idx] is not None:
|
|
117
|
-
val = data_row.file_paths[e.slot_idx]
|
|
118
|
-
else:
|
|
119
|
-
raise excs.Error(f'unknown audio/video type {type(val)}')
|
|
120
|
-
length = len(val)
|
|
121
|
-
elif col_type.is_json_type():
|
|
122
|
-
val = json.dumps(val)
|
|
123
|
-
length = len(val)
|
|
124
|
-
elif col_type.is_array_type():
|
|
125
|
-
length = val.nbytes
|
|
126
|
-
elif col_type.is_int_type() or col_type.is_float_type():
|
|
127
|
-
length = 8
|
|
128
|
-
elif col_type.is_bool_type():
|
|
129
|
-
length = 1
|
|
130
|
-
elif col_type.is_date_type():
|
|
131
|
-
length = 4
|
|
132
|
-
elif col_type.is_timestamp_type():
|
|
133
|
-
val = val.astimezone(datetime.timezone.utc)
|
|
134
|
-
length = 8
|
|
135
|
-
else:
|
|
136
|
-
raise excs.Error(f'unknown type {col_type} for {col_name}')
|
|
137
|
-
|
|
138
|
-
current_value_batch[col_name].append(val)
|
|
139
|
-
current_byte_estimate += length
|
|
140
|
-
if current_byte_estimate > partition_size_bytes:
|
|
141
|
-
assert batch_num < 100_000, 'wrote too many parquet files, unclear ordering'
|
|
142
|
-
_write_batch(current_value_batch, arrow_schema, temp_path / f'part-{batch_num:05d}.parquet')
|
|
143
|
-
batch_num += 1
|
|
144
|
-
current_value_batch = {k: deque() for k in df.schema}
|
|
145
|
-
current_byte_estimate = 0
|
|
146
|
-
|
|
147
|
-
_write_batch(current_value_batch, arrow_schema, temp_path / f'part-{batch_num:05d}.parquet')
|
|
63
|
+
for record_batch in to_record_batches(df, partition_size_bytes):
|
|
64
|
+
output_path = temp_path / f'part-{batch_num:05d}.parquet'
|
|
65
|
+
arrow_tbl = pa.Table.from_batches([record_batch]) # type: ignore
|
|
66
|
+
pa.parquet.write_table(arrow_tbl, str(output_path))
|
|
67
|
+
batch_num += 1
|
|
148
68
|
|
|
149
69
|
|
|
150
70
|
def import_parquet(
|
|
@@ -469,12 +469,12 @@ class ParquetTableDataConduit(TableDataConduit):
|
|
|
469
469
|
return t
|
|
470
470
|
|
|
471
471
|
def infer_schema_part1(self) -> tuple[dict[str, ts.ColumnType], list[str]]:
|
|
472
|
-
from pixeltable.utils.arrow import
|
|
472
|
+
from pixeltable.utils.arrow import to_pxt_schema
|
|
473
473
|
|
|
474
474
|
if self.source_column_map is None:
|
|
475
475
|
if self.src_schema_overrides is None:
|
|
476
476
|
self.src_schema_overrides = {}
|
|
477
|
-
self.src_schema =
|
|
477
|
+
self.src_schema = to_pxt_schema(self.pq_ds.schema, self.src_schema_overrides, self.src_pk)
|
|
478
478
|
inferred_schema, inferred_pk, self.source_column_map = normalize_schema_names(
|
|
479
479
|
self.src_schema, self.src_pk, self.src_schema_overrides
|
|
480
480
|
)
|
pixeltable/utils/arrow.py
CHANGED
|
@@ -1,11 +1,18 @@
|
|
|
1
1
|
import datetime
|
|
2
|
-
|
|
2
|
+
import io
|
|
3
|
+
import json
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Iterator, Optional, cast
|
|
3
5
|
|
|
4
6
|
import numpy as np
|
|
7
|
+
import PIL.Image
|
|
5
8
|
import pyarrow as pa
|
|
6
9
|
|
|
10
|
+
import pixeltable.exceptions as excs
|
|
7
11
|
import pixeltable.type_system as ts
|
|
8
12
|
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
import pixeltable as pxt
|
|
15
|
+
|
|
9
16
|
PA_TO_PXT_TYPES: dict[pa.DataType, ts.ColumnType] = {
|
|
10
17
|
pa.string(): ts.StringType(nullable=True),
|
|
11
18
|
pa.large_string(): ts.StringType(nullable=True),
|
|
@@ -71,7 +78,7 @@ def to_arrow_type(pixeltable_type: ts.ColumnType) -> Optional[pa.DataType]:
|
|
|
71
78
|
return None
|
|
72
79
|
|
|
73
80
|
|
|
74
|
-
def
|
|
81
|
+
def to_pxt_schema(
|
|
75
82
|
arrow_schema: pa.Schema, schema_overrides: dict[str, Any], primary_key: list[str]
|
|
76
83
|
) -> dict[str, ts.ColumnType]:
|
|
77
84
|
"""Convert a pyarrow Schema to a schema using pyarrow names and pixeltable types."""
|
|
@@ -88,6 +95,94 @@ def to_arrow_schema(pixeltable_schema: dict[str, Any]) -> pa.Schema:
|
|
|
88
95
|
return pa.schema((name, to_arrow_type(typ)) for name, typ in pixeltable_schema.items()) # type: ignore[misc]
|
|
89
96
|
|
|
90
97
|
|
|
98
|
+
def _to_record_batch(column_vals: dict[str, list[Any]], schema: pa.Schema) -> pa.RecordBatch:
|
|
99
|
+
import pyarrow as pa
|
|
100
|
+
|
|
101
|
+
pa_arrays: list[pa.Array] = []
|
|
102
|
+
for field in schema:
|
|
103
|
+
if isinstance(field.type, pa.FixedShapeTensorType):
|
|
104
|
+
stacked_arr = np.stack(column_vals[field.name])
|
|
105
|
+
pa_arrays.append(pa.FixedShapeTensorArray.from_numpy_ndarray(stacked_arr))
|
|
106
|
+
else:
|
|
107
|
+
pa_array = cast(pa.Array, pa.array(column_vals[field.name]))
|
|
108
|
+
pa_arrays.append(pa_array)
|
|
109
|
+
return pa.RecordBatch.from_arrays(pa_arrays, schema=schema) # type: ignore
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def to_record_batches(df: 'pxt.DataFrame', batch_size_bytes: int) -> Iterator[pa.RecordBatch]:
|
|
113
|
+
arrow_schema = to_arrow_schema(df.schema)
|
|
114
|
+
batch_columns: dict[str, list[Any]] = {k: [] for k in df.schema}
|
|
115
|
+
current_byte_estimate = 0
|
|
116
|
+
num_batch_rows = 0
|
|
117
|
+
|
|
118
|
+
# TODO: in order to avoid having to deal with ExprEvalError here, DataFrameResultSet should be an iterator
|
|
119
|
+
# over _exec()
|
|
120
|
+
try:
|
|
121
|
+
for data_row in df._exec():
|
|
122
|
+
num_batch_rows += 1
|
|
123
|
+
for (col_name, col_type), e in zip(df.schema.items(), df._select_list_exprs):
|
|
124
|
+
val = data_row[e.slot_idx]
|
|
125
|
+
val_size_bytes: int
|
|
126
|
+
if val is None:
|
|
127
|
+
batch_columns[col_name].append(val)
|
|
128
|
+
continue
|
|
129
|
+
|
|
130
|
+
assert val is not None
|
|
131
|
+
if col_type.is_image_type():
|
|
132
|
+
# images get inlined into the parquet file
|
|
133
|
+
if data_row.file_paths[e.slot_idx] is not None:
|
|
134
|
+
# if there is a file, read directly to preserve information
|
|
135
|
+
with open(data_row.file_paths[e.slot_idx], 'rb') as f:
|
|
136
|
+
val = f.read()
|
|
137
|
+
elif isinstance(val, PIL.Image.Image):
|
|
138
|
+
# no file available: save as png
|
|
139
|
+
buf = io.BytesIO()
|
|
140
|
+
val.save(buf, format='png')
|
|
141
|
+
val = buf.getvalue()
|
|
142
|
+
else:
|
|
143
|
+
raise excs.Error(f'unknown image type {type(val)}')
|
|
144
|
+
val_size_bytes = len(val)
|
|
145
|
+
elif col_type.is_string_type():
|
|
146
|
+
val_size_bytes = len(val)
|
|
147
|
+
elif col_type.is_media_type():
|
|
148
|
+
assert data_row.file_paths[e.slot_idx] is not None
|
|
149
|
+
val = data_row.file_paths[e.slot_idx]
|
|
150
|
+
val_size_bytes = len(val)
|
|
151
|
+
elif col_type.is_json_type():
|
|
152
|
+
val = json.dumps(val)
|
|
153
|
+
val_size_bytes = len(val)
|
|
154
|
+
elif col_type.is_array_type():
|
|
155
|
+
val_size_bytes = val.nbytes
|
|
156
|
+
elif col_type.is_int_type() or col_type.is_float_type():
|
|
157
|
+
val_size_bytes = 8
|
|
158
|
+
elif col_type.is_bool_type():
|
|
159
|
+
val_size_bytes = 1
|
|
160
|
+
elif col_type.is_date_type():
|
|
161
|
+
val_size_bytes = 4
|
|
162
|
+
elif col_type.is_timestamp_type():
|
|
163
|
+
val = val.astimezone(datetime.timezone.utc)
|
|
164
|
+
val_size_bytes = 8
|
|
165
|
+
else:
|
|
166
|
+
raise excs.Error(f'unknown type {col_type} for {col_name}')
|
|
167
|
+
|
|
168
|
+
batch_columns[col_name].append(val)
|
|
169
|
+
current_byte_estimate += val_size_bytes
|
|
170
|
+
|
|
171
|
+
if current_byte_estimate > batch_size_bytes and num_batch_rows > 0:
|
|
172
|
+
record_batch = _to_record_batch(batch_columns, arrow_schema)
|
|
173
|
+
yield record_batch
|
|
174
|
+
batch_columns = {k: [] for k in df.schema}
|
|
175
|
+
current_byte_estimate = 0
|
|
176
|
+
num_batch_rows = 0
|
|
177
|
+
|
|
178
|
+
except excs.ExprEvalError as e:
|
|
179
|
+
df._raise_expr_eval_err(e)
|
|
180
|
+
|
|
181
|
+
if num_batch_rows > 0:
|
|
182
|
+
record_batch = _to_record_batch(batch_columns, arrow_schema)
|
|
183
|
+
yield record_batch
|
|
184
|
+
|
|
185
|
+
|
|
91
186
|
def to_pydict(batch: pa.Table | pa.RecordBatch) -> dict[str, list | np.ndarray]:
|
|
92
187
|
"""Convert a RecordBatch to a dictionary of lists, unlike pa.lib.RecordBatch.to_pydict,
|
|
93
188
|
this function will not convert numpy arrays to lists, and will preserve the original numpy dtype.
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import shutil
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Literal
|
|
7
|
+
|
|
8
|
+
import pixeltable as pxt
|
|
9
|
+
import pixeltable.exceptions as excs
|
|
10
|
+
from pixeltable.catalog import Catalog
|
|
11
|
+
from pixeltable.env import Env
|
|
12
|
+
|
|
13
|
+
_logger = logging.getLogger('pixeltable')
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def export_lancedb(
|
|
17
|
+
table_or_df: pxt.Table | pxt.DataFrame,
|
|
18
|
+
db_uri: Path,
|
|
19
|
+
table_name: str,
|
|
20
|
+
batch_size_bytes: int = 128 * 2**20,
|
|
21
|
+
if_exists: Literal['error', 'overwrite', 'append'] = 'error',
|
|
22
|
+
) -> None:
|
|
23
|
+
"""
|
|
24
|
+
Exports a dataframe's data to a LanceDB table.
|
|
25
|
+
|
|
26
|
+
This utilizes LanceDB's streaming interface for efficient table creation, via a sequence of in-memory pyarrow
|
|
27
|
+
`RecordBatches`, the size of which can be controlled with the `batch_size_bytes` parameter.
|
|
28
|
+
|
|
29
|
+
__Requirements:__
|
|
30
|
+
|
|
31
|
+
- `pip install lancedb`
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
table_or_df : Table or Dataframe to export.
|
|
35
|
+
db_uri: Local Path to the LanceDB database.
|
|
36
|
+
table_name : Name of the table in the LanceDB database.
|
|
37
|
+
batch_size_bytes : Maximum size in bytes for each batch.
|
|
38
|
+
if_exists: Determines the behavior if the table already exists. Must be one of the following:
|
|
39
|
+
|
|
40
|
+
- `'error'`: raise an error
|
|
41
|
+
- `'overwrite'`: overwrite the existing table
|
|
42
|
+
- `'append'`: append to the existing table
|
|
43
|
+
"""
|
|
44
|
+
Env.get().require_package('lancedb')
|
|
45
|
+
|
|
46
|
+
import lancedb # type: ignore[import-untyped]
|
|
47
|
+
|
|
48
|
+
from pixeltable.utils.arrow import to_arrow_schema, to_record_batches
|
|
49
|
+
|
|
50
|
+
if if_exists not in ('error', 'overwrite', 'append'):
|
|
51
|
+
raise excs.Error("export_lancedb(): 'if_exists' must be one of: ['error', 'overwrite', 'append']")
|
|
52
|
+
|
|
53
|
+
df: pxt.DataFrame
|
|
54
|
+
if isinstance(table_or_df, pxt.catalog.Table):
|
|
55
|
+
df = table_or_df._df()
|
|
56
|
+
else:
|
|
57
|
+
df = table_or_df
|
|
58
|
+
|
|
59
|
+
db_exists = False
|
|
60
|
+
if db_uri.exists():
|
|
61
|
+
if not db_uri.is_dir():
|
|
62
|
+
raise excs.Error(f"export_lancedb(): '{db_uri!s}' exists and is not a directory")
|
|
63
|
+
db_exists = True
|
|
64
|
+
|
|
65
|
+
try:
|
|
66
|
+
db = lancedb.connect(str(db_uri))
|
|
67
|
+
lance_tbl: lancedb.LanceTable | None = None
|
|
68
|
+
try:
|
|
69
|
+
lance_tbl = db.open_table(table_name)
|
|
70
|
+
if if_exists == 'error':
|
|
71
|
+
raise excs.Error(f'export_lancedb(): table {table_name!r} already exists in {db_uri!r}')
|
|
72
|
+
except ValueError:
|
|
73
|
+
# table doesn't exist
|
|
74
|
+
pass
|
|
75
|
+
|
|
76
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
77
|
+
if lance_tbl is None or if_exists == 'overwrite':
|
|
78
|
+
mode = 'overwrite' if lance_tbl is not None else 'create'
|
|
79
|
+
arrow_schema = to_arrow_schema(df.schema)
|
|
80
|
+
_ = db.create_table(table_name, to_record_batches(df, batch_size_bytes), schema=arrow_schema, mode=mode)
|
|
81
|
+
else:
|
|
82
|
+
lance_tbl.add(to_record_batches(df, batch_size_bytes))
|
|
83
|
+
|
|
84
|
+
except Exception as e:
|
|
85
|
+
# cleanup
|
|
86
|
+
if not db_exists:
|
|
87
|
+
shutil.rmtree(db_uri)
|
|
88
|
+
raise e
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pixeltable
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.13
|
|
4
4
|
Summary: AI Data Infrastructure: Declarative, Multimodal, and Incremental
|
|
5
5
|
Project-URL: homepage, https://pixeltable.com/
|
|
6
6
|
Project-URL: repository, https://github.com/pixeltable/pixeltable
|
|
@@ -55,44 +55,41 @@ Requires-Dist: toml>=0.10
|
|
|
55
55
|
Requires-Dist: tqdm>=4.64
|
|
56
56
|
Description-Content-Type: text/markdown
|
|
57
57
|
|
|
58
|
-
<
|
|
59
|
-
<
|
|
60
|
-
|
|
61
|
-
<
|
|
58
|
+
<picture class="github-only">
|
|
59
|
+
<source media="(prefers-color-scheme: light)" srcset="https://github.com/user-attachments/assets/e9bf82b2-cace-4bd8-9523-b65495eb8131">
|
|
60
|
+
<source media="(prefers-color-scheme: dark)" srcset="https://github.com/user-attachments/assets/c5ab123e-806c-49bf-93e7-151353719b16">
|
|
61
|
+
<img alt="Pixeltable Logo" src="https://github.com/user-attachments/assets/e9bf82b2-cace-4bd8-9523-b65495eb8131" width="40%">
|
|
62
|
+
</picture>
|
|
62
63
|
|
|
63
|
-
<
|
|
64
|
+
<div>
|
|
65
|
+
<br>
|
|
66
|
+
</div>
|
|
67
|
+
|
|
68
|
+
The only open source Python library providing declarative data infrastructure for building multimodal AI applications, enabling incremental storage, transformation, indexing, retrieval, and orchestration of data.
|
|
64
69
|
|
|
65
70
|
[](https://opensource.org/licenses/Apache-2.0)
|
|
66
|
-

|
|
67
|
-

|
|
68
|
-
<br>
|
|
69
71
|
[](https://github.com/pixeltable/pixeltable/actions/workflows/pytest.yml)
|
|
70
72
|
[](https://github.com/pixeltable/pixeltable/actions/workflows/nightly.yml)
|
|
71
73
|
[](https://github.com/pixeltable/pixeltable/actions/workflows/stress-tests.yml)
|
|
72
74
|
[](https://pypi.org/project/pixeltable/)
|
|
73
75
|
[](https://discord.gg/QPyqFYx2UN)
|
|
74
76
|
|
|
75
|
-
[**Installation**](https://docs.pixeltable.com/docs/overview/installation) |
|
|
76
77
|
[**Quick Start**](https://docs.pixeltable.com/docs/overview/quick-start) |
|
|
77
78
|
[**Documentation**](https://docs.pixeltable.com/) |
|
|
78
79
|
[**API Reference**](https://pixeltable.github.io/pixeltable/) |
|
|
79
|
-
[**
|
|
80
|
+
[**Sample Apps**](https://github.com/pixeltable/pixeltable/tree/main/docs/sample-apps) |
|
|
80
81
|
[**Discord Community**](https://discord.gg/QPyqFYx2UN)
|
|
81
82
|
|
|
82
|
-
</div>
|
|
83
|
-
|
|
84
83
|
---
|
|
85
84
|
|
|
86
|
-
##
|
|
85
|
+
## Installation
|
|
87
86
|
|
|
88
87
|
```python
|
|
89
88
|
pip install pixeltable
|
|
90
89
|
```
|
|
90
|
+
Pixeltable replaces the complex multi-system architecture typically needed for AI applications (databases, file storage, vector DBs, APIs, orchestration) with a single declarative table interface that natively handles multimodal data like images, videos, and documents.
|
|
91
91
|
|
|
92
|
-
|
|
93
|
-
It stores metadata and computed results persistently, typically in a `.pixeltable` directory in your workspace.
|
|
94
|
-
|
|
95
|
-
## Pixeltable Demo
|
|
92
|
+
## Demo
|
|
96
93
|
|
|
97
94
|
https://github.com/user-attachments/assets/b50fd6df-5169-4881-9dbe-1b6e5d06cede
|
|
98
95
|
|
|
@@ -152,7 +149,7 @@ results = t.select(
|
|
|
152
149
|
).collect()
|
|
153
150
|
```
|
|
154
151
|
|
|
155
|
-
##
|
|
152
|
+
## What Happened?
|
|
156
153
|
|
|
157
154
|
* **Data Ingestion & Storage:** References [files](https://docs.pixeltable.com/docs/datastore/bringing-data)
|
|
158
155
|
(images, videos, audio, docs) in place, handles structured data.
|
|
@@ -174,7 +171,7 @@ as in the `insert` statement above, Pixeltable caches them locally before proces
|
|
|
174
171
|
[Working with External Files](https://github.com/pixeltable/pixeltable/blob/main/docs/notebooks/feature-guides/working-with-external-files.ipynb)
|
|
175
172
|
notebook for more details.
|
|
176
173
|
|
|
177
|
-
##
|
|
174
|
+
## Where Did My Data Go?
|
|
178
175
|
|
|
179
176
|
Pixeltable workloads generate various outputs, including both structured outputs (such as bounding boxes for detected
|
|
180
177
|
objects) and/or unstructured outputs (such as generated images or video). By default, everything resides in your
|
|
@@ -186,125 +183,163 @@ a unified table interface over both structured and unstructured data.
|
|
|
186
183
|
In general, the user is not expected to interact directly with the data in `~/.pixeltable`; the data store is fully
|
|
187
184
|
managed by Pixeltable and is intended to be accessed through the Pixeltable Python SDK.
|
|
188
185
|
|
|
189
|
-
##
|
|
186
|
+
## Key Principles
|
|
190
187
|
|
|
191
|
-
|
|
192
|
-
|
|
188
|
+
**[Unified Multimodal Interface:](https://docs.pixeltable.com/docs/datastore/tables-and-operations)** `pxt.Image`,
|
|
189
|
+
`pxt.Video`, `pxt.Audio`, `pxt.Document`, etc. – manage diverse data consistently.
|
|
193
190
|
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
191
|
+
```python
|
|
192
|
+
t = pxt.create_table(
|
|
193
|
+
'media',
|
|
194
|
+
{
|
|
195
|
+
'img': pxt.Image,
|
|
196
|
+
'video': pxt.Video
|
|
197
|
+
}
|
|
198
|
+
)
|
|
199
|
+
```
|
|
203
200
|
|
|
204
|
-
|
|
205
|
-
|
|
201
|
+
**[Declarative Computed Columns:](https://docs.pixeltable.com/docs/datastore/computed-columns)** Define processing
|
|
202
|
+
steps once; they run automatically on new/updated data.
|
|
206
203
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
204
|
+
```python
|
|
205
|
+
t.add_computed_column(
|
|
206
|
+
classification=huggingface.vit_for_image_classification(
|
|
207
|
+
t.image
|
|
208
|
+
)
|
|
209
|
+
)
|
|
210
|
+
```
|
|
214
211
|
|
|
215
|
-
|
|
216
|
-
|
|
212
|
+
**[Built-in Vector Search:](https://docs.pixeltable.com/docs/datastore/embedding-index)** Add embedding indexes and
|
|
213
|
+
perform similarity searches directly on tables/views.
|
|
217
214
|
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
215
|
+
```python
|
|
216
|
+
t.add_embedding_index(
|
|
217
|
+
'img',
|
|
218
|
+
embedding=clip.using(
|
|
219
|
+
model_id='openai/clip-vit-base-patch32'
|
|
220
|
+
)
|
|
221
|
+
)
|
|
225
222
|
|
|
226
|
-
|
|
227
|
-
|
|
223
|
+
sim = t.img.similarity("cat playing with yarn")
|
|
224
|
+
```
|
|
228
225
|
|
|
229
|
-
|
|
230
|
-
|
|
226
|
+
**[Incremental View Maintenance:](https://docs.pixeltable.com/docs/datastore/views)** Create virtual tables using iterators
|
|
227
|
+
for efficient processing without data duplication.
|
|
231
228
|
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
229
|
+
```python
|
|
230
|
+
# Document chunking with overlap & metadata and many more options to build your own iterator
|
|
231
|
+
chunks = pxt.create_view('chunks', docs,
|
|
232
|
+
iterator=DocumentSplitter.create(
|
|
233
|
+
document=docs.doc,
|
|
234
|
+
separators='sentence,token_limit',
|
|
235
|
+
overlap=50, limit=500
|
|
236
|
+
))
|
|
237
|
+
|
|
238
|
+
# Video frame extraction
|
|
239
|
+
frames = pxt.create_view('frames', videos,
|
|
240
|
+
iterator=FrameIterator.create(video=videos.video, fps=0.5))
|
|
241
|
+
```
|
|
242
242
|
|
|
243
|
-
|
|
244
|
-
|
|
243
|
+
**[Seamless AI Integration:](https://docs.pixeltable.com/docs/integrations/frameworks)** Built-in functions for
|
|
244
|
+
OpenAI, Anthropic, Hugging Face, CLIP, YOLOX, and more.
|
|
245
245
|
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
246
|
+
```python
|
|
247
|
+
# LLM integration (OpenAI, Anthropic, etc.)
|
|
248
|
+
t.add_computed_column(
|
|
249
|
+
response=openai.chat_completions(
|
|
250
|
+
messages=[{"role": "user", "content": t.prompt}], model='gpt-4o-mini'
|
|
251
|
+
)
|
|
252
|
+
)
|
|
253
253
|
|
|
254
|
-
|
|
255
|
-
|
|
254
|
+
# Computer vision (YOLOX object detection)
|
|
255
|
+
t.add_computed_column(
|
|
256
|
+
detections=yolox(t.image, model_id='yolox_s', threshold=0.5)
|
|
257
|
+
)
|
|
256
258
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
259
|
+
# Embedding models (Hugging Face, CLIP)
|
|
260
|
+
t.add_computed_column(
|
|
261
|
+
embeddings=huggingface.sentence_transformer(
|
|
262
|
+
t.text, model_id='all-MiniLM-L6-v2'
|
|
263
|
+
)
|
|
264
|
+
)
|
|
265
|
+
```
|
|
262
266
|
|
|
263
|
-
|
|
264
|
-
|
|
267
|
+
**[Bring Your Own Code:](https://docs.pixeltable.com/docs/datastore/custom-functions)** Extend Pixeltable with UDFs, batch processing, and custom aggregators.
|
|
268
|
+
|
|
269
|
+
```python
|
|
270
|
+
@pxt.udf
|
|
271
|
+
def format_prompt(context: list, question: str) -> str:
|
|
272
|
+
return f"Context: {context}\nQuestion: {question}"
|
|
273
|
+
```
|
|
265
274
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
tools = pxt.tools(get_weather_udf, search_context_query)
|
|
275
|
+
**[Agentic Workflows / Tool Calling:](https://docs.pixeltable.com/docs/examples/chat/tools)** Register `@pxt.udf`,
|
|
276
|
+
`@pxt.query` functions, or **MCP tools** as tools.
|
|
269
277
|
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
278
|
+
```python
|
|
279
|
+
# Example tools: UDFs, Query functions, and MCP tools
|
|
280
|
+
mcp_tools = pxt.mcp_udfs('http://localhost:8000/mcp') # Load from MCP server
|
|
281
|
+
tools = pxt.tools(get_weather_udf, search_context_query, *mcp_tools)
|
|
282
|
+
|
|
283
|
+
# LLM decides which tool to call; Pixeltable executes it
|
|
284
|
+
t.add_computed_column(
|
|
285
|
+
tool_output=invoke_tools(tools, t.llm_tool_choice)
|
|
286
|
+
)
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
**[Data Persistence:](https://docs.pixeltable.com/docs/datastore/tables-and-operations#data-operations)** All data,
|
|
290
|
+
metadata, and computed results are automatically stored and versioned.
|
|
291
|
+
|
|
292
|
+
```python
|
|
293
|
+
t = pxt.get_table('my_table') # Get a handle to an existing table
|
|
294
|
+
t.select(t.account, t.balance).collect() # Query its contents
|
|
295
|
+
t.revert() # Undo the last modification to the table and restore its previous state
|
|
296
|
+
```
|
|
297
|
+
|
|
298
|
+
**[Time Travel:](https://docs.pixeltable.com/docs/datastore/tables-and-operations#data-operations)** By default,
|
|
299
|
+
Pixeltable preserves the full change history of each table, and any prior version can be selected and queried.
|
|
300
|
+
|
|
301
|
+
```python
|
|
302
|
+
t.history() # Display a human-readable list of all prior versions of the table
|
|
303
|
+
old_version = pxt.get_table('my_table:472') # Get a handle to a specific table version
|
|
304
|
+
old_version.select(t.account, t.balance).collect() # Query the older version
|
|
305
|
+
```
|
|
306
|
+
|
|
307
|
+
**[SQL-like Python Querying:](https://docs.pixeltable.com/docs/datastore/filtering-and-selecting)** Familiar syntax
|
|
308
|
+
combined with powerful AI capabilities.
|
|
309
|
+
|
|
310
|
+
```python
|
|
311
|
+
results = (
|
|
312
|
+
t.where(t.score > 0.8)
|
|
313
|
+
.order_by(t.timestamp)
|
|
314
|
+
.select(t.image, score=t.score)
|
|
315
|
+
.limit(10)
|
|
316
|
+
.collect()
|
|
317
|
+
)
|
|
318
|
+
```
|
|
319
|
+
|
|
320
|
+
**[I/O & Integration:](https://pixeltable.github.io/pixeltable/pixeltable/io/)** Export to multiple
|
|
321
|
+
formats and integrate with ML/AI tools ecosystem.
|
|
322
|
+
|
|
323
|
+
```python
|
|
324
|
+
# Export to analytics/ML formats
|
|
325
|
+
pxt.export_parquet(table, 'data.parquet', partition_size_bytes=100_000_000)
|
|
326
|
+
pxt.export_lancedb(table, 'vector_db')
|
|
327
|
+
|
|
328
|
+
# DataFrame conversions
|
|
329
|
+
results = table.select(table.image, table.labels).collect()
|
|
330
|
+
df = results.to_pandas() # → pandas DataFrame
|
|
331
|
+
models = results.to_pydantic(MyModel) # → Pydantic models
|
|
332
|
+
|
|
333
|
+
# Specialized ML dataset formats
|
|
334
|
+
coco_path = table.to_coco_dataset() # → COCO annotations
|
|
335
|
+
pytorch_ds = table.to_pytorch_dataset('pt') # → PyTorch DataLoader ready
|
|
336
|
+
|
|
337
|
+
# ML tool integrations
|
|
338
|
+
pxt.create_label_studio_project(table, label_config) # Annotation
|
|
339
|
+
pxt.export_images_as_fo_dataset(table, table.image) # FiftyOne
|
|
340
|
+
```
|
|
306
341
|
|
|
307
|
-
##
|
|
342
|
+
## Key Examples
|
|
308
343
|
|
|
309
344
|
*(See the [Full Quick Start](https://docs.pixeltable.com/docs/overview/quick-start) or
|
|
310
345
|
[Notebook Gallery](#-notebook-gallery) for more details)*
|
|
@@ -497,7 +532,7 @@ print("--- Final Answer ---")
|
|
|
497
532
|
print(qa.select(qa.answer).collect())
|
|
498
533
|
```
|
|
499
534
|
|
|
500
|
-
##
|
|
535
|
+
## Notebook Gallery
|
|
501
536
|
|
|
502
537
|
Explore Pixeltable's capabilities interactively:
|
|
503
538
|
|
|
@@ -514,7 +549,7 @@ Explore Pixeltable's capabilities interactively:
|
|
|
514
549
|
| Object Detection | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/use-cases/object-detection-in-videos.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Image/Text Search | <a target="_blank" href="https://github.com/pixeltable/pixeltable/tree/main/docs/sample-apps/text-and-image-similarity-search-nextjs-fastapi"> <img src="https://img.shields.io/badge/🖥️%20App-black.svg" alt="GitHub App"/> |
|
|
515
550
|
| Audio Transcription | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/use-cases/audio-transcriptions.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | Discord Bot | <a target="_blank" href="https://github.com/pixeltable/pixeltable/blob/main/docs/sample-apps/context-aware-discord-bot"> <img src="https://img.shields.io/badge/%F0%9F%92%AC%20Bot-%235865F2.svg" alt="GitHub App"/></a> |
|
|
516
551
|
|
|
517
|
-
##
|
|
552
|
+
## Maintaining Production-Ready Multimodal AI Apps is Still Too Hard
|
|
518
553
|
|
|
519
554
|
Building robust AI applications, especially [multimodal](https://docs.pixeltable.com/docs/datastore/bringing-data) ones,
|
|
520
555
|
requires stitching together numerous tools:
|
|
@@ -528,7 +563,7 @@ requires stitching together numerous tools:
|
|
|
528
563
|
|
|
529
564
|
This complex "data plumbing" slows down development, increases costs, and makes applications brittle and hard to reproduce.
|
|
530
565
|
|
|
531
|
-
##
|
|
566
|
+
## Roadmap (2025)
|
|
532
567
|
|
|
533
568
|
### Cloud Infrastructure and Deployment
|
|
534
569
|
|
|
@@ -538,13 +573,13 @@ We're working on a hosted Pixeltable service that will:
|
|
|
538
573
|
* Provide a persistent cloud instance
|
|
539
574
|
* Turn Pixeltable workflows (Tables, Queries, UDFs) into API endpoints/[MCP Servers](https://github.com/pixeltable/pixeltable-mcp-server)
|
|
540
575
|
|
|
541
|
-
##
|
|
576
|
+
## Contributing
|
|
542
577
|
|
|
543
578
|
We love contributions! Whether it's reporting bugs, suggesting features, improving documentation, or submitting code
|
|
544
579
|
changes, please check out our [Contributing Guide](CONTRIBUTING.md) and join the
|
|
545
580
|
[Discussions](https://github.com/pixeltable/pixeltable/discussions) or our
|
|
546
581
|
[Discord Server](https://discord.gg/QPyqFYx2UN).
|
|
547
582
|
|
|
548
|
-
##
|
|
583
|
+
## License
|
|
549
584
|
|
|
550
585
|
Pixeltable is licensed under the Apache 2.0 License.
|
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
pixeltable/__init__.py,sha256=
|
|
1
|
+
pixeltable/__init__.py,sha256=PDfphK_WypPopRbBNhJ0wXiX5T9Vp4Vq9Hf8Oz_oXZA,1620
|
|
2
2
|
pixeltable/__version__.py,sha256=LnMIuAxx6nAQDMev_jnZyUdgsaiE3F8lulfXQBRl9qQ,112
|
|
3
3
|
pixeltable/config.py,sha256=-aoSVF0Aak83IC-u-XANw3if76TDq5VnnWNWoFDR5Hc,8390
|
|
4
4
|
pixeltable/dataframe.py,sha256=XbrzPjnPgZKJ5lVgPO71cK-nRHCpqGCGWFc52kUO8_E,64213
|
|
5
|
-
pixeltable/env.py,sha256=
|
|
5
|
+
pixeltable/env.py,sha256=LUTOi3DcinsVFoqiOmsG8Dlhe8yWBEfgIdY9rOlJMME,44203
|
|
6
6
|
pixeltable/exceptions.py,sha256=Gm8d3TL2iiv6Pj2DLd29wp_j41qNBhxXL9iTQnL4Nk4,1116
|
|
7
|
-
pixeltable/globals.py,sha256=
|
|
7
|
+
pixeltable/globals.py,sha256=dktqUbpsiLorB4-1VjYDp7LH0rfqfh_3c8OD819K_H4,39183
|
|
8
8
|
pixeltable/plan.py,sha256=4yAe7ExAqaSvkFxwK7LPH_HpmoumwqoLeOo7czJ8CyQ,48001
|
|
9
9
|
pixeltable/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
10
|
pixeltable/store.py,sha256=CneWUmgN-EwaPYLcizlAxONC7WYwMr8SNpSFeNBBmOA,22885
|
|
11
11
|
pixeltable/type_system.py,sha256=UfPZZy4zJ2kGvdHXI9rqxOGAjgIxCZ9QGvvidPWcq-M,56153
|
|
12
|
-
pixeltable/catalog/__init__.py,sha256=
|
|
12
|
+
pixeltable/catalog/__init__.py,sha256=GL0MLxqCBHlhKWqhC3e9B4kwTazagTOiqBHHRjyWbTg,726
|
|
13
13
|
pixeltable/catalog/catalog.py,sha256=gaq10XFwkr6jyv8yVi5xV3_oiDkPvqVe55vxOo14W6k,93853
|
|
14
14
|
pixeltable/catalog/column.py,sha256=MXa5o3ku94T8ZFEL7wnAvqvlk65fOmmHPqIvrUVf3uo,13514
|
|
15
15
|
pixeltable/catalog/dir.py,sha256=VYTscPlKR6XhupPTXlJ8txAHxS5GSpPJ3LIleDJagVQ,2047
|
|
@@ -18,7 +18,8 @@ pixeltable/catalog/insertable_table.py,sha256=VUuJ8z7OtMqgy_LMzkn1KzeLXdR-9poTtt
|
|
|
18
18
|
pixeltable/catalog/named_function.py,sha256=vZ-j7P4HugWh9OmUzBMwyRYvO3tQn9jWyJz_1stPavU,1210
|
|
19
19
|
pixeltable/catalog/path.py,sha256=O3FfxrvyX2crijBhp_2k4-3mG3BFxwba-tlPB74QtJQ,3780
|
|
20
20
|
pixeltable/catalog/schema_object.py,sha256=rQ6-3rzqnOHyEEHi97kai2S7BO3D9AkH7rirnfbGc14,1785
|
|
21
|
-
pixeltable/catalog/table.py,sha256=
|
|
21
|
+
pixeltable/catalog/table.py,sha256=phOf59IZJO7xPPR91F2trJpA4TC9lic-dd13mbiUz5Q,80222
|
|
22
|
+
pixeltable/catalog/table_metadata.py,sha256=MVxJLS6Tz2PVOerlnoOOjjhq6LxUdDLeN0BUJf42Smw,3518
|
|
22
23
|
pixeltable/catalog/table_version.py,sha256=SRF2ACp_DcPMLTbc4dbZSgYEfW6-o-UzDOBehecKbb0,65073
|
|
23
24
|
pixeltable/catalog/table_version_handle.py,sha256=FTPRqcGY-h-POcWyZbd9b8P2D5zIw5OSUvwF_dbyCGo,3608
|
|
24
25
|
pixeltable/catalog/table_version_path.py,sha256=IaFVDH06_6ZMuBv5eLNCRTlWizpvz95jgAzqp4OVx_o,9713
|
|
@@ -113,16 +114,17 @@ pixeltable/index/__init__.py,sha256=97aFuxiP_oz1ldn5iq8IWApkOV8XG6ZIBW5-9rkS0vM,
|
|
|
113
114
|
pixeltable/index/base.py,sha256=200s7v3Zy810bRlbSAYzxxaEjVssl6r8esTHiSvWRwQ,1704
|
|
114
115
|
pixeltable/index/btree.py,sha256=8B06D67ay0DFUtEBC5q4bLjxMq7ILpKyyoLAiSaamzA,2503
|
|
115
116
|
pixeltable/index/embedding_index.py,sha256=B_k_3UJmSv7t2ljUg8GC_D4t1jc03PVsTAvxqiTmHBA,11754
|
|
116
|
-
pixeltable/io/__init__.py,sha256=
|
|
117
|
+
pixeltable/io/__init__.py,sha256=SO9xvWuQHfg_YyVahDmstB3lSuMoPKRarW8qgUR81jM,655
|
|
117
118
|
pixeltable/io/datarows.py,sha256=s2fDQTttGxq7cS5JwKFEJRSKn6WsXTaGdmm9VJSl_2M,6154
|
|
118
119
|
pixeltable/io/external_store.py,sha256=rOYBwTqcZZVU2toWxJ_9Iy2w2YO0DhuABrM2xGmqHSo,14787
|
|
119
120
|
pixeltable/io/fiftyone.py,sha256=JcAL9zFszSTcsws6ioF1KZZJFmUeg-11W-c4Gyh3FyQ,6891
|
|
120
121
|
pixeltable/io/globals.py,sha256=B9ubI9Z0m2wGPZXWmZm10vlaP0UCuUsVyrMWvyudZSc,11360
|
|
121
122
|
pixeltable/io/hf_datasets.py,sha256=5WfWfXoQppG1Bx_pS5n44KO1Vo_mEb_S82PLB8cLfAU,5606
|
|
122
123
|
pixeltable/io/label_studio.py,sha256=OCQBVgGjXRSdukFQv2ZKdaBmpxanqH9ibDLxZd1L3mc,31469
|
|
124
|
+
pixeltable/io/lancedb.py,sha256=kNcYXptieMlJ6yxEIZHVFklEMOEB2mrSyp7XZmOw4qs,82
|
|
123
125
|
pixeltable/io/pandas.py,sha256=xQmkwbqE9_fjbbPUgeG5yNICrbVVK73UHxDL-cgrQw0,9007
|
|
124
|
-
pixeltable/io/parquet.py,sha256=
|
|
125
|
-
pixeltable/io/table_data_conduit.py,sha256
|
|
126
|
+
pixeltable/io/parquet.py,sha256=qVvg9nixJnK9gXYxZocD8HE13SznyLrgW9IsehtT4j4,4101
|
|
127
|
+
pixeltable/io/table_data_conduit.py,sha256=8jwQ3IOoOBS-8j2TEfgiqsFUD85kEP5IjoC0dg2uPEk,22058
|
|
126
128
|
pixeltable/io/utils.py,sha256=qzBTmqdIawXMt2bfXQOraYnEstL69eC2Z33nl8RrwJk,4244
|
|
127
129
|
pixeltable/iterators/__init__.py,sha256=hI937cmBRU3eWbfJ7miFthAGUo_xmcYciw6gAjOCg9g,470
|
|
128
130
|
pixeltable/iterators/audio.py,sha256=HYE8JcqaJsTGdrq4NkwV5tn7lcyMp6Fjrm59efOLzb0,9671
|
|
@@ -171,7 +173,7 @@ pixeltable/share/__init__.py,sha256=PTX1mw61Ss4acEOI-sUlu0HaoVsosLqwDfh0ldn8Hkg,
|
|
|
171
173
|
pixeltable/share/packager.py,sha256=5rSKnQCs3YP5h48d79bXEK4L8tLUSeTSbXaB8X9SmBI,31265
|
|
172
174
|
pixeltable/share/publish.py,sha256=VE_H3ux56gdSHd8_ganxCnNYtxrjaalMPgwAIYmdbE8,11300
|
|
173
175
|
pixeltable/utils/__init__.py,sha256=45qEM20L2VuIe-Cc3BTKWFqQb-S7A8qDtmmgl77zYK0,1728
|
|
174
|
-
pixeltable/utils/arrow.py,sha256=
|
|
176
|
+
pixeltable/utils/arrow.py,sha256=U7vb_ffPCR7zv-phyBMPMDosPdKN6LK4IVMpfm2mRy8,10424
|
|
175
177
|
pixeltable/utils/av.py,sha256=omJufz62dzaTTwlR7quKfcT7apf8KkBLJ9cQ9240dt0,4016
|
|
176
178
|
pixeltable/utils/coco.py,sha256=Y1DWVYguZD4VhKyf7JruYfHWvhkJLq39fzbiSm5cdyY,7304
|
|
177
179
|
pixeltable/utils/code.py,sha256=3CZMVJm69JIG5sxmd56mjB4Fo4L-s0_Y8YvQeJIj0F0,1280
|
|
@@ -185,14 +187,15 @@ pixeltable/utils/filecache.py,sha256=3TTEqhGg0pEAP_l0GKn34uspC4dha1jPab1Ka9_oTBM
|
|
|
185
187
|
pixeltable/utils/formatter.py,sha256=tbMxE9rBw6wdKUnJhNZ8h9uAF8dZKcihQ2KesqAag9A,10096
|
|
186
188
|
pixeltable/utils/http_server.py,sha256=6khOAtpVj1lDIm9Dx8VIECLm87cFEp4IFbAg8T92A2o,2441
|
|
187
189
|
pixeltable/utils/iceberg.py,sha256=COeNqqy5RRMkDGLS8CTnaUeAccG10x2fwP3e1veuqIA,522
|
|
190
|
+
pixeltable/utils/lancedb.py,sha256=Otr-t47YACRo0Cq9-FyelcUuan1Kgs4gxCOpLOckj3s,2988
|
|
188
191
|
pixeltable/utils/media_store.py,sha256=-rYfpZOUrWU1YtEFrxdrn9Na0NeyRW3HJYsOdH-kJO4,10898
|
|
189
192
|
pixeltable/utils/pydantic.py,sha256=-ztUsuRXA7B6bywb5Yy1h5pNQ2DnsT1d0oHMxqtK3WY,2011
|
|
190
193
|
pixeltable/utils/pytorch.py,sha256=564VHRdDHwD9h0v5lBHEDTJ8c6zx8wuzWYx8ZYjBxlI,3621
|
|
191
194
|
pixeltable/utils/s3.py,sha256=pxip2MlCqd2Qon2dzJXzfxvwtZyc-BAsjAnLL4J_OXY,587
|
|
192
195
|
pixeltable/utils/sql.py,sha256=Sa4Lh-VGe8GToU5W7DRiWf2lMl9B6saPqemiT0ZdHEc,806
|
|
193
196
|
pixeltable/utils/transactional_directory.py,sha256=OFKmu90oP7KwBAljwjnzP_w8euGdAXob3y4Nx9SCNHA,1357
|
|
194
|
-
pixeltable-0.4.
|
|
195
|
-
pixeltable-0.4.
|
|
196
|
-
pixeltable-0.4.
|
|
197
|
-
pixeltable-0.4.
|
|
198
|
-
pixeltable-0.4.
|
|
197
|
+
pixeltable-0.4.13.dist-info/METADATA,sha256=VSQp0eAebSMwoxcFkjAwTQbtuLISMx-PZ-LoCJo55hg,25631
|
|
198
|
+
pixeltable-0.4.13.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
199
|
+
pixeltable-0.4.13.dist-info/entry_points.txt,sha256=rrKugZmxDtGnXCnEQ5UJMaaSYY7-g1cLjUZ4W1moIhM,98
|
|
200
|
+
pixeltable-0.4.13.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
201
|
+
pixeltable-0.4.13.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|