pixeltable 0.3.3__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__version__.py +2 -2
- pixeltable/dataframe.py +4 -9
- pixeltable/env.py +8 -3
- pixeltable/exec/component_iteration_node.py +1 -2
- pixeltable/exprs/expr.py +7 -0
- pixeltable/functions/openai.py +35 -10
- pixeltable/io/pandas.py +3 -14
- pixeltable/share/__init__.py +0 -0
- pixeltable/share/packager.py +218 -0
- pixeltable/type_system.py +47 -28
- pixeltable/utils/arrow.py +6 -6
- pixeltable/utils/iceberg.py +14 -0
- pixeltable/utils/media_store.py +1 -1
- {pixeltable-0.3.3.dist-info → pixeltable-0.3.4.dist-info}/METADATA +4 -2
- {pixeltable-0.3.3.dist-info → pixeltable-0.3.4.dist-info}/RECORD +18 -15
- {pixeltable-0.3.3.dist-info → pixeltable-0.3.4.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.3.dist-info → pixeltable-0.3.4.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.3.dist-info → pixeltable-0.3.4.dist-info}/entry_points.txt +0 -0
pixeltable/__version__.py
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
# These version placeholders will be replaced during build.
|
|
2
|
-
__version__ = '0.3.
|
|
3
|
-
__version_tuple__ = (0, 3,
|
|
2
|
+
__version__ = '0.3.4'
|
|
3
|
+
__version_tuple__ = (0, 3, 4)
|
pixeltable/dataframe.py
CHANGED
|
@@ -578,15 +578,9 @@ class DataFrame:
|
|
|
578
578
|
# analyze select list; wrap literals with the corresponding expressions
|
|
579
579
|
select_list: list[tuple[exprs.Expr, Optional[str]]] = []
|
|
580
580
|
for raw_expr, name in base_list:
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
select_list.append((exprs.Expr.from_object(raw_expr), name))
|
|
585
|
-
elif isinstance(raw_expr, np.ndarray):
|
|
586
|
-
select_list.append((exprs.Expr.from_array(raw_expr), name))
|
|
587
|
-
else:
|
|
588
|
-
select_list.append((exprs.Literal(raw_expr), name))
|
|
589
|
-
expr = select_list[-1][0]
|
|
581
|
+
expr = exprs.Expr.from_object(raw_expr)
|
|
582
|
+
if expr is None:
|
|
583
|
+
raise excs.Error(f'Invalid expression: {raw_expr}')
|
|
590
584
|
if expr.col_type.is_invalid_type():
|
|
591
585
|
raise excs.Error(f'Invalid type: {raw_expr}')
|
|
592
586
|
if not expr.is_bound_by(self._from_clause.tbls):
|
|
@@ -594,6 +588,7 @@ class DataFrame:
|
|
|
594
588
|
f"Expression '{expr}' cannot be evaluated in the context of this query's tables "
|
|
595
589
|
f'({",".join(tbl.tbl_name() for tbl in self._from_clause.tbls)})'
|
|
596
590
|
)
|
|
591
|
+
select_list.append((expr, name))
|
|
597
592
|
|
|
598
593
|
# check user provided names do not conflict among themselves or with auto-generated ones
|
|
599
594
|
seen: set[str] = set()
|
pixeltable/env.py
CHANGED
|
@@ -333,9 +333,7 @@ class Env:
|
|
|
333
333
|
http_logger.addHandler(http_fh)
|
|
334
334
|
http_logger.propagate = False
|
|
335
335
|
|
|
336
|
-
|
|
337
|
-
for path in glob.glob(f'{self._tmp_dir}/*'):
|
|
338
|
-
os.remove(path)
|
|
336
|
+
self.clear_tmp_dir()
|
|
339
337
|
|
|
340
338
|
self._db_name = os.environ.get('PIXELTABLE_DB', 'pixeltable')
|
|
341
339
|
self._pgdata_dir = Path(os.environ.get('PIXELTABLE_PGDATA', str(self._home / 'pgdata')))
|
|
@@ -628,6 +626,13 @@ class Env:
|
|
|
628
626
|
)
|
|
629
627
|
self.__optional_packages['spacy'].is_installed = False
|
|
630
628
|
|
|
629
|
+
def clear_tmp_dir(self) -> None:
|
|
630
|
+
for path in glob.glob(f'{self._tmp_dir}/*'):
|
|
631
|
+
if os.path.isdir(path):
|
|
632
|
+
shutil.rmtree(path)
|
|
633
|
+
else:
|
|
634
|
+
os.remove(path)
|
|
635
|
+
|
|
631
636
|
def num_tmp_files(self) -> int:
|
|
632
637
|
return len(glob.glob(f'{self._tmp_dir}/*'))
|
|
633
638
|
|
pixeltable/exprs/expr.py
CHANGED
|
@@ -10,6 +10,7 @@ import typing
|
|
|
10
10
|
from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, Optional, TypeVar, Union, overload
|
|
11
11
|
from uuid import UUID
|
|
12
12
|
|
|
13
|
+
import numpy as np
|
|
13
14
|
import sqlalchemy as sql
|
|
14
15
|
from typing_extensions import Self, _AnnotatedAlias
|
|
15
16
|
|
|
@@ -379,6 +380,12 @@ class Expr(abc.ABC):
|
|
|
379
380
|
@classmethod
|
|
380
381
|
def from_array(cls, elements: Iterable) -> Optional[Expr]:
|
|
381
382
|
from .inline_expr import InlineArray
|
|
383
|
+
from .literal import Literal
|
|
384
|
+
|
|
385
|
+
if isinstance(elements, np.ndarray):
|
|
386
|
+
pxttype = ts.ArrayType.from_literal(elements)
|
|
387
|
+
if pxttype is not None:
|
|
388
|
+
return Literal(elements, col_type=pxttype)
|
|
382
389
|
|
|
383
390
|
inline_array = InlineArray(elements)
|
|
384
391
|
return inline_array.maybe_literal()
|
pixeltable/functions/openai.py
CHANGED
|
@@ -14,7 +14,7 @@ import math
|
|
|
14
14
|
import pathlib
|
|
15
15
|
import re
|
|
16
16
|
import uuid
|
|
17
|
-
from typing import TYPE_CHECKING, Any, Callable, Optional, Type, TypeVar, Union, cast
|
|
17
|
+
from typing import TYPE_CHECKING, Any, Callable, Literal, Optional, Type, TypeVar, Union, cast
|
|
18
18
|
|
|
19
19
|
import httpx
|
|
20
20
|
import numpy as np
|
|
@@ -324,10 +324,17 @@ async def translations(
|
|
|
324
324
|
# Chat Endpoints
|
|
325
325
|
|
|
326
326
|
|
|
327
|
+
def _default_max_tokens(model: str) -> int:
|
|
328
|
+
if model in ('o1', 'o3-mini'):
|
|
329
|
+
return 65536
|
|
330
|
+
else:
|
|
331
|
+
return 1024
|
|
332
|
+
|
|
333
|
+
|
|
327
334
|
def _chat_completions_get_request_resources(
|
|
328
|
-
messages: list, max_tokens: Optional[int], n: Optional[int]
|
|
335
|
+
messages: list, model: str, max_completion_tokens: Optional[int], max_tokens: Optional[int], n: Optional[int]
|
|
329
336
|
) -> dict[str, int]:
|
|
330
|
-
completion_tokens = n * max_tokens
|
|
337
|
+
completion_tokens = (n or 1) * (max_completion_tokens or max_tokens or _default_max_tokens(model))
|
|
331
338
|
|
|
332
339
|
num_tokens = 0.0
|
|
333
340
|
for message in messages:
|
|
@@ -349,16 +356,18 @@ async def chat_completions(
|
|
|
349
356
|
logit_bias: Optional[dict[str, int]] = None,
|
|
350
357
|
logprobs: Optional[bool] = None,
|
|
351
358
|
top_logprobs: Optional[int] = None,
|
|
352
|
-
|
|
353
|
-
|
|
359
|
+
max_completion_tokens: Optional[int] = None,
|
|
360
|
+
max_tokens: Optional[int] = None,
|
|
361
|
+
n: Optional[int] = None,
|
|
354
362
|
presence_penalty: Optional[float] = None,
|
|
363
|
+
reasoning_effort: Optional[Literal['low', 'medium', 'high']] = None,
|
|
355
364
|
response_format: Optional[dict] = None,
|
|
356
365
|
seed: Optional[int] = None,
|
|
357
366
|
stop: Optional[list[str]] = None,
|
|
358
367
|
temperature: Optional[float] = None,
|
|
359
|
-
top_p: Optional[float] = None,
|
|
360
368
|
tools: Optional[list[dict]] = None,
|
|
361
369
|
tool_choice: Optional[dict] = None,
|
|
370
|
+
top_p: Optional[float] = None,
|
|
362
371
|
user: Optional[str] = None,
|
|
363
372
|
timeout: Optional[float] = None,
|
|
364
373
|
) -> dict:
|
|
@@ -418,6 +427,9 @@ async def chat_completions(
|
|
|
418
427
|
resource_pool, lambda: OpenAIRateLimitsInfo(_chat_completions_get_request_resources)
|
|
419
428
|
)
|
|
420
429
|
|
|
430
|
+
if max_completion_tokens is None and max_tokens is None:
|
|
431
|
+
max_completion_tokens = _default_max_tokens(model)
|
|
432
|
+
|
|
421
433
|
# cast(Any, ...): avoid mypy errors
|
|
422
434
|
result = await _openai_client().chat.completions.with_raw_response.create(
|
|
423
435
|
messages=messages,
|
|
@@ -426,16 +438,18 @@ async def chat_completions(
|
|
|
426
438
|
logit_bias=_opt(logit_bias),
|
|
427
439
|
logprobs=_opt(logprobs),
|
|
428
440
|
top_logprobs=_opt(top_logprobs),
|
|
441
|
+
max_completion_tokens=_opt(max_completion_tokens),
|
|
429
442
|
max_tokens=_opt(max_tokens),
|
|
430
443
|
n=_opt(n),
|
|
431
444
|
presence_penalty=_opt(presence_penalty),
|
|
445
|
+
reasoning_effort=_opt(reasoning_effort),
|
|
432
446
|
response_format=_opt(cast(Any, response_format)),
|
|
433
447
|
seed=_opt(seed),
|
|
434
448
|
stop=_opt(stop),
|
|
435
449
|
temperature=_opt(temperature),
|
|
436
|
-
top_p=_opt(top_p),
|
|
437
450
|
tools=_opt(cast(Any, tools)),
|
|
438
451
|
tool_choice=_opt(cast(Any, tool_choice_)),
|
|
452
|
+
top_p=_opt(top_p),
|
|
439
453
|
user=_opt(user),
|
|
440
454
|
timeout=_opt(timeout),
|
|
441
455
|
extra_body=extra_body,
|
|
@@ -448,9 +462,14 @@ async def chat_completions(
|
|
|
448
462
|
|
|
449
463
|
|
|
450
464
|
def _vision_get_request_resources(
|
|
451
|
-
prompt: str,
|
|
465
|
+
prompt: str,
|
|
466
|
+
image: PIL.Image.Image,
|
|
467
|
+
model: str,
|
|
468
|
+
max_completion_tokens: Optional[int],
|
|
469
|
+
max_tokens: Optional[int],
|
|
470
|
+
n: Optional[int],
|
|
452
471
|
) -> dict[str, int]:
|
|
453
|
-
completion_tokens = n * max_tokens
|
|
472
|
+
completion_tokens = (n or 1) * (max_completion_tokens or max_tokens or _default_max_tokens(model))
|
|
454
473
|
prompt_tokens = len(prompt) / 4
|
|
455
474
|
|
|
456
475
|
# calculate image tokens based on
|
|
@@ -482,7 +501,8 @@ async def vision(
|
|
|
482
501
|
image: PIL.Image.Image,
|
|
483
502
|
*,
|
|
484
503
|
model: str,
|
|
485
|
-
|
|
504
|
+
max_completion_tokens: Optional[int] = None,
|
|
505
|
+
max_tokens: Optional[int] = None,
|
|
486
506
|
n: Optional[int] = 1,
|
|
487
507
|
timeout: Optional[float] = None,
|
|
488
508
|
) -> str:
|
|
@@ -534,9 +554,14 @@ async def vision(
|
|
|
534
554
|
rate_limits_info = env.Env.get().get_resource_pool_info(
|
|
535
555
|
resource_pool, lambda: OpenAIRateLimitsInfo(_vision_get_request_resources)
|
|
536
556
|
)
|
|
557
|
+
|
|
558
|
+
if max_completion_tokens is None and max_tokens is None:
|
|
559
|
+
max_completion_tokens = _default_max_tokens(model)
|
|
560
|
+
|
|
537
561
|
result = await _openai_client().chat.completions.with_raw_response.create(
|
|
538
562
|
messages=messages, # type: ignore
|
|
539
563
|
model=model,
|
|
564
|
+
max_completion_tokens=_opt(max_completion_tokens),
|
|
540
565
|
max_tokens=_opt(max_tokens),
|
|
541
566
|
n=_opt(n),
|
|
542
567
|
timeout=_opt(timeout),
|
pixeltable/io/pandas.py
CHANGED
|
@@ -185,20 +185,9 @@ def __np_dtype_to_pxt_type(np_dtype: np.dtype, data_col: pd.Series, nullable: bo
|
|
|
185
185
|
"""
|
|
186
186
|
Infers a Pixeltable type based on a Numpy dtype.
|
|
187
187
|
"""
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
if np.issubdtype(np_dtype, np.floating):
|
|
192
|
-
return pxt.FloatType(nullable=nullable)
|
|
193
|
-
|
|
194
|
-
if np.issubdtype(np_dtype, np.bool_):
|
|
195
|
-
return pxt.BoolType(nullable=nullable)
|
|
196
|
-
|
|
197
|
-
if np.issubdtype(np_dtype, np.character):
|
|
198
|
-
return pxt.StringType(nullable=nullable)
|
|
199
|
-
|
|
200
|
-
if np.issubdtype(np_dtype, np.datetime64):
|
|
201
|
-
return pxt.TimestampType(nullable=nullable)
|
|
188
|
+
pxttype = ts.ArrayType.from_np_dtype(np_dtype, nullable)
|
|
189
|
+
if pxttype is not None:
|
|
190
|
+
return pxttype
|
|
202
191
|
|
|
203
192
|
if np_dtype == np.object_:
|
|
204
193
|
# The `object_` dtype can mean all sorts of things; see if we can infer the Pixeltable type
|
|
File without changes
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import tarfile
|
|
5
|
+
import urllib.parse
|
|
6
|
+
import urllib.request
|
|
7
|
+
import uuid
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any, Iterator
|
|
10
|
+
|
|
11
|
+
import more_itertools
|
|
12
|
+
import numpy as np
|
|
13
|
+
import pyarrow as pa
|
|
14
|
+
import pyiceberg.catalog
|
|
15
|
+
|
|
16
|
+
import pixeltable as pxt
|
|
17
|
+
import pixeltable.type_system as ts
|
|
18
|
+
from pixeltable import exprs
|
|
19
|
+
from pixeltable.env import Env
|
|
20
|
+
from pixeltable.utils.arrow import PXT_TO_PA_TYPES
|
|
21
|
+
from pixeltable.utils.iceberg import sqlite_catalog
|
|
22
|
+
|
|
23
|
+
_logger = logging.getLogger('pixeltable')
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class TablePackager:
|
|
27
|
+
"""
|
|
28
|
+
Packages a pixeltable Table into a tarball containing Iceberg tables and media files. The structure of the tarball
|
|
29
|
+
is as follows:
|
|
30
|
+
|
|
31
|
+
warehouse/catalog.db # sqlite Iceberg catalog
|
|
32
|
+
warehouse/pxt.db/** # Iceberg metadata and data files (parquet/avro/json)
|
|
33
|
+
media/** # Local media files
|
|
34
|
+
|
|
35
|
+
If the table being archived is a view, then the Iceberg catalog will contain separate tables for the view and each
|
|
36
|
+
of its ancestors. All rows will be exported with additional _rowid and _v_min columns. Currently, only the most
|
|
37
|
+
recent version of the table can be exported, and only the full table contents.
|
|
38
|
+
|
|
39
|
+
If the table contains media columns, they are handled as follows:
|
|
40
|
+
- If a media file has an external URL (any URL scheme other than file://), then the URL will be preserved as-is and
|
|
41
|
+
stored in the Iceberg table.
|
|
42
|
+
- If a media file is a local file, then it will be copied into the tarball as a file of the form
|
|
43
|
+
'media/{uuid}{extension}', and the Iceberg table will contain the ephemeral URI 'pxtmedia://{uuid}{extension}'.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
table: pxt.Table # The table to be packaged
|
|
47
|
+
tmp_dir: Path # Temporary directory where the package will reside
|
|
48
|
+
iceberg_catalog: pyiceberg.catalog.Catalog
|
|
49
|
+
media_files: dict[Path, str] # Mapping from local media file paths to their tarball names
|
|
50
|
+
|
|
51
|
+
def __init__(self, table: pxt.Table) -> None:
|
|
52
|
+
self.table = table
|
|
53
|
+
self.tmp_dir = Path(Env.get().create_tmp_path())
|
|
54
|
+
self.media_files = {}
|
|
55
|
+
|
|
56
|
+
def package(self) -> Path:
|
|
57
|
+
"""
|
|
58
|
+
Export the table to a tarball containing Iceberg tables and media files.
|
|
59
|
+
"""
|
|
60
|
+
assert not self.tmp_dir.exists() # Packaging can only be done once per TablePackager instance
|
|
61
|
+
_logger.info(f"Packaging table '{self.table._path}' and its ancestors in: {self.tmp_dir}")
|
|
62
|
+
self.tmp_dir.mkdir()
|
|
63
|
+
self.iceberg_catalog = sqlite_catalog(self.tmp_dir / 'warehouse')
|
|
64
|
+
ancestors = [self.table] + self.table._bases
|
|
65
|
+
for t in ancestors:
|
|
66
|
+
_logger.info(f"Exporting table '{t._path}'.")
|
|
67
|
+
self.__export_table(t)
|
|
68
|
+
_logger.info(f'Building archive.')
|
|
69
|
+
bundle_path = self.__build_tarball()
|
|
70
|
+
_logger.info(f'Packaging complete: {bundle_path}')
|
|
71
|
+
return bundle_path
|
|
72
|
+
|
|
73
|
+
def __export_table(self, t: pxt.Table) -> None:
|
|
74
|
+
"""
|
|
75
|
+
Exports the data from `t` into an Iceberg table.
|
|
76
|
+
"""
|
|
77
|
+
# First generate a select list for the data we want to extract from `t`. This includes:
|
|
78
|
+
# - all stored columns, including computed columns;
|
|
79
|
+
# - errortype and errormsg fields whenever they're defined.
|
|
80
|
+
# We select only those columns that are defined in this table (columns inherited from ancestor tables will be
|
|
81
|
+
# handled separately).
|
|
82
|
+
# For media columns, we substitute `col.fileurl` so that we always get the URL (which may be a file:// URL;
|
|
83
|
+
# these will be specially handled later)
|
|
84
|
+
select_exprs: dict[str, exprs.Expr] = {}
|
|
85
|
+
|
|
86
|
+
# As we generate the select list, we construct a separate list of column types. We can't rely on df._schema
|
|
87
|
+
# to get the column types, since we'll be substituting `fileurl`s for media columns.
|
|
88
|
+
actual_col_types: list[ts.ColumnType] = []
|
|
89
|
+
|
|
90
|
+
for col_name, col in t._tbl_version.cols_by_name.items():
|
|
91
|
+
if not col.is_stored:
|
|
92
|
+
continue
|
|
93
|
+
if col.col_type.is_media_type():
|
|
94
|
+
select_exprs[col_name] = t[col_name].fileurl
|
|
95
|
+
else:
|
|
96
|
+
select_exprs[col_name] = t[col_name]
|
|
97
|
+
actual_col_types.append(col.col_type)
|
|
98
|
+
if col.records_errors:
|
|
99
|
+
select_exprs[f'{col_name}_errortype'] = t[col_name].errortype
|
|
100
|
+
actual_col_types.append(ts.StringType())
|
|
101
|
+
select_exprs[f'{col_name}_errormsg'] = t[col_name].errormsg
|
|
102
|
+
actual_col_types.append(ts.StringType())
|
|
103
|
+
|
|
104
|
+
# Run the select() on `self.table`, not `t`, so that we export only those rows that are actually present in
|
|
105
|
+
# `self.table`.
|
|
106
|
+
df = self.table.select(**select_exprs)
|
|
107
|
+
namespace = self.__iceberg_namespace(t)
|
|
108
|
+
self.iceberg_catalog.create_namespace_if_not_exists(namespace)
|
|
109
|
+
iceberg_schema = self.__to_iceberg_schema(df._schema)
|
|
110
|
+
iceberg_tbl = self.iceberg_catalog.create_table(f'{namespace}.{t._name}', schema=iceberg_schema)
|
|
111
|
+
|
|
112
|
+
# Populate the Iceberg table with data.
|
|
113
|
+
# The data is first loaded from the DataFrame into a sequence of pyarrow tables, batched in order to avoid
|
|
114
|
+
# excessive memory usage. The pyarrow tables are then amalgamated into the (single) Iceberg table on disk.
|
|
115
|
+
for pa_table in self.__to_pa_tables(df, actual_col_types, iceberg_schema):
|
|
116
|
+
iceberg_tbl.append(pa_table)
|
|
117
|
+
|
|
118
|
+
@classmethod
|
|
119
|
+
def __iceberg_namespace(cls, table: pxt.Table) -> str:
|
|
120
|
+
"""
|
|
121
|
+
Iceberg tables must have a namespace, which cannot be the empty string, so we prepend `pxt` to the table path.
|
|
122
|
+
"""
|
|
123
|
+
parent_path = table._parent._path
|
|
124
|
+
if len(parent_path) == 0:
|
|
125
|
+
return 'pxt'
|
|
126
|
+
else:
|
|
127
|
+
return f'pxt.{parent_path}'
|
|
128
|
+
|
|
129
|
+
# The following methods are responsible for schema and data conversion from Pixeltable to Iceberg. Some of this
|
|
130
|
+
# logic might be consolidated into arrow.py and unified with general Parquet export, but there are several
|
|
131
|
+
# major differences:
|
|
132
|
+
# - Iceberg has no array type; we export all arrays as binary blobs
|
|
133
|
+
# - We include _rowid and _v_min columns in the Iceberg table
|
|
134
|
+
# - Media columns are handled specially as indicated above
|
|
135
|
+
|
|
136
|
+
@classmethod
|
|
137
|
+
def __to_iceberg_schema(cls, pxt_schema: dict[str, ts.ColumnType]) -> pa.Schema:
|
|
138
|
+
entries = [(name, cls.__to_iceberg_type(col_type)) for name, col_type in pxt_schema.items()]
|
|
139
|
+
entries.append(('_rowid', pa.list_(pa.int64())))
|
|
140
|
+
entries.append(('_v_min', pa.int64()))
|
|
141
|
+
return pa.schema(entries) # type: ignore[arg-type]
|
|
142
|
+
|
|
143
|
+
@classmethod
|
|
144
|
+
def __to_iceberg_type(cls, col_type: ts.ColumnType) -> pa.DataType:
|
|
145
|
+
if col_type.is_array_type():
|
|
146
|
+
return pa.binary()
|
|
147
|
+
if col_type.is_media_type():
|
|
148
|
+
return pa.string()
|
|
149
|
+
return PXT_TO_PA_TYPES.get(col_type.__class__)
|
|
150
|
+
|
|
151
|
+
def __to_pa_tables(
|
|
152
|
+
self,
|
|
153
|
+
df: pxt.DataFrame,
|
|
154
|
+
actual_col_types: list[pxt.ColumnType],
|
|
155
|
+
arrow_schema: pa.Schema,
|
|
156
|
+
batch_size: int = 1_000,
|
|
157
|
+
) -> Iterator[pa.Table]:
|
|
158
|
+
"""
|
|
159
|
+
Load a DataFrame as a sequence of pyarrow tables. The pyarrow tables are batched into smaller chunks
|
|
160
|
+
to avoid excessive memory usage.
|
|
161
|
+
"""
|
|
162
|
+
for rows in more_itertools.batched(self.__to_pa_rows(df, actual_col_types), batch_size):
|
|
163
|
+
cols = {col_name: [row[idx] for row in rows] for idx, col_name in enumerate(df._schema.keys())}
|
|
164
|
+
cols['_rowid'] = [row[-2] for row in rows]
|
|
165
|
+
cols['_v_min'] = [row[-1] for row in rows]
|
|
166
|
+
yield pa.Table.from_pydict(cols, schema=arrow_schema)
|
|
167
|
+
|
|
168
|
+
def __to_pa_rows(self, df: pxt.DataFrame, actual_col_types: list[pxt.ColumnType]) -> Iterator[list]:
|
|
169
|
+
for row in df._exec():
|
|
170
|
+
vals = [row[e.slot_idx] for e in df._select_list_exprs]
|
|
171
|
+
result = [self.__to_pa_value(val, col_type) for val, col_type in zip(vals, actual_col_types)]
|
|
172
|
+
result.append(row.rowid)
|
|
173
|
+
result.append(row.v_min)
|
|
174
|
+
yield result
|
|
175
|
+
|
|
176
|
+
def __to_pa_value(self, val: Any, col_type: ts.ColumnType) -> Any:
|
|
177
|
+
if val is None:
|
|
178
|
+
return None
|
|
179
|
+
if col_type.is_array_type():
|
|
180
|
+
# Export arrays as binary
|
|
181
|
+
assert isinstance(val, np.ndarray)
|
|
182
|
+
arr = io.BytesIO()
|
|
183
|
+
np.save(arr, val)
|
|
184
|
+
return arr.getvalue()
|
|
185
|
+
if col_type.is_json_type():
|
|
186
|
+
# Export JSON as strings
|
|
187
|
+
return json.dumps(val)
|
|
188
|
+
if col_type.is_media_type():
|
|
189
|
+
# Handle media files as described above
|
|
190
|
+
assert isinstance(val, str) # Media columns are always referenced by `fileurl`
|
|
191
|
+
return self.__process_media_url(val)
|
|
192
|
+
return val
|
|
193
|
+
|
|
194
|
+
def __process_media_url(self, url: str) -> str:
|
|
195
|
+
parsed_url = urllib.parse.urlparse(url)
|
|
196
|
+
if parsed_url.scheme == 'file':
|
|
197
|
+
# It's the URL of a local file. Replace it with a pxtmedia:// URI.
|
|
198
|
+
# (We can't use an actual pxt:// URI, because the eventual pxt:// table name might not be known at this
|
|
199
|
+
# time. The pxtmedia:// URI serves as a relative reference into the tarball that can be replaced with an
|
|
200
|
+
# actual URL when the table is reconstituted.)
|
|
201
|
+
path = Path(urllib.parse.unquote(urllib.request.url2pathname(parsed_url.path)))
|
|
202
|
+
if path not in self.media_files:
|
|
203
|
+
# Create a new entry in the `media_files` dict so that we can copy the file into the tarball later.
|
|
204
|
+
dest_name = f'{uuid.uuid4().hex}{path.suffix}'
|
|
205
|
+
self.media_files[path] = dest_name
|
|
206
|
+
return f'pxtmedia://{self.media_files[path]}'
|
|
207
|
+
# For any type of URL other than a local file, just return the URL as-is.
|
|
208
|
+
return url
|
|
209
|
+
|
|
210
|
+
def __build_tarball(self) -> Path:
|
|
211
|
+
bundle_path = self.tmp_dir / 'bundle.tar.bz2'
|
|
212
|
+
with tarfile.open(bundle_path, 'w:bz2') as tf:
|
|
213
|
+
# Add the Iceberg warehouse dir (including the catalog)
|
|
214
|
+
tf.add(self.tmp_dir / 'warehouse', arcname='warehouse', recursive=True)
|
|
215
|
+
# Add the media files
|
|
216
|
+
for src_file, dest_name in self.media_files.items():
|
|
217
|
+
tf.add(src_file, arcname=f'media/{dest_name}')
|
|
218
|
+
return bundle_path
|
pixeltable/type_system.py
CHANGED
|
@@ -9,9 +9,7 @@ import typing
|
|
|
9
9
|
import urllib.parse
|
|
10
10
|
import urllib.request
|
|
11
11
|
from pathlib import Path
|
|
12
|
-
|
|
13
|
-
from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
|
|
14
|
-
from typing import Any, Iterable, Mapping, Optional, Sequence, Union
|
|
12
|
+
from typing import Any, Iterable, Literal, Mapping, Optional, Sequence, Union
|
|
15
13
|
|
|
16
14
|
import av # type: ignore
|
|
17
15
|
import jsonschema
|
|
@@ -25,6 +23,8 @@ from typing_extensions import _AnnotatedAlias
|
|
|
25
23
|
|
|
26
24
|
import pixeltable.exceptions as excs
|
|
27
25
|
|
|
26
|
+
from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
|
|
27
|
+
|
|
28
28
|
|
|
29
29
|
class ColumnType:
|
|
30
30
|
@enum.unique
|
|
@@ -213,9 +213,9 @@ class ColumnType:
|
|
|
213
213
|
return self.copy(nullable=(self.nullable or other.nullable))
|
|
214
214
|
|
|
215
215
|
if self.is_invalid_type():
|
|
216
|
-
return other
|
|
216
|
+
return other.copy(nullable=(self.nullable or other.nullable))
|
|
217
217
|
if other.is_invalid_type():
|
|
218
|
-
return self
|
|
218
|
+
return self.copy(nullable=(self.nullable or other.nullable))
|
|
219
219
|
|
|
220
220
|
if self.is_scalar_type() and other.is_scalar_type():
|
|
221
221
|
t = self.Type.supertype(self._type, other._type, self.common_supertypes)
|
|
@@ -292,26 +292,24 @@ class ColumnType:
|
|
|
292
292
|
designations will be allowed regardless.
|
|
293
293
|
"""
|
|
294
294
|
origin = typing.get_origin(t)
|
|
295
|
+
type_args = typing.get_args(t)
|
|
295
296
|
if origin is typing.Union:
|
|
296
297
|
# Check if `t` has the form Optional[T].
|
|
297
|
-
|
|
298
|
-
if len(union_args) == 2 and type(None) in union_args:
|
|
298
|
+
if len(type_args) == 2 and type(None) in type_args:
|
|
299
299
|
# `t` is a type of the form Optional[T] (equivalently, Union[T, None] or Union[None, T]).
|
|
300
300
|
# We treat it as the underlying type but with nullable=True.
|
|
301
|
-
underlying_py_type =
|
|
301
|
+
underlying_py_type = type_args[0] if type_args[1] is type(None) else type_args[1]
|
|
302
302
|
underlying = cls.from_python_type(underlying_py_type, allow_builtin_types=allow_builtin_types)
|
|
303
303
|
if underlying is not None:
|
|
304
304
|
return underlying.copy(nullable=True)
|
|
305
305
|
elif origin is Required:
|
|
306
|
-
|
|
307
|
-
assert len(required_args) == 1
|
|
306
|
+
assert len(type_args) == 1
|
|
308
307
|
return cls.from_python_type(
|
|
309
|
-
|
|
310
|
-
)
|
|
308
|
+
type_args[0], nullable_default=False, allow_builtin_types=allow_builtin_types
|
|
309
|
+
).copy(nullable=False)
|
|
311
310
|
elif origin is typing.Annotated:
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
parameters = annotated_args[1]
|
|
311
|
+
origin = type_args[0]
|
|
312
|
+
parameters = type_args[1]
|
|
315
313
|
if isinstance(parameters, ColumnType):
|
|
316
314
|
return parameters.copy(nullable=nullable_default)
|
|
317
315
|
else:
|
|
@@ -323,6 +321,11 @@ class ColumnType:
|
|
|
323
321
|
if isinstance(t, type) and issubclass(t, _PxtType):
|
|
324
322
|
return t.as_col_type(nullable=nullable_default)
|
|
325
323
|
elif allow_builtin_types:
|
|
324
|
+
if t is Literal and len(type_args) > 0:
|
|
325
|
+
literal_type = cls.infer_common_literal_type(type_args)
|
|
326
|
+
if literal_type is None:
|
|
327
|
+
return None
|
|
328
|
+
return literal_type.copy(nullable=(literal_type.nullable or nullable_default))
|
|
326
329
|
if t is str:
|
|
327
330
|
return StringType(nullable=nullable_default)
|
|
328
331
|
if t is int:
|
|
@@ -335,7 +338,7 @@ class ColumnType:
|
|
|
335
338
|
return TimestampType(nullable=nullable_default)
|
|
336
339
|
if t is PIL.Image.Image:
|
|
337
340
|
return ImageType(nullable=nullable_default)
|
|
338
|
-
if
|
|
341
|
+
if isinstance(t, type) and issubclass(t, (Sequence, Mapping, pydantic.BaseModel)):
|
|
339
342
|
return JsonType(nullable=nullable_default)
|
|
340
343
|
return None
|
|
341
344
|
|
|
@@ -851,23 +854,39 @@ class ArrayType(ColumnType):
|
|
|
851
854
|
dtype = None if d['dtype'] is None else cls.make_type(cls.Type(d['dtype']))
|
|
852
855
|
return cls(shape, dtype, nullable=d['nullable'])
|
|
853
856
|
|
|
857
|
+
@classmethod
|
|
858
|
+
def from_np_dtype(cls, dtype: np.dtype, nullable: bool) -> Optional[ColumnType]:
|
|
859
|
+
"""
|
|
860
|
+
Return pixeltable type corresponding to a given simple numpy dtype
|
|
861
|
+
"""
|
|
862
|
+
if np.issubdtype(dtype, np.integer):
|
|
863
|
+
return IntType(nullable=nullable)
|
|
864
|
+
|
|
865
|
+
if np.issubdtype(dtype, np.floating):
|
|
866
|
+
return FloatType(nullable=nullable)
|
|
867
|
+
|
|
868
|
+
if dtype == np.bool_:
|
|
869
|
+
return BoolType(nullable=nullable)
|
|
870
|
+
|
|
871
|
+
if np.issubdtype(dtype, np.str_):
|
|
872
|
+
return StringType(nullable=nullable)
|
|
873
|
+
|
|
874
|
+
if np.issubdtype(dtype, np.character):
|
|
875
|
+
return StringType(nullable=nullable)
|
|
876
|
+
|
|
877
|
+
if np.issubdtype(dtype, np.datetime64):
|
|
878
|
+
return TimestampType(nullable=nullable)
|
|
879
|
+
|
|
880
|
+
return None
|
|
881
|
+
|
|
854
882
|
@classmethod
|
|
855
883
|
def from_literal(cls, val: np.ndarray, nullable: bool = False) -> Optional[ArrayType]:
|
|
856
884
|
# determine our dtype
|
|
857
885
|
assert isinstance(val, np.ndarray)
|
|
858
|
-
|
|
859
|
-
if
|
|
860
|
-
dtype = IntType()
|
|
861
|
-
elif np.issubdtype(val.dtype, np.floating):
|
|
862
|
-
dtype = FloatType()
|
|
863
|
-
elif val.dtype == np.bool_:
|
|
864
|
-
dtype = BoolType()
|
|
865
|
-
elif np.issubdtype(val.dtype, np.str_):
|
|
866
|
-
# Note that this includes NumPy types like '<U1' -- arrays of single Unicode characters
|
|
867
|
-
dtype = StringType()
|
|
868
|
-
else:
|
|
886
|
+
pxttype: Optional[ColumnType] = cls.from_np_dtype(val.dtype, nullable)
|
|
887
|
+
if pxttype == None:
|
|
869
888
|
return None
|
|
870
|
-
return cls(val.shape, dtype=
|
|
889
|
+
return cls(val.shape, dtype=pxttype, nullable=nullable)
|
|
871
890
|
|
|
872
891
|
def is_valid_literal(self, val: np.ndarray) -> bool:
|
|
873
892
|
if not isinstance(val, np.ndarray):
|
pixeltable/utils/arrow.py
CHANGED
|
@@ -6,7 +6,7 @@ import pyarrow as pa
|
|
|
6
6
|
|
|
7
7
|
import pixeltable.type_system as ts
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
PA_TO_PXT_TYPES: dict[pa.DataType, ts.ColumnType] = {
|
|
10
10
|
pa.string(): ts.StringType(nullable=True),
|
|
11
11
|
pa.bool_(): ts.BoolType(nullable=True),
|
|
12
12
|
pa.uint8(): ts.IntType(nullable=True),
|
|
@@ -18,7 +18,7 @@ _pa_to_pt: dict[pa.DataType, ts.ColumnType] = {
|
|
|
18
18
|
pa.float32(): ts.FloatType(nullable=True),
|
|
19
19
|
}
|
|
20
20
|
|
|
21
|
-
|
|
21
|
+
PXT_TO_PA_TYPES: dict[type[ts.ColumnType], pa.DataType] = {
|
|
22
22
|
ts.StringType: pa.string(),
|
|
23
23
|
ts.TimestampType: pa.timestamp('us', tz=datetime.timezone.utc), # postgres timestamp is microseconds
|
|
24
24
|
ts.BoolType: pa.bool_(),
|
|
@@ -38,8 +38,8 @@ def to_pixeltable_type(arrow_type: pa.DataType) -> Optional[ts.ColumnType]:
|
|
|
38
38
|
"""
|
|
39
39
|
if isinstance(arrow_type, pa.TimestampType):
|
|
40
40
|
return ts.TimestampType(nullable=True)
|
|
41
|
-
elif arrow_type in
|
|
42
|
-
return
|
|
41
|
+
elif arrow_type in PA_TO_PXT_TYPES:
|
|
42
|
+
return PA_TO_PXT_TYPES[arrow_type]
|
|
43
43
|
elif isinstance(arrow_type, pa.FixedShapeTensorType):
|
|
44
44
|
dtype = to_pixeltable_type(arrow_type.value_type)
|
|
45
45
|
if dtype is None:
|
|
@@ -53,8 +53,8 @@ def to_arrow_type(pixeltable_type: ts.ColumnType) -> Optional[pa.DataType]:
|
|
|
53
53
|
"""Convert a pixeltable DataType to a pyarrow datatype if one is defined.
|
|
54
54
|
Returns None if no conversion is currently implemented.
|
|
55
55
|
"""
|
|
56
|
-
if pixeltable_type.__class__ in
|
|
57
|
-
return
|
|
56
|
+
if pixeltable_type.__class__ in PXT_TO_PA_TYPES:
|
|
57
|
+
return PXT_TO_PA_TYPES[pixeltable_type.__class__]
|
|
58
58
|
elif isinstance(pixeltable_type, ts.ArrayType):
|
|
59
59
|
return pa.fixed_shape_tensor(pa.from_numpy_dtype(pixeltable_type.numpy_dtype()), pixeltable_type.shape)
|
|
60
60
|
else:
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Union
|
|
3
|
+
|
|
4
|
+
from pyiceberg.catalog.sql import SqlCatalog
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def sqlite_catalog(warehouse_path: Union[str, Path], name: str = 'pixeltable') -> SqlCatalog:
|
|
8
|
+
"""
|
|
9
|
+
Instantiate a sqlite Iceberg catalog at the specified path. If no catalog exists, one will be created.
|
|
10
|
+
"""
|
|
11
|
+
if isinstance(warehouse_path, str):
|
|
12
|
+
warehouse_path = Path(warehouse_path)
|
|
13
|
+
warehouse_path.mkdir(exist_ok=True)
|
|
14
|
+
return SqlCatalog(name, uri=f'sqlite:///{warehouse_path}/catalog.db', warehouse=f'file://{warehouse_path}')
|
pixeltable/utils/media_store.py
CHANGED
|
@@ -30,7 +30,7 @@ class MediaStore:
|
|
|
30
30
|
the environment's media_dir.
|
|
31
31
|
"""
|
|
32
32
|
id_hex = uuid.uuid4().hex
|
|
33
|
-
parent = Env.get().media_dir / tbl_id.hex / id_hex[
|
|
33
|
+
parent = Env.get().media_dir / tbl_id.hex / id_hex[:2] / id_hex[:4]
|
|
34
34
|
parent.mkdir(parents=True, exist_ok=True)
|
|
35
35
|
return parent / f'{tbl_id.hex}_{col_id}_{version}_{id_hex}{ext or ""}'
|
|
36
36
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: pixeltable
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.4
|
|
4
4
|
Summary: AI Data Infrastructure: Declarative, Multimodal, and Incremental
|
|
5
5
|
Home-page: https://pixeltable.com/
|
|
6
6
|
License: Apache-2.0
|
|
@@ -39,11 +39,13 @@ Requires-Dist: numpy (>=1.25,<2.0)
|
|
|
39
39
|
Requires-Dist: pandas (>=2.0,<3.0)
|
|
40
40
|
Requires-Dist: pgvector (>=0.2.1,<0.3.0)
|
|
41
41
|
Requires-Dist: pillow (>=9.3.0)
|
|
42
|
-
Requires-Dist: pixeltable-pgserver (==0.
|
|
42
|
+
Requires-Dist: pixeltable-pgserver (==0.3.1)
|
|
43
43
|
Requires-Dist: psutil (>=5.9.5,<6.0.0)
|
|
44
44
|
Requires-Dist: psycopg[binary] (>=3.1.18)
|
|
45
45
|
Requires-Dist: puremagic (>=1.20)
|
|
46
|
+
Requires-Dist: pyarrow (>=13.0.0)
|
|
46
47
|
Requires-Dist: pydantic (>=2.7.4)
|
|
48
|
+
Requires-Dist: pyiceberg (>=0.6.0)
|
|
47
49
|
Requires-Dist: pymupdf (>=1.24.1,<2.0.0)
|
|
48
50
|
Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
|
|
49
51
|
Requires-Dist: requests (>=2.31.0,<3.0.0)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
pixeltable/__init__.py,sha256=FeL_ABFaY6QiShtTao1cfhSAwXV_2dkhL_4-qXoHbPE,1616
|
|
2
|
-
pixeltable/__version__.py,sha256=
|
|
2
|
+
pixeltable/__version__.py,sha256=a50-dZlwYU667r1CN3zUS6OONPFGlyZFnAAe8vTD1k8,112
|
|
3
3
|
pixeltable/catalog/__init__.py,sha256=bACh33HpWQed86eV8t9of_ClSXqZx5blZi4y8vJ7-EA,517
|
|
4
4
|
pixeltable/catalog/catalog.py,sha256=LFaOtHoGJM306jDlyyQRqCaPR6K4nrN-jPu3_vyZNvc,8267
|
|
5
5
|
pixeltable/catalog/column.py,sha256=9Rm4DCP-uUCl3P44uTsD89P63jxmvv9emD2Rc7Bw_us,9684
|
|
@@ -14,13 +14,13 @@ pixeltable/catalog/table.py,sha256=qfTI7obvSanFt96-jbjSXU9PyninU3_B9K4pnaxlJdM,6
|
|
|
14
14
|
pixeltable/catalog/table_version.py,sha256=rWBtgnIepVgq5tZ4vb9RzAL5peHnze5ZMOr-7gqMpog,60354
|
|
15
15
|
pixeltable/catalog/table_version_path.py,sha256=yDU_KXriAckJqKPfKYhLVDig7glUc--_Fda9X7ekfGo,5810
|
|
16
16
|
pixeltable/catalog/view.py,sha256=cTL1jBYHa3RweODoD-y_I9NjAntqJPSofP4BJdSWaBA,11226
|
|
17
|
-
pixeltable/dataframe.py,sha256=
|
|
18
|
-
pixeltable/env.py,sha256=
|
|
17
|
+
pixeltable/dataframe.py,sha256=9eMkOUKYpcml6y_Nsj9nTY_UHaDyzo1GT1c6IfzWfXo,49177
|
|
18
|
+
pixeltable/env.py,sha256=1IN2Tju45H-ADNhMfVRDOQ11udBxo4L_euZ6gQKiRC8,35860
|
|
19
19
|
pixeltable/exceptions.py,sha256=NuFY2WtkQpLfLHT_J70kOw9Tr0kEDkkgo-u7As4Gaq4,410
|
|
20
20
|
pixeltable/exec/__init__.py,sha256=Qi0s2BEM8O8MPdYGQAIzclv2GNFsoCPJFvA6s5Tjc_o,489
|
|
21
21
|
pixeltable/exec/aggregation_node.py,sha256=KR7OLQOfAL4KTF6_vKSuJvFC2ntwWf0NJxhQ9i340-4,4072
|
|
22
22
|
pixeltable/exec/cache_prefetch_node.py,sha256=fwO-xUQfSOMWQMbrJplFXvjcKjLVjPz93O0HttSD3A8,12211
|
|
23
|
-
pixeltable/exec/component_iteration_node.py,sha256=
|
|
23
|
+
pixeltable/exec/component_iteration_node.py,sha256=b3tyspAuYLYHlb7BvAWqDpMGJojSeqtP-l8x72OGjvA,4678
|
|
24
24
|
pixeltable/exec/data_row_batch.py,sha256=E0SVjyOBc237DopT0TwqK7JzcgFTEpE3xOS9K0-WFh8,3407
|
|
25
25
|
pixeltable/exec/exec_context.py,sha256=l7GWAbt57H9VEksrDCeocmlc-MgUp8w_nDdAau8Cfqw,1115
|
|
26
26
|
pixeltable/exec/exec_node.py,sha256=RbMJLDy7jwphNCEphSL0w50Dy1lrpjtEEugzyL6pqlA,4006
|
|
@@ -41,7 +41,7 @@ pixeltable/exprs/column_ref.py,sha256=MBWrNwnbRe0Hswu0q_Arerm9JoQs_0pNSsCYVxXONx
|
|
|
41
41
|
pixeltable/exprs/comparison.py,sha256=5Bw6fEvVq-ynt3ciGLCouse7ZWFGPA-egsEkgUjUvsc,5132
|
|
42
42
|
pixeltable/exprs/compound_predicate.py,sha256=ZN_JL97OZfTwdfgXF2t57EGyTYrpsBHaduZWRuBAekk,3832
|
|
43
43
|
pixeltable/exprs/data_row.py,sha256=4lEyTxTw95v3ERuG9mFUBla8FfhPueoZyltcpTsWLK0,10577
|
|
44
|
-
pixeltable/exprs/expr.py,sha256=
|
|
44
|
+
pixeltable/exprs/expr.py,sha256=r7eS6-7RCHemYBv_Ap1U9IKcZHpVqAghpxHcCpuk6uY,32463
|
|
45
45
|
pixeltable/exprs/expr_dict.py,sha256=wf82K-aCPHZcM2A-VbE_0p5OzQFfVsI65uzMLp4Uwu4,1589
|
|
46
46
|
pixeltable/exprs/expr_set.py,sha256=kkcG9df8fQOblNIKz2xciw9qfu2CnTWb4qwJKYVTUx8,2578
|
|
47
47
|
pixeltable/exprs/function_call.py,sha256=3zjWl_vAKHpClR61-wpNNfPWYp5ccHO8CXD3Dts2bcs,28123
|
|
@@ -88,7 +88,7 @@ pixeltable/functions/llama_cpp.py,sha256=1nVXgU5ymuNblVNqRQv3iAEvlYpqzDZPAjYnAOH
|
|
|
88
88
|
pixeltable/functions/math.py,sha256=WPoH9zD9_GdwvBs-FSC3Sqb70gOPNouhPcBZABsuLwI,1541
|
|
89
89
|
pixeltable/functions/mistralai.py,sha256=H2onsnW1R_SaFN5SI_JWO0A5lJdlsnKxmtIu2m19cEg,6212
|
|
90
90
|
pixeltable/functions/ollama.py,sha256=Et0l7XEMaNLxDwy3qTblljomjCkOQroY1Z7a-Ajmshk,4218
|
|
91
|
-
pixeltable/functions/openai.py,sha256=
|
|
91
|
+
pixeltable/functions/openai.py,sha256=Oc_WApfR8M_-EgUEwV1BBuQwkmhunLUGqUVl5CWDTnA,29083
|
|
92
92
|
pixeltable/functions/replicate.py,sha256=BQ5iaFJnw5MioL3X08DQiH41xQ_Pi2H5DDEasux9-fE,2454
|
|
93
93
|
pixeltable/functions/string.py,sha256=1vFlbqKVm2n6jdh23BIA_8MBJJiNyxbQoFs5tJPgpy4,20433
|
|
94
94
|
pixeltable/functions/timestamp.py,sha256=KKOw7l1hErYp8QQfFiWVTf7QowZszOyHJu-OJDKaXSg,9114
|
|
@@ -108,7 +108,7 @@ pixeltable/io/fiftyone.py,sha256=nviYiqDOGS5Os374Tl5knGNXpjFlgqcKnSPsBzz60vU,685
|
|
|
108
108
|
pixeltable/io/globals.py,sha256=0X0sLpVrqPlgNna_vQX4KcBuerdUojZDTyTaX2sKV4I,17838
|
|
109
109
|
pixeltable/io/hf_datasets.py,sha256=DV_bHB-LOQB8YC9FK1KYTEgaBPFelk31fYpq8h72eEE,8321
|
|
110
110
|
pixeltable/io/label_studio.py,sha256=Dlq-2iVBadDnU0xOn3duLbpBJxiegY04XkWsmqQTXwk,31242
|
|
111
|
-
pixeltable/io/pandas.py,sha256=
|
|
111
|
+
pixeltable/io/pandas.py,sha256=eKoo0tTPnKJUGOIc8VUV1gamsoeOPO6pOtXJyEV_W84,9594
|
|
112
112
|
pixeltable/io/parquet.py,sha256=2i3YAQd-ZifxJv4JUU5Ysh7p6SemozBncd989bSl_qw,8745
|
|
113
113
|
pixeltable/iterators/__init__.py,sha256=r5NYNF7qsepOPJnywG5N7jTz3Z1ubrbSzD19JK97cCM,431
|
|
114
114
|
pixeltable/iterators/audio.py,sha256=UfWAzUAq33bqN5R7-kFK4LN2VUukhgZhAsnoHuOm2CU,9092
|
|
@@ -139,10 +139,12 @@ pixeltable/metadata/notes.py,sha256=2gQ0fAdAWOKxvzZ5DVBdmTk62q_KFGRFmv0tzi7tklE,
|
|
|
139
139
|
pixeltable/metadata/schema.py,sha256=kv-PIMfG_NysET1k71iwIkBVlK5HwdnotXUvFeLaxaY,9470
|
|
140
140
|
pixeltable/plan.py,sha256=ZTXpt10Rexvfm3_68CLQzUAS7YubZjbUJLbAN-RZDps,42385
|
|
141
141
|
pixeltable/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
142
|
+
pixeltable/share/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
143
|
+
pixeltable/share/packager.py,sha256=QcMRI5qihNzO9Wcku-KpA8N7jUCkygrJUyyHB5XAGAA,10233
|
|
142
144
|
pixeltable/store.py,sha256=uQKW9A3RWVVuP6bnQx22jhs5_WxQKx3rV0sGpdoBUzY,22555
|
|
143
|
-
pixeltable/type_system.py,sha256=
|
|
145
|
+
pixeltable/type_system.py,sha256=c1kVcnX2Siu_V4DDn6DVF7nnDSNzFlDFw583WnWsUIc,50927
|
|
144
146
|
pixeltable/utils/__init__.py,sha256=UYlrf6TIWJT0g-Hac0b34-dEk478B5Qx8dGco34YlIk,439
|
|
145
|
-
pixeltable/utils/arrow.py,sha256=
|
|
147
|
+
pixeltable/utils/arrow.py,sha256=EVFTHXt1r1b-rbvgG-TOjvl6GiAtm1hH-86A449cKTw,3901
|
|
146
148
|
pixeltable/utils/coco.py,sha256=dl-IYO4VgfFly4-TvvF9Rw9XK2yY6HGTuL7LcyQk_RA,7290
|
|
147
149
|
pixeltable/utils/code.py,sha256=SbG5OUF_fQAbOgGZHDuENijmbzisVqa4VS9guaZ0KtU,1231
|
|
148
150
|
pixeltable/utils/console_output.py,sha256=GJ1oJWanP8_an343CEB35rtc1kcVW1FQtT3vRT4SZPs,1148
|
|
@@ -151,13 +153,14 @@ pixeltable/utils/documents.py,sha256=APFujdYq1qe2Do4KAUI0te35jh4925geR9UB8GeFQ1w
|
|
|
151
153
|
pixeltable/utils/filecache.py,sha256=sYofh-6TwkQbwe8X64eUt27itSJ8o5rY10HYZJShbbI,10703
|
|
152
154
|
pixeltable/utils/formatter.py,sha256=5E_gDg11ClFI-5SthwkiqyE3hAok3JHDj4OSK9cJklM,9257
|
|
153
155
|
pixeltable/utils/http_server.py,sha256=zsESVjtG1P6hrz-d2N1m6_BChqPt8N3f-EO9sJbWnLs,2388
|
|
154
|
-
pixeltable/utils/
|
|
156
|
+
pixeltable/utils/iceberg.py,sha256=L_s9G9NMIGMQdRHtNkks6ntTVW4DKKAw97R9gRmtw5s,553
|
|
157
|
+
pixeltable/utils/media_store.py,sha256=kSQ6YwQPRQzOhhCChS2hYmY9HxXX1fRq_M_FgkfsYU8,3091
|
|
155
158
|
pixeltable/utils/pytorch.py,sha256=8lJT1SyP9jTMN7uLtrj9T_rGPEYRID44rWXbjBhRUrU,3422
|
|
156
159
|
pixeltable/utils/s3.py,sha256=pxip2MlCqd2Qon2dzJXzfxvwtZyc-BAsjAnLL4J_OXY,587
|
|
157
160
|
pixeltable/utils/sql.py,sha256=JX_fNI_SJWVUcXif5ho5qVhfJKFupOCFLLrHCMcbzLk,796
|
|
158
161
|
pixeltable/utils/transactional_directory.py,sha256=4Q8UTylEyw-aZa-NVjfjGR9_JHRJTGQH1k1LNFaZukY,1349
|
|
159
|
-
pixeltable-0.3.
|
|
160
|
-
pixeltable-0.3.
|
|
161
|
-
pixeltable-0.3.
|
|
162
|
-
pixeltable-0.3.
|
|
163
|
-
pixeltable-0.3.
|
|
162
|
+
pixeltable-0.3.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
163
|
+
pixeltable-0.3.4.dist-info/METADATA,sha256=nM9QtJyu9ljdyn9ktpCuNLf9uaReun1Lo83BG9zR9Z4,19428
|
|
164
|
+
pixeltable-0.3.4.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
165
|
+
pixeltable-0.3.4.dist-info/entry_points.txt,sha256=ToOd-pRgG7AitEBgYoBCRRB4-KVDQ0pj_9T4a1LgwA4,97
|
|
166
|
+
pixeltable-0.3.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|