pixeltable 0.2.11__py3-none-any.whl → 0.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +2 -2
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/insertable_table.py +2 -2
- pixeltable/catalog/schema_object.py +28 -2
- pixeltable/catalog/table.py +68 -30
- pixeltable/catalog/table_version.py +14 -43
- pixeltable/catalog/view.py +2 -2
- pixeltable/dataframe.py +8 -7
- pixeltable/exec/expr_eval_node.py +8 -1
- pixeltable/exec/sql_scan_node.py +1 -1
- pixeltable/exprs/__init__.py +0 -1
- pixeltable/exprs/column_ref.py +2 -7
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +12 -12
- pixeltable/exprs/expr.py +32 -0
- pixeltable/exprs/in_predicate.py +3 -3
- pixeltable/exprs/is_null.py +5 -5
- pixeltable/exprs/similarity_expr.py +27 -16
- pixeltable/func/aggregate_function.py +10 -4
- pixeltable/func/callable_function.py +4 -0
- pixeltable/func/function_registry.py +2 -0
- pixeltable/functions/globals.py +36 -1
- pixeltable/functions/huggingface.py +62 -4
- pixeltable/functions/image.py +17 -0
- pixeltable/functions/openai.py +1 -1
- pixeltable/functions/string.py +622 -7
- pixeltable/functions/video.py +26 -8
- pixeltable/globals.py +54 -50
- pixeltable/index/embedding_index.py +28 -27
- pixeltable/io/external_store.py +2 -2
- pixeltable/io/globals.py +54 -5
- pixeltable/io/label_studio.py +45 -5
- pixeltable/io/pandas.py +18 -7
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_17.py +26 -0
- pixeltable/plan.py +6 -6
- pixeltable/tool/create_test_db_dump.py +2 -2
- pixeltable/tool/doc_plugins/griffe.py +77 -0
- pixeltable/tool/doc_plugins/mkdocstrings.py +6 -0
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +135 -0
- pixeltable/utils/s3.py +1 -1
- pixeltable-0.2.13.dist-info/METADATA +206 -0
- {pixeltable-0.2.11.dist-info → pixeltable-0.2.13.dist-info}/RECORD +46 -42
- pixeltable-0.2.13.dist-info/entry_points.txt +3 -0
- pixeltable/exprs/predicate.py +0 -44
- pixeltable-0.2.11.dist-info/METADATA +0 -137
- {pixeltable-0.2.11.dist-info → pixeltable-0.2.13.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.11.dist-info → pixeltable-0.2.13.dist-info}/WHEEL +0 -0
pixeltable/plan.py
CHANGED
|
@@ -40,7 +40,7 @@ class Analyzer:
|
|
|
40
40
|
|
|
41
41
|
def __init__(
|
|
42
42
|
self, tbl: catalog.TableVersionPath, select_list: List[exprs.Expr],
|
|
43
|
-
where_clause: Optional[exprs.
|
|
43
|
+
where_clause: Optional[exprs.Expr] = None, group_by_clause: Optional[List[exprs.Expr]] = None,
|
|
44
44
|
order_by_clause: Optional[List[Tuple[exprs.Expr, bool]]] = None):
|
|
45
45
|
if group_by_clause is None:
|
|
46
46
|
group_by_clause = []
|
|
@@ -58,7 +58,7 @@ class Analyzer:
|
|
|
58
58
|
# Where clause of the Select stmt of the SQL scan
|
|
59
59
|
self.sql_where_clause: Optional[exprs.Expr] = None
|
|
60
60
|
# filter predicate applied to output rows of the SQL scan
|
|
61
|
-
self.filter: Optional[exprs.
|
|
61
|
+
self.filter: Optional[exprs.Expr] = None
|
|
62
62
|
# not executable
|
|
63
63
|
#self.similarity_clause: Optional[exprs.ImageSimilarityPredicate] = None
|
|
64
64
|
if where_clause is not None:
|
|
@@ -183,7 +183,7 @@ class Planner:
|
|
|
183
183
|
# TODO: create an exec.CountNode and change this to create_count_plan()
|
|
184
184
|
@classmethod
|
|
185
185
|
def create_count_stmt(
|
|
186
|
-
cls, tbl: catalog.TableVersionPath, where_clause: Optional[exprs.
|
|
186
|
+
cls, tbl: catalog.TableVersionPath, where_clause: Optional[exprs.Expr] = None
|
|
187
187
|
) -> sql.Select:
|
|
188
188
|
stmt = sql.select(sql.func.count('*'))
|
|
189
189
|
refd_tbl_ids: Set[UUID] = set()
|
|
@@ -239,7 +239,7 @@ class Planner:
|
|
|
239
239
|
cls, tbl: catalog.TableVersionPath,
|
|
240
240
|
update_targets: dict[catalog.Column, exprs.Expr],
|
|
241
241
|
recompute_targets: List[catalog.Column],
|
|
242
|
-
where_clause: Optional[exprs.
|
|
242
|
+
where_clause: Optional[exprs.Expr], cascade: bool
|
|
243
243
|
) -> Tuple[exec.ExecNode, List[str], List[catalog.Column]]:
|
|
244
244
|
"""Creates a plan to materialize updated rows.
|
|
245
245
|
The plan:
|
|
@@ -505,7 +505,7 @@ class Planner:
|
|
|
505
505
|
@classmethod
|
|
506
506
|
def create_query_plan(
|
|
507
507
|
cls, tbl: catalog.TableVersionPath, select_list: Optional[List[exprs.Expr]] = None,
|
|
508
|
-
where_clause: Optional[exprs.
|
|
508
|
+
where_clause: Optional[exprs.Expr] = None, group_by_clause: Optional[List[exprs.Expr]] = None,
|
|
509
509
|
order_by_clause: Optional[List[Tuple[exprs.Expr, bool]]] = None, limit: Optional[int] = None,
|
|
510
510
|
with_pk: bool = False, ignore_errors: bool = False, exact_version_only: Optional[List[catalog.TableVersion]] = None
|
|
511
511
|
) -> exec.ExecNode:
|
|
@@ -597,7 +597,7 @@ class Planner:
|
|
|
597
597
|
return plan
|
|
598
598
|
|
|
599
599
|
@classmethod
|
|
600
|
-
def analyze(cls, tbl: catalog.TableVersionPath, where_clause: exprs.
|
|
600
|
+
def analyze(cls, tbl: catalog.TableVersionPath, where_clause: exprs.Expr) -> Analyzer:
|
|
601
601
|
return Analyzer(tbl, [], where_clause=where_clause)
|
|
602
602
|
|
|
603
603
|
@classmethod
|
|
@@ -208,7 +208,7 @@ class Dumper:
|
|
|
208
208
|
add_column('not', ~(t.c2 > 20))
|
|
209
209
|
|
|
210
210
|
# function_call
|
|
211
|
-
add_column('function_call', pxt.functions.string.
|
|
211
|
+
add_column('function_call', pxt.functions.string.format('{0} {key}', t.c1, key=t.c1)) # library function
|
|
212
212
|
add_column('test_udf', test_udf_stored(t.c2)) # stored udf
|
|
213
213
|
add_column('test_udf_batched', test_udf_stored_batched(t.c1, upper=False)) # batched stored udf
|
|
214
214
|
if include_expensive_functions:
|
|
@@ -253,7 +253,7 @@ class Dumper:
|
|
|
253
253
|
add_column('c6_to_string', t.c6.apply(json.dumps))
|
|
254
254
|
add_column('c6_back_to_json', t[f'{col_prefix}_c6_to_string'].apply(json.loads))
|
|
255
255
|
|
|
256
|
-
t.add_embedding_index(f'{col_prefix}_function_call',
|
|
256
|
+
t.add_embedding_index(f'{col_prefix}_function_call', string_embed=embed_udf.clip_text_embed)
|
|
257
257
|
|
|
258
258
|
# query()
|
|
259
259
|
@t.query
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import ast
|
|
2
|
+
from typing import Optional, Union
|
|
3
|
+
|
|
4
|
+
import griffe
|
|
5
|
+
import griffe.expressions
|
|
6
|
+
from griffe import Extension, Object, ObjectNode
|
|
7
|
+
|
|
8
|
+
import pixeltable as pxt
|
|
9
|
+
|
|
10
|
+
logger = griffe.get_logger(__name__)
|
|
11
|
+
|
|
12
|
+
class PxtGriffeExtension(Extension):
|
|
13
|
+
"""Implementation of a Pixeltable custom griffe extension."""
|
|
14
|
+
|
|
15
|
+
def on_instance(self, node: Union[ast.AST, ObjectNode], obj: Object) -> None:
|
|
16
|
+
if obj.docstring is None:
|
|
17
|
+
# Skip over entities without a docstring
|
|
18
|
+
return
|
|
19
|
+
|
|
20
|
+
if isinstance(obj, griffe.Function):
|
|
21
|
+
# See if the (Python) function has a @pxt.udf decorator
|
|
22
|
+
if any(
|
|
23
|
+
isinstance(dec.value, griffe.expressions.Expr) and dec.value.canonical_path in ['pixeltable.func.udf', 'pixeltable.udf']
|
|
24
|
+
for dec in obj.decorators
|
|
25
|
+
):
|
|
26
|
+
# Update the template
|
|
27
|
+
self.__modify_pxt_udf(obj)
|
|
28
|
+
|
|
29
|
+
def __modify_pxt_udf(self, func: griffe.Function) -> None:
|
|
30
|
+
"""
|
|
31
|
+
Instructs the doc snippet for `func` to use the custom Pixeltable UDF jinja template, and
|
|
32
|
+
converts all type hints to Pixeltable column type references, in accordance with the @udf
|
|
33
|
+
decorator behavior.
|
|
34
|
+
"""
|
|
35
|
+
func.extra['mkdocstrings']['template'] = 'udf.html.jinja'
|
|
36
|
+
# Dynamically load the UDF reference so we can inspect the Pixeltable signature directly
|
|
37
|
+
udf = griffe.dynamic_import(func.path)
|
|
38
|
+
assert isinstance(udf, pxt.Function)
|
|
39
|
+
# Convert the return type to a Pixeltable type reference
|
|
40
|
+
func.returns = self.__column_type_to_display_str(udf.signature.get_return_type())
|
|
41
|
+
# Convert the parameter types to Pixeltable type references
|
|
42
|
+
for griffe_param in func.parameters:
|
|
43
|
+
assert isinstance(griffe_param.annotation, griffe.expressions.Expr)
|
|
44
|
+
if griffe_param.name not in udf.signature.parameters:
|
|
45
|
+
logger.warning(f'Parameter `{griffe_param.name}` not found in signature for UDF: {udf.display_name}')
|
|
46
|
+
continue
|
|
47
|
+
pxt_param = udf.signature.parameters[griffe_param.name]
|
|
48
|
+
griffe_param.annotation = self.__column_type_to_display_str(pxt_param.col_type)
|
|
49
|
+
|
|
50
|
+
def __column_type_to_display_str(self, column_type: Optional[pxt.ColumnType]) -> str:
|
|
51
|
+
# TODO: When we enhance the Pixeltable type system, we may want to refactor some of this logic out.
|
|
52
|
+
# I'm putting it here for now though.
|
|
53
|
+
if column_type is None:
|
|
54
|
+
return 'None'
|
|
55
|
+
if column_type.is_string_type():
|
|
56
|
+
base = 'str'
|
|
57
|
+
elif column_type.is_int_type():
|
|
58
|
+
base = 'int'
|
|
59
|
+
elif column_type.is_float_type():
|
|
60
|
+
base = 'float'
|
|
61
|
+
elif column_type.is_bool_type():
|
|
62
|
+
base = 'bool'
|
|
63
|
+
elif column_type.is_array_type():
|
|
64
|
+
base = 'ArrayT'
|
|
65
|
+
elif column_type.is_json_type():
|
|
66
|
+
base = 'JsonT'
|
|
67
|
+
elif column_type.is_image_type():
|
|
68
|
+
base = 'ImageT'
|
|
69
|
+
elif column_type.is_video_type():
|
|
70
|
+
base = 'VideoT'
|
|
71
|
+
elif column_type.is_audio_type():
|
|
72
|
+
base = 'AudioT'
|
|
73
|
+
elif column_type.is_document_type():
|
|
74
|
+
base = 'DocumentT'
|
|
75
|
+
else:
|
|
76
|
+
assert False
|
|
77
|
+
return f'Optional[{base}]' if column_type.nullable else base
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
{#- Template for Pixeltable UDFs. Cargo-culted (with modification) from _base/function.html.jinja. -#}
|
|
2
|
+
|
|
3
|
+
{% block logs scoped %}
|
|
4
|
+
{#- Logging block.
|
|
5
|
+
|
|
6
|
+
This block can be used to log debug messages, deprecation messages, warnings, etc.
|
|
7
|
+
-#}
|
|
8
|
+
{{ log.debug("Rendering " + function.path) }}
|
|
9
|
+
{% endblock logs %}
|
|
10
|
+
|
|
11
|
+
{% import "language"|get_template as lang with context %}
|
|
12
|
+
{#- Language module providing the `t` translation method. -#}
|
|
13
|
+
|
|
14
|
+
<div class="doc doc-object doc-function">
|
|
15
|
+
{% with obj = function, html_id = function.path %}
|
|
16
|
+
|
|
17
|
+
{% if root %}
|
|
18
|
+
{% set show_full_path = config.show_root_full_path %}
|
|
19
|
+
{% set root_members = True %}
|
|
20
|
+
{% elif root_members %}
|
|
21
|
+
{% set show_full_path = config.show_root_members_full_path or config.show_object_full_path %}
|
|
22
|
+
{% set root_members = False %}
|
|
23
|
+
{% else %}
|
|
24
|
+
{% set show_full_path = config.show_object_full_path %}
|
|
25
|
+
{% endif %}
|
|
26
|
+
|
|
27
|
+
{% set function_name = function.path if show_full_path else function.name %}
|
|
28
|
+
{#- Brief or full function name depending on configuration. -#}
|
|
29
|
+
{% set symbol_type = "udf" %}
|
|
30
|
+
{#- Symbol type: method when parent is a class, function otherwise. -#}
|
|
31
|
+
|
|
32
|
+
{% if not root or config.show_root_heading %}
|
|
33
|
+
{% filter heading(
|
|
34
|
+
heading_level,
|
|
35
|
+
role="function",
|
|
36
|
+
id=html_id,
|
|
37
|
+
class="doc doc-heading",
|
|
38
|
+
toc_label=(('<code class="doc-symbol doc-symbol-toc doc-symbol-' + symbol_type + '"></code> ')|safe if config.show_symbol_type_toc else '') + function.name,
|
|
39
|
+
) %}
|
|
40
|
+
|
|
41
|
+
{% block heading scoped %}
|
|
42
|
+
{#- Heading block.
|
|
43
|
+
|
|
44
|
+
This block renders the heading for the function.
|
|
45
|
+
-#}
|
|
46
|
+
{% if config.show_symbol_type_heading %}<code class="doc-symbol doc-symbol-heading doc-symbol-{{ symbol_type }}"></code>{% endif %}
|
|
47
|
+
{% if config.separate_signature %}
|
|
48
|
+
<span class="doc doc-object-name doc-function-name">{{ function_name }}</span>
|
|
49
|
+
{% else %}
|
|
50
|
+
{%+ filter highlight(language="python", inline=True) %}
|
|
51
|
+
{{ function_name }}{% include "signature"|get_template with context %}
|
|
52
|
+
{% endfilter %}
|
|
53
|
+
{% endif %}
|
|
54
|
+
{% endblock heading %}
|
|
55
|
+
|
|
56
|
+
{% block labels scoped %}
|
|
57
|
+
{#- Labels block.
|
|
58
|
+
|
|
59
|
+
This block renders the labels for the function.
|
|
60
|
+
-#}
|
|
61
|
+
{% with labels = function.labels %}
|
|
62
|
+
{% include "labels"|get_template with context %}
|
|
63
|
+
{% endwith %}
|
|
64
|
+
{% endblock labels %}
|
|
65
|
+
|
|
66
|
+
{% endfilter %}
|
|
67
|
+
|
|
68
|
+
{% block signature scoped %}
|
|
69
|
+
{#- Signature block.
|
|
70
|
+
|
|
71
|
+
This block renders the signature for the function.
|
|
72
|
+
-#}
|
|
73
|
+
{% if config.separate_signature %}
|
|
74
|
+
{% filter format_signature(function, config.line_length, crossrefs=config.signature_crossrefs) %}
|
|
75
|
+
{{ function.name }}
|
|
76
|
+
{% endfilter %}
|
|
77
|
+
{% endif %}
|
|
78
|
+
{% endblock signature %}
|
|
79
|
+
|
|
80
|
+
{% else %}
|
|
81
|
+
|
|
82
|
+
{% if config.show_root_toc_entry %}
|
|
83
|
+
{% filter heading(
|
|
84
|
+
heading_level,
|
|
85
|
+
role="function",
|
|
86
|
+
id=html_id,
|
|
87
|
+
toc_label=(('<code class="doc-symbol doc-symbol-toc doc-symbol-' + symbol_type + '"></code> ')|safe if config.show_symbol_type_toc else '') + function.name,
|
|
88
|
+
hidden=True,
|
|
89
|
+
) %}
|
|
90
|
+
{% endfilter %}
|
|
91
|
+
{% endif %}
|
|
92
|
+
{% set heading_level = heading_level - 1 %}
|
|
93
|
+
{% endif %}
|
|
94
|
+
|
|
95
|
+
<div class="doc doc-contents {% if root %}first{% endif %}">
|
|
96
|
+
{% block contents scoped %}
|
|
97
|
+
{#- Contents block.
|
|
98
|
+
|
|
99
|
+
This block renders the contents of the function.
|
|
100
|
+
It contains other blocks that users can override.
|
|
101
|
+
Overriding the contents block allows to rearrange the order of the blocks.
|
|
102
|
+
-#}
|
|
103
|
+
{% block docstring scoped %}
|
|
104
|
+
{#- Docstring block.
|
|
105
|
+
|
|
106
|
+
This block renders the docstring for the function.
|
|
107
|
+
-#}
|
|
108
|
+
{% with docstring_sections = function.docstring.parsed %}
|
|
109
|
+
{% include "docstring"|get_template with context %}
|
|
110
|
+
{% endwith %}
|
|
111
|
+
{% endblock docstring %}
|
|
112
|
+
|
|
113
|
+
{% block source scoped %}
|
|
114
|
+
{#- Source block.
|
|
115
|
+
|
|
116
|
+
This block renders the source code for the function.
|
|
117
|
+
-#}
|
|
118
|
+
{% if config.show_source and function.source %}
|
|
119
|
+
<details class="quote">
|
|
120
|
+
<summary>{{ lang.t("Source code in") }} <code>
|
|
121
|
+
{%- if function.relative_filepath.is_absolute() -%}
|
|
122
|
+
{{ function.relative_package_filepath }}
|
|
123
|
+
{%- else -%}
|
|
124
|
+
{{ function.relative_filepath }}
|
|
125
|
+
{%- endif -%}
|
|
126
|
+
</code></summary>
|
|
127
|
+
{{ function.source|highlight(language="python", linestart=function.lineno, linenums=True) }}
|
|
128
|
+
</details>
|
|
129
|
+
{% endif %}
|
|
130
|
+
{% endblock source %}
|
|
131
|
+
{% endblock contents %}
|
|
132
|
+
</div>
|
|
133
|
+
|
|
134
|
+
{% endwith %}
|
|
135
|
+
</div>
|
pixeltable/utils/s3.py
CHANGED
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: pixeltable
|
|
3
|
+
Version: 0.2.13
|
|
4
|
+
Summary: Pixeltable: The Multimodal AI Data Plane
|
|
5
|
+
Author: Pixeltable, Inc.
|
|
6
|
+
Author-email: contact@pixeltable.com
|
|
7
|
+
Requires-Python: >=3.9,<4.0
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Requires-Dist: av (>=10.0.0)
|
|
14
|
+
Requires-Dist: beautifulsoup4 (>=4.0.0,<5.0.0)
|
|
15
|
+
Requires-Dist: cloudpickle (>=2.2.1,<3.0.0)
|
|
16
|
+
Requires-Dist: ftfy (>=6.2.0,<7.0.0)
|
|
17
|
+
Requires-Dist: jinja2 (>=3.1.3,<4.0.0)
|
|
18
|
+
Requires-Dist: jmespath (>=1.0.1,<2.0.0)
|
|
19
|
+
Requires-Dist: mistune (>=3.0.2,<4.0.0)
|
|
20
|
+
Requires-Dist: more-itertools (>=10.2,<11.0)
|
|
21
|
+
Requires-Dist: numpy (>=1.25)
|
|
22
|
+
Requires-Dist: opencv-python-headless (>=4.7.0.68,<5.0.0.0)
|
|
23
|
+
Requires-Dist: pandas (>=2.0,<3.0)
|
|
24
|
+
Requires-Dist: pgserver (==0.1.4)
|
|
25
|
+
Requires-Dist: pgvector (>=0.2.1,<0.3.0)
|
|
26
|
+
Requires-Dist: pillow (>=9.3.0)
|
|
27
|
+
Requires-Dist: psutil (>=5.9.5,<6.0.0)
|
|
28
|
+
Requires-Dist: psycopg2-binary (>=2.9.5,<3.0.0)
|
|
29
|
+
Requires-Dist: pymupdf (>=1.24.1,<2.0.0)
|
|
30
|
+
Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
|
|
31
|
+
Requires-Dist: requests (>=2.31.0,<3.0.0)
|
|
32
|
+
Requires-Dist: setuptools (==69.1.1)
|
|
33
|
+
Requires-Dist: sqlalchemy[mypy] (>=2.0.23,<3.0.0)
|
|
34
|
+
Requires-Dist: tenacity (>=8.2,<9.0)
|
|
35
|
+
Requires-Dist: tqdm (>=4.64)
|
|
36
|
+
Description-Content-Type: text/markdown
|
|
37
|
+
|
|
38
|
+
<div align="center">
|
|
39
|
+
<img src="https://raw.githubusercontent.com/pixeltable/pixeltable/master/docs/release/pixeltable-banner.png" alt="Pixeltable" width="45%" />
|
|
40
|
+
|
|
41
|
+
# Unifying Data, Models, and Orchestration for AI Products
|
|
42
|
+
|
|
43
|
+
[](https://opensource.org/licenses/Apache-2.0)
|
|
44
|
+

|
|
45
|
+
[]()
|
|
46
|
+
[](https://github.com/pixeltable/pixeltable/actions)
|
|
47
|
+
[](https://pypi.org/project/pixeltable/)
|
|
48
|
+
|
|
49
|
+
[Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.readme.io/) | [API Reference](https://pixeltable.github.io/pixeltable/) | [Code Samples](https://pixeltable.readme.io/recipes) | [Examples](https://github.com/pixeltable/pixeltable/tree/master/docs/release/tutorials)
|
|
50
|
+
</div>
|
|
51
|
+
|
|
52
|
+
Pixeltable is a Python library that lets ML Engineers and Data Scientists focus on exploration, modeling, and app development without dealing with the customary data plumbing.
|
|
53
|
+
|
|
54
|
+
### What problems does Pixeltable solve?
|
|
55
|
+
|
|
56
|
+
Today’s solutions for AI app development require extensive custom coding and infrastructure plumbing. Tracking lineage and versions between and across data transformations, models, and deployment is cumbersome.
|
|
57
|
+
|
|
58
|
+
## 💾 Installation
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
pip install pixeltable
|
|
62
|
+
```
|
|
63
|
+
> [!IMPORTANT]
|
|
64
|
+
> Pixeltable is persistent. Unlike in-memory Python libraries such as Pandas, Pixeltable is a database. When working locally or against an hosted version of Pixeltable, use [get_table](https://pixeltable.github.io/pixeltable/api/pixeltable/#pixeltable.get_table) at any time to retrieve an existing table.
|
|
65
|
+
|
|
66
|
+
## 💡 Getting Started
|
|
67
|
+
Learn how to create tables, populate them with data, and enhance them with built-in or user-defined transformations and AI operations.
|
|
68
|
+
|
|
69
|
+
| Topic | Notebook | Topic | Notebook |
|
|
70
|
+
|:----------|:-----------------|:-------------------------|:---------------------------------:|
|
|
71
|
+
| 10-Minute Tour of Pixeltable | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Tables and Data Operations | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/fundamentals/tables-and-data-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
|
|
72
|
+
| User-Defined Functions (UDFs) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/howto/udfs-in-pixeltable.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Object Detection Models | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/object-detection-in-videos.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
|
|
73
|
+
| Experimenting with Chunking (RAG) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/rag-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | Working with External Files | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/howto/working-with-external-files.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
|
|
74
|
+
| Integrating with Label Studio | <a target="_blank" href="https://pixeltable.readme.io/docs/label-studio"> <img src="https://img.shields.io/badge/Docs-Label Studio-blue" alt="Visit our documentation"/></a> | Audio/Video Transcript Indexing | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/audio-transcriptions.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
|
|
75
|
+
|
|
76
|
+
## 🧱 Code Samples
|
|
77
|
+
|
|
78
|
+
### Import media data into Pixeltable (videos, images, audio...)
|
|
79
|
+
```python
|
|
80
|
+
import pixeltable as pxt
|
|
81
|
+
|
|
82
|
+
v = pxt.create_table('external_data.videos', {'video': pxt.VideoType()})
|
|
83
|
+
|
|
84
|
+
prefix = 's3://multimedia-commons/'
|
|
85
|
+
paths = [
|
|
86
|
+
'data/videos/mp4/ffe/ffb/ffeffbef41bbc269810b2a1a888de.mp4',
|
|
87
|
+
'data/videos/mp4/ffe/feb/ffefebb41485539f964760e6115fbc44.mp4',
|
|
88
|
+
'data/videos/mp4/ffe/f73/ffef7384d698b5f70d411c696247169.mp4'
|
|
89
|
+
]
|
|
90
|
+
v.insert({'video': prefix + p} for p in paths)
|
|
91
|
+
```
|
|
92
|
+
Learn how to [work with data in Pixeltable](https://pixeltable.readme.io/docs/working-with-external-files).
|
|
93
|
+
|
|
94
|
+
### Add an object detection model to your workflow
|
|
95
|
+
```python
|
|
96
|
+
table['detections'] = huggingface.detr_for_object_detection(table.input_image, model_id='facebook/detr-resnet-50')
|
|
97
|
+
```
|
|
98
|
+
Learn about computed columns and object detection: [Comparing object detection models](https://pixeltable.readme.io/docs/object-detection-in-videos).
|
|
99
|
+
|
|
100
|
+
### Extend Pixeltable's capabilities with user-defined functions
|
|
101
|
+
```python
|
|
102
|
+
@pxt.udf
|
|
103
|
+
def draw_boxes(img: PIL.Image.Image, boxes: list[list[float]]) -> PIL.Image.Image:
|
|
104
|
+
result = img.copy() # Create a copy of `img`
|
|
105
|
+
d = PIL.ImageDraw.Draw(result)
|
|
106
|
+
for box in boxes:
|
|
107
|
+
d.rectangle(box, width=3) # Draw bounding box rectangles on the copied image
|
|
108
|
+
return result
|
|
109
|
+
```
|
|
110
|
+
Learn more about user-defined functions: [UDFs in Pixeltable](https://pixeltable.readme.io/docs/user-defined-functions-udfs).
|
|
111
|
+
|
|
112
|
+
### Automate data operations with views
|
|
113
|
+
```python
|
|
114
|
+
# In this example, the view is defined by iteration over the chunks of a DocumentSplitter.
|
|
115
|
+
chunks_table = pxt.create_view(
|
|
116
|
+
'rag_demo.chunks',
|
|
117
|
+
documents_table,
|
|
118
|
+
iterator=DocumentSplitter.create(
|
|
119
|
+
document=documents_table.document,
|
|
120
|
+
separators='token_limit', limit=300)
|
|
121
|
+
)
|
|
122
|
+
```
|
|
123
|
+
Learn how to leverage views to build your [RAG workflow](https://pixeltable.readme.io/docs/document-indexing-and-rag).
|
|
124
|
+
|
|
125
|
+
### Evaluate model performance
|
|
126
|
+
```python
|
|
127
|
+
# The computation of the mAP metric can simply become a query over the evaluation output, aggregated with the mean_ap() function.
|
|
128
|
+
frames_view.select(mean_ap(frames_view.eval_yolox_tiny), mean_ap(frames_view.eval_yolox_m)).show()
|
|
129
|
+
```
|
|
130
|
+
Learn how to leverage Pixeltable for [Model analytics](https://pixeltable.readme.io/docs/object-detection-in-videos).
|
|
131
|
+
|
|
132
|
+
### Working with inference services
|
|
133
|
+
```python
|
|
134
|
+
chat_table = pxt.create_table('together_demo.chat', {'input': pxt.StringType()})
|
|
135
|
+
|
|
136
|
+
# The chat-completions API expects JSON-formatted input:
|
|
137
|
+
messages = [{'role': 'user', 'content': chat_table.input}]
|
|
138
|
+
|
|
139
|
+
# This example shows how additional parameters from the Together API can be used in Pixeltable to customize the model behavior.
|
|
140
|
+
chat_table['output'] = chat_completions(
|
|
141
|
+
messages=messages,
|
|
142
|
+
model='mistralai/Mixtral-8x7B-Instruct-v0.1',
|
|
143
|
+
max_tokens=300,
|
|
144
|
+
stop=['\n'],
|
|
145
|
+
temperature=0.7,
|
|
146
|
+
top_p=0.9,
|
|
147
|
+
top_k=40,
|
|
148
|
+
repetition_penalty=1.1,
|
|
149
|
+
logprobs=1,
|
|
150
|
+
echo=True
|
|
151
|
+
)
|
|
152
|
+
chat_table['response'] = chat_table.output.choices[0].message.content
|
|
153
|
+
|
|
154
|
+
# Start a conversation
|
|
155
|
+
chat_table.insert([
|
|
156
|
+
{'input': 'How many species of felids have been classified?'},
|
|
157
|
+
{'input': 'Can you make me a coffee?'}
|
|
158
|
+
])
|
|
159
|
+
chat_table.select(chat_table.input, chat_table.response).head()
|
|
160
|
+
```
|
|
161
|
+
Learn how to interact with inference services such as [Together AI](https://pixeltable.readme.io/docs/together-ai) in Pixeltable.
|
|
162
|
+
|
|
163
|
+
## ❓ FAQ
|
|
164
|
+
|
|
165
|
+
### What is Pixeltable?
|
|
166
|
+
|
|
167
|
+
Pixeltable unifies data storage, versioning, and indexing with orchestration and model versioning under a declarative table interface, with transformations, model inference, and custom logic represented as computed columns.
|
|
168
|
+
|
|
169
|
+
### What does Pixeltable provide me with? Pixeltable provides:
|
|
170
|
+
|
|
171
|
+
- Data storage and versioning
|
|
172
|
+
- Combined Data and Model Lineage
|
|
173
|
+
- Indexing (e.g. embedding vectors) and Data Retrieval
|
|
174
|
+
- Orchestration of multimodal workloads
|
|
175
|
+
- Incremental updates
|
|
176
|
+
- Code is automatically production-ready
|
|
177
|
+
|
|
178
|
+
### Why should you use Pixeltable?
|
|
179
|
+
|
|
180
|
+
- **It gives you transparency and reproducibility**
|
|
181
|
+
- All generated data is automatically recorded and versioned
|
|
182
|
+
- You will never need to re-run a workload because you lost track of the input data
|
|
183
|
+
- **It saves you money**
|
|
184
|
+
- All data changes are automatically incremental
|
|
185
|
+
- You never need to re-run pipelines from scratch because you’re adding data
|
|
186
|
+
- **It integrates with any existing Python code or libraries**
|
|
187
|
+
- Bring your ever-changing code and workloads
|
|
188
|
+
- You choose the models, tools, and AI practices (e.g., your embedding model for a vector index); Pixeltable orchestrates the data
|
|
189
|
+
|
|
190
|
+
### What is Pixeltable not providing?
|
|
191
|
+
|
|
192
|
+
- Pixeltable is not a low-code, prescriptive AI solution. We empower you to use the best frameworks and techniques for your specific needs.
|
|
193
|
+
- We do not aim to replace your existing AI toolkit, but rather enhance it by streamlining the underlying data infrastructure and orchestration.
|
|
194
|
+
|
|
195
|
+
> [!TIP]
|
|
196
|
+
> Check out the [Integrations](https://pixeltable.readme.io/docs/working-with-openai) section, and feel free to submit a request for additional ones.
|
|
197
|
+
|
|
198
|
+
## 🐛 Contributions & Feedback
|
|
199
|
+
|
|
200
|
+
Are you experiencing issues or bugs with Pixeltable? File an [Issue](https://github.com/pixeltable/pixeltable/issues).
|
|
201
|
+
</br>Do you want to contribute? Feel free to open a [PR](https://github.com/pixeltable/pixeltable/pulls).
|
|
202
|
+
|
|
203
|
+
## :classical_building: License
|
|
204
|
+
|
|
205
|
+
This library is licensed under the Apache 2.0 License.
|
|
206
|
+
|