pixeltable 0.2.11__py3-none-any.whl → 0.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (48) hide show
  1. pixeltable/__init__.py +2 -2
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/insertable_table.py +2 -2
  4. pixeltable/catalog/schema_object.py +28 -2
  5. pixeltable/catalog/table.py +68 -30
  6. pixeltable/catalog/table_version.py +14 -43
  7. pixeltable/catalog/view.py +2 -2
  8. pixeltable/dataframe.py +8 -7
  9. pixeltable/exec/expr_eval_node.py +8 -1
  10. pixeltable/exec/sql_scan_node.py +1 -1
  11. pixeltable/exprs/__init__.py +0 -1
  12. pixeltable/exprs/column_ref.py +2 -7
  13. pixeltable/exprs/comparison.py +5 -5
  14. pixeltable/exprs/compound_predicate.py +12 -12
  15. pixeltable/exprs/expr.py +32 -0
  16. pixeltable/exprs/in_predicate.py +3 -3
  17. pixeltable/exprs/is_null.py +5 -5
  18. pixeltable/exprs/similarity_expr.py +27 -16
  19. pixeltable/func/aggregate_function.py +10 -4
  20. pixeltable/func/callable_function.py +4 -0
  21. pixeltable/func/function_registry.py +2 -0
  22. pixeltable/functions/globals.py +36 -1
  23. pixeltable/functions/huggingface.py +62 -4
  24. pixeltable/functions/image.py +17 -0
  25. pixeltable/functions/openai.py +1 -1
  26. pixeltable/functions/string.py +622 -7
  27. pixeltable/functions/video.py +26 -8
  28. pixeltable/globals.py +54 -50
  29. pixeltable/index/embedding_index.py +28 -27
  30. pixeltable/io/external_store.py +2 -2
  31. pixeltable/io/globals.py +54 -5
  32. pixeltable/io/label_studio.py +45 -5
  33. pixeltable/io/pandas.py +18 -7
  34. pixeltable/metadata/__init__.py +1 -1
  35. pixeltable/metadata/converters/convert_17.py +26 -0
  36. pixeltable/plan.py +6 -6
  37. pixeltable/tool/create_test_db_dump.py +2 -2
  38. pixeltable/tool/doc_plugins/griffe.py +77 -0
  39. pixeltable/tool/doc_plugins/mkdocstrings.py +6 -0
  40. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +135 -0
  41. pixeltable/utils/s3.py +1 -1
  42. pixeltable-0.2.13.dist-info/METADATA +206 -0
  43. {pixeltable-0.2.11.dist-info → pixeltable-0.2.13.dist-info}/RECORD +46 -42
  44. pixeltable-0.2.13.dist-info/entry_points.txt +3 -0
  45. pixeltable/exprs/predicate.py +0 -44
  46. pixeltable-0.2.11.dist-info/METADATA +0 -137
  47. {pixeltable-0.2.11.dist-info → pixeltable-0.2.13.dist-info}/LICENSE +0 -0
  48. {pixeltable-0.2.11.dist-info → pixeltable-0.2.13.dist-info}/WHEEL +0 -0
pixeltable/plan.py CHANGED
@@ -40,7 +40,7 @@ class Analyzer:
40
40
 
41
41
  def __init__(
42
42
  self, tbl: catalog.TableVersionPath, select_list: List[exprs.Expr],
43
- where_clause: Optional[exprs.Predicate] = None, group_by_clause: Optional[List[exprs.Expr]] = None,
43
+ where_clause: Optional[exprs.Expr] = None, group_by_clause: Optional[List[exprs.Expr]] = None,
44
44
  order_by_clause: Optional[List[Tuple[exprs.Expr, bool]]] = None):
45
45
  if group_by_clause is None:
46
46
  group_by_clause = []
@@ -58,7 +58,7 @@ class Analyzer:
58
58
  # Where clause of the Select stmt of the SQL scan
59
59
  self.sql_where_clause: Optional[exprs.Expr] = None
60
60
  # filter predicate applied to output rows of the SQL scan
61
- self.filter: Optional[exprs.Predicate] = None
61
+ self.filter: Optional[exprs.Expr] = None
62
62
  # not executable
63
63
  #self.similarity_clause: Optional[exprs.ImageSimilarityPredicate] = None
64
64
  if where_clause is not None:
@@ -183,7 +183,7 @@ class Planner:
183
183
  # TODO: create an exec.CountNode and change this to create_count_plan()
184
184
  @classmethod
185
185
  def create_count_stmt(
186
- cls, tbl: catalog.TableVersionPath, where_clause: Optional[exprs.Predicate] = None
186
+ cls, tbl: catalog.TableVersionPath, where_clause: Optional[exprs.Expr] = None
187
187
  ) -> sql.Select:
188
188
  stmt = sql.select(sql.func.count('*'))
189
189
  refd_tbl_ids: Set[UUID] = set()
@@ -239,7 +239,7 @@ class Planner:
239
239
  cls, tbl: catalog.TableVersionPath,
240
240
  update_targets: dict[catalog.Column, exprs.Expr],
241
241
  recompute_targets: List[catalog.Column],
242
- where_clause: Optional[exprs.Predicate], cascade: bool
242
+ where_clause: Optional[exprs.Expr], cascade: bool
243
243
  ) -> Tuple[exec.ExecNode, List[str], List[catalog.Column]]:
244
244
  """Creates a plan to materialize updated rows.
245
245
  The plan:
@@ -505,7 +505,7 @@ class Planner:
505
505
  @classmethod
506
506
  def create_query_plan(
507
507
  cls, tbl: catalog.TableVersionPath, select_list: Optional[List[exprs.Expr]] = None,
508
- where_clause: Optional[exprs.Predicate] = None, group_by_clause: Optional[List[exprs.Expr]] = None,
508
+ where_clause: Optional[exprs.Expr] = None, group_by_clause: Optional[List[exprs.Expr]] = None,
509
509
  order_by_clause: Optional[List[Tuple[exprs.Expr, bool]]] = None, limit: Optional[int] = None,
510
510
  with_pk: bool = False, ignore_errors: bool = False, exact_version_only: Optional[List[catalog.TableVersion]] = None
511
511
  ) -> exec.ExecNode:
@@ -597,7 +597,7 @@ class Planner:
597
597
  return plan
598
598
 
599
599
  @classmethod
600
- def analyze(cls, tbl: catalog.TableVersionPath, where_clause: exprs.Predicate) -> Analyzer:
600
+ def analyze(cls, tbl: catalog.TableVersionPath, where_clause: exprs.Expr) -> Analyzer:
601
601
  return Analyzer(tbl, [], where_clause=where_clause)
602
602
 
603
603
  @classmethod
@@ -208,7 +208,7 @@ class Dumper:
208
208
  add_column('not', ~(t.c2 > 20))
209
209
 
210
210
  # function_call
211
- add_column('function_call', pxt.functions.string.str_format('{0} {key}', t.c1, key=t.c1)) # library function
211
+ add_column('function_call', pxt.functions.string.format('{0} {key}', t.c1, key=t.c1)) # library function
212
212
  add_column('test_udf', test_udf_stored(t.c2)) # stored udf
213
213
  add_column('test_udf_batched', test_udf_stored_batched(t.c1, upper=False)) # batched stored udf
214
214
  if include_expensive_functions:
@@ -253,7 +253,7 @@ class Dumper:
253
253
  add_column('c6_to_string', t.c6.apply(json.dumps))
254
254
  add_column('c6_back_to_json', t[f'{col_prefix}_c6_to_string'].apply(json.loads))
255
255
 
256
- t.add_embedding_index(f'{col_prefix}_function_call', text_embed=embed_udf.clip_text_embed)
256
+ t.add_embedding_index(f'{col_prefix}_function_call', string_embed=embed_udf.clip_text_embed)
257
257
 
258
258
  # query()
259
259
  @t.query
@@ -0,0 +1,77 @@
1
+ import ast
2
+ from typing import Optional, Union
3
+
4
+ import griffe
5
+ import griffe.expressions
6
+ from griffe import Extension, Object, ObjectNode
7
+
8
+ import pixeltable as pxt
9
+
10
+ logger = griffe.get_logger(__name__)
11
+
12
+ class PxtGriffeExtension(Extension):
13
+ """Implementation of a Pixeltable custom griffe extension."""
14
+
15
+ def on_instance(self, node: Union[ast.AST, ObjectNode], obj: Object) -> None:
16
+ if obj.docstring is None:
17
+ # Skip over entities without a docstring
18
+ return
19
+
20
+ if isinstance(obj, griffe.Function):
21
+ # See if the (Python) function has a @pxt.udf decorator
22
+ if any(
23
+ isinstance(dec.value, griffe.expressions.Expr) and dec.value.canonical_path in ['pixeltable.func.udf', 'pixeltable.udf']
24
+ for dec in obj.decorators
25
+ ):
26
+ # Update the template
27
+ self.__modify_pxt_udf(obj)
28
+
29
+ def __modify_pxt_udf(self, func: griffe.Function) -> None:
30
+ """
31
+ Instructs the doc snippet for `func` to use the custom Pixeltable UDF jinja template, and
32
+ converts all type hints to Pixeltable column type references, in accordance with the @udf
33
+ decorator behavior.
34
+ """
35
+ func.extra['mkdocstrings']['template'] = 'udf.html.jinja'
36
+ # Dynamically load the UDF reference so we can inspect the Pixeltable signature directly
37
+ udf = griffe.dynamic_import(func.path)
38
+ assert isinstance(udf, pxt.Function)
39
+ # Convert the return type to a Pixeltable type reference
40
+ func.returns = self.__column_type_to_display_str(udf.signature.get_return_type())
41
+ # Convert the parameter types to Pixeltable type references
42
+ for griffe_param in func.parameters:
43
+ assert isinstance(griffe_param.annotation, griffe.expressions.Expr)
44
+ if griffe_param.name not in udf.signature.parameters:
45
+ logger.warning(f'Parameter `{griffe_param.name}` not found in signature for UDF: {udf.display_name}')
46
+ continue
47
+ pxt_param = udf.signature.parameters[griffe_param.name]
48
+ griffe_param.annotation = self.__column_type_to_display_str(pxt_param.col_type)
49
+
50
+ def __column_type_to_display_str(self, column_type: Optional[pxt.ColumnType]) -> str:
51
+ # TODO: When we enhance the Pixeltable type system, we may want to refactor some of this logic out.
52
+ # I'm putting it here for now though.
53
+ if column_type is None:
54
+ return 'None'
55
+ if column_type.is_string_type():
56
+ base = 'str'
57
+ elif column_type.is_int_type():
58
+ base = 'int'
59
+ elif column_type.is_float_type():
60
+ base = 'float'
61
+ elif column_type.is_bool_type():
62
+ base = 'bool'
63
+ elif column_type.is_array_type():
64
+ base = 'ArrayT'
65
+ elif column_type.is_json_type():
66
+ base = 'JsonT'
67
+ elif column_type.is_image_type():
68
+ base = 'ImageT'
69
+ elif column_type.is_video_type():
70
+ base = 'VideoT'
71
+ elif column_type.is_audio_type():
72
+ base = 'AudioT'
73
+ elif column_type.is_document_type():
74
+ base = 'DocumentT'
75
+ else:
76
+ assert False
77
+ return f'Optional[{base}]' if column_type.nullable else base
@@ -0,0 +1,6 @@
1
+ from pathlib import Path
2
+
3
+
4
+ def get_templates_path() -> Path:
5
+ """Implementation of the 'mkdocstrings.python.templates' plugin for custom jinja templates."""
6
+ return Path(__file__).parent / "templates"
@@ -0,0 +1,135 @@
1
+ {#- Template for Pixeltable UDFs. Cargo-culted (with modification) from _base/function.html.jinja. -#}
2
+
3
+ {% block logs scoped %}
4
+ {#- Logging block.
5
+
6
+ This block can be used to log debug messages, deprecation messages, warnings, etc.
7
+ -#}
8
+ {{ log.debug("Rendering " + function.path) }}
9
+ {% endblock logs %}
10
+
11
+ {% import "language"|get_template as lang with context %}
12
+ {#- Language module providing the `t` translation method. -#}
13
+
14
+ <div class="doc doc-object doc-function">
15
+ {% with obj = function, html_id = function.path %}
16
+
17
+ {% if root %}
18
+ {% set show_full_path = config.show_root_full_path %}
19
+ {% set root_members = True %}
20
+ {% elif root_members %}
21
+ {% set show_full_path = config.show_root_members_full_path or config.show_object_full_path %}
22
+ {% set root_members = False %}
23
+ {% else %}
24
+ {% set show_full_path = config.show_object_full_path %}
25
+ {% endif %}
26
+
27
+ {% set function_name = function.path if show_full_path else function.name %}
28
+ {#- Brief or full function name depending on configuration. -#}
29
+ {% set symbol_type = "udf" %}
30
+ {#- Symbol type: method when parent is a class, function otherwise. -#}
31
+
32
+ {% if not root or config.show_root_heading %}
33
+ {% filter heading(
34
+ heading_level,
35
+ role="function",
36
+ id=html_id,
37
+ class="doc doc-heading",
38
+ toc_label=(('<code class="doc-symbol doc-symbol-toc doc-symbol-' + symbol_type + '"></code>&nbsp;')|safe if config.show_symbol_type_toc else '') + function.name,
39
+ ) %}
40
+
41
+ {% block heading scoped %}
42
+ {#- Heading block.
43
+
44
+ This block renders the heading for the function.
45
+ -#}
46
+ {% if config.show_symbol_type_heading %}<code class="doc-symbol doc-symbol-heading doc-symbol-{{ symbol_type }}"></code>{% endif %}
47
+ {% if config.separate_signature %}
48
+ <span class="doc doc-object-name doc-function-name">{{ function_name }}</span>
49
+ {% else %}
50
+ {%+ filter highlight(language="python", inline=True) %}
51
+ {{ function_name }}{% include "signature"|get_template with context %}
52
+ {% endfilter %}
53
+ {% endif %}
54
+ {% endblock heading %}
55
+
56
+ {% block labels scoped %}
57
+ {#- Labels block.
58
+
59
+ This block renders the labels for the function.
60
+ -#}
61
+ {% with labels = function.labels %}
62
+ {% include "labels"|get_template with context %}
63
+ {% endwith %}
64
+ {% endblock labels %}
65
+
66
+ {% endfilter %}
67
+
68
+ {% block signature scoped %}
69
+ {#- Signature block.
70
+
71
+ This block renders the signature for the function.
72
+ -#}
73
+ {% if config.separate_signature %}
74
+ {% filter format_signature(function, config.line_length, crossrefs=config.signature_crossrefs) %}
75
+ {{ function.name }}
76
+ {% endfilter %}
77
+ {% endif %}
78
+ {% endblock signature %}
79
+
80
+ {% else %}
81
+
82
+ {% if config.show_root_toc_entry %}
83
+ {% filter heading(
84
+ heading_level,
85
+ role="function",
86
+ id=html_id,
87
+ toc_label=(('<code class="doc-symbol doc-symbol-toc doc-symbol-' + symbol_type + '"></code>&nbsp;')|safe if config.show_symbol_type_toc else '') + function.name,
88
+ hidden=True,
89
+ ) %}
90
+ {% endfilter %}
91
+ {% endif %}
92
+ {% set heading_level = heading_level - 1 %}
93
+ {% endif %}
94
+
95
+ <div class="doc doc-contents {% if root %}first{% endif %}">
96
+ {% block contents scoped %}
97
+ {#- Contents block.
98
+
99
+ This block renders the contents of the function.
100
+ It contains other blocks that users can override.
101
+ Overriding the contents block allows to rearrange the order of the blocks.
102
+ -#}
103
+ {% block docstring scoped %}
104
+ {#- Docstring block.
105
+
106
+ This block renders the docstring for the function.
107
+ -#}
108
+ {% with docstring_sections = function.docstring.parsed %}
109
+ {% include "docstring"|get_template with context %}
110
+ {% endwith %}
111
+ {% endblock docstring %}
112
+
113
+ {% block source scoped %}
114
+ {#- Source block.
115
+
116
+ This block renders the source code for the function.
117
+ -#}
118
+ {% if config.show_source and function.source %}
119
+ <details class="quote">
120
+ <summary>{{ lang.t("Source code in") }} <code>
121
+ {%- if function.relative_filepath.is_absolute() -%}
122
+ {{ function.relative_package_filepath }}
123
+ {%- else -%}
124
+ {{ function.relative_filepath }}
125
+ {%- endif -%}
126
+ </code></summary>
127
+ {{ function.source|highlight(language="python", linestart=function.lineno, linenums=True) }}
128
+ </details>
129
+ {% endif %}
130
+ {% endblock source %}
131
+ {% endblock contents %}
132
+ </div>
133
+
134
+ {% endwith %}
135
+ </div>
pixeltable/utils/s3.py CHANGED
@@ -10,4 +10,4 @@ def get_client() -> Any:
10
10
  except AttributeError:
11
11
  # No credentials available, use unsigned mode
12
12
  config = botocore.config.Config(signature_version=botocore.UNSIGNED)
13
- return boto3.client('s3', config=config)
13
+ return boto3.client('s3', config=config)
@@ -0,0 +1,206 @@
1
+ Metadata-Version: 2.1
2
+ Name: pixeltable
3
+ Version: 0.2.13
4
+ Summary: Pixeltable: The Multimodal AI Data Plane
5
+ Author: Pixeltable, Inc.
6
+ Author-email: contact@pixeltable.com
7
+ Requires-Python: >=3.9,<4.0
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.9
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Requires-Dist: av (>=10.0.0)
14
+ Requires-Dist: beautifulsoup4 (>=4.0.0,<5.0.0)
15
+ Requires-Dist: cloudpickle (>=2.2.1,<3.0.0)
16
+ Requires-Dist: ftfy (>=6.2.0,<7.0.0)
17
+ Requires-Dist: jinja2 (>=3.1.3,<4.0.0)
18
+ Requires-Dist: jmespath (>=1.0.1,<2.0.0)
19
+ Requires-Dist: mistune (>=3.0.2,<4.0.0)
20
+ Requires-Dist: more-itertools (>=10.2,<11.0)
21
+ Requires-Dist: numpy (>=1.25)
22
+ Requires-Dist: opencv-python-headless (>=4.7.0.68,<5.0.0.0)
23
+ Requires-Dist: pandas (>=2.0,<3.0)
24
+ Requires-Dist: pgserver (==0.1.4)
25
+ Requires-Dist: pgvector (>=0.2.1,<0.3.0)
26
+ Requires-Dist: pillow (>=9.3.0)
27
+ Requires-Dist: psutil (>=5.9.5,<6.0.0)
28
+ Requires-Dist: psycopg2-binary (>=2.9.5,<3.0.0)
29
+ Requires-Dist: pymupdf (>=1.24.1,<2.0.0)
30
+ Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
31
+ Requires-Dist: requests (>=2.31.0,<3.0.0)
32
+ Requires-Dist: setuptools (==69.1.1)
33
+ Requires-Dist: sqlalchemy[mypy] (>=2.0.23,<3.0.0)
34
+ Requires-Dist: tenacity (>=8.2,<9.0)
35
+ Requires-Dist: tqdm (>=4.64)
36
+ Description-Content-Type: text/markdown
37
+
38
+ <div align="center">
39
+ <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/master/docs/release/pixeltable-banner.png" alt="Pixeltable" width="45%" />
40
+
41
+ # Unifying Data, Models, and Orchestration for AI Products
42
+
43
+ [![License](https://img.shields.io/badge/License-Apache%202.0-darkblue.svg)](https://opensource.org/licenses/Apache-2.0)
44
+ ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pixeltable?logo=python&logoColor=white)
45
+ [![Platform Support](https://img.shields.io/badge/platform-Linux%20%7C%20macOS%20%7C%20Windows-8A2BE2)]()
46
+ [![pytest status](https://github.com/pixeltable/pixeltable/actions/workflows/pytest.yml/badge.svg)](https://github.com/pixeltable/pixeltable/actions)
47
+ [![PyPI Package](https://img.shields.io/pypi/v/pixeltable?color=darkorange)](https://pypi.org/project/pixeltable/)
48
+
49
+ [Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.readme.io/) | [API Reference](https://pixeltable.github.io/pixeltable/) | [Code Samples](https://pixeltable.readme.io/recipes) | [Examples](https://github.com/pixeltable/pixeltable/tree/master/docs/release/tutorials)
50
+ </div>
51
+
52
+ Pixeltable is a Python library that lets ML Engineers and Data Scientists focus on exploration, modeling, and app development without dealing with the customary data plumbing.
53
+
54
+ ### What problems does Pixeltable solve?
55
+
56
+ Today’s solutions for AI app development require extensive custom coding and infrastructure plumbing. Tracking lineage and versions between and across data transformations, models, and deployment is cumbersome.
57
+
58
+ ## 💾 Installation
59
+
60
+ ```python
61
+ pip install pixeltable
62
+ ```
63
+ > [!IMPORTANT]
64
+ > Pixeltable is persistent. Unlike in-memory Python libraries such as Pandas, Pixeltable is a database. When working locally or against an hosted version of Pixeltable, use [get_table](https://pixeltable.github.io/pixeltable/api/pixeltable/#pixeltable.get_table) at any time to retrieve an existing table.
65
+
66
+ ## 💡 Getting Started
67
+ Learn how to create tables, populate them with data, and enhance them with built-in or user-defined transformations and AI operations.
68
+
69
+ | Topic | Notebook | Topic | Notebook |
70
+ |:----------|:-----------------|:-------------------------|:---------------------------------:|
71
+ | 10-Minute Tour of Pixeltable | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Tables and Data Operations | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/fundamentals/tables-and-data-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
72
+ | User-Defined Functions (UDFs) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/howto/udfs-in-pixeltable.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Object Detection Models | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/object-detection-in-videos.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
73
+ | Experimenting with Chunking (RAG) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/rag-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | Working with External Files | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/howto/working-with-external-files.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
74
+ | Integrating with Label Studio | <a target="_blank" href="https://pixeltable.readme.io/docs/label-studio"> <img src="https://img.shields.io/badge/Docs-Label Studio-blue" alt="Visit our documentation"/></a> | Audio/Video Transcript Indexing | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/audio-transcriptions.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
75
+
76
+ ## 🧱 Code Samples
77
+
78
+ ### Import media data into Pixeltable (videos, images, audio...)
79
+ ```python
80
+ import pixeltable as pxt
81
+
82
+ v = pxt.create_table('external_data.videos', {'video': pxt.VideoType()})
83
+
84
+ prefix = 's3://multimedia-commons/'
85
+ paths = [
86
+ 'data/videos/mp4/ffe/ffb/ffeffbef41bbc269810b2a1a888de.mp4',
87
+ 'data/videos/mp4/ffe/feb/ffefebb41485539f964760e6115fbc44.mp4',
88
+ 'data/videos/mp4/ffe/f73/ffef7384d698b5f70d411c696247169.mp4'
89
+ ]
90
+ v.insert({'video': prefix + p} for p in paths)
91
+ ```
92
+ Learn how to [work with data in Pixeltable](https://pixeltable.readme.io/docs/working-with-external-files).
93
+
94
+ ### Add an object detection model to your workflow
95
+ ```python
96
+ table['detections'] = huggingface.detr_for_object_detection(table.input_image, model_id='facebook/detr-resnet-50')
97
+ ```
98
+ Learn about computed columns and object detection: [Comparing object detection models](https://pixeltable.readme.io/docs/object-detection-in-videos).
99
+
100
+ ### Extend Pixeltable's capabilities with user-defined functions
101
+ ```python
102
+ @pxt.udf
103
+ def draw_boxes(img: PIL.Image.Image, boxes: list[list[float]]) -> PIL.Image.Image:
104
+ result = img.copy() # Create a copy of `img`
105
+ d = PIL.ImageDraw.Draw(result)
106
+ for box in boxes:
107
+ d.rectangle(box, width=3) # Draw bounding box rectangles on the copied image
108
+ return result
109
+ ```
110
+ Learn more about user-defined functions: [UDFs in Pixeltable](https://pixeltable.readme.io/docs/user-defined-functions-udfs).
111
+
112
+ ### Automate data operations with views
113
+ ```python
114
+ # In this example, the view is defined by iteration over the chunks of a DocumentSplitter.
115
+ chunks_table = pxt.create_view(
116
+ 'rag_demo.chunks',
117
+ documents_table,
118
+ iterator=DocumentSplitter.create(
119
+ document=documents_table.document,
120
+ separators='token_limit', limit=300)
121
+ )
122
+ ```
123
+ Learn how to leverage views to build your [RAG workflow](https://pixeltable.readme.io/docs/document-indexing-and-rag).
124
+
125
+ ### Evaluate model performance
126
+ ```python
127
+ # The computation of the mAP metric can simply become a query over the evaluation output, aggregated with the mean_ap() function.
128
+ frames_view.select(mean_ap(frames_view.eval_yolox_tiny), mean_ap(frames_view.eval_yolox_m)).show()
129
+ ```
130
+ Learn how to leverage Pixeltable for [Model analytics](https://pixeltable.readme.io/docs/object-detection-in-videos).
131
+
132
+ ### Working with inference services
133
+ ```python
134
+ chat_table = pxt.create_table('together_demo.chat', {'input': pxt.StringType()})
135
+
136
+ # The chat-completions API expects JSON-formatted input:
137
+ messages = [{'role': 'user', 'content': chat_table.input}]
138
+
139
+ # This example shows how additional parameters from the Together API can be used in Pixeltable to customize the model behavior.
140
+ chat_table['output'] = chat_completions(
141
+ messages=messages,
142
+ model='mistralai/Mixtral-8x7B-Instruct-v0.1',
143
+ max_tokens=300,
144
+ stop=['\n'],
145
+ temperature=0.7,
146
+ top_p=0.9,
147
+ top_k=40,
148
+ repetition_penalty=1.1,
149
+ logprobs=1,
150
+ echo=True
151
+ )
152
+ chat_table['response'] = chat_table.output.choices[0].message.content
153
+
154
+ # Start a conversation
155
+ chat_table.insert([
156
+ {'input': 'How many species of felids have been classified?'},
157
+ {'input': 'Can you make me a coffee?'}
158
+ ])
159
+ chat_table.select(chat_table.input, chat_table.response).head()
160
+ ```
161
+ Learn how to interact with inference services such as [Together AI](https://pixeltable.readme.io/docs/together-ai) in Pixeltable.
162
+
163
+ ## ❓ FAQ
164
+
165
+ ### What is Pixeltable?
166
+
167
+ Pixeltable unifies data storage, versioning, and indexing with orchestration and model versioning under a declarative table interface, with transformations, model inference, and custom logic represented as computed columns.
168
+
169
+ ### What does Pixeltable provide me with? Pixeltable provides:
170
+
171
+ - Data storage and versioning
172
+ - Combined Data and Model Lineage
173
+ - Indexing (e.g. embedding vectors) and Data Retrieval
174
+ - Orchestration of multimodal workloads
175
+ - Incremental updates
176
+ - Code is automatically production-ready
177
+
178
+ ### Why should you use Pixeltable?
179
+
180
+ - **It gives you transparency and reproducibility**
181
+ - All generated data is automatically recorded and versioned
182
+ - You will never need to re-run a workload because you lost track of the input data
183
+ - **It saves you money**
184
+ - All data changes are automatically incremental
185
+ - You never need to re-run pipelines from scratch because you’re adding data
186
+ - **It integrates with any existing Python code or libraries**
187
+ - Bring your ever-changing code and workloads
188
+ - You choose the models, tools, and AI practices (e.g., your embedding model for a vector index); Pixeltable orchestrates the data
189
+
190
+ ### What is Pixeltable not providing?
191
+
192
+ - Pixeltable is not a low-code, prescriptive AI solution. We empower you to use the best frameworks and techniques for your specific needs.
193
+ - We do not aim to replace your existing AI toolkit, but rather enhance it by streamlining the underlying data infrastructure and orchestration.
194
+
195
+ > [!TIP]
196
+ > Check out the [Integrations](https://pixeltable.readme.io/docs/working-with-openai) section, and feel free to submit a request for additional ones.
197
+
198
+ ## 🐛 Contributions & Feedback
199
+
200
+ Are you experiencing issues or bugs with Pixeltable? File an [Issue](https://github.com/pixeltable/pixeltable/issues).
201
+ </br>Do you want to contribute? Feel free to open a [PR](https://github.com/pixeltable/pixeltable/pulls).
202
+
203
+ ## :classical_building: License
204
+
205
+ This library is licensed under the Apache 2.0 License.
206
+