pixeltable 0.2.17__tar.gz → 0.2.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (148) hide show
  1. {pixeltable-0.2.17 → pixeltable-0.2.18}/PKG-INFO +79 -21
  2. {pixeltable-0.2.17 → pixeltable-0.2.18}/README.md +76 -17
  3. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/__version__.py +2 -2
  4. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/catalog/catalog.py +8 -7
  5. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/catalog/column.py +11 -8
  6. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/catalog/insertable_table.py +1 -1
  7. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/catalog/path_dict.py +8 -6
  8. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/catalog/table.py +20 -13
  9. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/catalog/table_version.py +91 -54
  10. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/catalog/table_version_path.py +7 -9
  11. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/catalog/view.py +2 -1
  12. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/dataframe.py +1 -1
  13. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/env.py +173 -83
  14. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exec/aggregation_node.py +2 -1
  15. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exec/component_iteration_node.py +1 -1
  16. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exec/sql_node.py +11 -8
  17. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exprs/__init__.py +1 -0
  18. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exprs/arithmetic_expr.py +4 -4
  19. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exprs/array_slice.py +2 -1
  20. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exprs/column_property_ref.py +9 -7
  21. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exprs/column_ref.py +2 -1
  22. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exprs/comparison.py +10 -7
  23. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exprs/compound_predicate.py +3 -2
  24. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exprs/data_row.py +19 -4
  25. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exprs/expr.py +46 -35
  26. pixeltable-0.2.18/pixeltable/exprs/expr_set.py +62 -0
  27. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exprs/function_call.py +56 -32
  28. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exprs/in_predicate.py +3 -2
  29. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exprs/inline_array.py +2 -1
  30. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exprs/inline_dict.py +2 -1
  31. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exprs/is_null.py +3 -2
  32. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exprs/json_mapper.py +5 -4
  33. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exprs/json_path.py +7 -1
  34. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exprs/literal.py +34 -7
  35. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exprs/method_ref.py +3 -3
  36. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exprs/object_ref.py +6 -5
  37. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exprs/row_builder.py +25 -17
  38. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exprs/rowid_ref.py +2 -1
  39. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exprs/similarity_expr.py +2 -1
  40. pixeltable-0.2.18/pixeltable/exprs/sql_element_cache.py +30 -0
  41. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exprs/type_cast.py +3 -3
  42. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exprs/variable.py +2 -1
  43. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/ext/functions/whisperx.py +4 -4
  44. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/ext/functions/yolox.py +6 -6
  45. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/func/aggregate_function.py +1 -0
  46. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/func/function.py +28 -4
  47. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/functions/__init__.py +4 -2
  48. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/functions/anthropic.py +15 -5
  49. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/functions/fireworks.py +1 -1
  50. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/functions/globals.py +6 -1
  51. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/functions/huggingface.py +2 -2
  52. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/functions/image.py +17 -2
  53. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/functions/json.py +5 -5
  54. pixeltable-0.2.18/pixeltable/functions/mistralai.py +188 -0
  55. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/functions/openai.py +6 -10
  56. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/functions/string.py +3 -2
  57. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/functions/timestamp.py +95 -7
  58. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/functions/together.py +4 -4
  59. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/functions/video.py +2 -2
  60. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/functions/vision.py +27 -17
  61. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/functions/whisper.py +1 -1
  62. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/io/hf_datasets.py +17 -15
  63. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/io/pandas.py +0 -2
  64. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/io/parquet.py +15 -14
  65. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/iterators/document.py +16 -15
  66. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/metadata/__init__.py +1 -1
  67. pixeltable-0.2.18/pixeltable/metadata/converters/convert_19.py +46 -0
  68. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/metadata/notes.py +1 -0
  69. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/metadata/schema.py +5 -4
  70. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/plan.py +100 -78
  71. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/store.py +5 -1
  72. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/tool/create_test_db_dump.py +4 -3
  73. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/type_system.py +12 -14
  74. pixeltable-0.2.18/pixeltable/utils/documents.py +72 -0
  75. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/utils/formatter.py +2 -2
  76. {pixeltable-0.2.17 → pixeltable-0.2.18}/pyproject.toml +10 -10
  77. pixeltable-0.2.17/pixeltable/exprs/expr_set.py +0 -39
  78. pixeltable-0.2.17/pixeltable/utils/documents.py +0 -69
  79. {pixeltable-0.2.17 → pixeltable-0.2.18}/LICENSE +0 -0
  80. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/__init__.py +0 -0
  81. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/catalog/__init__.py +0 -0
  82. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/catalog/dir.py +0 -0
  83. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/catalog/globals.py +0 -0
  84. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/catalog/named_function.py +0 -0
  85. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/catalog/path.py +0 -0
  86. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/catalog/schema_object.py +0 -0
  87. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exceptions.py +0 -0
  88. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exec/__init__.py +0 -0
  89. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exec/cache_prefetch_node.py +0 -0
  90. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exec/data_row_batch.py +0 -0
  91. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exec/exec_context.py +0 -0
  92. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exec/exec_node.py +0 -0
  93. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exec/expr_eval_node.py +0 -0
  94. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exec/in_memory_data_node.py +0 -0
  95. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exec/media_validation_node.py +0 -0
  96. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exec/row_update_node.py +0 -0
  97. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/exprs/globals.py +0 -0
  98. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/ext/__init__.py +0 -0
  99. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/ext/functions/__init__.py +0 -0
  100. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/func/__init__.py +0 -0
  101. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/func/callable_function.py +0 -0
  102. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/func/expr_template_function.py +0 -0
  103. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/func/function_registry.py +0 -0
  104. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/func/globals.py +0 -0
  105. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/func/query_template_function.py +0 -0
  106. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/func/signature.py +0 -0
  107. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/func/udf.py +0 -0
  108. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/functions/audio.py +0 -0
  109. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/functions/util.py +0 -0
  110. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/globals.py +0 -0
  111. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/index/__init__.py +0 -0
  112. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/index/base.py +0 -0
  113. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/index/btree.py +0 -0
  114. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/index/embedding_index.py +0 -0
  115. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/io/__init__.py +0 -0
  116. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/io/external_store.py +0 -0
  117. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/io/globals.py +0 -0
  118. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/io/label_studio.py +0 -0
  119. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/iterators/__init__.py +0 -0
  120. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/iterators/base.py +0 -0
  121. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/iterators/string.py +0 -0
  122. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/iterators/video.py +0 -0
  123. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/metadata/converters/convert_10.py +0 -0
  124. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/metadata/converters/convert_12.py +0 -0
  125. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/metadata/converters/convert_13.py +0 -0
  126. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/metadata/converters/convert_14.py +0 -0
  127. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/metadata/converters/convert_15.py +0 -0
  128. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/metadata/converters/convert_16.py +0 -0
  129. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/metadata/converters/convert_17.py +0 -0
  130. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/metadata/converters/convert_18.py +0 -0
  131. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/metadata/converters/util.py +0 -0
  132. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/tool/create_test_video.py +0 -0
  133. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/tool/doc_plugins/griffe.py +0 -0
  134. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/tool/doc_plugins/mkdocstrings.py +0 -0
  135. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -0
  136. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/tool/embed_udf.py +0 -0
  137. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/utils/__init__.py +0 -0
  138. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/utils/arrow.py +0 -0
  139. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/utils/coco.py +0 -0
  140. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/utils/code.py +0 -0
  141. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/utils/filecache.py +0 -0
  142. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/utils/help.py +0 -0
  143. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/utils/http_server.py +0 -0
  144. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/utils/media_store.py +0 -0
  145. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/utils/pytorch.py +0 -0
  146. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/utils/s3.py +0 -0
  147. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/utils/sql.py +0 -0
  148. {pixeltable-0.2.17 → pixeltable-0.2.18}/pixeltable/utils/transactional_directory.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pixeltable
3
- Version: 0.2.17
3
+ Version: 0.2.18
4
4
  Summary: Pixeltable: The Multimodal AI Data Plane
5
5
  Author: Pixeltable, Inc.
6
6
  Author-email: contact@pixeltable.com
@@ -16,7 +16,6 @@ Requires-Dist: cloudpickle (>=2.2.1,<3.0.0)
16
16
  Requires-Dist: ftfy (>=6.2.0,<7.0.0)
17
17
  Requires-Dist: jinja2 (>=3.1.3,<4.0.0)
18
18
  Requires-Dist: jmespath (>=1.0.1,<2.0.0)
19
- Requires-Dist: mistune (>=3.0.2,<4.0.0)
20
19
  Requires-Dist: more-itertools (>=10.2,<11.0)
21
20
  Requires-Dist: numpy (>=1.25)
22
21
  Requires-Dist: opencv-python-headless (>=4.7.0.68,<5.0.0.0)
@@ -26,19 +25,18 @@ Requires-Dist: pillow (>=9.3.0)
26
25
  Requires-Dist: pixeltable-pgserver (==0.2.7)
27
26
  Requires-Dist: psutil (>=5.9.5,<6.0.0)
28
27
  Requires-Dist: psycopg[binary] (==3.1.18)
28
+ Requires-Dist: puremagic (>=1.20)
29
29
  Requires-Dist: pymupdf (>=1.24.1,<2.0.0)
30
30
  Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
31
31
  Requires-Dist: requests (>=2.31.0,<3.0.0)
32
- Requires-Dist: setuptools (==69.1.1)
33
- Requires-Dist: sqlalchemy[mypy] (>=2.0.23,<3.0.0)
32
+ Requires-Dist: sqlalchemy (>=2.0.23,<3.0.0)
34
33
  Requires-Dist: tenacity (>=8.2,<9.0)
35
34
  Requires-Dist: tqdm (>=4.64)
36
35
  Description-Content-Type: text/markdown
37
36
 
38
37
  <div align="center">
39
- <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/release/pixeltable-banner.png" alt="Pixeltable" width="45%" />
40
-
41
- # Unifying Data, Models, and Orchestration for AI Products
38
+ <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/source/data/pixeltable-logo-large.png" alt="Pixeltable" width="50%" />
39
+ <br></br>
42
40
 
43
41
  [![License](https://img.shields.io/badge/License-Apache%202.0-darkblue.svg)](https://opensource.org/licenses/Apache-2.0)
44
42
  ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pixeltable?logo=python&logoColor=white)
@@ -49,22 +47,17 @@ Description-Content-Type: text/markdown
49
47
  [Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.readme.io/) | [API Reference](https://pixeltable.github.io/pixeltable/) | [Code Samples](https://pixeltable.readme.io/recipes) | [Examples](https://github.com/pixeltable/pixeltable/tree/release/docs/release/tutorials)
50
48
  </div>
51
49
 
52
- Pixeltable is a Python library that lets ML Engineers and Data Scientists focus on exploration, modeling, and app development without dealing with the customary data plumbing.
53
-
54
- ### What problems does Pixeltable solve?
55
-
56
- Today’s solutions for AI app development require extensive custom coding and infrastructure plumbing. Tracking lineage and versions between and across data transformations, models, and deployment is cumbersome.
50
+ Pixeltable is a Python library providing a declarative interface for multimodal data (text, images, audio, video). It features built-in versioning, lineage tracking, and incremental updates, enabling users to store, transform, index, and iterate on data for their ML workflows. Data transformations, model inference, and custom logic are embedded as computed columns.
57
51
 
58
52
  ## 💾 Installation
59
53
 
60
54
  ```python
61
55
  pip install pixeltable
62
56
  ```
63
- > [!IMPORTANT]
64
- > Pixeltable is persistent. Unlike in-memory Python libraries such as Pandas, Pixeltable is a database. When working locally or against an hosted version of Pixeltable, use [get_table](https://pixeltable.github.io/pixeltable/api/pixeltable/#pixeltable.get_table) at any time to retrieve an existing table.
57
+ **Pixeltable is persistent. Unlike in-memory Python libraries such as Pandas, Pixeltable is a database.**
65
58
 
66
59
  ## 💡 Getting Started
67
- Learn how to create tables, populate them with data, and enhance them with built-in or user-defined transformations and AI operations.
60
+ Learn how to create tables, populate them with data, and enhance them with built-in or user-defined transformations.
68
61
 
69
62
  | Topic | Notebook | Topic | Notebook |
70
63
  |:----------|:-----------------|:-------------------------|:---------------------------------:|
@@ -91,9 +84,32 @@ v.insert({'video': prefix + p} for p in paths)
91
84
  ```
92
85
  Learn how to [work with data in Pixeltable](https://pixeltable.readme.io/docs/working-with-external-files).
93
86
 
94
- ### Add an object detection model to your workflow
87
+ ### Object detection in images using DETR model
95
88
  ```python
96
- table['detections'] = huggingface.detr_for_object_detection(table.input_image, model_id='facebook/detr-resnet-50')
89
+ import pixeltable as pxt
90
+ from pixeltable.functions import huggingface
91
+
92
+ # Create a table to store data persistently
93
+ t = pxt.create_table('image', {'image': pxt.ImageType()})
94
+
95
+ # Insert some images
96
+ prefix = 'https://upload.wikimedia.org/wikipedia/commons'
97
+ paths = [
98
+ '/1/15/Cat_August_2010-4.jpg',
99
+ '/e/e1/Example_of_a_Dog.jpg',
100
+ '/thumb/b/bf/Bird_Diversity_2013.png/300px-Bird_Diversity_2013.png'
101
+ ]
102
+ t.insert({'image': prefix + p} for p in paths)
103
+
104
+ # Add a computed column for image classification
105
+ t['classification'] = huggingface.detr_for_object_detection(
106
+ (t.image), model_id='facebook/detr-resnet-50'
107
+ )
108
+
109
+ # Retrieve the rows where cats have been identified
110
+ t.select(animal = t.image,
111
+ classification = t.classification.label_text[0]) \
112
+ .where(t.classification.label_text[0]=='cat').head()
97
113
  ```
98
114
  Learn about computed columns and object detection: [Comparing object detection models](https://pixeltable.readme.io/docs/object-detection-in-videos).
99
115
 
@@ -109,9 +125,9 @@ def draw_boxes(img: PIL.Image.Image, boxes: list[list[float]]) -> PIL.Image.Imag
109
125
  ```
110
126
  Learn more about user-defined functions: [UDFs in Pixeltable](https://pixeltable.readme.io/docs/user-defined-functions-udfs).
111
127
 
112
- ### Automate data operations with views
128
+ ### Automate data operations with views, e.g., split documents into chunks
113
129
  ```python
114
- # In this example, the view is defined by iteration over the chunks of a DocumentSplitter.
130
+ # In this example, the view is defined by iteration over the chunks of a DocumentSplitter
115
131
  chunks_table = pxt.create_view(
116
132
  'rag_demo.chunks',
117
133
  documents_table,
@@ -124,7 +140,7 @@ Learn how to leverage views to build your [RAG workflow](https://pixeltable.read
124
140
 
125
141
  ### Evaluate model performance
126
142
  ```python
127
- # The computation of the mAP metric can simply become a query over the evaluation output, aggregated with the mean_ap() function.
143
+ # The computation of the mAP metric can become a query over the evaluation output
128
144
  frames_view.select(mean_ap(frames_view.eval_yolox_tiny), mean_ap(frames_view.eval_yolox_m)).show()
129
145
  ```
130
146
  Learn how to leverage Pixeltable for [Model analytics](https://pixeltable.readme.io/docs/object-detection-in-videos).
@@ -136,7 +152,7 @@ chat_table = pxt.create_table('together_demo.chat', {'input': pxt.StringType()})
136
152
  # The chat-completions API expects JSON-formatted input:
137
153
  messages = [{'role': 'user', 'content': chat_table.input}]
138
154
 
139
- # This example shows how additional parameters from the Together API can be used in Pixeltable to customize the model behavior.
155
+ # This example shows how additional parameters from the Together API can be used in Pixeltable
140
156
  chat_table['output'] = chat_completions(
141
157
  messages=messages,
142
158
  model='mistralai/Mixtral-8x7B-Instruct-v0.1',
@@ -160,12 +176,54 @@ chat_table.select(chat_table.input, chat_table.response).head()
160
176
  ```
161
177
  Learn how to interact with inference services such as [Together AI](https://pixeltable.readme.io/docs/together-ai) in Pixeltable.
162
178
 
179
+ ### Text and image similarity search on video frames with embedding indexes
180
+ ```python
181
+ import pixeltable as pxt
182
+ from pixeltable.functions.huggingface import clip_image, clip_text
183
+ from pixeltable.iterators import FrameIterator
184
+ import PIL.Image
185
+
186
+ video_table = pxt.create_table('videos', {'video': pxt.VideoType()})
187
+
188
+ video_table.insert([{'video': '/video.mp4'}])
189
+
190
+ frames_view = pxt.create_view(
191
+ 'frames', video_table, iterator=FrameIterator.create(video=video_table.video))
192
+
193
+ @pxt.expr_udf
194
+ def embed_image(img: PIL.Image.Image):
195
+ return clip_image(img, model_id='openai/clip-vit-base-patch32')
196
+
197
+ @pxt.expr_udf
198
+ def str_embed(s: str):
199
+ return clip_text(s, model_id='openai/clip-vit-base-patch32')
200
+
201
+ # Create an index on the 'frame' column that allows text and image search
202
+ frames_view.add_embedding_index('frame', string_embed=str_embed, image_embed=embed_image)
203
+
204
+ # Now we will retrieve images based on a sample image
205
+ sample_image = '/image.jpeg'
206
+ sim = frames_view.frame.similarity(sample_image)
207
+ frames_view.order_by(sim, asc=False).limit(5).select(frames_view.frame, sim=sim).collect()
208
+
209
+ # Now we will retrieve images based on a string
210
+ sample_text = 'red truck'
211
+ sim = frames_view.frame.similarity(sample_text)
212
+ frames_view.order_by(sim, asc=False).limit(5).select(frames_view.frame, sim=sim).collect()
213
+
214
+ ```
215
+ Learn how to work with [Embedding and Vector Indexes](https://docs.pixeltable.com/docs/embedding-vector-indexes).
216
+
163
217
  ## ❓ FAQ
164
218
 
165
219
  ### What is Pixeltable?
166
220
 
167
221
  Pixeltable unifies data storage, versioning, and indexing with orchestration and model versioning under a declarative table interface, with transformations, model inference, and custom logic represented as computed columns.
168
222
 
223
+ ### What problems does Pixeltable solve?
224
+
225
+ Today's solutions for AI app development require extensive custom coding and infrastructure plumbing. Tracking lineage and versions between and across data transformations, models, and deployments is cumbersome. Pixeltable lets ML Engineers and Data Scientists focus on exploration, modeling, and app development without dealing with the customary data plumbing.
226
+
169
227
  ### What does Pixeltable provide me with? Pixeltable provides:
170
228
 
171
229
  - Data storage and versioning
@@ -1,7 +1,6 @@
1
1
  <div align="center">
2
- <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/release/pixeltable-banner.png" alt="Pixeltable" width="45%" />
3
-
4
- # Unifying Data, Models, and Orchestration for AI Products
2
+ <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/source/data/pixeltable-logo-large.png" alt="Pixeltable" width="50%" />
3
+ <br></br>
5
4
 
6
5
  [![License](https://img.shields.io/badge/License-Apache%202.0-darkblue.svg)](https://opensource.org/licenses/Apache-2.0)
7
6
  ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pixeltable?logo=python&logoColor=white)
@@ -12,22 +11,17 @@
12
11
  [Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.readme.io/) | [API Reference](https://pixeltable.github.io/pixeltable/) | [Code Samples](https://pixeltable.readme.io/recipes) | [Examples](https://github.com/pixeltable/pixeltable/tree/release/docs/release/tutorials)
13
12
  </div>
14
13
 
15
- Pixeltable is a Python library that lets ML Engineers and Data Scientists focus on exploration, modeling, and app development without dealing with the customary data plumbing.
16
-
17
- ### What problems does Pixeltable solve?
18
-
19
- Today’s solutions for AI app development require extensive custom coding and infrastructure plumbing. Tracking lineage and versions between and across data transformations, models, and deployment is cumbersome.
14
+ Pixeltable is a Python library providing a declarative interface for multimodal data (text, images, audio, video). It features built-in versioning, lineage tracking, and incremental updates, enabling users to store, transform, index, and iterate on data for their ML workflows. Data transformations, model inference, and custom logic are embedded as computed columns.
20
15
 
21
16
  ## 💾 Installation
22
17
 
23
18
  ```python
24
19
  pip install pixeltable
25
20
  ```
26
- > [!IMPORTANT]
27
- > Pixeltable is persistent. Unlike in-memory Python libraries such as Pandas, Pixeltable is a database. When working locally or against an hosted version of Pixeltable, use [get_table](https://pixeltable.github.io/pixeltable/api/pixeltable/#pixeltable.get_table) at any time to retrieve an existing table.
21
+ **Pixeltable is persistent. Unlike in-memory Python libraries such as Pandas, Pixeltable is a database.**
28
22
 
29
23
  ## 💡 Getting Started
30
- Learn how to create tables, populate them with data, and enhance them with built-in or user-defined transformations and AI operations.
24
+ Learn how to create tables, populate them with data, and enhance them with built-in or user-defined transformations.
31
25
 
32
26
  | Topic | Notebook | Topic | Notebook |
33
27
  |:----------|:-----------------|:-------------------------|:---------------------------------:|
@@ -54,9 +48,32 @@ v.insert({'video': prefix + p} for p in paths)
54
48
  ```
55
49
  Learn how to [work with data in Pixeltable](https://pixeltable.readme.io/docs/working-with-external-files).
56
50
 
57
- ### Add an object detection model to your workflow
51
+ ### Object detection in images using DETR model
58
52
  ```python
59
- table['detections'] = huggingface.detr_for_object_detection(table.input_image, model_id='facebook/detr-resnet-50')
53
+ import pixeltable as pxt
54
+ from pixeltable.functions import huggingface
55
+
56
+ # Create a table to store data persistently
57
+ t = pxt.create_table('image', {'image': pxt.ImageType()})
58
+
59
+ # Insert some images
60
+ prefix = 'https://upload.wikimedia.org/wikipedia/commons'
61
+ paths = [
62
+ '/1/15/Cat_August_2010-4.jpg',
63
+ '/e/e1/Example_of_a_Dog.jpg',
64
+ '/thumb/b/bf/Bird_Diversity_2013.png/300px-Bird_Diversity_2013.png'
65
+ ]
66
+ t.insert({'image': prefix + p} for p in paths)
67
+
68
+ # Add a computed column for image classification
69
+ t['classification'] = huggingface.detr_for_object_detection(
70
+ (t.image), model_id='facebook/detr-resnet-50'
71
+ )
72
+
73
+ # Retrieve the rows where cats have been identified
74
+ t.select(animal = t.image,
75
+ classification = t.classification.label_text[0]) \
76
+ .where(t.classification.label_text[0]=='cat').head()
60
77
  ```
61
78
  Learn about computed columns and object detection: [Comparing object detection models](https://pixeltable.readme.io/docs/object-detection-in-videos).
62
79
 
@@ -72,9 +89,9 @@ def draw_boxes(img: PIL.Image.Image, boxes: list[list[float]]) -> PIL.Image.Imag
72
89
  ```
73
90
  Learn more about user-defined functions: [UDFs in Pixeltable](https://pixeltable.readme.io/docs/user-defined-functions-udfs).
74
91
 
75
- ### Automate data operations with views
92
+ ### Automate data operations with views, e.g., split documents into chunks
76
93
  ```python
77
- # In this example, the view is defined by iteration over the chunks of a DocumentSplitter.
94
+ # In this example, the view is defined by iteration over the chunks of a DocumentSplitter
78
95
  chunks_table = pxt.create_view(
79
96
  'rag_demo.chunks',
80
97
  documents_table,
@@ -87,7 +104,7 @@ Learn how to leverage views to build your [RAG workflow](https://pixeltable.read
87
104
 
88
105
  ### Evaluate model performance
89
106
  ```python
90
- # The computation of the mAP metric can simply become a query over the evaluation output, aggregated with the mean_ap() function.
107
+ # The computation of the mAP metric can become a query over the evaluation output
91
108
  frames_view.select(mean_ap(frames_view.eval_yolox_tiny), mean_ap(frames_view.eval_yolox_m)).show()
92
109
  ```
93
110
  Learn how to leverage Pixeltable for [Model analytics](https://pixeltable.readme.io/docs/object-detection-in-videos).
@@ -99,7 +116,7 @@ chat_table = pxt.create_table('together_demo.chat', {'input': pxt.StringType()})
99
116
  # The chat-completions API expects JSON-formatted input:
100
117
  messages = [{'role': 'user', 'content': chat_table.input}]
101
118
 
102
- # This example shows how additional parameters from the Together API can be used in Pixeltable to customize the model behavior.
119
+ # This example shows how additional parameters from the Together API can be used in Pixeltable
103
120
  chat_table['output'] = chat_completions(
104
121
  messages=messages,
105
122
  model='mistralai/Mixtral-8x7B-Instruct-v0.1',
@@ -123,12 +140,54 @@ chat_table.select(chat_table.input, chat_table.response).head()
123
140
  ```
124
141
  Learn how to interact with inference services such as [Together AI](https://pixeltable.readme.io/docs/together-ai) in Pixeltable.
125
142
 
143
+ ### Text and image similarity search on video frames with embedding indexes
144
+ ```python
145
+ import pixeltable as pxt
146
+ from pixeltable.functions.huggingface import clip_image, clip_text
147
+ from pixeltable.iterators import FrameIterator
148
+ import PIL.Image
149
+
150
+ video_table = pxt.create_table('videos', {'video': pxt.VideoType()})
151
+
152
+ video_table.insert([{'video': '/video.mp4'}])
153
+
154
+ frames_view = pxt.create_view(
155
+ 'frames', video_table, iterator=FrameIterator.create(video=video_table.video))
156
+
157
+ @pxt.expr_udf
158
+ def embed_image(img: PIL.Image.Image):
159
+ return clip_image(img, model_id='openai/clip-vit-base-patch32')
160
+
161
+ @pxt.expr_udf
162
+ def str_embed(s: str):
163
+ return clip_text(s, model_id='openai/clip-vit-base-patch32')
164
+
165
+ # Create an index on the 'frame' column that allows text and image search
166
+ frames_view.add_embedding_index('frame', string_embed=str_embed, image_embed=embed_image)
167
+
168
+ # Now we will retrieve images based on a sample image
169
+ sample_image = '/image.jpeg'
170
+ sim = frames_view.frame.similarity(sample_image)
171
+ frames_view.order_by(sim, asc=False).limit(5).select(frames_view.frame, sim=sim).collect()
172
+
173
+ # Now we will retrieve images based on a string
174
+ sample_text = 'red truck'
175
+ sim = frames_view.frame.similarity(sample_text)
176
+ frames_view.order_by(sim, asc=False).limit(5).select(frames_view.frame, sim=sim).collect()
177
+
178
+ ```
179
+ Learn how to work with [Embedding and Vector Indexes](https://docs.pixeltable.com/docs/embedding-vector-indexes).
180
+
126
181
  ## ❓ FAQ
127
182
 
128
183
  ### What is Pixeltable?
129
184
 
130
185
  Pixeltable unifies data storage, versioning, and indexing with orchestration and model versioning under a declarative table interface, with transformations, model inference, and custom logic represented as computed columns.
131
186
 
187
+ ### What problems does Pixeltable solve?
188
+
189
+ Today's solutions for AI app development require extensive custom coding and infrastructure plumbing. Tracking lineage and versions between and across data transformations, models, and deployments is cumbersome. Pixeltable lets ML Engineers and Data Scientists focus on exploration, modeling, and app development without dealing with the customary data plumbing.
190
+
132
191
  ### What does Pixeltable provide me with? Pixeltable provides:
133
192
 
134
193
  - Data storage and versioning
@@ -1,3 +1,3 @@
1
1
  # These version placeholders will be replaced during build.
2
- __version__ = "0.2.17"
3
- __version_tuple__ = (0, 2, 17)
2
+ __version__ = "0.2.18"
3
+ __version_tuple__ = (0, 2, 18)
@@ -1,8 +1,9 @@
1
1
  from __future__ import annotations
2
- from typing import Optional, List, Any, Dict, Tuple
3
- from uuid import UUID
2
+
4
3
  import dataclasses
5
4
  import logging
5
+ from typing import Optional
6
+ from uuid import UUID
6
7
 
7
8
  import sqlalchemy as sql
8
9
  import sqlalchemy.orm as orm
@@ -10,8 +11,8 @@ import sqlalchemy.orm as orm
10
11
  from .table_version import TableVersion
11
12
  from .table_version_path import TableVersionPath
12
13
  from .table import Table
13
- from .named_function import NamedFunction
14
14
  from .path_dict import PathDict
15
+
15
16
  import pixeltable.env as env
16
17
  import pixeltable.metadata.schema as schema
17
18
 
@@ -39,10 +40,10 @@ class Catalog:
39
40
  # key: [id, version]
40
41
  # - mutable version of a table: version == None (even though TableVersion.version is set correctly)
41
42
  # - snapshot versions: records the version of the snapshot
42
- self.tbl_versions: Dict[Tuple[UUID, Optional[int]], TableVersion] = {}
43
+ self.tbl_versions: dict[tuple[UUID, Optional[int]], TableVersion] = {}
43
44
 
44
- self.tbls: Dict[UUID, Table] = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
45
- self.tbl_dependents: Dict[UUID, List[Table]] = {}
45
+ self.tbls: dict[UUID, Table] = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
46
+ self.tbl_dependents: dict[UUID, list[Table]] = {}
46
47
 
47
48
  self._init_store()
48
49
  self.paths = PathDict() # do this after _init_catalog()
@@ -133,7 +134,7 @@ class Catalog:
133
134
  base_path=base_path if not is_snapshot else None)
134
135
  view_path = TableVersionPath(tbl_version, base=base_path)
135
136
 
136
- tbl = View(
137
+ tbl: Table = View(
137
138
  tbl_record.id, tbl_record.dir_id, tbl_md.name, view_path, base_tbl_id,
138
139
  snapshot_only=snapshot_only)
139
140
  self.tbl_dependents[base_tbl_id].append(tbl)
@@ -1,15 +1,19 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- from typing import Any, Callable, Optional, Union
4
+ from typing import TYPE_CHECKING, Any, Callable, Optional, Union
5
5
 
6
6
  import sqlalchemy as sql
7
7
 
8
8
  import pixeltable.exceptions as excs
9
9
  import pixeltable.type_system as ts
10
+ from pixeltable import exprs
10
11
 
11
12
  from .globals import is_valid_identifier
12
13
 
14
+ if TYPE_CHECKING:
15
+ from .table_version import TableVersion
16
+
13
17
  _logger = logging.getLogger('pixeltable')
14
18
 
15
19
  class Column:
@@ -20,7 +24,7 @@ class Column:
20
24
  """
21
25
  def __init__(
22
26
  self, name: Optional[str], col_type: Optional[ts.ColumnType] = None,
23
- computed_with: Optional[Union['Expr', Callable]] = None,
27
+ computed_with: Optional[Union[exprs.Expr, Callable]] = None,
24
28
  is_pk: bool = False, stored: bool = True,
25
29
  col_id: Optional[int] = None, schema_version_add: Optional[int] = None,
26
30
  schema_version_drop: Optional[int] = None, sa_col_type: Optional[sql.sqltypes.TypeEngine] = None,
@@ -57,15 +61,14 @@ class Column:
57
61
  if col_type is None and computed_with is None:
58
62
  raise excs.Error(f'Column `{name}`: col_type is required if computed_with is not specified')
59
63
 
60
- self._value_expr: Optional['Expr'] = None
64
+ self._value_expr: Optional[exprs.Expr] = None
61
65
  self.compute_func: Optional[Callable] = None
62
66
  self.value_expr_dict = value_expr_dict
63
- from pixeltable import exprs
64
67
  if computed_with is not None:
65
68
  value_expr = exprs.Expr.from_object(computed_with)
66
69
  if value_expr is None:
67
70
  # computed_with needs to be a Callable
68
- if not isinstance(computed_with, Callable):
71
+ if not callable(computed_with):
69
72
  raise excs.Error(
70
73
  f'Column {name}: computed_with needs to be either a Pixeltable expression or a Callable, '
71
74
  f'but it is a {type(computed_with)}')
@@ -103,7 +106,7 @@ class Column:
103
106
  self.tbl: Optional[TableVersion] = None # set by owning TableVersion
104
107
 
105
108
  @property
106
- def value_expr(self) -> Optional['Expr']:
109
+ def value_expr(self) -> Optional[exprs.Expr]:
107
110
  """Instantiate value_expr on-demand"""
108
111
  # TODO: instantiate expr in the c'tor and add an Expr.prepare() that can create additional state after the
109
112
  # catalog has been fully loaded; that way, we encounter bugs in the serialization/deserialization logic earlier
@@ -112,7 +115,7 @@ class Column:
112
115
  self._value_expr = exprs.Expr.from_dict(self.value_expr_dict)
113
116
  return self._value_expr
114
117
 
115
- def set_value_expr(self, value_expr: 'Expr') -> None:
118
+ def set_value_expr(self, value_expr: exprs.Expr) -> None:
116
119
  self._value_expr = value_expr
117
120
  self.value_expr_dict = None
118
121
 
@@ -130,7 +133,7 @@ class Column:
130
133
  l = list(self.value_expr.subexprs(filter=lambda e: isinstance(e, exprs.FunctionCall) and e.is_window_fn_call))
131
134
  return len(l) > 0
132
135
 
133
- def get_idx_info(self) -> dict[str, 'pixeltable.catalog.TableVersion.IndexInfo']:
136
+ def get_idx_info(self) -> dict[str, 'TableVersion.IndexInfo']:
134
137
  assert self.tbl is not None
135
138
  return {name: info for name, info in self.tbl.idxs_by_name.items() if info.col == self}
136
139
 
@@ -82,7 +82,7 @@ class InsertableTable(Table):
82
82
  @overload
83
83
  def insert(self, *, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any) -> UpdateStatus: ...
84
84
 
85
- def insert(
85
+ def insert( # type: ignore[misc]
86
86
  self, rows: Optional[Iterable[dict[str, Any]]] = None, /, *, print_stats: bool = False,
87
87
  fail_on_exception: bool = True, **kwargs: Any
88
88
  ) -> UpdateStatus:
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import copy
4
4
  import logging
5
- from typing import Optional, List, Dict, Type
5
+ from typing import Optional
6
6
  from uuid import UUID
7
7
 
8
8
  import sqlalchemy.orm as orm
@@ -10,6 +10,7 @@ import sqlalchemy.orm as orm
10
10
  from pixeltable import exceptions as excs
11
11
  from pixeltable.env import Env
12
12
  from pixeltable.metadata import schema
13
+
13
14
  from .dir import Dir
14
15
  from .path import Path
15
16
  from .schema_object import SchemaObject
@@ -19,8 +20,8 @@ _logger = logging.getLogger('pixeltable')
19
20
  class PathDict:
20
21
  """Keep track of all paths in a Db instance"""
21
22
  def __init__(self):
22
- self.dir_contents: Dict[UUID, Dict[str, SchemaObject]] = {}
23
- self.schema_objs: Dict[UUID, SchemaObject] = {}
23
+ self.dir_contents: dict[UUID, dict[str, SchemaObject]] = {}
24
+ self.schema_objs: dict[UUID, SchemaObject] = {}
24
25
 
25
26
  # load dirs
26
27
  with orm.Session(Env.get().engine, future=True) as session:
@@ -36,7 +37,8 @@ class PathDict:
36
37
  self.root_dir = root_dirs[0]
37
38
 
38
39
  # build dir_contents
39
- def record_dir(dir: Dir) -> None:
40
+ def record_dir(dir: SchemaObject) -> None:
41
+ assert isinstance(dir, Dir)
40
42
  if dir._id in self.dir_contents:
41
43
  return
42
44
  else:
@@ -99,7 +101,7 @@ class PathDict:
99
101
  assert to_path.name not in self.dir_contents[to_dir._id]
100
102
  self.dir_contents[to_dir._id][to_path.name] = obj
101
103
 
102
- def check_is_valid(self, path: Path, expected: Optional[Type[SchemaObject]]) -> None:
104
+ def check_is_valid(self, path: Path, expected: Optional[type[SchemaObject]]) -> None:
103
105
  """Check that path is valid and that the object at path has the expected type.
104
106
 
105
107
  Args:
@@ -124,7 +126,7 @@ class PathDict:
124
126
  obj = self.dir_contents[parent_obj._id][path.name]
125
127
  raise excs.Error(f"{type(obj)._display_name()} '{str(path)}' already exists")
126
128
 
127
- def get_children(self, parent: Path, child_type: Optional[Type[SchemaObject]], recursive: bool) -> List[Path]:
129
+ def get_children(self, parent: Path, child_type: Optional[type[SchemaObject]], recursive: bool) -> list[Path]:
128
130
  dir = self._resolve_path(parent)
129
131
  if not isinstance(dir, Dir):
130
132
  raise excs.Error(f'{str(parent)} is a {type(dir)._display_name()}, not a directory')
@@ -1,14 +1,16 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import abc
4
+ import builtins
4
5
  import itertools
5
6
  import json
6
7
  import logging
7
8
  from pathlib import Path
8
- from typing import Any, Callable, Iterable, Literal, Optional, Set, Tuple, Type, Union, overload
9
+ from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Set, Tuple, Type, Union, overload
9
10
  from uuid import UUID
10
11
 
11
12
  import pandas as pd
13
+ import pandas.io.formats.style
12
14
  import sqlalchemy as sql
13
15
 
14
16
  import pixeltable
@@ -26,6 +28,9 @@ from .schema_object import SchemaObject
26
28
  from .table_version import TableVersion
27
29
  from .table_version_path import TableVersionPath
28
30
 
31
+ if TYPE_CHECKING:
32
+ import torch.utils.data
33
+
29
34
  _logger = logging.getLogger('pixeltable')
30
35
 
31
36
  class Table(SchemaObject):
@@ -211,23 +216,24 @@ class Table(SchemaObject):
211
216
  })
212
217
  return df
213
218
 
214
- def _description_html(self) -> pd.DataFrame:
219
+ def _description_html(self) -> pandas.io.formats.style.Styler:
215
220
  pd_df = self._description()
216
221
  # white-space: pre-wrap: print \n as newline
217
222
  # th: center-align headings
218
- return pd_df.style.set_properties(**{'white-space': 'pre-wrap', 'text-align': 'left'}) \
219
- .set_table_styles([dict(selector='th', props=[('text-align', 'center')])]) \
223
+ return (
224
+ pd_df.style.set_properties(None, **{'white-space': 'pre-wrap', 'text-align': 'left'})
225
+ .set_table_styles([dict(selector='th', props=[('text-align', 'center')])])
220
226
  .hide(axis='index')
227
+ )
221
228
 
222
229
  def describe(self) -> None:
223
230
  """
224
231
  Print the table schema.
225
232
  """
226
- try:
227
- __IPYTHON__
233
+ if getattr(builtins, '__IPYTHON__', False):
228
234
  from IPython.display import display
229
235
  display(self._description_html())
230
- except NameError:
236
+ else:
231
237
  print(self.__repr__())
232
238
 
233
239
  # TODO: Display comments in _repr_html()
@@ -240,7 +246,7 @@ class Table(SchemaObject):
240
246
  return f'{self._display_name()} \'{self._name}\'\n{comment}{description_str}'
241
247
 
242
248
  def _repr_html_(self) -> str:
243
- return self._description_html()._repr_html_()
249
+ return self._description_html()._repr_html_() # type: ignore[attr-defined]
244
250
 
245
251
  def _drop(self) -> None:
246
252
  self._check_is_dropped()
@@ -282,7 +288,7 @@ class Table(SchemaObject):
282
288
  raise excs.Error(f'Column name must be a string, got {type(col_name)}')
283
289
  if not isinstance(spec, (ts.ColumnType, exprs.Expr)):
284
290
  raise excs.Error(f'Column spec must be a ColumnType or an Expr, got {type(spec)}')
285
- self.add_column(**{col_name: spec})
291
+ self.add_column(type=None, stored=None, print_stats=False, **{col_name: spec})
286
292
 
287
293
  def add_column(
288
294
  self,
@@ -368,7 +374,7 @@ class Table(SchemaObject):
368
374
  col_schema['stored'] = stored
369
375
 
370
376
  new_col = self._create_columns({col_name: col_schema})[0]
371
- self._verify_column(new_col, set(self._schema.keys()), self._query_names)
377
+ self._verify_column(new_col, set(self._schema.keys()), set(self._query_names))
372
378
  return self._tbl_version.add_column(new_col, print_stats=print_stats)
373
379
 
374
380
  @classmethod
@@ -395,7 +401,7 @@ class Table(SchemaObject):
395
401
  value_expr = exprs.Expr.from_object(value_spec)
396
402
  if value_expr is None:
397
403
  # needs to be a Callable
398
- if not isinstance(value_spec, Callable):
404
+ if not callable(value_spec):
399
405
  raise excs.Error(
400
406
  f'Column {name}: value needs to be either a Pixeltable expression or a Callable, '
401
407
  f'but it is a {type(value_spec)}')
@@ -427,7 +433,7 @@ class Table(SchemaObject):
427
433
  elif isinstance(spec, exprs.Expr):
428
434
  # create copy so we can modify it
429
435
  value_expr = spec.copy()
430
- elif isinstance(spec, Callable):
436
+ elif callable(spec):
431
437
  raise excs.Error((
432
438
  f'Column {name} computed with a Callable: specify using a dictionary with '
433
439
  f'the "value" and "type" keys (e.g., "{name}": {{"value": <Callable>, "type": IntType()}})'
@@ -546,6 +552,7 @@ class Table(SchemaObject):
546
552
  metric: str = 'cosine'
547
553
  ) -> None:
548
554
  """Add an index to the table.
555
+
549
556
  Args:
550
557
  col_name: name of column to index
551
558
  idx_name: name of index, which needs to be unique for the table; if not provided, a name will be generated
@@ -656,7 +663,7 @@ class Table(SchemaObject):
656
663
  @overload
657
664
  def insert(self, *, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any) -> UpdateStatus: ...
658
665
 
659
- @abc.abstractmethod
666
+ @abc.abstractmethod # type: ignore[misc]
660
667
  def insert(
661
668
  self, rows: Optional[Iterable[dict[str, Any]]] = None, /, *, print_stats: bool = False,
662
669
  fail_on_exception: bool = True, **kwargs: Any