pixeltable 0.2.3__tar.gz → 0.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (145) hide show
  1. {pixeltable-0.2.3 → pixeltable-0.2.5}/PKG-INFO +35 -28
  2. {pixeltable-0.2.3 → pixeltable-0.2.5}/README.md +30 -24
  3. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/catalog/column.py +26 -49
  4. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/catalog/insertable_table.py +7 -4
  5. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/catalog/table.py +163 -57
  6. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/catalog/table_version.py +416 -140
  7. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/catalog/table_version_path.py +2 -2
  8. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/client.py +72 -6
  9. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/dataframe.py +65 -21
  10. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/env.py +52 -53
  11. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exec/cache_prefetch_node.py +1 -1
  12. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exec/in_memory_data_node.py +11 -7
  13. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/comparison.py +3 -3
  14. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/data_row.py +5 -1
  15. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/literal.py +16 -4
  16. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/row_builder.py +8 -40
  17. pixeltable-0.2.5/pixeltable/ext/__init__.py +5 -0
  18. pixeltable-0.2.5/pixeltable/ext/functions/yolox.py +92 -0
  19. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/func/aggregate_function.py +15 -15
  20. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/func/expr_template_function.py +9 -1
  21. pixeltable-0.2.5/pixeltable/func/globals.py +46 -0
  22. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/func/signature.py +18 -12
  23. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/func/udf.py +7 -2
  24. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/functions/__init__.py +9 -9
  25. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/functions/eval.py +7 -8
  26. pixeltable-0.2.5/pixeltable/functions/fireworks.py +34 -0
  27. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/functions/huggingface.py +47 -19
  28. pixeltable-0.2.5/pixeltable/functions/openai.py +256 -0
  29. pixeltable-0.2.5/pixeltable/functions/together.py +122 -0
  30. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/functions/util.py +11 -0
  31. pixeltable-0.2.5/pixeltable/index/__init__.py +2 -0
  32. pixeltable-0.2.5/pixeltable/index/base.py +49 -0
  33. pixeltable-0.2.5/pixeltable/index/embedding_index.py +95 -0
  34. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/metadata/schema.py +45 -22
  35. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/plan.py +15 -34
  36. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/store.py +38 -41
  37. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/conftest.py +8 -14
  38. pixeltable-0.2.5/pixeltable/tests/ext/test_yolox.py +21 -0
  39. pixeltable-0.2.5/pixeltable/tests/functions/test_fireworks.py +43 -0
  40. pixeltable-0.2.5/pixeltable/tests/functions/test_functions.py +60 -0
  41. pixeltable-0.2.3/pixeltable/tests/test_functions.py → pixeltable-0.2.5/pixeltable/tests/functions/test_huggingface.py +7 -143
  42. pixeltable-0.2.5/pixeltable/tests/functions/test_openai.py +162 -0
  43. pixeltable-0.2.5/pixeltable/tests/functions/test_together.py +112 -0
  44. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_component_view.py +14 -5
  45. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_dataframe.py +23 -22
  46. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_exprs.py +99 -102
  47. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_function.py +51 -43
  48. pixeltable-0.2.5/pixeltable/tests/test_index.py +138 -0
  49. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_migration.py +2 -1
  50. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_snapshot.py +24 -1
  51. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_table.py +205 -26
  52. pixeltable-0.2.5/pixeltable/tests/test_types.py +52 -0
  53. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_video.py +16 -16
  54. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_view.py +5 -0
  55. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/utils.py +171 -14
  56. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tool/create_test_db_dump.py +16 -0
  57. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/type_system.py +77 -128
  58. pixeltable-0.2.5/pixeltable/utils/arrow.py +98 -0
  59. pixeltable-0.2.5/pixeltable/utils/hf_datasets.py +157 -0
  60. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/utils/parquet.py +68 -27
  61. pixeltable-0.2.5/pixeltable/utils/pytorch.py +91 -0
  62. {pixeltable-0.2.3 → pixeltable-0.2.5}/pyproject.toml +15 -6
  63. pixeltable-0.2.3/pixeltable/func/globals.py +0 -36
  64. pixeltable-0.2.3/pixeltable/functions/fireworks.py +0 -61
  65. pixeltable-0.2.3/pixeltable/functions/openai.py +0 -88
  66. pixeltable-0.2.3/pixeltable/functions/together.py +0 -27
  67. pixeltable-0.2.3/pixeltable/tests/test_types.py +0 -22
  68. pixeltable-0.2.3/pixeltable/utils/pytorch.py +0 -172
  69. {pixeltable-0.2.3 → pixeltable-0.2.5}/LICENSE +0 -0
  70. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/__init__.py +0 -0
  71. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/catalog/__init__.py +0 -0
  72. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/catalog/catalog.py +0 -0
  73. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/catalog/dir.py +0 -0
  74. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/catalog/globals.py +0 -0
  75. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/catalog/named_function.py +0 -0
  76. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/catalog/path.py +0 -0
  77. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/catalog/path_dict.py +0 -0
  78. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/catalog/schema_object.py +0 -0
  79. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/catalog/view.py +0 -0
  80. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exceptions.py +0 -0
  81. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exec/__init__.py +0 -0
  82. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exec/aggregation_node.py +0 -0
  83. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exec/component_iteration_node.py +0 -0
  84. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exec/data_row_batch.py +0 -0
  85. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exec/exec_context.py +0 -0
  86. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exec/exec_node.py +0 -0
  87. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exec/expr_eval_node.py +0 -0
  88. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exec/media_validation_node.py +0 -0
  89. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exec/sql_scan_node.py +0 -0
  90. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/__init__.py +0 -0
  91. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/arithmetic_expr.py +0 -0
  92. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/array_slice.py +0 -0
  93. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/column_property_ref.py +0 -0
  94. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/column_ref.py +0 -0
  95. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/compound_predicate.py +0 -0
  96. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/expr.py +0 -0
  97. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/expr_set.py +0 -0
  98. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/function_call.py +0 -0
  99. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/globals.py +0 -0
  100. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/image_member_access.py +0 -0
  101. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/image_similarity_predicate.py +0 -0
  102. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/inline_array.py +0 -0
  103. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/inline_dict.py +0 -0
  104. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/is_null.py +0 -0
  105. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/json_mapper.py +0 -0
  106. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/json_path.py +0 -0
  107. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/object_ref.py +0 -0
  108. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/predicate.py +0 -0
  109. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/rowid_ref.py +0 -0
  110. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/type_cast.py +0 -0
  111. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/variable.py +0 -0
  112. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/func/__init__.py +0 -0
  113. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/func/batched_function.py +0 -0
  114. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/func/callable_function.py +0 -0
  115. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/func/function.py +0 -0
  116. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/func/function_registry.py +0 -0
  117. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/func/nos_function.py +0 -0
  118. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/functions/image.py +0 -0
  119. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/functions/pil/image.py +0 -0
  120. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/functions/string.py +0 -0
  121. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/functions/video.py +0 -0
  122. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/iterators/__init__.py +0 -0
  123. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/iterators/base.py +0 -0
  124. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/iterators/document.py +0 -0
  125. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/iterators/video.py +0 -0
  126. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/metadata/__init__.py +0 -0
  127. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/metadata/converters/convert_10.py +0 -0
  128. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_audio.py +0 -0
  129. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_catalog.py +0 -0
  130. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_client.py +0 -0
  131. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_dirs.py +0 -0
  132. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_document.py +0 -0
  133. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_nos.py +0 -0
  134. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_transactional_directory.py +0 -0
  135. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tool/create_test_video.py +0 -0
  136. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/utils/__init__.py +0 -0
  137. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/utils/clip.py +0 -0
  138. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/utils/coco.py +0 -0
  139. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/utils/documents.py +0 -0
  140. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/utils/filecache.py +0 -0
  141. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/utils/help.py +0 -0
  142. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/utils/media_store.py +0 -0
  143. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/utils/s3.py +0 -0
  144. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/utils/sql.py +0 -0
  145. {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/utils/transactional_directory.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pixeltable
3
- Version: 0.2.3
3
+ Version: 0.2.5
4
4
  Summary: Pixeltable: The Multimodal AI Data Plane
5
5
  Author: Marcel Kornacker
6
6
  Author-email: marcelk@gmail.com
@@ -15,12 +15,12 @@ Requires-Dist: beautifulsoup4 (>=4.0.0,<5.0.0)
15
15
  Requires-Dist: cloudpickle (>=2.2.1,<3.0.0)
16
16
  Requires-Dist: jinja2 (>=3.1.3,<4.0.0)
17
17
  Requires-Dist: jmespath (>=1.0.1,<2.0.0)
18
- Requires-Dist: numpy (>=1.26,<2.0)
18
+ Requires-Dist: numpy (>=1.26)
19
19
  Requires-Dist: opencv-python-headless (>=4.7.0.68,<5.0.0.0)
20
20
  Requires-Dist: pandas (>=2.0,<3.0)
21
- Requires-Dist: pgserver (==0.0.9)
21
+ Requires-Dist: pgserver (==0.1.2)
22
22
  Requires-Dist: pgvector (>=0.2.1,<0.3.0)
23
- Requires-Dist: pillow (>=9.4.0,<10.0.0)
23
+ Requires-Dist: pillow (>=10.0)
24
24
  Requires-Dist: psutil (>=5.9.5,<6.0.0)
25
25
  Requires-Dist: psycopg2-binary (>=2.9.5,<3.0.0)
26
26
  Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
@@ -28,38 +28,44 @@ Requires-Dist: regex (>=2022.10.31,<2023.0.0)
28
28
  Requires-Dist: requests (>=2.31.0,<3.0.0)
29
29
  Requires-Dist: sqlalchemy-utils (>=0.41.1,<0.42.0)
30
30
  Requires-Dist: sqlalchemy[mypy] (>=2.0.23,<3.0.0)
31
+ Requires-Dist: tenacity (>=8.2,<9.0)
31
32
  Requires-Dist: tqdm (>=4.64.1,<5.0.0)
32
33
  Description-Content-Type: text/markdown
33
34
 
35
+ <div align="center">
34
36
  <img src="docs/pixeltable-banner.png" width="45%"/>
35
37
 
36
- # Pixeltable: The Multimodal AI Data Plane
38
+ # Unifying Data, Models, and Orchestration for AI Products
37
39
 
38
40
  [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
39
41
  &nbsp;&nbsp;
40
42
  ![pytest status](https://github.com/pixeltable/pixeltable/actions/workflows/pytest.yml/badge.svg)
41
43
 
42
- Pixeltable is a Python library that lets AI engineers and data scientists focus on
43
- exploration, modeling, and app development without having to deal with the customary
44
- data plumbing.
44
+ [Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.github.io/pixeltable/)
45
+ </div>
45
46
 
46
- **Pixeltable redefines data infrastructure and workflow orchestration for AI development.**
47
- It brings together data storage, versioning, and indexing with orchestration and model
48
- versioning under a declarative table interface, with transformations, model inference,
49
- and custom logic represented as computed columns.
47
+ Pixeltable is a Python library that lets AI engineers and data scientists focus on exploration, modeling, and app development without dealing with the customary data plumbing.
50
48
 
51
- ## Quick Start
49
+ ## What problems does Pixeltable solve?
50
+
51
+ Today’s solutions for AI app development require extensive custom coding and infrastructure
52
+ plumbing. Tracking lineage and versions between and across data transformations, models, and
53
+ deployment is cumbersome. Pixeltable is a replacement for traditional data plumbing, providing
54
+ a unified plane for data, models, and orchestration. It removes the data plumbing overhead in
55
+ building and productionizing AI applications.
56
+
57
+ ## ⚡Quick Start
58
+ Learn the basics of Pixeltable through interactive examples. View the notebooks on Google Colab or Kaggle, for free.
52
59
 
53
- If you just want to play around with Pixeltable to see what it's capable of, the easiest way is to run
54
- the Pixeltable Basics tutorial in colab:
60
+ ### Pixeltable Basics
61
+ In this tutorial, we'll survey how to create tables, populate them with data, and enhance them with built-in and user-defined transformations and AI operations.
55
62
 
56
- <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/tutorials/pixeltable-basics.ipynb">
57
- <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
58
- </a>
63
+ [![Kaggle](https://kaggle.com/static/images/open-in-kaggle.svg)](https://kaggle.com/kernels/welcome?src=https://github.com/pixeltable/pixeltable/blob/master/docs/tutorials/pixeltable-basics.ipynb)&nbsp;&nbsp;
64
+ <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
59
65
 
60
- ## Installation
61
66
 
62
- Pixeltable works with Python 3.9, 3.10, or 3.11 running on Linux or MacOS.
67
+ ## 💾 Installation
68
+ Pixeltable works with Python 3.9, 3.10, 3.11, or 3.12 running on Linux, MacOS, or Windows.
63
69
 
64
70
  ```
65
71
  pip install pixeltable
@@ -78,14 +84,6 @@ guide. Then, check out the
78
84
  [Pixeltable Basics](https://pixeltable.github.io/pixeltable/tutorials/pixeltable-basics/)
79
85
  tutorial for a tour of its most important features.
80
86
 
81
- ## What problems does Pixeltable solve?
82
-
83
- Today’s solutions for AI app development require extensive custom coding and infrastructure
84
- plumbing. Tracking lineage and versions between and across data transformations, models, and
85
- deployment is cumbersome. Pixeltable is a replacement for traditional data plumbing, providing
86
- a unified plane for data, models, and orchestration. It removes the data plumbing overhead in
87
- building and productionizing AI applications.
88
-
89
87
  ## Why should you use Pixeltable?
90
88
 
91
89
  - It gives you transparency and reproducibility
@@ -119,3 +117,12 @@ get cost projections before adding new data and new augmentations.
119
117
  * Rely on Pixeltable's automatic versioning and snapshot functionality to protect against regressions
120
118
  and to ensure reproducibility.
121
119
 
120
+ ## Contributions & Feedback
121
+
122
+ Are you experiencing issues or bugs with Pixeltable? File an [Issue](https://github.com/pixeltable/pixeltable/issues).
123
+ </br>Do you want to contribute? Feel free to open a [PR](https://github.com/pixeltable/pixeltable/pulls).
124
+
125
+ ## :classical_building: License
126
+
127
+ This library is licensed under the Apache 2.0 License.
128
+
@@ -1,32 +1,37 @@
1
+ <div align="center">
1
2
  <img src="docs/pixeltable-banner.png" width="45%"/>
2
3
 
3
- # Pixeltable: The Multimodal AI Data Plane
4
+ # Unifying Data, Models, and Orchestration for AI Products
4
5
 
5
6
  [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
6
7
  &nbsp;&nbsp;
7
8
  ![pytest status](https://github.com/pixeltable/pixeltable/actions/workflows/pytest.yml/badge.svg)
8
9
 
9
- Pixeltable is a Python library that lets AI engineers and data scientists focus on
10
- exploration, modeling, and app development without having to deal with the customary
11
- data plumbing.
10
+ [Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.github.io/pixeltable/)
11
+ </div>
12
12
 
13
- **Pixeltable redefines data infrastructure and workflow orchestration for AI development.**
14
- It brings together data storage, versioning, and indexing with orchestration and model
15
- versioning under a declarative table interface, with transformations, model inference,
16
- and custom logic represented as computed columns.
13
+ Pixeltable is a Python library that lets AI engineers and data scientists focus on exploration, modeling, and app development without dealing with the customary data plumbing.
17
14
 
18
- ## Quick Start
15
+ ## What problems does Pixeltable solve?
16
+
17
+ Today’s solutions for AI app development require extensive custom coding and infrastructure
18
+ plumbing. Tracking lineage and versions between and across data transformations, models, and
19
+ deployment is cumbersome. Pixeltable is a replacement for traditional data plumbing, providing
20
+ a unified plane for data, models, and orchestration. It removes the data plumbing overhead in
21
+ building and productionizing AI applications.
22
+
23
+ ## ⚡Quick Start
24
+ Learn the basics of Pixeltable through interactive examples. View the notebooks on Google Colab or Kaggle, for free.
19
25
 
20
- If you just want to play around with Pixeltable to see what it's capable of, the easiest way is to run
21
- the Pixeltable Basics tutorial in colab:
26
+ ### Pixeltable Basics
27
+ In this tutorial, we'll survey how to create tables, populate them with data, and enhance them with built-in and user-defined transformations and AI operations.
22
28
 
23
- <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/tutorials/pixeltable-basics.ipynb">
24
- <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
25
- </a>
29
+ [![Kaggle](https://kaggle.com/static/images/open-in-kaggle.svg)](https://kaggle.com/kernels/welcome?src=https://github.com/pixeltable/pixeltable/blob/master/docs/tutorials/pixeltable-basics.ipynb)&nbsp;&nbsp;
30
+ <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
26
31
 
27
- ## Installation
28
32
 
29
- Pixeltable works with Python 3.9, 3.10, or 3.11 running on Linux or MacOS.
33
+ ## 💾 Installation
34
+ Pixeltable works with Python 3.9, 3.10, 3.11, or 3.12 running on Linux, MacOS, or Windows.
30
35
 
31
36
  ```
32
37
  pip install pixeltable
@@ -45,14 +50,6 @@ guide. Then, check out the
45
50
  [Pixeltable Basics](https://pixeltable.github.io/pixeltable/tutorials/pixeltable-basics/)
46
51
  tutorial for a tour of its most important features.
47
52
 
48
- ## What problems does Pixeltable solve?
49
-
50
- Today’s solutions for AI app development require extensive custom coding and infrastructure
51
- plumbing. Tracking lineage and versions between and across data transformations, models, and
52
- deployment is cumbersome. Pixeltable is a replacement for traditional data plumbing, providing
53
- a unified plane for data, models, and orchestration. It removes the data plumbing overhead in
54
- building and productionizing AI applications.
55
-
56
53
  ## Why should you use Pixeltable?
57
54
 
58
55
  - It gives you transparency and reproducibility
@@ -85,3 +82,12 @@ storage.
85
82
  get cost projections before adding new data and new augmentations.
86
83
  * Rely on Pixeltable's automatic versioning and snapshot functionality to protect against regressions
87
84
  and to ensure reproducibility.
85
+
86
+ ## Contributions & Feedback
87
+
88
+ Are you experiencing issues or bugs with Pixeltable? File an [Issue](https://github.com/pixeltable/pixeltable/issues).
89
+ </br>Do you want to contribute? Feel free to open a [PR](https://github.com/pixeltable/pixeltable/pulls).
90
+
91
+ ## :classical_building: License
92
+
93
+ This library is licensed under the Apache 2.0 License.
@@ -4,10 +4,8 @@ import logging
4
4
  from typing import Optional, Union, Callable, Set
5
5
 
6
6
  import sqlalchemy as sql
7
- from pgvector.sqlalchemy import Vector
8
7
 
9
8
  from pixeltable import exceptions as excs
10
- from pixeltable.metadata import schema
11
9
  from pixeltable.type_system import ColumnType, StringType
12
10
  from .globals import is_valid_identifier
13
11
 
@@ -20,48 +18,42 @@ class Column:
20
18
  table/view.
21
19
  """
22
20
  def __init__(
23
- self, name: str, col_type: Optional[ColumnType] = None,
21
+ self, name: Optional[str], col_type: Optional[ColumnType] = None,
24
22
  computed_with: Optional[Union['Expr', Callable]] = None,
25
- primary_key: bool = False, stored: Optional[bool] = None,
26
- indexed: bool = False,
27
- # these parameters aren't set by users
28
- col_id: Optional[int] = None):
23
+ is_pk: bool = False, stored: Optional[bool] = None,
24
+ col_id: Optional[int] = None, schema_version_add: Optional[int] = None,
25
+ schema_version_drop: Optional[int] = None, sa_col_type: Optional[sql.sqltypes.TypeEngine] = None
26
+ ):
29
27
  """Column constructor.
30
28
 
31
29
  Args:
32
- name: column name
30
+ name: column name; None for system columns (eg, index columns)
33
31
  col_type: column type; can be None if the type can be derived from ``computed_with``
34
32
  computed_with: a callable or an Expr object that computes the column value
35
- primary_key: if True, this column is part of the primary key
33
+ is_pk: if True, this column is part of the primary key
36
34
  stored: determines whether a computed column is present in the stored table or recomputed on demand
37
- indexed: if True, this column has a nearest neighbor index (only valid for image columns)
38
35
  col_id: column ID (only used internally)
39
36
 
40
37
  Computed columns: those have a non-None ``computed_with`` argument
41
-
42
38
  - when constructed by the user: ``computed_with`` was constructed explicitly and is passed in;
43
39
  col_type is None
44
40
  - when loaded from md store: ``computed_with`` is set and col_type is set
45
41
 
46
42
  ``computed_with`` is a Callable:
47
-
48
43
  - the callable's parameter names must correspond to existing columns in the table for which this Column
49
44
  is being used
50
45
  - ``col_type`` needs to be set to the callable's return type
51
46
 
52
47
  ``stored`` (only valid for computed image columns):
53
-
54
48
  - if True: the column is present in the stored table
55
49
  - if False: the column is not present in the stored table and recomputed during a query
56
50
  - if None: the system chooses for you (at present, this is always False, but this may change in the future)
57
-
58
- indexed: only valid for image columns; if true, maintains an NN index for this column
59
51
  """
60
- if not is_valid_identifier(name):
52
+ if name is not None and not is_valid_identifier(name):
61
53
  raise excs.Error(f"Invalid column name: '{name}'")
62
54
  self.name = name
63
55
  if col_type is None and computed_with is None:
64
- raise excs.Error(f'Column {name}: col_type is required if computed_with is not specified')
56
+ raise excs.Error(f'Column `{name}`: col_type is required if computed_with is not specified')
65
57
 
66
58
  self.value_expr: Optional['Expr'] = None
67
59
  self.compute_func: Optional[Callable] = None
@@ -90,35 +82,20 @@ class Column:
90
82
  self.stored = stored
91
83
  self.dependent_cols: Set[Column] = set() # cols with value_exprs that reference us; set by TableVersion
92
84
  self.id = col_id
93
- self.primary_key = primary_key
85
+ self.is_pk = is_pk
86
+ self.schema_version_add = schema_version_add
87
+ self.schema_version_drop = schema_version_drop
94
88
 
95
89
  # column in the stored table for the values of this Column
96
90
  self.sa_col: Optional[sql.schema.Column] = None
91
+ self.sa_col_type = sa_col_type
97
92
 
98
93
  # computed cols also have storage columns for the exception string and type
99
94
  self.sa_errormsg_col: Optional[sql.schema.Column] = None
100
95
  self.sa_errortype_col: Optional[sql.schema.Column] = None
101
- # indexed columns also have a column for the embeddings
102
- self.sa_idx_col: Optional[sql.schema.Column] = None
103
96
  from .table_version import TableVersion
104
97
  self.tbl: Optional[TableVersion] = None # set by owning TableVersion
105
98
 
106
- if indexed and not self.col_type.is_image_type():
107
- raise excs.Error(f'Column {name}: indexed=True requires ImageType')
108
- self.is_indexed = indexed
109
-
110
- @classmethod
111
- def from_md(cls, col_id: int, md: schema.SchemaColumn, tbl: 'TableVersion') -> Column:
112
- """Construct a Column from metadata.
113
-
114
- Leaves out value_expr, because that requires TableVersion.cols to be complete.
115
- """
116
- col = cls(
117
- md.name, col_type=ColumnType.from_dict(md.col_type), primary_key=md.is_pk,
118
- stored=md.stored, indexed=md.is_indexed, col_id=col_id)
119
- col.tbl = tbl
120
- return col
121
-
122
99
  def __hash__(self) -> int:
123
100
  assert self.tbl is not None
124
101
  return hash((self.tbl.id, self.id))
@@ -167,26 +144,26 @@ class Column:
167
144
  """
168
145
  assert self.is_stored
169
146
  # all storage columns are nullable (we deal with null errors in Pixeltable directly)
170
- self.sa_col = sql.Column(self.storage_name(), self.col_type.to_sa_type(), nullable=True)
147
+ self.sa_col = sql.Column(
148
+ self.store_name(), self.col_type.to_sa_type() if self.sa_col_type is None else self.sa_col_type,
149
+ nullable=True)
171
150
  if self.is_computed or self.col_type.is_media_type():
172
- self.sa_errormsg_col = sql.Column(self.errormsg_storage_name(), StringType().to_sa_type(), nullable=True)
173
- self.sa_errortype_col = sql.Column(self.errortype_storage_name(), StringType().to_sa_type(), nullable=True)
174
- if self.is_indexed:
175
- self.sa_idx_col = sql.Column(self.index_storage_name(), Vector(512), nullable=True)
151
+ self.sa_errormsg_col = sql.Column(self.errormsg_store_name(), StringType().to_sa_type(), nullable=True)
152
+ self.sa_errortype_col = sql.Column(self.errortype_store_name(), StringType().to_sa_type(), nullable=True)
176
153
 
177
- def storage_name(self) -> str:
154
+ def get_sa_col_type(self) -> sql.sqltypes.TypeEngine:
155
+ return self.col_type.to_sa_type() if self.sa_col_type is None else self.sa_col_type
156
+
157
+ def store_name(self) -> str:
178
158
  assert self.id is not None
179
159
  assert self.is_stored
180
160
  return f'col_{self.id}'
181
161
 
182
- def errormsg_storage_name(self) -> str:
183
- return f'{self.storage_name()}_errormsg'
184
-
185
- def errortype_storage_name(self) -> str:
186
- return f'{self.storage_name()}_errortype'
162
+ def errormsg_store_name(self) -> str:
163
+ return f'{self.store_name()}_errormsg'
187
164
 
188
- def index_storage_name(self) -> str:
189
- return f'{self.storage_name()}_idx_0'
165
+ def errortype_store_name(self) -> str:
166
+ return f'{self.store_name()}_errortype'
190
167
 
191
168
  def __str__(self) -> str:
192
169
  return f'{self.name}: {self.col_type}'
@@ -11,14 +11,17 @@ import pixeltable.type_system as ts
11
11
  from pixeltable import exceptions as excs
12
12
  from pixeltable.env import Env
13
13
  from .catalog import Catalog
14
+ from .globals import UpdateStatus
14
15
  from .table import Table
15
16
  from .table_version import TableVersion
16
17
  from .table_version_path import TableVersionPath
17
18
 
18
19
  _logger = logging.getLogger('pixeltable')
19
20
 
21
+
20
22
  class InsertableTable(Table):
21
23
  """A `Table` that allows inserting and deleting rows."""
24
+
22
25
  def __init__(self, dir_id: UUID, tbl_version: TableVersion):
23
26
  tbl_version_path = TableVersionPath(tbl_version)
24
27
  super().__init__(tbl_version.id, dir_id, tbl_version.name, tbl_version_path)
@@ -42,7 +45,7 @@ class InsertableTable(Table):
42
45
  col = columns[column_names.index(pk_col)]
43
46
  if col.col_type.nullable:
44
47
  raise excs.Error(f'Primary key column {pk_col} cannot be nullable')
45
- col.primary_key = True
48
+ col.is_pk = True
46
49
 
47
50
  with orm.Session(Env.get().engine, future=True) as session:
48
51
  _, tbl_version = TableVersion.create(session, dir_id, name, columns, num_retained_versions, comment)
@@ -62,7 +65,7 @@ class InsertableTable(Table):
62
65
  @overload
63
66
  def insert(self, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any): ...
64
67
 
65
- def insert(self, *args, **kwargs) -> Table.UpdateStatus:
68
+ def insert(self, *args, **kwargs) -> UpdateStatus:
66
69
  """Insert rows into table.
67
70
 
68
71
  To insert multiple rows at a time:
@@ -161,7 +164,7 @@ class InsertableTable(Table):
161
164
  msg = str(e)
162
165
  raise excs.Error(f'Error in column {col.name}: {msg[0].lower() + msg[1:]}\nRow: {row}')
163
166
 
164
- def delete(self, where: Optional['pixeltable.exprs.Predicate'] = None) -> Table.UpdateStatus:
167
+ def delete(self, where: Optional['pixeltable.exprs.Predicate'] = None) -> UpdateStatus:
165
168
  """Delete rows in this table.
166
169
 
167
170
  Args:
@@ -181,7 +184,7 @@ class InsertableTable(Table):
181
184
  if where is not None:
182
185
  if not isinstance(where, Predicate):
183
186
  raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
184
- analysis_info = Planner.analyze(self.tbl_version, where)
187
+ analysis_info = Planner.analyze(self.tbl_version_path, where)
185
188
  if analysis_info.similarity_clause is not None:
186
189
  raise excs.Error('nearest() cannot be used with delete()')
187
190
  # for now we require that the updated rows can be identified via SQL, rather than via a Python filter