pixeltable 0.2.2__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (136) hide show
  1. {pixeltable-0.2.2 → pixeltable-0.2.4}/PKG-INFO +35 -27
  2. {pixeltable-0.2.2 → pixeltable-0.2.4}/README.md +28 -23
  3. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/catalog/column.py +1 -1
  4. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/client.py +72 -2
  5. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/env.py +36 -52
  6. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exec/cache_prefetch_node.py +14 -11
  7. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exprs/data_row.py +14 -6
  8. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/functions/__init__.py +1 -1
  9. pixeltable-0.2.4/pixeltable/functions/fireworks.py +34 -0
  10. pixeltable-0.2.4/pixeltable/functions/openai.py +256 -0
  11. pixeltable-0.2.4/pixeltable/functions/together.py +122 -0
  12. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/iterators/document.py +1 -1
  13. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/store.py +15 -10
  14. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/tests/conftest.py +4 -4
  15. pixeltable-0.2.4/pixeltable/tests/functions/test_fireworks.py +42 -0
  16. pixeltable-0.2.4/pixeltable/tests/functions/test_functions.py +60 -0
  17. pixeltable-0.2.2/pixeltable/tests/test_functions.py → pixeltable-0.2.4/pixeltable/tests/functions/test_huggingface.py +5 -141
  18. pixeltable-0.2.4/pixeltable/tests/functions/test_openai.py +152 -0
  19. pixeltable-0.2.4/pixeltable/tests/functions/test_together.py +111 -0
  20. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/tests/test_dataframe.py +11 -5
  21. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/tests/test_document.py +3 -0
  22. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/tests/test_exprs.py +2 -1
  23. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/tests/test_table.py +111 -2
  24. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/tests/test_video.py +2 -0
  25. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/tests/utils.py +134 -9
  26. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/type_system.py +42 -85
  27. pixeltable-0.2.4/pixeltable/utils/arrow.py +98 -0
  28. pixeltable-0.2.4/pixeltable/utils/hf_datasets.py +157 -0
  29. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/utils/parquet.py +68 -27
  30. pixeltable-0.2.4/pixeltable/utils/pytorch.py +91 -0
  31. {pixeltable-0.2.2 → pixeltable-0.2.4}/pyproject.toml +11 -9
  32. pixeltable-0.2.2/pixeltable/functions/fireworks.py +0 -61
  33. pixeltable-0.2.2/pixeltable/functions/openai.py +0 -88
  34. pixeltable-0.2.2/pixeltable/functions/together.py +0 -27
  35. pixeltable-0.2.2/pixeltable/utils/pytorch.py +0 -172
  36. {pixeltable-0.2.2 → pixeltable-0.2.4}/LICENSE +0 -0
  37. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/__init__.py +0 -0
  38. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/catalog/__init__.py +0 -0
  39. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/catalog/catalog.py +0 -0
  40. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/catalog/dir.py +0 -0
  41. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/catalog/globals.py +0 -0
  42. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/catalog/insertable_table.py +0 -0
  43. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/catalog/named_function.py +0 -0
  44. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/catalog/path.py +0 -0
  45. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/catalog/path_dict.py +0 -0
  46. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/catalog/schema_object.py +0 -0
  47. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/catalog/table.py +0 -0
  48. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/catalog/table_version.py +0 -0
  49. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/catalog/table_version_path.py +0 -0
  50. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/catalog/view.py +0 -0
  51. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/dataframe.py +0 -0
  52. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exceptions.py +0 -0
  53. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exec/__init__.py +0 -0
  54. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exec/aggregation_node.py +0 -0
  55. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exec/component_iteration_node.py +0 -0
  56. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exec/data_row_batch.py +0 -0
  57. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exec/exec_context.py +0 -0
  58. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exec/exec_node.py +0 -0
  59. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exec/expr_eval_node.py +0 -0
  60. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exec/in_memory_data_node.py +0 -0
  61. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exec/media_validation_node.py +0 -0
  62. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exec/sql_scan_node.py +0 -0
  63. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exprs/__init__.py +0 -0
  64. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exprs/arithmetic_expr.py +0 -0
  65. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exprs/array_slice.py +0 -0
  66. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exprs/column_property_ref.py +0 -0
  67. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exprs/column_ref.py +0 -0
  68. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exprs/comparison.py +0 -0
  69. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exprs/compound_predicate.py +0 -0
  70. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exprs/expr.py +0 -0
  71. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exprs/expr_set.py +0 -0
  72. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exprs/function_call.py +0 -0
  73. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exprs/globals.py +0 -0
  74. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exprs/image_member_access.py +0 -0
  75. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exprs/image_similarity_predicate.py +0 -0
  76. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exprs/inline_array.py +0 -0
  77. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exprs/inline_dict.py +0 -0
  78. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exprs/is_null.py +0 -0
  79. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exprs/json_mapper.py +0 -0
  80. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exprs/json_path.py +0 -0
  81. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exprs/literal.py +0 -0
  82. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exprs/object_ref.py +0 -0
  83. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exprs/predicate.py +0 -0
  84. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exprs/row_builder.py +0 -0
  85. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exprs/rowid_ref.py +0 -0
  86. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exprs/type_cast.py +0 -0
  87. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/exprs/variable.py +0 -0
  88. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/func/__init__.py +0 -0
  89. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/func/aggregate_function.py +0 -0
  90. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/func/batched_function.py +0 -0
  91. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/func/callable_function.py +0 -0
  92. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/func/expr_template_function.py +0 -0
  93. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/func/function.py +0 -0
  94. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/func/function_registry.py +0 -0
  95. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/func/globals.py +0 -0
  96. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/func/nos_function.py +0 -0
  97. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/func/signature.py +0 -0
  98. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/func/udf.py +0 -0
  99. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/functions/eval.py +0 -0
  100. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/functions/huggingface.py +0 -0
  101. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/functions/image.py +0 -0
  102. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/functions/pil/image.py +0 -0
  103. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/functions/string.py +0 -0
  104. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/functions/util.py +0 -0
  105. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/functions/video.py +0 -0
  106. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/iterators/__init__.py +0 -0
  107. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/iterators/base.py +0 -0
  108. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/iterators/video.py +0 -0
  109. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/metadata/__init__.py +0 -0
  110. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/metadata/converters/convert_10.py +0 -0
  111. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/metadata/schema.py +0 -0
  112. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/plan.py +0 -0
  113. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/tests/test_audio.py +0 -0
  114. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/tests/test_catalog.py +0 -0
  115. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/tests/test_client.py +0 -0
  116. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/tests/test_component_view.py +0 -0
  117. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/tests/test_dirs.py +0 -0
  118. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/tests/test_function.py +0 -0
  119. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/tests/test_migration.py +0 -0
  120. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/tests/test_nos.py +0 -0
  121. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/tests/test_snapshot.py +0 -0
  122. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/tests/test_transactional_directory.py +0 -0
  123. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/tests/test_types.py +0 -0
  124. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/tests/test_view.py +0 -0
  125. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/tool/create_test_db_dump.py +0 -0
  126. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/tool/create_test_video.py +0 -0
  127. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/utils/__init__.py +0 -0
  128. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/utils/clip.py +0 -0
  129. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/utils/coco.py +0 -0
  130. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/utils/documents.py +0 -0
  131. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/utils/filecache.py +0 -0
  132. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/utils/help.py +0 -0
  133. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/utils/media_store.py +0 -0
  134. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/utils/s3.py +0 -0
  135. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/utils/sql.py +0 -0
  136. {pixeltable-0.2.2 → pixeltable-0.2.4}/pixeltable/utils/transactional_directory.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pixeltable
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: Pixeltable: The Multimodal AI Data Plane
5
5
  Author: Marcel Kornacker
6
6
  Author-email: marcelk@gmail.com
@@ -11,52 +11,59 @@ Classifier: Programming Language :: Python :: 3.10
11
11
  Classifier: Programming Language :: Python :: 3.11
12
12
  Classifier: Programming Language :: Python :: 3.12
13
13
  Requires-Dist: av (>=10.0.0)
14
+ Requires-Dist: beautifulsoup4 (>=4.0.0,<5.0.0)
14
15
  Requires-Dist: cloudpickle (>=2.2.1,<3.0.0)
15
16
  Requires-Dist: jinja2 (>=3.1.3,<4.0.0)
16
17
  Requires-Dist: jmespath (>=1.0.1,<2.0.0)
17
- Requires-Dist: numpy (>=1.24.1,<2.0.0)
18
+ Requires-Dist: numpy (>=1.26)
18
19
  Requires-Dist: opencv-python-headless (>=4.7.0.68,<5.0.0.0)
19
20
  Requires-Dist: pandas (>=2.0,<3.0)
20
- Requires-Dist: pgserver (==0.0.7)
21
+ Requires-Dist: pgserver (==0.1.0)
21
22
  Requires-Dist: pgvector (>=0.2.1,<0.3.0)
22
- Requires-Dist: pillow (>=9.4.0,<10.0.0)
23
+ Requires-Dist: pillow (>=10.0)
23
24
  Requires-Dist: psutil (>=5.9.5,<6.0.0)
24
25
  Requires-Dist: psycopg2-binary (>=2.9.5,<3.0.0)
25
26
  Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
26
27
  Requires-Dist: regex (>=2022.10.31,<2023.0.0)
28
+ Requires-Dist: requests (>=2.31.0,<3.0.0)
27
29
  Requires-Dist: sqlalchemy-utils (>=0.41.1,<0.42.0)
28
30
  Requires-Dist: sqlalchemy[mypy] (>=2.0.23,<3.0.0)
31
+ Requires-Dist: tenacity (>=8.2,<9.0)
29
32
  Requires-Dist: tqdm (>=4.64.1,<5.0.0)
30
33
  Description-Content-Type: text/markdown
31
34
 
35
+ <div align="center">
32
36
  <img src="docs/pixeltable-banner.png" width="45%"/>
33
37
 
34
- # Pixeltable: The Multimodal AI Data Plane
38
+ # Unifying Data, Models, and Orchestration for AI Products
35
39
 
36
40
  [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
37
41
  &nbsp;&nbsp;
38
42
  ![pytest status](https://github.com/pixeltable/pixeltable/actions/workflows/pytest.yml/badge.svg)
39
43
 
40
- Pixeltable is a Python library that lets AI engineers and data scientists focus on
41
- exploration, modeling, and app development without having to deal with the customary
42
- data plumbing.
44
+ [Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.github.io/pixeltable/)
45
+ </div>
43
46
 
44
- **Pixeltable redefines data infrastructure and workflow orchestration for AI development.**
45
- It brings together data storage, versioning, and indexing with orchestration and model
46
- versioning under a declarative table interface, with transformations, model inference,
47
- and custom logic represented as computed columns.
47
+ Pixeltable is a Python library that lets AI engineers and data scientists focus on exploration, modeling, and app development without dealing with the customary data plumbing.
48
48
 
49
- ## Quick Start
49
+ ## What problems does Pixeltable solve?
50
+
51
+ Today’s solutions for AI app development require extensive custom coding and infrastructure
52
+ plumbing. Tracking lineage and versions between and across data transformations, models, and
53
+ deployment is cumbersome. Pixeltable is a replacement for traditional data plumbing, providing
54
+ a unified plane for data, models, and orchestration. It removes the data plumbing overhead in
55
+ building and productionizing AI applications.
56
+
57
+ ## ⚡Quick Start
58
+ Learn the basics of Pixeltable through interactive examples. View the notebooks on Google Colab or Kaggle, for free.
50
59
 
51
- If you just want to play around with Pixeltable to see what it's capable of, the easiest way is to run
52
- the Pixeltable Basics tutorial in colab:
60
+ ### Pixeltable Basics
61
+ In this tutorial, we'll survey how to create tables, populate them with data, and enhance them with built-in and user-defined transformations and AI operations.
53
62
 
54
- <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/tutorials/pixeltable-basics.ipynb">
55
- <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
56
- </a>
63
+ [![Open in Kaggle](https://kaggle.com/static/images/open-in-kaggle.svg)](https://www.kaggle.com/code/brunep/pixeltable-basics) <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
57
64
 
58
- ## Installation
59
65
 
66
+ ## 💾 Installation
60
67
  Pixeltable works with Python 3.9, 3.10, or 3.11 running on Linux or MacOS.
61
68
 
62
69
  ```
@@ -76,14 +83,6 @@ guide. Then, check out the
76
83
  [Pixeltable Basics](https://pixeltable.github.io/pixeltable/tutorials/pixeltable-basics/)
77
84
  tutorial for a tour of its most important features.
78
85
 
79
- ## What problems does Pixeltable solve?
80
-
81
- Today’s solutions for AI app development require extensive custom coding and infrastructure
82
- plumbing. Tracking lineage and versions between and across data transformations, models, and
83
- deployment is cumbersome. Pixeltable is a replacement for traditional data plumbing, providing
84
- a unified plane for data, models, and orchestration. It removes the data plumbing overhead in
85
- building and productionizing AI applications.
86
-
87
86
  ## Why should you use Pixeltable?
88
87
 
89
88
  - It gives you transparency and reproducibility
@@ -117,3 +116,12 @@ get cost projections before adding new data and new augmentations.
117
116
  * Rely on Pixeltable's automatic versioning and snapshot functionality to protect against regressions
118
117
  and to ensure reproducibility.
119
118
 
119
+ ## Contributions & Feedback
120
+
121
+ Are you experiencing issues or bugs with Pixeltable? File an [Issue](https://github.com/pixeltable/pixeltable/issues).
122
+ </br>Do you want to contribute? Feel free to open a [PR](https://github.com/pixeltable/pixeltable/pulls).
123
+
124
+ ## :classical_building: License
125
+
126
+ This library is licensed under the Apache 2.0 License.
127
+
@@ -1,31 +1,35 @@
1
+ <div align="center">
1
2
  <img src="docs/pixeltable-banner.png" width="45%"/>
2
3
 
3
- # Pixeltable: The Multimodal AI Data Plane
4
+ # Unifying Data, Models, and Orchestration for AI Products
4
5
 
5
6
  [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
6
7
  &nbsp;&nbsp;
7
8
  ![pytest status](https://github.com/pixeltable/pixeltable/actions/workflows/pytest.yml/badge.svg)
8
9
 
9
- Pixeltable is a Python library that lets AI engineers and data scientists focus on
10
- exploration, modeling, and app development without having to deal with the customary
11
- data plumbing.
10
+ [Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.github.io/pixeltable/)
11
+ </div>
12
12
 
13
- **Pixeltable redefines data infrastructure and workflow orchestration for AI development.**
14
- It brings together data storage, versioning, and indexing with orchestration and model
15
- versioning under a declarative table interface, with transformations, model inference,
16
- and custom logic represented as computed columns.
13
+ Pixeltable is a Python library that lets AI engineers and data scientists focus on exploration, modeling, and app development without dealing with the customary data plumbing.
17
14
 
18
- ## Quick Start
15
+ ## What problems does Pixeltable solve?
16
+
17
+ Today’s solutions for AI app development require extensive custom coding and infrastructure
18
+ plumbing. Tracking lineage and versions between and across data transformations, models, and
19
+ deployment is cumbersome. Pixeltable is a replacement for traditional data plumbing, providing
20
+ a unified plane for data, models, and orchestration. It removes the data plumbing overhead in
21
+ building and productionizing AI applications.
22
+
23
+ ## ⚡Quick Start
24
+ Learn the basics of Pixeltable through interactive examples. View the notebooks on Google Colab or Kaggle, for free.
19
25
 
20
- If you just want to play around with Pixeltable to see what it's capable of, the easiest way is to run
21
- the Pixeltable Basics tutorial in colab:
26
+ ### Pixeltable Basics
27
+ In this tutorial, we'll survey how to create tables, populate them with data, and enhance them with built-in and user-defined transformations and AI operations.
22
28
 
23
- <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/tutorials/pixeltable-basics.ipynb">
24
- <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
25
- </a>
29
+ [![Open in Kaggle](https://kaggle.com/static/images/open-in-kaggle.svg)](https://www.kaggle.com/code/brunep/pixeltable-basics) <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
26
30
 
27
- ## Installation
28
31
 
32
+ ## 💾 Installation
29
33
  Pixeltable works with Python 3.9, 3.10, or 3.11 running on Linux or MacOS.
30
34
 
31
35
  ```
@@ -45,14 +49,6 @@ guide. Then, check out the
45
49
  [Pixeltable Basics](https://pixeltable.github.io/pixeltable/tutorials/pixeltable-basics/)
46
50
  tutorial for a tour of its most important features.
47
51
 
48
- ## What problems does Pixeltable solve?
49
-
50
- Today’s solutions for AI app development require extensive custom coding and infrastructure
51
- plumbing. Tracking lineage and versions between and across data transformations, models, and
52
- deployment is cumbersome. Pixeltable is a replacement for traditional data plumbing, providing
53
- a unified plane for data, models, and orchestration. It removes the data plumbing overhead in
54
- building and productionizing AI applications.
55
-
56
52
  ## Why should you use Pixeltable?
57
53
 
58
54
  - It gives you transparency and reproducibility
@@ -85,3 +81,12 @@ storage.
85
81
  get cost projections before adding new data and new augmentations.
86
82
  * Rely on Pixeltable's automatic versioning and snapshot functionality to protect against regressions
87
83
  and to ensure reproducibility.
84
+
85
+ ## Contributions & Feedback
86
+
87
+ Are you experiencing issues or bugs with Pixeltable? File an [Issue](https://github.com/pixeltable/pixeltable/issues).
88
+ </br>Do you want to contribute? Feel free to open a [PR](https://github.com/pixeltable/pixeltable/pulls).
89
+
90
+ ## :classical_building: License
91
+
92
+ This library is licensed under the Apache 2.0 License.
@@ -61,7 +61,7 @@ class Column:
61
61
  raise excs.Error(f"Invalid column name: '{name}'")
62
62
  self.name = name
63
63
  if col_type is None and computed_with is None:
64
- raise excs.Error(f'Column {name}: col_type is required if computed_with is not specified')
64
+ raise excs.Error(f'Column `{name}`: col_type is required if computed_with is not specified')
65
65
 
66
66
  self.value_expr: Optional['Expr'] = None
67
67
  self.compute_func: Optional[Callable] = None
@@ -2,12 +2,11 @@ from typing import List, Optional, Dict, Type, Any, Union
2
2
  import pandas as pd
3
3
  import logging
4
4
  import dataclasses
5
- from uuid import UUID
6
- from collections import defaultdict
7
5
 
8
6
  import sqlalchemy as sql
9
7
  import sqlalchemy.orm as orm
10
8
 
9
+ import pixeltable
11
10
  from pixeltable.metadata import schema
12
11
  from pixeltable.env import Env
13
12
  import pixeltable.func as func
@@ -16,6 +15,10 @@ from pixeltable import exceptions as excs
16
15
  from pixeltable.exprs import Predicate
17
16
  from pixeltable.iterators import ComponentIterator
18
17
 
18
+ from typing import TYPE_CHECKING
19
+ if TYPE_CHECKING:
20
+ import datasets
21
+
19
22
  __all__ = [
20
23
  'Client',
21
24
  ]
@@ -155,6 +158,73 @@ class Client:
155
158
  _logger.info(f'Created table `{path_str}`.')
156
159
  return tbl
157
160
 
161
+ def import_parquet(
162
+ self,
163
+ table_path: str,
164
+ *,
165
+ parquet_path: str,
166
+ schema_override: Optional[Dict[str, Any]] = None,
167
+ **kwargs,
168
+ ) -> catalog.InsertableTable:
169
+ """Create a new `InsertableTable` from a Parquet file or set of files. Requires pyarrow to be installed.
170
+ Args:
171
+ path_str: Path to the table within pixeltable.
172
+ parquet_path: Path to an individual Parquet file or directory of Parquet files.
173
+ schema_override: Optional dictionary mapping column names to column type to override the default
174
+ schema inferred from the Parquet file. The column type should be a pixeltable ColumnType.
175
+ For example, {'col_vid': VideoType()}, rather than {'col_vid': StringType()}.
176
+ Any fields not provided explicitly will map to types with `pixeltable.utils.parquet.parquet_schema_to_pixeltable_schema`
177
+ kwargs: Additional arguments to pass to `Client.create_table`.
178
+
179
+ Returns:
180
+ The newly created table. The table will have loaded the data from the Parquet file(s).
181
+ """
182
+ from pixeltable.utils import parquet
183
+
184
+ return parquet.import_parquet(
185
+ self,
186
+ table_path=table_path,
187
+ parquet_path=parquet_path,
188
+ schema_override=schema_override,
189
+ **kwargs,
190
+ )
191
+
192
+ def import_huggingface_dataset(
193
+ self,
194
+ table_path: str,
195
+ dataset: Union['datasets.Dataset', 'datasets.DatasetDict'],
196
+ *,
197
+ column_name_for_split: Optional[str] = 'split',
198
+ schema_override: Optional[Dict[str, Any]] = None,
199
+ **kwargs
200
+ ) -> catalog.InsertableTable:
201
+ """Create a new `InsertableTable` from a Huggingface dataset, or dataset dict with multiple splits.
202
+ Requires datasets library to be installed.
203
+
204
+ Args:
205
+ path_str: Path to the table.
206
+ dataset: Huggingface datasts.Dataset or datasts.DatasetDict to insert into the table.
207
+ column_name_for_split: column name to use for split information. If None, no split information will be stored.
208
+ schema_override: Optional dictionary mapping column names to column type to override the corresponding defaults from
209
+ `pixeltable.utils.hf_datasets.huggingface_schema_to_pixeltable_schema`. The column type should be a pixeltable ColumnType.
210
+ For example, {'col_vid': VideoType()}, rather than {'col_vid': StringType()}.
211
+
212
+ kwargs: Additional arguments to pass to `create_table`.
213
+
214
+ Returns:
215
+ The newly created table. The table will have loaded the data from the dataset.
216
+ """
217
+ from pixeltable.utils import hf_datasets
218
+
219
+ return hf_datasets.import_huggingface_dataset(
220
+ self,
221
+ table_path,
222
+ dataset,
223
+ column_name_for_split=column_name_for_split,
224
+ schema_override=schema_override,
225
+ **kwargs,
226
+ )
227
+
158
228
  def create_view(
159
229
  self, path_str: str, base: catalog.Table, *, schema: Optional[Dict[str, Any]] = None,
160
230
  filter: Optional[Predicate] = None,
@@ -1,33 +1,28 @@
1
1
  from __future__ import annotations
2
+
2
3
  import datetime
3
- import os
4
- from typing import Optional, Dict, Any, List
5
- from pathlib import Path
6
- import sqlalchemy as sql
7
- import uuid
4
+ import glob
5
+ import http.server
8
6
  import importlib
9
7
  import importlib.util
10
-
11
- import http.server
8
+ import logging
9
+ import os
12
10
  import socketserver
11
+ import sys
13
12
  import threading
14
13
  import typing
15
14
  import uuid
16
15
  from pathlib import Path
17
- from typing import Optional, Dict, Any, List
16
+ from typing import Callable, Optional, Dict, Any, List
18
17
 
18
+ import pgserver
19
+ import sqlalchemy as sql
19
20
  import yaml
20
21
  from sqlalchemy_utils.functions import database_exists, create_database, drop_database
21
- import pgserver
22
- import logging
23
- import sys
24
- import glob
25
22
 
26
- from pixeltable import metadata
27
23
  import pixeltable.exceptions as excs
24
+ from pixeltable import metadata
28
25
 
29
- if typing.TYPE_CHECKING:
30
- import openai
31
26
 
32
27
  class Env:
33
28
  """
@@ -59,12 +54,12 @@ class Env:
59
54
  # package name -> version; version == []: package is installed, but we haven't determined the version yet
60
55
  self._installed_packages: Dict[str, Optional[List[int]]] = {}
61
56
  self._nos_client: Optional[Any] = None
62
- self._openai_client: Optional['openai.OpenAI'] = None
63
- self._has_together_client: bool = False
64
57
  self._spacy_nlp: Optional[Any] = None # spacy.Language
65
58
  self._httpd: Optional[socketserver.TCPServer] = None
66
59
  self._http_address: Optional[str] = None
67
60
 
61
+ self._registered_clients: dict[str, Any] = {}
62
+
68
63
  # logging-related state
69
64
  self._logger = logging.getLogger('pixeltable')
70
65
  self._logger.setLevel(logging.DEBUG) # allow everything to pass, we filter in _log_filter()
@@ -256,31 +251,32 @@ class Env:
256
251
  from pixeltable.functions.util import create_nos_modules
257
252
  _ = create_nos_modules()
258
253
 
259
- def _create_openai_client(self) -> None:
260
- if not self.is_installed_package('openai'):
261
- raise excs.Error('OpenAI client not initialized (cannot find package `openai`: `pip install openai`?)')
262
- import openai
263
- if 'openai' in self._config and 'api_key' in self._config['openai']:
264
- api_key = self._config['openai']['api_key']
265
- else:
266
- api_key = os.environ.get('OPENAI_API_KEY')
267
- if api_key is None or api_key == '':
268
- raise excs.Error('OpenAI client not initialized (no API key configured).')
269
- self._openai_client = openai.OpenAI(api_key=api_key)
270
- self._logger.info('Initialized OpenAI client.')
254
+ def get_client(self, name: str, init: Callable, environ: Optional[str] = None) -> Any:
255
+ """
256
+ Gets the client with the specified name, using `init` to construct one if necessary.
257
+
258
+ - name: The name of the client
259
+ - init: A `Callable` with signature `fn(api_key: str) -> Any` that constructs a client object
260
+ - environ: The name of the environment variable to use for the API key, if no API key is found in config
261
+ (defaults to f'{name.upper()}_API_KEY')
262
+ """
263
+ if name in self._registered_clients:
264
+ return self._registered_clients[name]
265
+
266
+ if environ is None:
267
+ environ = f'{name.upper()}_API_KEY'
271
268
 
272
- def _create_together_client(self) -> None:
273
- if 'together' in self._config and 'api_key' in self._config['together']:
274
- api_key = self._config['together']['api_key']
269
+ if name in self._config and 'api_key' in self._config[name]:
270
+ api_key = self._config[name]['api_key']
275
271
  else:
276
- api_key = os.environ.get('TOGETHER_API_KEY')
272
+ api_key = os.environ.get(environ)
277
273
  if api_key is None or api_key == '':
278
- self._logger.info('Together client not initialized (no API key configured).')
279
- return
280
- import together
281
- self._logger.info('Initializing Together client.')
282
- together.api_key = api_key
283
- self._has_together_client = True
274
+ raise excs.Error(f'`{name}` client not initialized (no API key configured).')
275
+
276
+ client = init(api_key)
277
+ self._registered_clients[name] = client
278
+ self._logger.info(f'Initialized `{name}` client.')
279
+ return client
284
280
 
285
281
  def _start_web_server(self) -> None:
286
282
  """
@@ -319,6 +315,7 @@ class Env:
319
315
  else:
320
316
  self._installed_packages[package] = None
321
317
 
318
+ check('datasets')
322
319
  check('torch')
323
320
  check('torchvision')
324
321
  check('transformers')
@@ -332,8 +329,6 @@ class Env:
332
329
  check('tiktoken')
333
330
  check('openai')
334
331
  check('together')
335
- if self.is_installed_package('together'):
336
- self._create_together_client()
337
332
  check('fireworks')
338
333
  check('nos')
339
334
  if self.is_installed_package('nos'):
@@ -399,17 +394,6 @@ class Env:
399
394
  def nos_client(self) -> Any:
400
395
  return self._nos_client
401
396
 
402
- @property
403
- def openai_client(self) -> 'openai.OpenAI':
404
- if self._openai_client is None:
405
- self._create_openai_client()
406
- assert self._openai_client is not None
407
- return self._openai_client
408
-
409
- @property
410
- def has_together_client(self) -> bool:
411
- return self._has_together_client
412
-
413
397
  @property
414
398
  def spacy_nlp(self) -> Any:
415
399
  assert self._spacy_nlp is not None
@@ -1,19 +1,21 @@
1
1
  from __future__ import annotations
2
- from typing import List, Optional, Any, Tuple, Dict
2
+
3
+ import concurrent.futures
4
+ import logging
3
5
  import threading
6
+ import urllib.parse
7
+ import urllib.request
4
8
  from collections import defaultdict
5
- from uuid import UUID
6
- import concurrent
7
- import logging
8
- import urllib
9
9
  from pathlib import Path
10
+ from typing import List, Optional, Any, Tuple, Dict
11
+ from uuid import UUID
10
12
 
11
- from .data_row_batch import DataRowBatch
12
- from .exec_node import ExecNode
13
- import pixeltable.exprs as exprs
14
- from pixeltable.utils.filecache import FileCache
15
13
  import pixeltable.env as env
16
14
  import pixeltable.exceptions as excs
15
+ import pixeltable.exprs as exprs
16
+ from pixeltable.utils.filecache import FileCache
17
+ from .data_row_batch import DataRowBatch
18
+ from .exec_node import ExecNode
17
19
 
18
20
  _logger = logging.getLogger('pixeltable')
19
21
 
@@ -81,7 +83,9 @@ class CachePrefetchNode(ExecNode):
81
83
  """Fetches a remote URL into Env.tmp_dir and returns its path"""
82
84
  url = row.file_urls[slot_idx]
83
85
  parsed = urllib.parse.urlparse(url)
84
- assert parsed.scheme != '' and parsed.scheme != 'file'
86
+ # Use len(parsed.scheme) > 1 here to ensure we're not being passed
87
+ # a Windows filename
88
+ assert len(parsed.scheme) > 1 and parsed.scheme != 'file'
85
89
  # preserve the file extension, if there is one
86
90
  extension = ''
87
91
  if parsed.path != '':
@@ -95,7 +99,6 @@ class CachePrefetchNode(ExecNode):
95
99
  if self.boto_client is None:
96
100
  self.boto_client = get_client()
97
101
  self.boto_client.download_file(parsed.netloc, parsed.path.lstrip('/'), str(tmp_path))
98
- return tmp_path
99
102
  elif parsed.scheme == 'http' or parsed.scheme == 'https':
100
103
  with urllib.request.urlopen(url) as resp, open(tmp_path, 'wb') as f:
101
104
  data = resp.read()
@@ -1,7 +1,9 @@
1
1
  from __future__ import annotations
2
- from typing import Optional, List, Any, Tuple
2
+
3
3
  import io
4
- import urllib
4
+ import urllib.parse
5
+ import urllib.request
6
+ from typing import Optional, List, Any, Tuple
5
7
 
6
8
  import PIL
7
9
  import numpy as np
@@ -104,6 +106,7 @@ class DataRow:
104
106
  assert self.file_paths[index] is not None
105
107
  if self.vals[index] is None:
106
108
  self.vals[index] = PIL.Image.open(self.file_paths[index])
109
+ self.vals[index].load()
107
110
 
108
111
  return self.vals[index]
109
112
 
@@ -137,14 +140,19 @@ class DataRow:
137
140
  if (idx in self.img_slot_idxs or idx in self.media_slot_idxs) and isinstance(val, str):
138
141
  # this is either a local file path or a URL
139
142
  parsed = urllib.parse.urlparse(val)
140
- if parsed.scheme == '' or parsed.scheme == 'file':
143
+ # Determine if this is a local file or a remote URL. If the scheme length is <= 1,
144
+ # we assume it's a local file. (This is because a Windows path will be interpreted
145
+ # by urllib as a URL with scheme equal to the drive letter.)
146
+ if len(parsed.scheme) <= 1 or parsed.scheme == 'file':
141
147
  # local file path
142
148
  assert self.file_urls[idx] is None and self.file_paths[idx] is None
143
- if parsed.scheme == '':
144
- self.file_urls[idx] = urllib.parse.urljoin('file:', urllib.request.pathname2url(parsed.path))
149
+ if len(parsed.scheme) <= 1:
150
+ self.file_urls[idx] = urllib.parse.urljoin('file:', urllib.request.pathname2url(val))
151
+ self.file_paths[idx] = val
145
152
  else:
146
153
  self.file_urls[idx] = val
147
- self.file_paths[idx] = urllib.parse.unquote(parsed.path)
154
+ # Wrap the path in a url2pathname() call to ensure proper handling on Windows.
155
+ self.file_paths[idx] = urllib.parse.unquote(urllib.request.url2pathname(parsed.path))
148
156
  else:
149
157
  # URL
150
158
  assert self.file_urls[idx] is None
@@ -15,7 +15,7 @@ import pixeltable.functions.pil.image
15
15
  from pixeltable import exprs
16
16
  from pixeltable.type_system import IntType, ColumnType, FloatType, ImageType, VideoType
17
17
  # automatically import all submodules so that the udfs get registered
18
- from . import image, string, video, openai, together, fireworks, huggingface
18
+ from . import image, string, video, huggingface
19
19
 
20
20
  # TODO: remove and replace calls with astype()
21
21
  def cast(expr: exprs.Expr, target_type: ColumnType) -> exprs.Expr:
@@ -0,0 +1,34 @@
1
+ from typing import Optional
2
+
3
+ import fireworks.client
4
+
5
+ import pixeltable as pxt
6
+ from pixeltable import env
7
+
8
+
9
+ def fireworks_client() -> fireworks.client.Fireworks:
10
+ return env.Env.get().get_client('fireworks', lambda api_key: fireworks.client.Fireworks(api_key=api_key))
11
+
12
+
13
+ @pxt.udf
14
+ def chat_completions(
15
+ messages: list[dict[str, str]],
16
+ *,
17
+ model: str,
18
+ max_tokens: Optional[int] = None,
19
+ top_k: Optional[int] = None,
20
+ top_p: Optional[float] = None,
21
+ temperature: Optional[float] = None
22
+ ) -> dict:
23
+ kwargs = {
24
+ 'max_tokens': max_tokens,
25
+ 'top_k': top_k,
26
+ 'top_p': top_p,
27
+ 'temperature': temperature
28
+ }
29
+ kwargs_not_none = dict(filter(lambda x: x[1] is not None, kwargs.items()))
30
+ return fireworks_client().chat.completions.create(
31
+ model=model,
32
+ messages=messages,
33
+ **kwargs_not_none
34
+ ).dict()