pixeltable 0.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (119) hide show
  1. pixeltable-0.0.0/LICENSE +18 -0
  2. pixeltable-0.0.0/PKG-INFO +131 -0
  3. pixeltable-0.0.0/README.md +93 -0
  4. pixeltable-0.0.0/pixeltable/__init__.py +53 -0
  5. pixeltable-0.0.0/pixeltable/__version__.py +3 -0
  6. pixeltable-0.0.0/pixeltable/catalog/__init__.py +13 -0
  7. pixeltable-0.0.0/pixeltable/catalog/catalog.py +159 -0
  8. pixeltable-0.0.0/pixeltable/catalog/column.py +181 -0
  9. pixeltable-0.0.0/pixeltable/catalog/dir.py +32 -0
  10. pixeltable-0.0.0/pixeltable/catalog/globals.py +33 -0
  11. pixeltable-0.0.0/pixeltable/catalog/insertable_table.py +192 -0
  12. pixeltable-0.0.0/pixeltable/catalog/named_function.py +36 -0
  13. pixeltable-0.0.0/pixeltable/catalog/path.py +58 -0
  14. pixeltable-0.0.0/pixeltable/catalog/path_dict.py +139 -0
  15. pixeltable-0.0.0/pixeltable/catalog/schema_object.py +39 -0
  16. pixeltable-0.0.0/pixeltable/catalog/table.py +695 -0
  17. pixeltable-0.0.0/pixeltable/catalog/table_version.py +1026 -0
  18. pixeltable-0.0.0/pixeltable/catalog/table_version_path.py +133 -0
  19. pixeltable-0.0.0/pixeltable/catalog/view.py +203 -0
  20. pixeltable-0.0.0/pixeltable/dataframe.py +749 -0
  21. pixeltable-0.0.0/pixeltable/env.py +466 -0
  22. pixeltable-0.0.0/pixeltable/exceptions.py +17 -0
  23. pixeltable-0.0.0/pixeltable/exec/__init__.py +10 -0
  24. pixeltable-0.0.0/pixeltable/exec/aggregation_node.py +78 -0
  25. pixeltable-0.0.0/pixeltable/exec/cache_prefetch_node.py +116 -0
  26. pixeltable-0.0.0/pixeltable/exec/component_iteration_node.py +79 -0
  27. pixeltable-0.0.0/pixeltable/exec/data_row_batch.py +94 -0
  28. pixeltable-0.0.0/pixeltable/exec/exec_context.py +22 -0
  29. pixeltable-0.0.0/pixeltable/exec/exec_node.py +61 -0
  30. pixeltable-0.0.0/pixeltable/exec/expr_eval_node.py +217 -0
  31. pixeltable-0.0.0/pixeltable/exec/in_memory_data_node.py +73 -0
  32. pixeltable-0.0.0/pixeltable/exec/media_validation_node.py +43 -0
  33. pixeltable-0.0.0/pixeltable/exec/sql_scan_node.py +226 -0
  34. pixeltable-0.0.0/pixeltable/exprs/__init__.py +25 -0
  35. pixeltable-0.0.0/pixeltable/exprs/arithmetic_expr.py +102 -0
  36. pixeltable-0.0.0/pixeltable/exprs/array_slice.py +71 -0
  37. pixeltable-0.0.0/pixeltable/exprs/column_property_ref.py +77 -0
  38. pixeltable-0.0.0/pixeltable/exprs/column_ref.py +114 -0
  39. pixeltable-0.0.0/pixeltable/exprs/comparison.py +77 -0
  40. pixeltable-0.0.0/pixeltable/exprs/compound_predicate.py +98 -0
  41. pixeltable-0.0.0/pixeltable/exprs/data_row.py +199 -0
  42. pixeltable-0.0.0/pixeltable/exprs/expr.py +594 -0
  43. pixeltable-0.0.0/pixeltable/exprs/expr_set.py +39 -0
  44. pixeltable-0.0.0/pixeltable/exprs/function_call.py +382 -0
  45. pixeltable-0.0.0/pixeltable/exprs/globals.py +69 -0
  46. pixeltable-0.0.0/pixeltable/exprs/image_member_access.py +96 -0
  47. pixeltable-0.0.0/pixeltable/exprs/in_predicate.py +96 -0
  48. pixeltable-0.0.0/pixeltable/exprs/inline_array.py +109 -0
  49. pixeltable-0.0.0/pixeltable/exprs/inline_dict.py +103 -0
  50. pixeltable-0.0.0/pixeltable/exprs/is_null.py +38 -0
  51. pixeltable-0.0.0/pixeltable/exprs/json_mapper.py +121 -0
  52. pixeltable-0.0.0/pixeltable/exprs/json_path.py +159 -0
  53. pixeltable-0.0.0/pixeltable/exprs/literal.py +66 -0
  54. pixeltable-0.0.0/pixeltable/exprs/object_ref.py +41 -0
  55. pixeltable-0.0.0/pixeltable/exprs/predicate.py +44 -0
  56. pixeltable-0.0.0/pixeltable/exprs/row_builder.py +329 -0
  57. pixeltable-0.0.0/pixeltable/exprs/rowid_ref.py +94 -0
  58. pixeltable-0.0.0/pixeltable/exprs/similarity_expr.py +65 -0
  59. pixeltable-0.0.0/pixeltable/exprs/type_cast.py +53 -0
  60. pixeltable-0.0.0/pixeltable/exprs/variable.py +45 -0
  61. pixeltable-0.0.0/pixeltable/ext/__init__.py +5 -0
  62. pixeltable-0.0.0/pixeltable/ext/functions/yolox.py +92 -0
  63. pixeltable-0.0.0/pixeltable/func/__init__.py +7 -0
  64. pixeltable-0.0.0/pixeltable/func/aggregate_function.py +197 -0
  65. pixeltable-0.0.0/pixeltable/func/callable_function.py +113 -0
  66. pixeltable-0.0.0/pixeltable/func/expr_template_function.py +99 -0
  67. pixeltable-0.0.0/pixeltable/func/function.py +141 -0
  68. pixeltable-0.0.0/pixeltable/func/function_registry.py +227 -0
  69. pixeltable-0.0.0/pixeltable/func/globals.py +46 -0
  70. pixeltable-0.0.0/pixeltable/func/nos_function.py +202 -0
  71. pixeltable-0.0.0/pixeltable/func/signature.py +162 -0
  72. pixeltable-0.0.0/pixeltable/func/udf.py +164 -0
  73. pixeltable-0.0.0/pixeltable/functions/__init__.py +95 -0
  74. pixeltable-0.0.0/pixeltable/functions/eval.py +215 -0
  75. pixeltable-0.0.0/pixeltable/functions/fireworks.py +34 -0
  76. pixeltable-0.0.0/pixeltable/functions/huggingface.py +167 -0
  77. pixeltable-0.0.0/pixeltable/functions/image.py +16 -0
  78. pixeltable-0.0.0/pixeltable/functions/openai.py +289 -0
  79. pixeltable-0.0.0/pixeltable/functions/pil/image.py +147 -0
  80. pixeltable-0.0.0/pixeltable/functions/string.py +13 -0
  81. pixeltable-0.0.0/pixeltable/functions/together.py +143 -0
  82. pixeltable-0.0.0/pixeltable/functions/util.py +52 -0
  83. pixeltable-0.0.0/pixeltable/functions/video.py +62 -0
  84. pixeltable-0.0.0/pixeltable/globals.py +425 -0
  85. pixeltable-0.0.0/pixeltable/index/__init__.py +2 -0
  86. pixeltable-0.0.0/pixeltable/index/base.py +51 -0
  87. pixeltable-0.0.0/pixeltable/index/embedding_index.py +168 -0
  88. pixeltable-0.0.0/pixeltable/io/__init__.py +3 -0
  89. pixeltable-0.0.0/pixeltable/io/hf_datasets.py +188 -0
  90. pixeltable-0.0.0/pixeltable/io/pandas.py +148 -0
  91. pixeltable-0.0.0/pixeltable/io/parquet.py +192 -0
  92. pixeltable-0.0.0/pixeltable/iterators/__init__.py +3 -0
  93. pixeltable-0.0.0/pixeltable/iterators/base.py +52 -0
  94. pixeltable-0.0.0/pixeltable/iterators/document.py +432 -0
  95. pixeltable-0.0.0/pixeltable/iterators/video.py +88 -0
  96. pixeltable-0.0.0/pixeltable/metadata/__init__.py +58 -0
  97. pixeltable-0.0.0/pixeltable/metadata/converters/convert_10.py +18 -0
  98. pixeltable-0.0.0/pixeltable/metadata/converters/convert_12.py +3 -0
  99. pixeltable-0.0.0/pixeltable/metadata/converters/convert_13.py +41 -0
  100. pixeltable-0.0.0/pixeltable/metadata/schema.py +234 -0
  101. pixeltable-0.0.0/pixeltable/plan.py +620 -0
  102. pixeltable-0.0.0/pixeltable/store.py +424 -0
  103. pixeltable-0.0.0/pixeltable/tool/create_test_db_dump.py +184 -0
  104. pixeltable-0.0.0/pixeltable/tool/create_test_video.py +81 -0
  105. pixeltable-0.0.0/pixeltable/type_system.py +846 -0
  106. pixeltable-0.0.0/pixeltable/utils/__init__.py +17 -0
  107. pixeltable-0.0.0/pixeltable/utils/arrow.py +98 -0
  108. pixeltable-0.0.0/pixeltable/utils/clip.py +18 -0
  109. pixeltable-0.0.0/pixeltable/utils/coco.py +136 -0
  110. pixeltable-0.0.0/pixeltable/utils/documents.py +69 -0
  111. pixeltable-0.0.0/pixeltable/utils/filecache.py +195 -0
  112. pixeltable-0.0.0/pixeltable/utils/help.py +11 -0
  113. pixeltable-0.0.0/pixeltable/utils/http_server.py +70 -0
  114. pixeltable-0.0.0/pixeltable/utils/media_store.py +76 -0
  115. pixeltable-0.0.0/pixeltable/utils/pytorch.py +91 -0
  116. pixeltable-0.0.0/pixeltable/utils/s3.py +13 -0
  117. pixeltable-0.0.0/pixeltable/utils/sql.py +17 -0
  118. pixeltable-0.0.0/pixeltable/utils/transactional_directory.py +35 -0
  119. pixeltable-0.0.0/pyproject.toml +146 -0
@@ -0,0 +1,18 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+
6
+ Copyright 2023 Marcel Kornacker
7
+
8
+ Licensed under the Apache License, Version 2.0 (the "License");
9
+ you may not use this file except in compliance with the License.
10
+ You may obtain a copy of the License at
11
+
12
+ http://www.apache.org/licenses/LICENSE-2.0
13
+
14
+ Unless required by applicable law or agreed to in writing, software
15
+ distributed under the License is distributed on an "AS IS" BASIS,
16
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17
+ See the License for the specific language governing permissions and
18
+ limitations under the License.
@@ -0,0 +1,131 @@
1
+ Metadata-Version: 2.1
2
+ Name: pixeltable
3
+ Version: 0.0.0
4
+ Summary: Pixeltable: The Multimodal AI Data Plane
5
+ Author: Marcel Kornacker
6
+ Author-email: marcelk@gmail.com
7
+ Requires-Python: >=3.9,<4.0
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.9
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Requires-Dist: av (>=10.0.0)
14
+ Requires-Dist: beautifulsoup4 (>=4.0.0,<5.0.0)
15
+ Requires-Dist: cloudpickle (>=2.2.1,<3.0.0)
16
+ Requires-Dist: ftfy (>=6.2.0,<7.0.0)
17
+ Requires-Dist: jinja2 (>=3.1.3,<4.0.0)
18
+ Requires-Dist: jmespath (>=1.0.1,<2.0.0)
19
+ Requires-Dist: mistune (>=3.0.2,<4.0.0)
20
+ Requires-Dist: numpy (>=1.25)
21
+ Requires-Dist: opencv-python-headless (>=4.7.0.68,<5.0.0.0)
22
+ Requires-Dist: pandas (>=2.0,<3.0)
23
+ Requires-Dist: pgserver (==0.1.2)
24
+ Requires-Dist: pgvector (>=0.2.1,<0.3.0)
25
+ Requires-Dist: pillow (>=9.3.0)
26
+ Requires-Dist: psutil (>=5.9.5,<6.0.0)
27
+ Requires-Dist: psycopg2-binary (>=2.9.5,<3.0.0)
28
+ Requires-Dist: pymupdf (>=1.24.1,<2.0.0)
29
+ Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
30
+ Requires-Dist: requests (>=2.31.0,<3.0.0)
31
+ Requires-Dist: setuptools (==69.1.1)
32
+ Requires-Dist: sqlalchemy-utils (>=0.41.1,<0.42.0)
33
+ Requires-Dist: sqlalchemy[mypy] (>=2.0.23,<3.0.0)
34
+ Requires-Dist: tenacity (>=8.2,<9.0)
35
+ Requires-Dist: tqdm (>=4.64.1,<5.0.0)
36
+ Description-Content-Type: text/markdown
37
+
38
+ <div align="center">
39
+ <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/master/docs/pixeltable-banner.png" alt="Pixeltable" width="45%" />
40
+
41
+ # Unifying Data, Models, and Orchestration for AI Products
42
+
43
+ [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
44
+ &nbsp;&nbsp;
45
+ [![pytest status](https://github.com/pixeltable/pixeltable/actions/workflows/pytest.yml/badge.svg)](https://github.com/pixeltable/pixeltable/actions)
46
+
47
+ [Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.github.io/pixeltable/)
48
+ </div>
49
+
50
+ Pixeltable is a Python library that lets AI engineers and data scientists focus on exploration, modeling, and app development without dealing with the customary data plumbing.
51
+
52
+ ## What problems does Pixeltable solve?
53
+
54
+ Today’s solutions for AI app development require extensive custom coding and infrastructure
55
+ plumbing. Tracking lineage and versions between and across data transformations, models, and
56
+ deployment is cumbersome. Pixeltable is a replacement for traditional data plumbing, providing
57
+ a unified plane for data, models, and orchestration. It removes the data plumbing overhead in
58
+ building and productionizing AI applications.
59
+
60
+ ## ⚡Quick Start
61
+ Learn the basics of Pixeltable through interactive examples. View the notebooks on Google Colab or Kaggle, for free.
62
+
63
+ ### Pixeltable Basics
64
+ In this tutorial, we'll survey how to create tables, populate them with data, and enhance them with built-in and user-defined transformations and AI operations.
65
+
66
+ [![Kaggle](https://kaggle.com/static/images/open-in-kaggle.svg)](https://kaggle.com/kernels/welcome?src=https://github.com/pixeltable/pixeltable/blob/master/docs/tutorials/pixeltable-basics.ipynb)&nbsp;&nbsp;
67
+ <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
68
+
69
+
70
+ ## 💾 Installation
71
+ Pixeltable works with Python 3.9, 3.10, 3.11, or 3.12 running on Linux, MacOS, or Windows.
72
+
73
+ ```
74
+ pip install pixeltable
75
+ ```
76
+
77
+ To verify that it's working:
78
+
79
+ ```
80
+ import pixeltable as pxt
81
+ pxt.init()
82
+ ```
83
+
84
+ For more detailed installation instructions, see the
85
+ [Getting Started with Pixeltable](https://pixeltable.github.io/pixeltable/getting-started/)
86
+ guide. Then, check out the
87
+ [Pixeltable Basics](https://pixeltable.github.io/pixeltable/tutorials/pixeltable-basics/)
88
+ tutorial for a tour of its most important features.
89
+
90
+ ## Why should you use Pixeltable?
91
+
92
+ - It gives you transparency and reproducibility
93
+ - All generated data is automatically recorded and versioned
94
+ - You will never need to re-run a workload because you lost track of the input data
95
+ - It saves you money
96
+ - All data changes are automatically incremental
97
+ - You never need to re-run pipelines from scratch because you’re adding data
98
+ - It integrates with any existing Python code or libraries
99
+ - Bring your ever-changing code and workloads
100
+ - You choose the models, tools, and AI practices (e.g., your embedding model for a vector index); Pixeltable orchestrates the data
101
+
102
+ ## Example Use Cases
103
+
104
+ * Interact with video data at the frame level without having to think about frame extraction,
105
+ intermediate file storage, or storage space explosion.
106
+ * Augment your data incrementally and interactively with built-in functions and UDFs, such as
107
+ image transformations, model inference, and visualizations, without having to think about data pipelines,
108
+ incremental updates, or capturing function output.
109
+ * Interact with all the data relevant to your AI application (video, images, documents, audio, structured data, JSON) through
110
+ a simple dataframe-style API directly in Python. This includes:
111
+ * similarity search on embeddings, supported by high-dimensional vector indexing
112
+ * path expressions and transformations on JSON data
113
+ * PIL and OpenCV image operations
114
+ * assembling frames into videos
115
+ * Perform keyword and image similarity search at the video frame level without having to worry about frame
116
+ storage.
117
+ * Access all Pixeltable-resident data directly as a PyTorch dataset in your training scripts.
118
+ * Understand the compute and storage costs of your data at the granularity of individual augmentations and
119
+ get cost projections before adding new data and new augmentations.
120
+ * Rely on Pixeltable's automatic versioning and snapshot functionality to protect against regressions
121
+ and to ensure reproducibility.
122
+
123
+ ## Contributions & Feedback
124
+
125
+ Are you experiencing issues or bugs with Pixeltable? File an [Issue](https://github.com/pixeltable/pixeltable/issues).
126
+ </br>Do you want to contribute? Feel free to open a [PR](https://github.com/pixeltable/pixeltable/pulls).
127
+
128
+ ## :classical_building: License
129
+
130
+ This library is licensed under the Apache 2.0 License.
131
+
@@ -0,0 +1,93 @@
1
+ <div align="center">
2
+ <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/master/docs/pixeltable-banner.png" alt="Pixeltable" width="45%" />
3
+
4
+ # Unifying Data, Models, and Orchestration for AI Products
5
+
6
+ [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
7
+ &nbsp;&nbsp;
8
+ [![pytest status](https://github.com/pixeltable/pixeltable/actions/workflows/pytest.yml/badge.svg)](https://github.com/pixeltable/pixeltable/actions)
9
+
10
+ [Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.github.io/pixeltable/)
11
+ </div>
12
+
13
+ Pixeltable is a Python library that lets AI engineers and data scientists focus on exploration, modeling, and app development without dealing with the customary data plumbing.
14
+
15
+ ## What problems does Pixeltable solve?
16
+
17
+ Today’s solutions for AI app development require extensive custom coding and infrastructure
18
+ plumbing. Tracking lineage and versions between and across data transformations, models, and
19
+ deployment is cumbersome. Pixeltable is a replacement for traditional data plumbing, providing
20
+ a unified plane for data, models, and orchestration. It removes the data plumbing overhead in
21
+ building and productionizing AI applications.
22
+
23
+ ## ⚡Quick Start
24
+ Learn the basics of Pixeltable through interactive examples. View the notebooks on Google Colab or Kaggle, for free.
25
+
26
+ ### Pixeltable Basics
27
+ In this tutorial, we'll survey how to create tables, populate them with data, and enhance them with built-in and user-defined transformations and AI operations.
28
+
29
+ [![Kaggle](https://kaggle.com/static/images/open-in-kaggle.svg)](https://kaggle.com/kernels/welcome?src=https://github.com/pixeltable/pixeltable/blob/master/docs/tutorials/pixeltable-basics.ipynb)&nbsp;&nbsp;
30
+ <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
31
+
32
+
33
+ ## 💾 Installation
34
+ Pixeltable works with Python 3.9, 3.10, 3.11, or 3.12 running on Linux, MacOS, or Windows.
35
+
36
+ ```
37
+ pip install pixeltable
38
+ ```
39
+
40
+ To verify that it's working:
41
+
42
+ ```
43
+ import pixeltable as pxt
44
+ pxt.init()
45
+ ```
46
+
47
+ For more detailed installation instructions, see the
48
+ [Getting Started with Pixeltable](https://pixeltable.github.io/pixeltable/getting-started/)
49
+ guide. Then, check out the
50
+ [Pixeltable Basics](https://pixeltable.github.io/pixeltable/tutorials/pixeltable-basics/)
51
+ tutorial for a tour of its most important features.
52
+
53
+ ## Why should you use Pixeltable?
54
+
55
+ - It gives you transparency and reproducibility
56
+ - All generated data is automatically recorded and versioned
57
+ - You will never need to re-run a workload because you lost track of the input data
58
+ - It saves you money
59
+ - All data changes are automatically incremental
60
+ - You never need to re-run pipelines from scratch because you’re adding data
61
+ - It integrates with any existing Python code or libraries
62
+ - Bring your ever-changing code and workloads
63
+ - You choose the models, tools, and AI practices (e.g., your embedding model for a vector index); Pixeltable orchestrates the data
64
+
65
+ ## Example Use Cases
66
+
67
+ * Interact with video data at the frame level without having to think about frame extraction,
68
+ intermediate file storage, or storage space explosion.
69
+ * Augment your data incrementally and interactively with built-in functions and UDFs, such as
70
+ image transformations, model inference, and visualizations, without having to think about data pipelines,
71
+ incremental updates, or capturing function output.
72
+ * Interact with all the data relevant to your AI application (video, images, documents, audio, structured data, JSON) through
73
+ a simple dataframe-style API directly in Python. This includes:
74
+ * similarity search on embeddings, supported by high-dimensional vector indexing
75
+ * path expressions and transformations on JSON data
76
+ * PIL and OpenCV image operations
77
+ * assembling frames into videos
78
+ * Perform keyword and image similarity search at the video frame level without having to worry about frame
79
+ storage.
80
+ * Access all Pixeltable-resident data directly as a PyTorch dataset in your training scripts.
81
+ * Understand the compute and storage costs of your data at the granularity of individual augmentations and
82
+ get cost projections before adding new data and new augmentations.
83
+ * Rely on Pixeltable's automatic versioning and snapshot functionality to protect against regressions
84
+ and to ensure reproducibility.
85
+
86
+ ## Contributions & Feedback
87
+
88
+ Are you experiencing issues or bugs with Pixeltable? File an [Issue](https://github.com/pixeltable/pixeltable/issues).
89
+ </br>Do you want to contribute? Feel free to open a [PR](https://github.com/pixeltable/pixeltable/pulls).
90
+
91
+ ## :classical_building: License
92
+
93
+ This library is licensed under the Apache 2.0 License.
@@ -0,0 +1,53 @@
1
+ from .catalog import Column, Table, InsertableTable, View
2
+ from .dataframe import DataFrame
3
+ from .exceptions import Error, Error
4
+ from .exprs import RELATIVE_PATH_ROOT
5
+ from .func import Function, udf, uda, Aggregator, expr_udf
6
+ from .globals import *
7
+ from .type_system import (
8
+ ColumnType,
9
+ StringType,
10
+ IntType,
11
+ FloatType,
12
+ BoolType,
13
+ TimestampType,
14
+ JsonType,
15
+ ArrayType,
16
+ ImageType,
17
+ VideoType,
18
+ AudioType,
19
+ DocumentType,
20
+ )
21
+ from .utils.help import help
22
+
23
+ # noinspection PyUnresolvedReferences
24
+ from . import functions, io
25
+ from .__version__ import __version__, __version_tuple__
26
+
27
+ __all__ = [
28
+ 'DataFrame',
29
+ 'Column',
30
+ 'Table',
31
+ 'InsertableTable',
32
+ 'View',
33
+ 'Error',
34
+ 'ColumnType',
35
+ 'StringType',
36
+ 'IntType',
37
+ 'FloatType',
38
+ 'BoolType',
39
+ 'TimestampType',
40
+ 'JsonType',
41
+ 'RELATIVE_PATH_ROOT',
42
+ 'ArrayType',
43
+ 'ImageType',
44
+ 'VideoType',
45
+ 'AudioType',
46
+ 'DocumentType',
47
+ 'Function',
48
+ 'help',
49
+ 'udf',
50
+ 'Aggregator',
51
+ 'uda',
52
+ 'expr_udf',
53
+ ]
@@ -0,0 +1,3 @@
1
+ # These version placeholders will be replaced during build.
2
+ __version__ = "0.0.0"
3
+ __version_tuple__ = (0, 0, 0)
@@ -0,0 +1,13 @@
1
+ from .catalog import Catalog
2
+ from .column import Column
3
+ from .table_version_path import TableVersionPath
4
+ from .table_version import TableVersion
5
+ from .schema_object import SchemaObject
6
+ from .named_function import NamedFunction
7
+ from .dir import Dir
8
+ from .table import Table
9
+ from .insertable_table import InsertableTable
10
+ from .view import View
11
+ from .path import Path
12
+ from .path_dict import PathDict
13
+ from .globals import is_valid_identifier, is_valid_path
@@ -0,0 +1,159 @@
1
+ from __future__ import annotations
2
+ from typing import Optional, List, Any, Dict, Tuple
3
+ from uuid import UUID
4
+ import dataclasses
5
+ import logging
6
+
7
+ import sqlalchemy as sql
8
+ import sqlalchemy.orm as orm
9
+
10
+ from .table_version import TableVersion
11
+ from .table_version_path import TableVersionPath
12
+ from .table import Table
13
+ from .named_function import NamedFunction
14
+ from .path_dict import PathDict
15
+ import pixeltable.env as env
16
+ import pixeltable.metadata.schema as schema
17
+
18
+ _logger = logging.getLogger('pixeltable')
19
+
20
+ class Catalog:
21
+ """A repository of catalog objects"""
22
+ _instance: Optional[Catalog] = None
23
+
24
+ @classmethod
25
+ def get(cls) -> Catalog:
26
+ if cls._instance is None:
27
+ cls._instance = cls()
28
+ with orm.Session(env.Env.get().engine, future=True) as session:
29
+ cls._instance._load_table_versions(session)
30
+ #cls._instance._load_functions(session)
31
+ return cls._instance
32
+
33
+ @classmethod
34
+ def clear(cls) -> None:
35
+ """Remove the instance. Used for testing."""
36
+ cls._instance = None
37
+
38
+ def __init__(self) -> None:
39
+ # key: [id, version]
40
+ # - mutable version of a table: version == None (even though TableVersion.version is set correctly)
41
+ # - snapshot versions: records the version of the snapshot
42
+ self.tbl_versions: Dict[Tuple[UUID, int], TableVersion] = {}
43
+
44
+ self.tbls: Dict[UUID, Table] = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
45
+ self.tbl_dependents: Dict[UUID, List[Table]] = {}
46
+
47
+ self._init_store()
48
+ self.paths = PathDict() # do this after _init_catalog()
49
+
50
+ def _init_store(self) -> None:
51
+ """One-time initialization of the stored catalog. Idempotent."""
52
+ with orm.Session(env.Env.get().engine, future=True) as session:
53
+ if session.query(sql.func.count(schema.Dir.id)).scalar() > 0:
54
+ return
55
+ # create a top-level directory, so that every schema object has a directory
56
+ dir_md = schema.DirMd(name='')
57
+ dir_record = schema.Dir(parent_id=None, md=dataclasses.asdict(dir_md))
58
+ session.add(dir_record)
59
+ session.flush()
60
+ session.commit()
61
+ _logger.info(f'Initialized catalog')
62
+
63
+ def _load_snapshot_version(
64
+ self, tbl_id: UUID, version: int, base: Optional[TableVersion], session: orm.Session
65
+ ) -> TableVersion:
66
+ q = session.query(schema.Table, schema.TableSchemaVersion) \
67
+ .select_from(schema.Table) \
68
+ .join(schema.TableVersion) \
69
+ .join(schema.TableSchemaVersion) \
70
+ .where(schema.Table.id == tbl_id) \
71
+ .where(sql.text(f"({schema.TableVersion.__table__}.md->>'version')::int = {version}")) \
72
+ .where(sql.text((
73
+ f"({schema.TableVersion.__table__}.md->>'schema_version')::int = "
74
+ f"{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}")))
75
+ tbl_record, schema_version_record = q.one()
76
+ tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
77
+ schema_version_md = schema.md_from_dict(schema.TableSchemaVersionMd, schema_version_record.md)
78
+ # we ignore tbl_record.base_tbl_id/base_snapshot_id and use 'base' instead: if the base is a snapshot
79
+ # we'd have to look that up first
80
+ return TableVersion(tbl_record.id, tbl_md, version, schema_version_md, is_snapshot=True, base=base)
81
+
82
+ def _load_table_versions(self, session: orm.Session) -> None:
83
+ from .insertable_table import InsertableTable
84
+ from .view import View
85
+
86
+ # load tables/views;
87
+ # do this in ascending order of creation ts so that we can resolve base references in one pass
88
+ q = session.query(schema.Table, schema.TableSchemaVersion) \
89
+ .select_from(schema.Table) \
90
+ .join(schema.TableVersion) \
91
+ .join(schema.TableSchemaVersion) \
92
+ .where(sql.text(f"({schema.TableVersion.__table__}.md->>'version')::int = 0")) \
93
+ .where(sql.text((
94
+ f"({schema.Table.__table__}.md->>'current_schema_version')::int = "
95
+ f"{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}"))) \
96
+ .order_by(sql.text(f"({schema.TableVersion.__table__}.md->>'created_at')::float"))
97
+
98
+ for tbl_record, schema_version_record in q.all():
99
+ tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
100
+ schema_version_md = schema.md_from_dict(schema.TableSchemaVersionMd, schema_version_record.md)
101
+ view_md = tbl_md.view_md
102
+
103
+ if view_md is not None:
104
+ assert len(view_md.base_versions) > 0
105
+ # construct a TableVersionPath for the view
106
+ refd_versions = [(UUID(tbl_id), version) for tbl_id, version in view_md.base_versions]
107
+ base_path: Optional[TableVersionPath] = None
108
+ base: Optional[TableVersion] = None
109
+ # go through the versions in reverse order, so we can construct TableVersionPaths
110
+ for base_id, version in refd_versions[::-1]:
111
+ base_version = self.tbl_versions.get((base_id, version), None)
112
+ if base_version is None:
113
+ if version is None:
114
+ # debugging
115
+ pass
116
+ # if this is a reference to a mutable table, we should have loaded it already
117
+ assert version is not None
118
+ base_version = self._load_snapshot_version(base_id, version, base, session)
119
+ base_path = TableVersionPath(base_version, base=base_path)
120
+ base = base_version
121
+ assert base_path is not None
122
+
123
+ base_tbl = self.tbls[base_path.tbl_version.id]
124
+ is_snapshot = view_md is not None and view_md.is_snapshot
125
+ snapshot_only = is_snapshot and view_md.predicate is None and len(schema_version_md.columns) == 0
126
+ if snapshot_only:
127
+ # this is a pure snapshot, without a physical table backing it
128
+ view_path = base_path
129
+ else:
130
+ tbl_version = TableVersion(
131
+ tbl_record.id, tbl_md, tbl_md.current_version, schema_version_md, is_snapshot=is_snapshot,
132
+ base=base_path.tbl_version if is_snapshot else None,
133
+ base_path=base_path if not is_snapshot else None)
134
+ view_path = TableVersionPath(tbl_version, base=base_path)
135
+
136
+ tbl = View(
137
+ tbl_record.id, tbl_record.dir_id, tbl_md.name, view_path, base_tbl,
138
+ snapshot_only=snapshot_only)
139
+ self.tbl_dependents[base_tbl._id].append(tbl)
140
+
141
+ else:
142
+ tbl_version = TableVersion(tbl_record.id, tbl_md, tbl_md.current_version, schema_version_md)
143
+ tbl = InsertableTable(tbl_record.dir_id, tbl_version)
144
+
145
+ self.tbls[tbl._id] = tbl
146
+ self.tbl_dependents[tbl._id] = []
147
+ self.paths.add_schema_obj(tbl._dir_id, tbl_md.name, tbl)
148
+
149
+ # def _load_functions(self, session: orm.Session) -> None:
150
+ # # load Function metadata; doesn't load the actual callable, which can be large and is only done on-demand by the
151
+ # # FunctionRegistry
152
+ # q = session.query(schema.Function.id, schema.Function.dir_id, schema.Function.md) \
153
+ # .where(sql.text(f"({schema.Function.__table__}.md->>'name')::text IS NOT NULL"))
154
+ # for id, dir_id, md in q.all():
155
+ # assert 'name' in md
156
+ # name = md['name']
157
+ # assert name is not None
158
+ # named_fn = NamedFunction(id, dir_id, name)
159
+ # self.paths.add_schema_obj(dir_id, name, named_fn)
@@ -0,0 +1,181 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from typing import Optional, Union, Callable, Set
5
+
6
+ import sqlalchemy as sql
7
+
8
+ import pixeltable.exceptions as excs
9
+ import pixeltable.type_system as ts
10
+ from .globals import is_valid_identifier
11
+
12
+ _logger = logging.getLogger('pixeltable')
13
+
14
+ class Column:
15
+ """Representation of a column in the schema of a Table/DataFrame.
16
+
17
+ A Column contains all the metadata necessary for executing queries and updates against a particular version of a
18
+ table/view.
19
+ """
20
+ def __init__(
21
+ self, name: Optional[str], col_type: Optional[ts.ColumnType] = None,
22
+ computed_with: Optional[Union['Expr', Callable]] = None,
23
+ is_pk: bool = False, stored: Optional[bool] = None,
24
+ col_id: Optional[int] = None, schema_version_add: Optional[int] = None,
25
+ schema_version_drop: Optional[int] = None, sa_col_type: Optional[sql.sqltypes.TypeEngine] = None
26
+ ):
27
+ """Column constructor.
28
+
29
+ Args:
30
+ name: column name; None for system columns (eg, index columns)
31
+ col_type: column type; can be None if the type can be derived from ``computed_with``
32
+ computed_with: a callable or an Expr object that computes the column value
33
+ is_pk: if True, this column is part of the primary key
34
+ stored: determines whether a computed column is present in the stored table or recomputed on demand
35
+ col_id: column ID (only used internally)
36
+
37
+ Computed columns: those have a non-None ``computed_with`` argument
38
+ - when constructed by the user: ``computed_with`` was constructed explicitly and is passed in;
39
+ col_type is None
40
+ - when loaded from md store: ``computed_with`` is set and col_type is set
41
+
42
+ ``computed_with`` is a Callable:
43
+ - the callable's parameter names must correspond to existing columns in the table for which this Column
44
+ is being used
45
+ - ``col_type`` needs to be set to the callable's return type
46
+
47
+ ``stored`` (only valid for computed image columns):
48
+ - if True: the column is present in the stored table
49
+ - if False: the column is not present in the stored table and recomputed during a query
50
+ - if None: the system chooses for you (at present, this is always False, but this may change in the future)
51
+ """
52
+ if name is not None and not is_valid_identifier(name):
53
+ raise excs.Error(f"Invalid column name: '{name}'")
54
+ self.name = name
55
+ if col_type is None and computed_with is None:
56
+ raise excs.Error(f'Column `{name}`: col_type is required if computed_with is not specified')
57
+
58
+ self.value_expr: Optional['Expr'] = None
59
+ self.compute_func: Optional[Callable] = None
60
+ from pixeltable import exprs
61
+ if computed_with is not None:
62
+ value_expr = exprs.Expr.from_object(computed_with)
63
+ if value_expr is None:
64
+ # computed_with needs to be a Callable
65
+ if not isinstance(computed_with, Callable):
66
+ raise excs.Error(
67
+ f'Column {name}: computed_with needs to be either a Pixeltable expression or a Callable, '
68
+ f'but it is a {type(computed_with)}')
69
+ if col_type is None:
70
+ raise excs.Error(f'Column {name}: col_type is required if computed_with is a Callable')
71
+ # we need to turn the computed_with function into an Expr, but this requires resolving
72
+ # column name references and for that we need to wait until we're assigned to a Table
73
+ self.compute_func = computed_with
74
+ else:
75
+ self.value_expr = value_expr.copy()
76
+ self.col_type = self.value_expr.col_type
77
+
78
+ if col_type is not None:
79
+ self.col_type = col_type
80
+ assert self.col_type is not None
81
+
82
+ self.stored = stored
83
+ self.dependent_cols: Set[Column] = set() # cols with value_exprs that reference us; set by TableVersion
84
+ self.id = col_id
85
+ self.is_pk = is_pk
86
+ self.schema_version_add = schema_version_add
87
+ self.schema_version_drop = schema_version_drop
88
+
89
+ # column in the stored table for the values of this Column
90
+ self.sa_col: Optional[sql.schema.Column] = None
91
+ self.sa_col_type = sa_col_type
92
+
93
+ # computed cols also have storage columns for the exception string and type
94
+ self.sa_errormsg_col: Optional[sql.schema.Column] = None
95
+ self.sa_errortype_col: Optional[sql.schema.Column] = None
96
+ from .table_version import TableVersion
97
+ self.tbl: Optional[TableVersion] = None # set by owning TableVersion
98
+
99
+ def __hash__(self) -> int:
100
+ assert self.tbl is not None
101
+ return hash((self.tbl.id, self.id))
102
+
103
+ def check_value_expr(self) -> None:
104
+ assert self.value_expr is not None
105
+ if self.stored == False and self.is_computed and self.has_window_fn_call():
106
+ raise excs.Error(
107
+ f'Column {self.name}: stored={self.stored} not supported for columns computed with window functions:'
108
+ f'\n{self.value_expr}')
109
+
110
+ def has_window_fn_call(self) -> bool:
111
+ if self.value_expr is None:
112
+ return False
113
+ from pixeltable import exprs
114
+ l = list(self.value_expr.subexprs(filter=lambda e: isinstance(e, exprs.FunctionCall) and e.is_window_fn_call))
115
+ return len(l) > 0
116
+
117
+ def get_idx_info(self) -> dict[str, 'pixeltable.catalog.TableVersion.IndexInfo']:
118
+ assert self.tbl is not None
119
+ return {name: info for name, info in self.tbl.idxs_by_name.items() if info.col == self}
120
+
121
+ @property
122
+ def is_computed(self) -> bool:
123
+ return self.compute_func is not None or self.value_expr is not None
124
+
125
+ @property
126
+ def is_stored(self) -> bool:
127
+ """Returns True if column is materialized in the stored table."""
128
+ assert self.stored is not None
129
+ return self.stored
130
+
131
+ @property
132
+ def records_errors(self) -> bool:
133
+ """True if this column also stores error information."""
134
+ return self.is_stored and (self.is_computed or self.col_type.is_media_type())
135
+
136
+ def source(self) -> None:
137
+ """
138
+ If this is a computed col and the top-level expr is a function call, print the source, if possible.
139
+ """
140
+ from pixeltable import exprs
141
+ if self.value_expr is None or not isinstance(self.value_expr, exprs.FunctionCall):
142
+ return
143
+ self.value_expr.fn.source()
144
+
145
+ def create_sa_cols(self) -> None:
146
+ """
147
+ These need to be recreated for every new table schema version.
148
+ """
149
+ assert self.is_stored
150
+ # all storage columns are nullable (we deal with null errors in Pixeltable directly)
151
+ self.sa_col = sql.Column(
152
+ self.store_name(), self.col_type.to_sa_type() if self.sa_col_type is None else self.sa_col_type,
153
+ nullable=True)
154
+ if self.is_computed or self.col_type.is_media_type():
155
+ self.sa_errormsg_col = sql.Column(self.errormsg_store_name(), ts.StringType().to_sa_type(), nullable=True)
156
+ self.sa_errortype_col = sql.Column(self.errortype_store_name(), ts.StringType().to_sa_type(), nullable=True)
157
+
158
+ def get_sa_col_type(self) -> sql.sqltypes.TypeEngine:
159
+ return self.col_type.to_sa_type() if self.sa_col_type is None else self.sa_col_type
160
+
161
+ def store_name(self) -> str:
162
+ assert self.id is not None
163
+ assert self.is_stored
164
+ return f'col_{self.id}'
165
+
166
+ def errormsg_store_name(self) -> str:
167
+ return f'{self.store_name()}_errormsg'
168
+
169
+ def errortype_store_name(self) -> str:
170
+ return f'{self.store_name()}_errortype'
171
+
172
+ def __str__(self) -> str:
173
+ return f'{self.name}: {self.col_type}'
174
+
175
+ def __eq__(self, other: object) -> bool:
176
+ if not isinstance(other, Column):
177
+ return False
178
+ assert self.tbl is not None
179
+ assert other.tbl is not None
180
+ return self.tbl.id == other.tbl.id and self.id == other.id
181
+