pixeltable 0.3.5__tar.gz → 0.3.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (175) hide show
  1. {pixeltable-0.3.5 → pixeltable-0.3.7}/PKG-INFO +1 -1
  2. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/__init__.py +5 -3
  3. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/__version__.py +2 -2
  4. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/catalog/__init__.py +1 -0
  5. pixeltable-0.3.7/pixeltable/catalog/catalog.py +397 -0
  6. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/catalog/column.py +21 -5
  7. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/catalog/dir.py +19 -6
  8. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/catalog/insertable_table.py +34 -37
  9. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/catalog/named_function.py +0 -4
  10. pixeltable-0.3.7/pixeltable/catalog/schema_object.py +65 -0
  11. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/catalog/table.py +195 -158
  12. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/catalog/table_version.py +187 -232
  13. pixeltable-0.3.7/pixeltable/catalog/table_version_handle.py +50 -0
  14. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/catalog/table_version_path.py +49 -33
  15. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/catalog/view.py +56 -96
  16. pixeltable-0.3.7/pixeltable/config.py +103 -0
  17. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/dataframe.py +90 -90
  18. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/env.py +98 -168
  19. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exec/aggregation_node.py +5 -4
  20. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exec/cache_prefetch_node.py +1 -1
  21. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exec/component_iteration_node.py +13 -9
  22. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exec/data_row_batch.py +3 -3
  23. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exec/exec_context.py +0 -4
  24. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exec/exec_node.py +3 -2
  25. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exec/expr_eval/schedulers.py +2 -1
  26. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exec/in_memory_data_node.py +9 -4
  27. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exec/row_update_node.py +1 -2
  28. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exec/sql_node.py +20 -16
  29. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exprs/column_ref.py +9 -9
  30. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exprs/comparison.py +1 -1
  31. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exprs/data_row.py +4 -4
  32. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exprs/expr.py +20 -5
  33. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exprs/function_call.py +98 -58
  34. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exprs/json_mapper.py +25 -8
  35. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exprs/json_path.py +6 -5
  36. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exprs/object_ref.py +16 -5
  37. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exprs/row_builder.py +15 -15
  38. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exprs/rowid_ref.py +21 -7
  39. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/func/__init__.py +1 -1
  40. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/func/function.py +38 -6
  41. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/func/query_template_function.py +3 -6
  42. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/func/tools.py +26 -26
  43. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/func/udf.py +1 -1
  44. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/functions/__init__.py +2 -0
  45. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/functions/anthropic.py +9 -3
  46. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/functions/fireworks.py +7 -4
  47. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/functions/globals.py +4 -5
  48. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/functions/huggingface.py +1 -5
  49. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/functions/image.py +17 -7
  50. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/functions/llama_cpp.py +1 -1
  51. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/functions/mistralai.py +1 -1
  52. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/functions/ollama.py +4 -4
  53. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/functions/openai.py +26 -23
  54. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/functions/string.py +23 -30
  55. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/functions/timestamp.py +11 -6
  56. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/functions/together.py +14 -12
  57. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/functions/util.py +1 -1
  58. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/functions/video.py +5 -4
  59. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/functions/vision.py +6 -9
  60. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/functions/whisper.py +3 -3
  61. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/globals.py +246 -260
  62. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/index/__init__.py +2 -0
  63. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/index/base.py +1 -1
  64. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/index/btree.py +3 -1
  65. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/index/embedding_index.py +11 -5
  66. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/io/external_store.py +11 -12
  67. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/io/label_studio.py +4 -3
  68. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/io/parquet.py +57 -56
  69. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/iterators/__init__.py +4 -2
  70. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/iterators/audio.py +11 -11
  71. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/iterators/document.py +10 -10
  72. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/iterators/string.py +1 -2
  73. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/iterators/video.py +14 -15
  74. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/metadata/__init__.py +9 -5
  75. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/metadata/converters/convert_10.py +0 -1
  76. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/metadata/converters/convert_15.py +0 -2
  77. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/metadata/converters/convert_23.py +0 -2
  78. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/metadata/converters/convert_24.py +3 -3
  79. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/metadata/converters/convert_25.py +1 -1
  80. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/metadata/converters/convert_27.py +0 -2
  81. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/metadata/converters/convert_28.py +0 -2
  82. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/metadata/converters/convert_29.py +7 -8
  83. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/metadata/converters/util.py +7 -7
  84. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/metadata/schema.py +27 -19
  85. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/plan.py +68 -40
  86. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/share/packager.py +12 -9
  87. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/store.py +37 -38
  88. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/type_system.py +41 -28
  89. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/utils/filecache.py +2 -1
  90. {pixeltable-0.3.5 → pixeltable-0.3.7}/pyproject.toml +20 -23
  91. pixeltable-0.3.5/pixeltable/catalog/catalog.py +0 -190
  92. pixeltable-0.3.5/pixeltable/catalog/schema_object.py +0 -79
  93. {pixeltable-0.3.5 → pixeltable-0.3.7}/LICENSE +0 -0
  94. {pixeltable-0.3.5 → pixeltable-0.3.7}/README.md +0 -0
  95. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/catalog/globals.py +0 -0
  96. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/catalog/path.py +0 -0
  97. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/catalog/path_dict.py +0 -0
  98. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exceptions.py +0 -0
  99. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exec/__init__.py +0 -0
  100. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exec/expr_eval/__init__.py +0 -0
  101. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exec/expr_eval/evaluators.py +0 -0
  102. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exec/expr_eval/expr_eval_node.py +0 -0
  103. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exec/expr_eval/globals.py +0 -0
  104. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exec/expr_eval/row_buffer.py +0 -0
  105. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exprs/__init__.py +0 -0
  106. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exprs/arithmetic_expr.py +0 -0
  107. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exprs/array_slice.py +0 -0
  108. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exprs/column_property_ref.py +0 -0
  109. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exprs/compound_predicate.py +0 -0
  110. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exprs/expr_dict.py +0 -0
  111. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exprs/expr_set.py +0 -0
  112. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exprs/globals.py +0 -0
  113. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exprs/in_predicate.py +0 -0
  114. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exprs/inline_expr.py +0 -0
  115. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exprs/is_null.py +0 -0
  116. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exprs/literal.py +0 -0
  117. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exprs/method_ref.py +0 -0
  118. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exprs/similarity_expr.py +0 -0
  119. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exprs/sql_element_cache.py +0 -0
  120. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exprs/type_cast.py +0 -0
  121. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/exprs/variable.py +0 -0
  122. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/ext/__init__.py +0 -0
  123. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/ext/functions/__init__.py +0 -0
  124. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/ext/functions/whisperx.py +0 -0
  125. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/ext/functions/yolox.py +0 -0
  126. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/func/aggregate_function.py +0 -0
  127. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/func/callable_function.py +0 -0
  128. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/func/expr_template_function.py +0 -0
  129. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/func/function_registry.py +0 -0
  130. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/func/globals.py +0 -0
  131. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/func/signature.py +0 -0
  132. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/functions/audio.py +0 -0
  133. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/functions/deepseek.py +0 -0
  134. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/functions/gemini.py +0 -0
  135. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/functions/json.py +0 -0
  136. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/functions/math.py +0 -0
  137. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/functions/replicate.py +0 -0
  138. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/io/__init__.py +0 -0
  139. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/io/fiftyone.py +0 -0
  140. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/io/globals.py +0 -0
  141. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/io/hf_datasets.py +0 -0
  142. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/io/pandas.py +0 -0
  143. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/io/utils.py +0 -0
  144. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/iterators/base.py +0 -0
  145. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/iterators/image.py +0 -0
  146. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/metadata/converters/convert_12.py +0 -0
  147. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/metadata/converters/convert_13.py +0 -0
  148. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/metadata/converters/convert_14.py +0 -0
  149. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/metadata/converters/convert_16.py +0 -0
  150. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/metadata/converters/convert_17.py +0 -0
  151. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/metadata/converters/convert_18.py +0 -0
  152. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/metadata/converters/convert_19.py +0 -0
  153. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/metadata/converters/convert_20.py +0 -0
  154. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/metadata/converters/convert_21.py +0 -0
  155. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/metadata/converters/convert_22.py +0 -0
  156. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/metadata/converters/convert_26.py +0 -0
  157. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/metadata/notes.py +0 -0
  158. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/py.typed +0 -0
  159. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/share/__init__.py +0 -0
  160. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/share/publish.py +0 -0
  161. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/utils/__init__.py +0 -0
  162. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/utils/arrow.py +0 -0
  163. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/utils/coco.py +0 -0
  164. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/utils/code.py +0 -0
  165. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/utils/console_output.py +0 -0
  166. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/utils/description_helper.py +0 -0
  167. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/utils/documents.py +0 -0
  168. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/utils/formatter.py +0 -0
  169. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/utils/http_server.py +0 -0
  170. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/utils/iceberg.py +0 -0
  171. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/utils/media_store.py +0 -0
  172. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/utils/pytorch.py +0 -0
  173. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/utils/s3.py +0 -0
  174. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/utils/sql.py +0 -0
  175. {pixeltable-0.3.5 → pixeltable-0.3.7}/pixeltable/utils/transactional_directory.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: pixeltable
3
- Version: 0.3.5
3
+ Version: 0.3.7
4
4
  Summary: AI Data Infrastructure: Declarative, Multimodal, and Incremental
5
5
  License: Apache-2.0
6
6
  Keywords: data-science,machine-learning,database,ai,computer-vision,chatbot,ml,artificial-intelligence,feature-engineering,multimodal,mlops,feature-store,vector-database,llm,genai
@@ -1,7 +1,9 @@
1
+ # ruff: noqa: F401
2
+
1
3
  from .__version__ import __version__, __version_tuple__
2
4
  from .catalog import Column, InsertableTable, Table, UpdateStatus, View
3
5
  from .dataframe import DataFrame
4
- from .exceptions import Error
6
+ from .exceptions import Error, PixeltableWarning
5
7
  from .exprs import RELATIVE_PATH_ROOT
6
8
  from .func import Aggregator, Function, expr_udf, query, uda, udf
7
9
  from .globals import (
@@ -56,7 +58,7 @@ from . import ext, functions, io, iterators # isort: skip
56
58
  # This is the safest / most maintainable way to construct __all__: start with the default and "blacklist"
57
59
  # stuff that we don't want in there. (Using a "whitelist" is considerably harder to maintain.)
58
60
 
59
- __default_dir = set(symbol for symbol in dir() if not symbol.startswith('_'))
61
+ __default_dir = {symbol for symbol in dir() if not symbol.startswith('_')}
60
62
  __removed_symbols = {
61
63
  'catalog',
62
64
  'dataframe',
@@ -72,7 +74,7 @@ __removed_symbols = {
72
74
  'type_system',
73
75
  'utils',
74
76
  }
75
- __all__ = sorted(list(__default_dir - __removed_symbols))
77
+ __all__ = sorted(__default_dir - __removed_symbols)
76
78
 
77
79
 
78
80
  def __dir__():
@@ -1,3 +1,3 @@
1
1
  # These version placeholders will be replaced during build.
2
- __version__ = '0.3.5'
3
- __version_tuple__ = (0, 3, 5)
2
+ __version__ = '0.3.7'
3
+ __version_tuple__ = (0, 3, 7)
@@ -9,5 +9,6 @@ from .path_dict import PathDict
9
9
  from .schema_object import SchemaObject
10
10
  from .table import Table
11
11
  from .table_version import TableVersion
12
+ from .table_version_handle import TableVersionHandle
12
13
  from .table_version_path import TableVersionPath
13
14
  from .view import View
@@ -0,0 +1,397 @@
1
+ from __future__ import annotations
2
+
3
+ import dataclasses
4
+ import logging
5
+ from typing import Optional, Type
6
+ from uuid import UUID
7
+
8
+ import sqlalchemy as sql
9
+
10
+ import pixeltable.env as env
11
+ import pixeltable.exceptions as excs
12
+ import pixeltable.metadata.schema as schema
13
+ from pixeltable.env import Env
14
+
15
+ from .dir import Dir
16
+ from .schema_object import SchemaObject
17
+ from .table import Table
18
+ from .table_version import TableVersion
19
+ from .table_version_handle import TableVersionHandle
20
+ from .table_version_path import TableVersionPath
21
+
22
+ # from .. import InsertableTable
23
+
24
+ _logger = logging.getLogger('pixeltable')
25
+
26
+
27
+ def _join_path(path: str, name: str) -> str:
28
+ """Append name to path, if path is not empty."""
29
+ return name if path == '' else f'{path}.{name}'
30
+
31
+
32
+ class Catalog:
33
+ """The functional interface to getting access to catalog objects
34
+
35
+ All interface functions must be called in the context of a transaction, started with Env.begin().
36
+ """
37
+
38
+ _instance: Optional[Catalog] = None
39
+
40
+ # key: [id, version]
41
+ # - mutable version of a table: version == None (even though TableVersion.version is set correctly)
42
+ # - snapshot versions: records the version of the snapshot
43
+ _tbl_versions: dict[tuple[UUID, Optional[int]], TableVersion]
44
+ _tbls: dict[UUID, Table]
45
+
46
+ @classmethod
47
+ def get(cls) -> Catalog:
48
+ if cls._instance is None:
49
+ cls._instance = cls()
50
+ return cls._instance
51
+
52
+ @classmethod
53
+ def clear(cls) -> None:
54
+ """Remove the instance. Used for testing."""
55
+ cls._instance = None
56
+
57
+ def __init__(self) -> None:
58
+ self._tbl_versions = {}
59
+ self._tbls = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
60
+ self._init_store()
61
+
62
+ def get_dir_path(self, dir_id: UUID) -> str:
63
+ """Return path for directory with given id"""
64
+ session = env.Env.get().session
65
+ names: list[str] = []
66
+ while True:
67
+ dir = session.query(schema.Dir).filter(schema.Dir.id == dir_id).one()
68
+ if dir.md['name'] == '':
69
+ break
70
+ names.insert(0, dir.md['name'])
71
+ dir_id = dir.parent_id
72
+ assert dir_id is not None
73
+ return '.'.join(names)
74
+
75
+ def get_tbl_path(self, tbl_id: UUID) -> str:
76
+ """Return path for table with given id"""
77
+ session = env.Env.get().session
78
+ tbl = session.query(schema.Table).filter(schema.Table.id == tbl_id).one()
79
+ dir_path = self.get_dir_path(tbl.dir_id)
80
+ return _join_path(dir_path, tbl.md['name'])
81
+
82
+ @dataclasses.dataclass
83
+ class DirEntry:
84
+ dir: Optional[schema.Dir]
85
+ dir_entries: dict[str, Catalog.DirEntry]
86
+ table: Optional[schema.Table]
87
+
88
+ def get_dir_contents(self, dir_id: UUID, recursive: bool = False) -> dict[str, DirEntry]:
89
+ """Returns a dict mapping the entry names to DirEntry objects"""
90
+ session = env.Env.get().session
91
+ result: dict[str, Catalog.DirEntry] = {}
92
+
93
+ dirs = session.query(schema.Dir).filter(schema.Dir.parent_id == dir_id).all()
94
+ for dir in dirs:
95
+ dir_contents: dict[str, Catalog.DirEntry] = {}
96
+ if recursive:
97
+ dir_contents = self.get_dir_contents(dir.id, recursive=True)
98
+ result[dir.md['name']] = self.DirEntry(dir=dir, dir_entries=dir_contents, table=None)
99
+
100
+ tbls = session.query(schema.Table).filter(schema.Table.dir_id == dir_id).all()
101
+ for tbl in tbls:
102
+ result[tbl.md['name']] = self.DirEntry(dir=None, dir_entries={}, table=tbl)
103
+
104
+ return result
105
+
106
+ def drop_dir(self, dir_id: UUID) -> None:
107
+ """Delete the directory with the given id"""
108
+ session = env.Env.get().session
109
+ session.query(schema.Dir).filter(schema.Dir.id == dir_id).delete()
110
+
111
+ def get_schema_object(
112
+ self,
113
+ path: str,
114
+ expected: Optional[Type[SchemaObject]] = None,
115
+ raise_if_exists: bool = False,
116
+ raise_if_not_exists: bool = False,
117
+ ) -> Optional[SchemaObject]:
118
+ """Return the schema object at the given path, or None if it doesn't exist.
119
+
120
+ Raises Error if
121
+ - the parent directory doesn't exist'
122
+ - raise_if_exists is True and the path exists
123
+ - raise_if_not_exists is True and the path does not exist
124
+ - expected is not None and the existing object has a different type
125
+ """
126
+ session = env.Env.get().session
127
+ if path == '':
128
+ # the root dir
129
+ if expected is not None and expected is not Dir:
130
+ raise excs.Error(f'{path!r} needs to be a {expected._display_name()} but is a {Dir._display_name()}')
131
+ dir = self._get_dir(path)
132
+ return Dir(dir.id, dir.parent_id, dir.md['name'])
133
+
134
+ components = path.split('.')
135
+ parent_path = '.'.join(components[:-1])
136
+ parent_dir = self._get_dir('.'.join(components[:-1]))
137
+ if parent_dir is None:
138
+ raise excs.Error(f'Directory {parent_path!r} does not exist')
139
+ name = components[-1]
140
+
141
+ # check if path points to a directory
142
+ obj: Optional[SchemaObject] = None
143
+ dir = (
144
+ session.query(schema.Dir)
145
+ .filter(schema.Dir.parent_id == parent_dir.id, schema.Dir.md['name'].astext == name)
146
+ .one_or_none()
147
+ )
148
+ if dir is not None:
149
+ obj = Dir(dir.id, dir.parent_id, dir.md['name'])
150
+ else:
151
+ # check if it's a table
152
+ row = (
153
+ session.query(schema.Table.id)
154
+ .filter(schema.Table.dir_id == parent_dir.id, schema.Table.md['name'].astext == name)
155
+ .one_or_none()
156
+ )
157
+ if row is not None:
158
+ tbl_id = row[0]
159
+ if not tbl_id in self._tbls:
160
+ self._tbls[tbl_id] = self._load_tbl(tbl_id)
161
+ obj = self._tbls[tbl_id]
162
+
163
+ if obj is None and raise_if_not_exists:
164
+ raise excs.Error(f'Path {path!r} does not exist')
165
+ elif obj is not None and raise_if_exists:
166
+ raise excs.Error(f'Path {path!r} is an existing {type(obj)._display_name()}')
167
+ elif obj is not None and expected is not None and not isinstance(obj, expected):
168
+ raise excs.Error(f'{path!r} needs to be a {expected._display_name()} but is a {type(obj)._display_name()}')
169
+ return obj
170
+
171
+ def get_tbl(self, tbl_id: UUID) -> Optional[Table]:
172
+ if not tbl_id in self._tbls:
173
+ tbl = self._load_tbl(tbl_id)
174
+ if tbl is None:
175
+ return None
176
+ self._tbls[tbl_id] = tbl
177
+ return self._tbls[tbl_id]
178
+
179
+ def add_tbl(self, tbl: Table) -> None:
180
+ """Explicitly add a Table"""
181
+ self._tbls[tbl._id] = tbl
182
+
183
+ def get_views(self, tbl_id: UUID) -> list[UUID]:
184
+ """Return the ids of views that directly reference the given table"""
185
+ session = env.Env.get().session
186
+ q = session.query(schema.Table.id).filter(sql.text(f"md->'view_md'->'base_versions'->0->>0 = {tbl_id.hex!r}"))
187
+ result = [r[0] for r in q.all()]
188
+ return result
189
+
190
+ def remove_tbl(self, tbl_id: UUID) -> None:
191
+ assert tbl_id in self._tbls
192
+ del self._tbls[tbl_id]
193
+
194
+ def get_tbl_version(self, tbl_id: UUID, effective_version: Optional[int]) -> Optional[TableVersion]:
195
+ if (tbl_id, effective_version) not in self._tbl_versions:
196
+ self._tbl_versions[(tbl_id, effective_version)] = self._load_tbl_version(tbl_id, effective_version)
197
+ return self._tbl_versions[(tbl_id, effective_version)]
198
+
199
+ def add_tbl_version(self, tbl_version: TableVersion) -> None:
200
+ """Explicitly add a TableVersion"""
201
+ self._tbl_versions[(tbl_version.id, tbl_version.effective_version)] = tbl_version
202
+ # if this is a mutable view, also record it in the base
203
+ if tbl_version.is_view and tbl_version.effective_version is None:
204
+ base = tbl_version.base.get()
205
+ base.mutable_views.append(TableVersionHandle(tbl_version.id, tbl_version.effective_version))
206
+
207
+ def remove_tbl_version(self, tbl_version: TableVersion) -> None:
208
+ assert (tbl_version.id, tbl_version.effective_version) in self._tbl_versions
209
+ del self._tbl_versions[(tbl_version.id, tbl_version.effective_version)]
210
+
211
+ def get_dir(self, dir_id: UUID) -> Optional[Dir]:
212
+ """Return the Dir with the given id, or None if it doesn't exist"""
213
+ session = env.Env.get().session
214
+ dir_record = session.query(schema.Dir).filter(schema.Dir.id == dir_id).one_or_none()
215
+ if dir_record is None:
216
+ return None
217
+ return Dir(dir_record.id, dir_record.parent_id, dir_record.md['name'])
218
+
219
+ def _get_dir(self, path: str) -> Optional[schema.Dir]:
220
+ session = env.Env.get().session
221
+ assert session is not None
222
+ if path == '':
223
+ return session.query(schema.Dir).filter(schema.Dir.parent_id.is_(None)).one()
224
+ else:
225
+ components = path.split('.')
226
+ parent_path = '.'.join(components[:-1])
227
+ parent_dir = self._get_dir(parent_path)
228
+ if parent_dir is None:
229
+ return None
230
+ name = components[-1]
231
+ dir = (
232
+ session.query(schema.Dir)
233
+ .filter(schema.Dir.parent_id == parent_dir.id, schema.Dir.md['name'].astext == name)
234
+ .one_or_none()
235
+ )
236
+ return dir
237
+
238
+ def _load_tbl(self, tbl_id: UUID) -> Optional[Table]:
239
+ _logger.info(f'Loading table {tbl_id}')
240
+ from .insertable_table import InsertableTable
241
+ from .view import View
242
+
243
+ session = env.Env.get().session
244
+ tbl_record, schema_version_record = (
245
+ session.query(schema.Table, schema.TableSchemaVersion)
246
+ .join(schema.TableSchemaVersion)
247
+ .where(schema.Table.id == schema.TableSchemaVersion.tbl_id)
248
+ # Table.md['current_schema_version'] == TableSchemaVersion.schema_version
249
+ .where(
250
+ sql.text(
251
+ f"({schema.Table.__table__}.md->>'current_schema_version')::int = "
252
+ f'{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}'
253
+ )
254
+ )
255
+ .where(schema.Table.id == tbl_id)
256
+ .one_or_none()
257
+ )
258
+ if tbl_record is None:
259
+ return None
260
+
261
+ tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
262
+ view_md = tbl_md.view_md
263
+ if view_md is None:
264
+ # this is a base table
265
+ if (tbl_id, None) not in self._tbl_versions:
266
+ self._tbl_versions[(tbl_id, None)] = self._load_tbl_version(tbl_id, None)
267
+ tbl = InsertableTable(tbl_record.dir_id, TableVersionHandle(tbl_id, None))
268
+ return tbl
269
+
270
+ # this is a view; determine the sequence of TableVersions to load
271
+ tbl_version_path: list[tuple[UUID, Optional[int]]] = []
272
+ schema_version_md = schema.md_from_dict(schema.TableSchemaVersionMd, schema_version_record.md)
273
+ pure_snapshot = view_md.is_snapshot and view_md.predicate is None and len(schema_version_md.columns) == 0
274
+ if pure_snapshot:
275
+ # this is a pure snapshot, without a physical table backing it; we only need the bases
276
+ pass
277
+ else:
278
+ effective_version = 0 if view_md.is_snapshot else None # snapshots only have version 0
279
+ tbl_version_path.append((tbl_id, effective_version))
280
+ tbl_version_path.extend((UUID(tbl_id), version) for tbl_id, version in view_md.base_versions)
281
+
282
+ # load TableVersions, starting at the root
283
+ base_path: Optional[TableVersionPath] = None
284
+ view_path: Optional[TableVersionPath] = None
285
+ for id, effective_version in tbl_version_path[::-1]:
286
+ if (id, effective_version) not in self._tbl_versions:
287
+ self._tbl_versions[(id, effective_version)] = self._load_tbl_version(id, effective_version)
288
+ view_path = TableVersionPath(TableVersionHandle(id, effective_version), base=base_path)
289
+ base_path = view_path
290
+ view = View(tbl_id, tbl_record.dir_id, tbl_md.name, view_path, snapshot_only=pure_snapshot)
291
+ # TODO: also load mutable views
292
+ return view
293
+
294
+ def _load_tbl_version(self, tbl_id: UUID, effective_version: Optional[int]) -> Optional[TableVersion]:
295
+ _logger.info(f'Loading table version: {tbl_id}:{effective_version}')
296
+ session = env.Env.get().session
297
+ q = (
298
+ session.query(schema.Table, schema.TableSchemaVersion)
299
+ .select_from(schema.Table)
300
+ .where(schema.Table.id == tbl_id)
301
+ .join(schema.TableSchemaVersion)
302
+ .where(schema.TableSchemaVersion.tbl_id == tbl_id)
303
+ )
304
+
305
+ if effective_version is not None:
306
+ # we are loading a specific version
307
+ # SELECT *
308
+ # FROM Table t
309
+ # JOIN TableVersion tv ON (tv.tbl_id = tbl_id AND tv.version = effective_version)
310
+ # JOIN TableSchemaVersion tsv ON (tsv.tbl_id = tbl_id AND tv.md.schema_version = tsv.schema_version)
311
+ # WHERE t.id = tbl_id
312
+ q = (
313
+ q.join(schema.TableVersion)
314
+ .where(schema.TableVersion.tbl_id == tbl_id)
315
+ .where(sql.text(f"({schema.TableVersion.__table__}.md->>'version')::int = {effective_version}"))
316
+ .where(
317
+ sql.text(
318
+ (
319
+ f"({schema.TableVersion.__table__}.md->>'schema_version')::int = "
320
+ f'{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}'
321
+ )
322
+ )
323
+ )
324
+ )
325
+ else:
326
+ # we are loading the current version
327
+ # SELECT *
328
+ # FROM Table t
329
+ # JOIN TableSchemaVersion tsv ON (tsv.tbl_id = tbl_id AND t.current_schema_version = tsv.schema_version)
330
+ # WHERE t.id = tbl_id
331
+ q = q.where(
332
+ sql.text(
333
+ (
334
+ f"({schema.Table.__table__}.md->>'current_schema_version')::int = "
335
+ f'{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}'
336
+ )
337
+ )
338
+ )
339
+
340
+ tbl_record, schema_version_record = q.one_or_none()
341
+ tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
342
+ schema_version_md = schema.md_from_dict(schema.TableSchemaVersionMd, schema_version_record.md)
343
+ view_md = tbl_md.view_md
344
+
345
+ # load mutable view ids
346
+ q = session.query(schema.Table.id).filter(
347
+ sql.text(
348
+ f"md->'view_md'->'base_versions'->0->>0 = {tbl_id.hex!r} "
349
+ "AND md->'view_md'->'base_versions'->0->1 IS NULL"
350
+ )
351
+ )
352
+ mutable_view_ids = [r[0] for r in q.all()]
353
+ mutable_views = [TableVersionHandle(id, None) for id in mutable_view_ids]
354
+
355
+ if view_md is None:
356
+ # this is a base table
357
+ tbl_version = TableVersion(
358
+ tbl_record.id, tbl_md, effective_version, schema_version_md, mutable_views=mutable_views
359
+ )
360
+ return tbl_version
361
+
362
+ assert len(view_md.base_versions) > 0 # a view needs to have a base
363
+ pure_snapshot = view_md.is_snapshot and view_md.predicate is None and len(schema_version_md.columns) == 0
364
+ assert not pure_snapshot # a pure snapshot doesn't have a physical table backing it, no point in loading it
365
+
366
+ base: TableVersionHandle
367
+ base_path: Optional[TableVersionPath] = None # needed for live view
368
+ if view_md.is_snapshot:
369
+ base = TableVersionHandle(UUID(view_md.base_versions[0][0]), view_md.base_versions[0][1])
370
+ else:
371
+ base_path = TableVersionPath.from_md(tbl_md.view_md.base_versions)
372
+ base = base_path.tbl_version
373
+
374
+ tbl_version = TableVersion(
375
+ tbl_record.id,
376
+ tbl_md,
377
+ effective_version,
378
+ schema_version_md,
379
+ base_path=base_path,
380
+ base=base,
381
+ mutable_views=mutable_views,
382
+ )
383
+ return tbl_version
384
+
385
+ def _init_store(self) -> None:
386
+ """One-time initialization of the stored catalog. Idempotent."""
387
+ with env.Env.get().begin_xact():
388
+ session = env.Env.get().session
389
+ if session.query(sql.func.count(schema.Dir.id)).scalar() > 0:
390
+ return
391
+ # create a top-level directory, so that every schema object has a directory
392
+ dir_md = schema.DirMd(name='', user=None, additional_md={})
393
+ dir_record = schema.Dir(parent_id=None, md=dataclasses.asdict(dir_md))
394
+ session.add(dir_record)
395
+ session.flush()
396
+ session.commit()
397
+ _logger.info(f'Initialized catalog')
@@ -1,6 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
+ import warnings
5
+ from textwrap import dedent
4
6
  from typing import TYPE_CHECKING, Any, Optional
5
7
 
6
8
  import sqlalchemy as sql
@@ -13,6 +15,7 @@ from .globals import MediaValidation, is_valid_identifier
13
15
 
14
16
  if TYPE_CHECKING:
15
17
  from .table_version import TableVersion
18
+ from .table_version_handle import TableVersionHandle
16
19
 
17
20
  _logger = logging.getLogger('pixeltable')
18
21
 
@@ -40,7 +43,7 @@ class Column:
40
43
  _value_expr: Optional[exprs.Expr]
41
44
  value_expr_dict: Optional[dict[str, Any]]
42
45
  dependent_cols: set[Column]
43
- tbl: Optional[TableVersion]
46
+ tbl: Optional[TableVersionHandle]
44
47
 
45
48
  def __init__(
46
49
  self,
@@ -129,6 +132,19 @@ class Column:
129
132
  from pixeltable import exprs
130
133
 
131
134
  self._value_expr = exprs.Expr.from_dict(self.value_expr_dict)
135
+ if not self._value_expr.is_valid:
136
+ message = (
137
+ dedent(
138
+ f"""
139
+ The computed column {self.name!r} in table {self.tbl.get().name!r} is no longer valid.
140
+ {{validation_error}}
141
+ You can continue to query existing data from this column, but evaluating it on new data will raise an error.
142
+ """
143
+ )
144
+ .strip()
145
+ .format(validation_error=self._value_expr.validation_error)
146
+ )
147
+ warnings.warn(message, category=excs.PixeltableWarning)
132
148
  return self._value_expr
133
149
 
134
150
  def set_value_expr(self, value_expr: exprs.Expr) -> None:
@@ -153,7 +169,7 @@ class Column:
153
169
 
154
170
  def get_idx_info(self) -> dict[str, 'TableVersion.IndexInfo']:
155
171
  assert self.tbl is not None
156
- return {name: info for name, info in self.tbl.idxs_by_name.items() if info.col == self}
172
+ return {name: info for name, info in self.tbl.get().idxs_by_name.items() if info.col == self}
157
173
 
158
174
  @property
159
175
  def is_computed(self) -> bool:
@@ -176,14 +192,14 @@ class Column:
176
192
  @property
177
193
  def qualified_name(self) -> str:
178
194
  assert self.tbl is not None
179
- return f'{self.tbl.name}.{self.name}'
195
+ return f'{self.tbl.get().name}.{self.name}'
180
196
 
181
197
  @property
182
198
  def media_validation(self) -> MediaValidation:
183
199
  if self._media_validation is not None:
184
200
  return self._media_validation
185
201
  assert self.tbl is not None
186
- return self.tbl.media_validation
202
+ return self.tbl.get().media_validation
187
203
 
188
204
  def source(self) -> None:
189
205
  """
@@ -228,7 +244,7 @@ class Column:
228
244
  return f'{self.name}: {self.col_type}'
229
245
 
230
246
  def __repr__(self) -> str:
231
- return f'Column({self.id!r}, {self.name!r}, tbl={self.tbl.name!r})'
247
+ return f'Column({self.id!r}, {self.name!r}, tbl={self.tbl.get().name!r})'
232
248
 
233
249
  def __hash__(self) -> int:
234
250
  # TODO(aaron-siegel): This and __eq__ do not capture the table version. We need to rethink the Column
@@ -18,16 +18,29 @@ class Dir(SchemaObject):
18
18
  def __init__(self, id: UUID, parent_id: UUID, name: str):
19
19
  super().__init__(id, name, parent_id)
20
20
 
21
+ @classmethod
22
+ def _create(cls, parent_id: UUID, name: str) -> Dir:
23
+ session = Env.get().session
24
+ assert session is not None
25
+ dir_md = schema.DirMd(name=name, user=None, additional_md={})
26
+ dir_record = schema.Dir(parent_id=parent_id, md=dataclasses.asdict(dir_md))
27
+ session.add(dir_record)
28
+ session.flush()
29
+ assert dir_record.id is not None
30
+ assert isinstance(dir_record.id, UUID)
31
+ dir = cls(dir_record.id, parent_id, name)
32
+ return dir
33
+
21
34
  @classmethod
22
35
  def _display_name(cls) -> str:
23
36
  return 'directory'
24
37
 
25
- @property
26
- def _has_dependents(self) -> bool:
27
- """Returns True if this directory has any children."""
28
- from pixeltable.catalog import Catalog, Path
29
-
30
- return len(Catalog.get().paths.get_children(Path(self._path), child_type=None, recursive=False)) > 0
38
+ def _path(self) -> str:
39
+ """Returns the path to this schema object."""
40
+ if self._dir_id is None:
41
+ # we're the root dir
42
+ return ''
43
+ return super()._path()
31
44
 
32
45
  def _move(self, new_name: str, new_dir_id: UUID) -> None:
33
46
  super()._move(new_name, new_dir_id)