pixeltable 0.3.9__tar.gz → 0.3.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (180) hide show
  1. {pixeltable-0.3.9 → pixeltable-0.3.10}/PKG-INFO +1 -1
  2. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/__init__.py +1 -2
  3. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/__version__.py +2 -2
  4. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/catalog/column.py +5 -0
  5. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/catalog/globals.py +16 -0
  6. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/catalog/insertable_table.py +82 -41
  7. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/catalog/table.py +78 -55
  8. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/catalog/table_version.py +18 -3
  9. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/catalog/view.py +9 -2
  10. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/env.py +1 -1
  11. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exec/exec_node.py +1 -1
  12. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exprs/__init__.py +2 -1
  13. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exprs/arithmetic_expr.py +2 -0
  14. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exprs/column_ref.py +36 -0
  15. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exprs/expr.py +39 -9
  16. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exprs/globals.py +12 -0
  17. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exprs/json_mapper.py +1 -1
  18. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exprs/json_path.py +0 -6
  19. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exprs/similarity_expr.py +5 -20
  20. pixeltable-0.3.10/pixeltable/exprs/string_op.py +107 -0
  21. pixeltable-0.3.10/pixeltable/ext/functions/yolox.py +114 -0
  22. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/func/tools.py +2 -2
  23. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/functions/__init__.py +1 -1
  24. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/functions/globals.py +16 -5
  25. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/globals.py +85 -33
  26. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/io/__init__.py +3 -2
  27. pixeltable-0.3.10/pixeltable/io/datarows.py +138 -0
  28. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/io/external_store.py +8 -5
  29. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/io/globals.py +7 -160
  30. pixeltable-0.3.10/pixeltable/io/hf_datasets.py +114 -0
  31. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/io/pandas.py +29 -43
  32. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/io/parquet.py +17 -42
  33. pixeltable-0.3.10/pixeltable/io/table_data_conduit.py +569 -0
  34. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/io/utils.py +6 -21
  35. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/metadata/__init__.py +1 -1
  36. pixeltable-0.3.10/pixeltable/metadata/converters/convert_30.py +50 -0
  37. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/metadata/converters/util.py +26 -1
  38. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/metadata/notes.py +1 -0
  39. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/metadata/schema.py +3 -0
  40. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/utils/arrow.py +32 -7
  41. {pixeltable-0.3.9 → pixeltable-0.3.10}/pyproject.toml +2 -1
  42. pixeltable-0.3.9/pixeltable/ext/functions/yolox.py +0 -157
  43. pixeltable-0.3.9/pixeltable/io/hf_datasets.py +0 -191
  44. {pixeltable-0.3.9 → pixeltable-0.3.10}/LICENSE +0 -0
  45. {pixeltable-0.3.9 → pixeltable-0.3.10}/README.md +0 -0
  46. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/catalog/__init__.py +0 -0
  47. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/catalog/catalog.py +0 -0
  48. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/catalog/dir.py +0 -0
  49. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/catalog/named_function.py +0 -0
  50. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/catalog/path.py +0 -0
  51. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/catalog/path_dict.py +0 -0
  52. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/catalog/schema_object.py +0 -0
  53. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/catalog/table_version_handle.py +0 -0
  54. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/catalog/table_version_path.py +0 -0
  55. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/config.py +0 -0
  56. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/dataframe.py +0 -0
  57. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exceptions.py +0 -0
  58. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exec/__init__.py +0 -0
  59. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exec/aggregation_node.py +0 -0
  60. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exec/cache_prefetch_node.py +0 -0
  61. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exec/component_iteration_node.py +0 -0
  62. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exec/data_row_batch.py +0 -0
  63. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exec/exec_context.py +0 -0
  64. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exec/expr_eval/__init__.py +0 -0
  65. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exec/expr_eval/evaluators.py +0 -0
  66. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exec/expr_eval/expr_eval_node.py +0 -0
  67. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exec/expr_eval/globals.py +0 -0
  68. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exec/expr_eval/row_buffer.py +0 -0
  69. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exec/expr_eval/schedulers.py +0 -0
  70. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exec/in_memory_data_node.py +0 -0
  71. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exec/row_update_node.py +0 -0
  72. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exec/sql_node.py +0 -0
  73. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exprs/array_slice.py +0 -0
  74. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exprs/column_property_ref.py +0 -0
  75. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exprs/comparison.py +0 -0
  76. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exprs/compound_predicate.py +0 -0
  77. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exprs/data_row.py +0 -0
  78. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exprs/expr_dict.py +0 -0
  79. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exprs/expr_set.py +0 -0
  80. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exprs/function_call.py +0 -0
  81. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exprs/in_predicate.py +0 -0
  82. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exprs/inline_expr.py +0 -0
  83. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exprs/is_null.py +0 -0
  84. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exprs/literal.py +0 -0
  85. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exprs/method_ref.py +0 -0
  86. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exprs/object_ref.py +0 -0
  87. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exprs/row_builder.py +0 -0
  88. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exprs/rowid_ref.py +0 -0
  89. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exprs/sql_element_cache.py +0 -0
  90. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exprs/type_cast.py +0 -0
  91. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/exprs/variable.py +0 -0
  92. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/ext/__init__.py +0 -0
  93. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/ext/functions/__init__.py +0 -0
  94. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/ext/functions/whisperx.py +0 -0
  95. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/func/__init__.py +0 -0
  96. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/func/aggregate_function.py +0 -0
  97. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/func/callable_function.py +0 -0
  98. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/func/expr_template_function.py +0 -0
  99. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/func/function.py +0 -0
  100. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/func/function_registry.py +0 -0
  101. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/func/globals.py +0 -0
  102. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/func/query_template_function.py +0 -0
  103. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/func/signature.py +0 -0
  104. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/func/udf.py +0 -0
  105. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/functions/anthropic.py +0 -0
  106. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/functions/audio.py +0 -0
  107. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/functions/deepseek.py +0 -0
  108. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/functions/fireworks.py +0 -0
  109. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/functions/gemini.py +0 -0
  110. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/functions/huggingface.py +0 -0
  111. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/functions/image.py +0 -0
  112. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/functions/json.py +0 -0
  113. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/functions/llama_cpp.py +0 -0
  114. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/functions/math.py +0 -0
  115. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/functions/mistralai.py +0 -0
  116. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/functions/ollama.py +0 -0
  117. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/functions/openai.py +0 -0
  118. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/functions/replicate.py +0 -0
  119. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/functions/string.py +0 -0
  120. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/functions/timestamp.py +0 -0
  121. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/functions/together.py +0 -0
  122. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/functions/util.py +0 -0
  123. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/functions/video.py +0 -0
  124. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/functions/vision.py +0 -0
  125. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/functions/whisper.py +0 -0
  126. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/index/__init__.py +0 -0
  127. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/index/base.py +0 -0
  128. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/index/btree.py +0 -0
  129. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/index/embedding_index.py +0 -0
  130. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/io/fiftyone.py +0 -0
  131. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/io/label_studio.py +0 -0
  132. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/iterators/__init__.py +0 -0
  133. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/iterators/audio.py +0 -0
  134. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/iterators/base.py +0 -0
  135. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/iterators/document.py +0 -0
  136. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/iterators/image.py +0 -0
  137. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/iterators/string.py +0 -0
  138. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/iterators/video.py +0 -0
  139. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/metadata/converters/convert_10.py +0 -0
  140. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/metadata/converters/convert_12.py +0 -0
  141. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/metadata/converters/convert_13.py +0 -0
  142. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/metadata/converters/convert_14.py +0 -0
  143. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/metadata/converters/convert_15.py +0 -0
  144. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/metadata/converters/convert_16.py +0 -0
  145. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/metadata/converters/convert_17.py +0 -0
  146. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/metadata/converters/convert_18.py +0 -0
  147. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/metadata/converters/convert_19.py +0 -0
  148. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/metadata/converters/convert_20.py +0 -0
  149. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/metadata/converters/convert_21.py +0 -0
  150. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/metadata/converters/convert_22.py +0 -0
  151. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/metadata/converters/convert_23.py +0 -0
  152. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/metadata/converters/convert_24.py +0 -0
  153. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/metadata/converters/convert_25.py +0 -0
  154. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/metadata/converters/convert_26.py +0 -0
  155. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/metadata/converters/convert_27.py +0 -0
  156. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/metadata/converters/convert_28.py +0 -0
  157. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/metadata/converters/convert_29.py +0 -0
  158. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/plan.py +0 -0
  159. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/py.typed +0 -0
  160. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/share/__init__.py +0 -0
  161. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/share/packager.py +0 -0
  162. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/share/publish.py +0 -0
  163. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/store.py +0 -0
  164. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/type_system.py +0 -0
  165. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/utils/__init__.py +0 -0
  166. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/utils/coco.py +0 -0
  167. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/utils/code.py +0 -0
  168. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/utils/console_output.py +0 -0
  169. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/utils/coroutine.py +0 -0
  170. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/utils/description_helper.py +0 -0
  171. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/utils/documents.py +0 -0
  172. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/utils/filecache.py +0 -0
  173. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/utils/formatter.py +0 -0
  174. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/utils/http_server.py +0 -0
  175. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/utils/iceberg.py +0 -0
  176. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/utils/media_store.py +0 -0
  177. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/utils/pytorch.py +0 -0
  178. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/utils/s3.py +0 -0
  179. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/utils/sql.py +0 -0
  180. {pixeltable-0.3.9 → pixeltable-0.3.10}/pixeltable/utils/transactional_directory.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: pixeltable
3
- Version: 0.3.9
3
+ Version: 0.3.10
4
4
  Summary: AI Data Infrastructure: Declarative, Multimodal, and Incremental
5
5
  License: Apache-2.0
6
6
  Keywords: data-science,machine-learning,database,ai,computer-vision,chatbot,ml,artificial-intelligence,feature-engineering,multimodal,mlops,feature-store,vector-database,llm,genai
@@ -3,8 +3,7 @@
3
3
  from .__version__ import __version__, __version_tuple__
4
4
  from .catalog import Column, InsertableTable, Table, UpdateStatus, View
5
5
  from .dataframe import DataFrame
6
- from .exceptions import Error, PixeltableWarning
7
- from .exprs import RELATIVE_PATH_ROOT
6
+ from .exceptions import Error, ExprEvalError, PixeltableWarning
8
7
  from .func import Aggregator, Function, expr_udf, query, uda, udf
9
8
  from .globals import (
10
9
  array,
@@ -1,3 +1,3 @@
1
1
  # These version placeholders will be replaced during build.
2
- __version__ = '0.3.9'
3
- __version_tuple__ = (0, 3, 9)
2
+ __version__ = '0.3.10'
3
+ __version_tuple__ = (0, 3, 10)
@@ -202,6 +202,11 @@ class Column:
202
202
  assert self.tbl is not None
203
203
  return self.tbl.get().media_validation
204
204
 
205
+ @property
206
+ def is_required_for_insert(self) -> bool:
207
+ """Returns True if column is required when inserting rows."""
208
+ return not self.col_type.nullable and not self.is_computed
209
+
205
210
  def source(self) -> None:
206
211
  """
207
212
  If this is a computed col and the top-level expr is a function call, print the source, if possible.
@@ -40,6 +40,22 @@ class UpdateStatus:
40
40
  self.cols_with_excs = list(dict.fromkeys(self.cols_with_excs + other.cols_with_excs))
41
41
  return self
42
42
 
43
+ @property
44
+ def insert_msg(self) -> str:
45
+ """Return a message describing the results of an insert operation."""
46
+ if self.num_excs == 0:
47
+ cols_with_excs_str = ''
48
+ else:
49
+ cols_with_excs_str = (
50
+ f' across {len(self.cols_with_excs)} column{"" if len(self.cols_with_excs) == 1 else "s"}'
51
+ )
52
+ cols_with_excs_str += f' ({", ".join(self.cols_with_excs)})'
53
+ msg = (
54
+ f'Inserted {self.num_rows} row{"" if self.num_rows == 1 else "s"} '
55
+ f'with {self.num_excs} error{"" if self.num_excs == 1 else "s"}{cols_with_excs_str}.'
56
+ )
57
+ return msg
58
+
43
59
 
44
60
  class MediaValidation(enum.Enum):
45
61
  ON_READ = 0
@@ -1,7 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import enum
3
4
  import logging
4
- from typing import Any, Iterable, Literal, Optional, overload
5
+ from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, overload
5
6
  from uuid import UUID
6
7
 
7
8
  import pixeltable as pxt
@@ -16,9 +17,36 @@ from .table_version import TableVersion
16
17
  from .table_version_handle import TableVersionHandle
17
18
  from .table_version_path import TableVersionPath
18
19
 
20
+ if TYPE_CHECKING:
21
+ import datasets # type: ignore[import-untyped]
22
+
23
+ from pixeltable.globals import RowData, TableDataSource
24
+ from pixeltable.io.table_data_conduit import TableDataConduit
25
+
19
26
  _logger = logging.getLogger('pixeltable')
20
27
 
21
28
 
29
+ class OnErrorParameter(enum.Enum):
30
+ """Supported values for the on_error parameter"""
31
+
32
+ ABORT = 'abort'
33
+ IGNORE = 'ignore'
34
+
35
+ @classmethod
36
+ def is_valid(cls, v: Any) -> bool:
37
+ if isinstance(v, str):
38
+ return v.lower() in [c.value for c in cls]
39
+ return False
40
+
41
+ @classmethod
42
+ def fail_on_exception(cls, v: Any) -> bool:
43
+ if not cls.is_valid(v):
44
+ raise ValueError(f'Invalid value for on_error: {v}')
45
+ if isinstance(v, str):
46
+ return v.lower() != cls.IGNORE.value
47
+ return True
48
+
49
+
22
50
  class InsertableTable(Table):
23
51
  """A `Table` that allows inserting and deleting rows."""
24
52
 
@@ -86,62 +114,75 @@ class InsertableTable(Table):
86
114
  @overload
87
115
  def insert(
88
116
  self,
89
- rows: Iterable[dict[str, Any]],
117
+ source: Optional[TableDataSource] = None,
90
118
  /,
91
119
  *,
92
- print_stats: bool = False,
120
+ source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
121
+ schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
93
122
  on_error: Literal['abort', 'ignore'] = 'abort',
123
+ print_stats: bool = False,
124
+ **kwargs: Any,
94
125
  ) -> UpdateStatus: ...
95
126
 
96
127
  @overload
97
128
  def insert(
98
- self, *, print_stats: bool = False, on_error: Literal['abort', 'ignore'] = 'abort', **kwargs: Any
129
+ self, /, *, on_error: Literal['abort', 'ignore'] = 'abort', print_stats: bool = False, **kwargs: Any
99
130
  ) -> UpdateStatus: ...
100
131
 
101
- def insert( # type: ignore[misc]
132
+ def insert(
102
133
  self,
103
- rows: Optional[Iterable[dict[str, Any]]] = None,
134
+ source: Optional[TableDataSource] = None,
104
135
  /,
105
136
  *,
106
- print_stats: bool = False,
137
+ source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
138
+ schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
107
139
  on_error: Literal['abort', 'ignore'] = 'abort',
140
+ print_stats: bool = False,
108
141
  **kwargs: Any,
109
142
  ) -> UpdateStatus:
110
- if rows is None:
111
- rows = [kwargs]
112
- else:
113
- rows = list(rows)
114
- if len(kwargs) > 0:
115
- raise excs.Error('`kwargs` cannot be specified unless `rows is None`.')
116
-
117
- fail_on_exception = on_error == 'abort'
118
-
119
- if not isinstance(rows, list):
120
- raise excs.Error('rows must be a list of dictionaries')
121
- if len(rows) == 0:
122
- raise excs.Error('rows must not be empty')
123
- for row in rows:
124
- if not isinstance(row, dict):
125
- raise excs.Error('rows must be a list of dictionaries')
126
- self._validate_input_rows(rows)
127
- with Env.get().begin_xact():
128
- status = self._tbl_version.get().insert(
129
- rows, None, print_stats=print_stats, fail_on_exception=fail_on_exception
130
- )
131
-
132
- if status.num_excs == 0:
133
- cols_with_excs_str = ''
134
- else:
135
- cols_with_excs_str = (
136
- f' across {len(status.cols_with_excs)} column{"" if len(status.cols_with_excs) == 1 else "s"}'
137
- )
138
- cols_with_excs_str += f' ({", ".join(status.cols_with_excs)})'
139
- msg = (
140
- f'Inserted {status.num_rows} row{"" if status.num_rows == 1 else "s"} '
141
- f'with {status.num_excs} error{"" if status.num_excs == 1 else "s"}{cols_with_excs_str}.'
143
+ from pixeltable.io.table_data_conduit import UnkTableDataConduit
144
+
145
+ table = self
146
+ if source is None:
147
+ source = [kwargs]
148
+ kwargs = None
149
+
150
+ tds = UnkTableDataConduit(
151
+ source, source_format=source_format, src_schema_overrides=schema_overrides, extra_fields=kwargs
142
152
  )
143
- Env.get().console_logger.info(msg)
144
- _logger.info(f'InsertableTable {self._name}: {msg}')
153
+ data_source = tds.specialize()
154
+ if data_source.source_column_map is None:
155
+ data_source.src_pk = []
156
+
157
+ assert isinstance(table, Table)
158
+ data_source.add_table_info(table)
159
+ data_source.prepare_for_insert_into_table()
160
+
161
+ fail_on_exception = OnErrorParameter.fail_on_exception(on_error)
162
+ return table.insert_table_data_source(
163
+ data_source=data_source, fail_on_exception=fail_on_exception, print_stats=print_stats
164
+ )
165
+
166
+ def insert_table_data_source(
167
+ self, data_source: TableDataConduit, fail_on_exception: bool, print_stats: bool = False
168
+ ) -> pxt.UpdateStatus:
169
+ """Insert row batches into this table from a `TableDataConduit`."""
170
+ from pixeltable.io.table_data_conduit import DFTableDataConduit, TableDataConduit
171
+
172
+ status = pxt.UpdateStatus()
173
+ with Env.get().begin_xact():
174
+ if isinstance(data_source, DFTableDataConduit):
175
+ status += self._tbl_version.get().insert(
176
+ rows=None, df=data_source.pxt_df, print_stats=print_stats, fail_on_exception=fail_on_exception
177
+ )
178
+ else:
179
+ for row_batch in data_source.valid_row_batch():
180
+ status += self._tbl_version.get().insert(
181
+ rows=row_batch, df=None, print_stats=print_stats, fail_on_exception=fail_on_exception
182
+ )
183
+
184
+ Env.get().console_logger.info(status.insert_msg)
185
+
145
186
  FileCache.get().emit_eviction_warnings()
146
187
  return status
147
188
 
@@ -8,6 +8,7 @@ from pathlib import Path
8
8
  from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, Union, overload
9
9
 
10
10
  from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
11
+ from keyword import iskeyword as is_python_keyword
11
12
  from uuid import UUID
12
13
 
13
14
  import pandas as pd
@@ -42,9 +43,11 @@ from .table_version_handle import TableVersionHandle
42
43
  from .table_version_path import TableVersionPath
43
44
 
44
45
  if TYPE_CHECKING:
46
+ import datasets # type: ignore[import-untyped]
45
47
  import torch.utils.data
46
48
 
47
49
  import pixeltable.plan
50
+ from pixeltable.globals import RowData, TableDataSource
48
51
 
49
52
  _logger = logging.getLogger('pixeltable')
50
53
 
@@ -720,13 +723,18 @@ class Table(SchemaObject):
720
723
  columns.append(column)
721
724
  return columns
722
725
 
726
+ @classmethod
727
+ def validate_column_name(cls, name: str) -> None:
728
+ """Check that a name is usable as a pixeltalbe column name"""
729
+ if is_system_column_name(name) or is_python_keyword(name):
730
+ raise excs.Error(f'{name!r} is a reserved name in Pixeltable; please choose a different column name.')
731
+ if not is_valid_identifier(name):
732
+ raise excs.Error(f'Invalid column name: {name!r}')
733
+
723
734
  @classmethod
724
735
  def _verify_column(cls, col: Column) -> None:
725
736
  """Check integrity of user-supplied Column and supply defaults"""
726
- if is_system_column_name(col.name):
727
- raise excs.Error(f'{col.name!r} is a reserved name in Pixeltable; please choose a different column name.')
728
- if not is_valid_identifier(col.name):
729
- raise excs.Error(f'Invalid column name: {col.name!r}')
737
+ cls.validate_column_name(col.name)
730
738
  if col.stored is False and not col.is_computed:
731
739
  raise excs.Error(f'Column {col.name!r}: stored={col.stored} only applies to computed columns')
732
740
  if col.stored is False and col.has_window_fn_call():
@@ -745,16 +753,6 @@ class Table(SchemaObject):
745
753
  cls._verify_column(col)
746
754
  column_names.add(col.name)
747
755
 
748
- def __check_column_name_exists(self, column_name: str, include_bases: bool = False) -> None:
749
- col = self._tbl_version_path.get_column(column_name, include_bases)
750
- if col is None:
751
- raise excs.Error(f'Column {column_name!r} unknown')
752
-
753
- def __check_column_ref_exists(self, col_ref: ColumnRef, include_bases: bool = False) -> None:
754
- exists = self._tbl_version_path.has_column(col_ref.col, include_bases)
755
- if not exists:
756
- raise excs.Error(f'Unknown column: {col_ref.col.qualified_name}')
757
-
758
756
  def drop_column(self, column: Union[str, ColumnRef], if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
759
757
  """Drop a column from the table.
760
758
 
@@ -907,7 +905,7 @@ class Table(SchemaObject):
907
905
  Args:
908
906
  column: The name of, or reference to, the column to be indexed; must be a `String` or `Image` column.
909
907
  idx_name: An optional name for the index. If not specified, a name such as `'idx0'` will be generated
910
- automatically. If specified, the name must be unique for this table.
908
+ automatically. If specified, the name must be unique for this table and a valid pixeltable column name.
911
909
  embedding: The UDF to use for the embedding. Must be a UDF that accepts a single argument of type `String`
912
910
  or `Image` (as appropriate for the column being indexed) and returns a fixed-size 1-dimensional
913
911
  array of floats.
@@ -960,13 +958,7 @@ class Table(SchemaObject):
960
958
  """
961
959
  if self._tbl_version_path.is_snapshot():
962
960
  raise excs.Error('Cannot add an index to a snapshot')
963
- col: Column
964
- if isinstance(column, str):
965
- self.__check_column_name_exists(column, include_bases=True)
966
- col = self._tbl_version_path.get_column(column, include_bases=True)
967
- else:
968
- self.__check_column_ref_exists(column, include_bases=True)
969
- col = column.col
961
+ col = self._resolve_column_parameter(column)
970
962
 
971
963
  with Env.get().begin_xact():
972
964
  if idx_name is not None and idx_name in self._tbl_version.get().idxs_by_name:
@@ -986,6 +978,10 @@ class Table(SchemaObject):
986
978
  assert idx_name not in self._tbl_version.get().idxs_by_name
987
979
  from pixeltable.index import EmbeddingIndex
988
980
 
981
+ # idx_name must be a valid pixeltable column name
982
+ if idx_name is not None:
983
+ Table.validate_column_name(idx_name)
984
+
989
985
  # create the EmbeddingIndex instance to verify args
990
986
  idx = EmbeddingIndex(
991
987
  col, metric=metric, embed=embedding, string_embed=string_embed, image_embed=image_embed
@@ -1049,17 +1045,28 @@ class Table(SchemaObject):
1049
1045
 
1050
1046
  col: Column = None
1051
1047
  if idx_name is None:
1052
- if isinstance(column, str):
1053
- self.__check_column_name_exists(column, include_bases=True)
1054
- col = self._tbl_version_path.get_column(column, include_bases=True)
1055
- else:
1056
- self.__check_column_ref_exists(column, include_bases=True)
1057
- col = column.col
1048
+ col = self._resolve_column_parameter(column)
1058
1049
  assert col is not None
1059
1050
 
1060
1051
  with Env.get().begin_xact():
1061
1052
  self._drop_index(col=col, idx_name=idx_name, _idx_class=index.EmbeddingIndex, if_not_exists=if_not_exists)
1062
1053
 
1054
+ def _resolve_column_parameter(self, column: Union[str, ColumnRef]) -> Column:
1055
+ """Resolve a column parameter to a Column object"""
1056
+ col: Column = None
1057
+ if isinstance(column, str):
1058
+ col = self._tbl_version_path.get_column(column, include_bases=True)
1059
+ if col is None:
1060
+ raise excs.Error(f'Column {column!r} unknown')
1061
+ elif isinstance(column, ColumnRef):
1062
+ exists = self._tbl_version_path.has_column(column.col, include_bases=True)
1063
+ if not exists:
1064
+ raise excs.Error(f'Unknown column: {column.col.qualified_name}')
1065
+ col = column.col
1066
+ else:
1067
+ raise excs.Error(f'Invalid column parameter type: {type(column)}')
1068
+ return col
1069
+
1063
1070
  def drop_index(
1064
1071
  self,
1065
1072
  *,
@@ -1115,12 +1122,7 @@ class Table(SchemaObject):
1115
1122
 
1116
1123
  col: Column = None
1117
1124
  if idx_name is None:
1118
- if isinstance(column, str):
1119
- self.__check_column_name_exists(column, include_bases=True)
1120
- col = self._tbl_version_path.get_column(column, include_bases=True)
1121
- else:
1122
- self.__check_column_ref_exists(column, include_bases=True)
1123
- col = column.col
1125
+ col = self._resolve_column_parameter(column)
1124
1126
  assert col is not None
1125
1127
 
1126
1128
  with Env.get().begin_xact():
@@ -1145,49 +1147,62 @@ class Table(SchemaObject):
1145
1147
  raise excs.Error(f'Index {idx_name!r} does not exist')
1146
1148
  assert _if_not_exists == IfNotExistsParam.IGNORE
1147
1149
  return
1148
- idx_id = self._tbl_version.get().idxs_by_name[idx_name].id
1150
+ idx_info = self._tbl_version.get().idxs_by_name[idx_name]
1149
1151
  else:
1150
1152
  if col.tbl.id != self._tbl_version.id:
1151
1153
  raise excs.Error(
1152
1154
  f'Column {col.name!r}: cannot drop index from column that belongs to base ({col.tbl.get().name}!r)'
1153
1155
  )
1154
- idx_info = [info for info in self._tbl_version.get().idxs_by_name.values() if info.col.id == col.id]
1156
+ idx_info_list = [info for info in self._tbl_version.get().idxs_by_name.values() if info.col.id == col.id]
1155
1157
  if _idx_class is not None:
1156
- idx_info = [info for info in idx_info if isinstance(info.idx, _idx_class)]
1157
- if len(idx_info) == 0:
1158
+ idx_info_list = [info for info in idx_info_list if isinstance(info.idx, _idx_class)]
1159
+ if len(idx_info_list) == 0:
1158
1160
  _if_not_exists = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
1159
1161
  if _if_not_exists == IfNotExistsParam.ERROR:
1160
1162
  raise excs.Error(f'Column {col.name!r} does not have an index')
1161
1163
  assert _if_not_exists == IfNotExistsParam.IGNORE
1162
1164
  return
1163
- if len(idx_info) > 1:
1165
+ if len(idx_info_list) > 1:
1164
1166
  raise excs.Error(f"Column {col.name!r} has multiple indices; specify 'idx_name' instead")
1165
- idx_id = idx_info[0].id
1166
- self._tbl_version.get().drop_index(idx_id)
1167
+ idx_info = idx_info_list[0]
1168
+
1169
+ # Find out if anything depends on this index
1170
+ dependent_user_cols = [c for c in idx_info.val_col.dependent_cols if c.name is not None]
1171
+ if len(dependent_user_cols) > 0:
1172
+ raise excs.Error(
1173
+ f'Cannot drop index because the following columns depend on it:\n'
1174
+ f'{", ".join(c.name for c in dependent_user_cols)}'
1175
+ )
1176
+ self._tbl_version.get().drop_index(idx_info.id)
1167
1177
 
1168
1178
  @overload
1169
1179
  def insert(
1170
1180
  self,
1171
- rows: Iterable[dict[str, Any]],
1181
+ source: TableDataSource,
1172
1182
  /,
1173
1183
  *,
1174
- print_stats: bool = False,
1184
+ source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
1185
+ schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
1175
1186
  on_error: Literal['abort', 'ignore'] = 'abort',
1187
+ print_stats: bool = False,
1188
+ **kwargs: Any,
1176
1189
  ) -> UpdateStatus: ...
1177
1190
 
1178
1191
  @overload
1179
1192
  def insert(
1180
- self, *, print_stats: bool = False, on_error: Literal['abort', 'ignore'] = 'abort', **kwargs: Any
1193
+ self, /, *, on_error: Literal['abort', 'ignore'] = 'abort', print_stats: bool = False, **kwargs: Any
1181
1194
  ) -> UpdateStatus: ...
1182
1195
 
1183
- @abc.abstractmethod # type: ignore[misc]
1196
+ @abc.abstractmethod
1184
1197
  def insert(
1185
1198
  self,
1186
- rows: Optional[Iterable[dict[str, Any]]] = None,
1199
+ source: Optional[TableDataSource] = None,
1187
1200
  /,
1188
1201
  *,
1189
- print_stats: bool = False,
1202
+ source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
1203
+ schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
1190
1204
  on_error: Literal['abort', 'ignore'] = 'abort',
1205
+ print_stats: bool = False,
1191
1206
  **kwargs: Any,
1192
1207
  ) -> UpdateStatus:
1193
1208
  """Inserts rows into this table. There are two mutually exclusive call patterns:
@@ -1196,11 +1211,12 @@ class Table(SchemaObject):
1196
1211
 
1197
1212
  ```python
1198
1213
  insert(
1199
- rows: Iterable[dict[str, Any]],
1214
+ source: TableSourceDataType,
1200
1215
  /,
1201
1216
  *,
1217
+ on_error: Literal['abort', 'ignore'] = 'abort',
1202
1218
  print_stats: bool = False,
1203
- on_error: Literal['abort', 'ignore'] = 'abort'
1219
+ **kwargs: Any,
1204
1220
  )```
1205
1221
 
1206
1222
  To insert just a single row, you can use the more concise syntax:
@@ -1208,23 +1224,25 @@ class Table(SchemaObject):
1208
1224
  ```python
1209
1225
  insert(
1210
1226
  *,
1211
- print_stats: bool = False,
1212
1227
  on_error: Literal['abort', 'ignore'] = 'abort',
1228
+ print_stats: bool = False,
1213
1229
  **kwargs: Any
1214
1230
  )```
1215
1231
 
1216
1232
  Args:
1217
- rows: (if inserting multiple rows) A list of rows to insert, each of which is a dictionary mapping column
1218
- names to values.
1233
+ source: A data source from which data can be imported.
1219
1234
  kwargs: (if inserting a single row) Keyword-argument pairs representing column names and values.
1220
- print_stats: If `True`, print statistics about the cost of computed columns.
1235
+ (if inserting multiple rows) Additional keyword arguments are passed to the data source.
1236
+ source_format: A hint about the format of the source data
1237
+ schema_overrides: If specified, then columns in `schema_overrides` will be given the specified types
1221
1238
  on_error: Determines the behavior if an error occurs while evaluating a computed column or detecting an
1222
1239
  invalid media file (such as a corrupt image) for one of the inserted rows.
1223
1240
 
1224
1241
  - If `on_error='abort'`, then an exception will be raised and the rows will not be inserted.
1225
1242
  - If `on_error='ignore'`, then execution will continue and the rows will be inserted. Any cells
1226
- with errors will have a `None` value for that cell, with information about the error stored in the
1227
- corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
1243
+ with errors will have a `None` value for that cell, with information about the error stored in the
1244
+ corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
1245
+ print_stats: If `True`, print statistics about the cost of computed columns.
1228
1246
 
1229
1247
  Returns:
1230
1248
  An [`UpdateStatus`][pixeltable.UpdateStatus] object containing information about the update.
@@ -1236,6 +1254,7 @@ class Table(SchemaObject):
1236
1254
  - The table has been dropped.
1237
1255
  - One of the rows being inserted does not conform to the table schema.
1238
1256
  - An error occurs during processing of computed columns, and `on_error='ignore'`.
1257
+ - An error occurs while importing data from a source, and `on_error='abort'`.
1239
1258
 
1240
1259
  Examples:
1241
1260
  Insert two rows into the table `my_table` with three int columns ``a``, ``b``, and ``c``.
@@ -1247,6 +1266,10 @@ class Table(SchemaObject):
1247
1266
  Insert a single row using the alternative syntax:
1248
1267
 
1249
1268
  >>> tbl.insert(a=3, b=3, c=3)
1269
+
1270
+ Insert rows from a CSV file:
1271
+
1272
+ >>> tbl.insert(source='path/to/file.csv')
1250
1273
  """
1251
1274
  raise NotImplementedError
1252
1275
 
@@ -225,7 +225,9 @@ class TableVersion:
225
225
  # create schema.Table
226
226
  # Column.dependent_cols for existing cols is wrong at this point, but init() will set it correctly
227
227
  column_md = cls._create_column_md(cols)
228
+ tbl_id = uuid.uuid4()
228
229
  table_md = schema.TableMd(
230
+ tbl_id=str(tbl_id),
229
231
  name=name,
230
232
  user=None,
231
233
  current_version=0,
@@ -241,11 +243,12 @@ class TableVersion:
241
243
  )
242
244
  # create a schema.Table here, we need it to call our c'tor;
243
245
  # don't add it to the session yet, we might add index metadata
244
- tbl_id = uuid.uuid4()
245
246
  tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(table_md))
246
247
 
247
248
  # create schema.TableVersion
248
- table_version_md = schema.TableVersionMd(created_at=timestamp, version=0, schema_version=0, additional_md={})
249
+ table_version_md = schema.TableVersionMd(
250
+ tbl_id=str(tbl_record.id), created_at=timestamp, version=0, schema_version=0, additional_md={}
251
+ )
249
252
  tbl_version_record = schema.TableVersion(
250
253
  tbl_id=tbl_record.id, version=0, md=dataclasses.asdict(table_version_md)
251
254
  )
@@ -261,6 +264,7 @@ class TableVersion:
261
264
  schema_col_md[col.id] = md
262
265
 
263
266
  schema_version_md = schema.TableSchemaVersionMd(
267
+ tbl_id=str(tbl_record.id),
264
268
  schema_version=0,
265
269
  preceding_schema_version=None,
266
270
  columns=schema_col_md,
@@ -1240,6 +1244,11 @@ class TableVersion:
1240
1244
  """Return all non-system columns"""
1241
1245
  return [c for c in self.cols if c.is_pk]
1242
1246
 
1247
+ @property
1248
+ def primary_key(self) -> list[str]:
1249
+ """Return the names of the primary key columns"""
1250
+ return [c.name for c in self.cols if c.is_pk]
1251
+
1243
1252
  def get_required_col_names(self) -> list[str]:
1244
1253
  """Return the names of all columns for which values must be specified in insert()"""
1245
1254
  assert not self.is_view
@@ -1306,6 +1315,7 @@ class TableVersion:
1306
1315
 
1307
1316
  def _create_tbl_md(self) -> schema.TableMd:
1308
1317
  return schema.TableMd(
1318
+ tbl_id=str(self.id),
1309
1319
  name=self.name,
1310
1320
  user=None,
1311
1321
  current_version=self.version,
@@ -1322,7 +1332,11 @@ class TableVersion:
1322
1332
 
1323
1333
  def _create_version_md(self, timestamp: float) -> schema.TableVersionMd:
1324
1334
  return schema.TableVersionMd(
1325
- created_at=timestamp, version=self.version, schema_version=self.schema_version, additional_md={}
1335
+ tbl_id=str(self.id),
1336
+ created_at=timestamp,
1337
+ version=self.version,
1338
+ schema_version=self.schema_version,
1339
+ additional_md={},
1326
1340
  )
1327
1341
 
1328
1342
  def _create_schema_version_md(self, preceding_schema_version: int) -> schema.TableSchemaVersionMd:
@@ -1335,6 +1349,7 @@ class TableVersion:
1335
1349
  )
1336
1350
  # preceding_schema_version to be set by the caller
1337
1351
  return schema.TableSchemaVersionMd(
1352
+ tbl_id=str(self.id),
1338
1353
  schema_version=self.schema_version,
1339
1354
  preceding_schema_version=preceding_schema_version,
1340
1355
  columns=column_md,
@@ -251,13 +251,20 @@ class View(Table):
251
251
  md['is_snapshot'] = self._tbl_version_path.is_snapshot()
252
252
  return md
253
253
 
254
+ if TYPE_CHECKING:
255
+ import datasets # type: ignore[import-untyped]
256
+
257
+ from pixeltable.globals import RowData, TableDataSource
258
+
254
259
  def insert(
255
260
  self,
256
- rows: Optional[Iterable[dict[str, Any]]] = None,
261
+ source: Optional[TableDataSource] = None,
257
262
  /,
258
263
  *,
259
- print_stats: bool = False,
264
+ source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
265
+ schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
260
266
  on_error: Literal['abort', 'ignore'] = 'abort',
267
+ print_stats: bool = False,
261
268
  **kwargs: Any,
262
269
  ) -> UpdateStatus:
263
270
  raise excs.Error(f'{self._display_name()} {self._name!r}: cannot insert into view')
@@ -567,7 +567,7 @@ class Env:
567
567
  self.__register_package('transformers')
568
568
  self.__register_package('whisper', library_name='openai-whisper')
569
569
  self.__register_package('whisperx')
570
- self.__register_package('yolox', library_name='git+https://github.com/Megvii-BaseDetection/YOLOX@ac58e0a')
570
+ self.__register_package('yolox', library_name='pixeltable-yolox')
571
571
 
572
572
  def __register_package(self, package_name: str, library_name: Optional[str] = None) -> None:
573
573
  is_installed: bool
@@ -75,7 +75,7 @@ class ExecNode(abc.ABC):
75
75
  loop = asyncio.new_event_loop()
76
76
  asyncio.set_event_loop(loop)
77
77
 
78
- if 'pytest' in sys.modules:
78
+ if _logger.isEnabledFor(logging.DEBUG):
79
79
  loop.set_debug(True)
80
80
 
81
81
  aiter = self.__aiter__()
@@ -16,7 +16,7 @@ from .in_predicate import InPredicate
16
16
  from .inline_expr import InlineArray, InlineDict, InlineList
17
17
  from .is_null import IsNull
18
18
  from .json_mapper import JsonMapper
19
- from .json_path import RELATIVE_PATH_ROOT, JsonPath
19
+ from .json_path import JsonPath
20
20
  from .literal import Literal
21
21
  from .method_ref import MethodRef
22
22
  from .object_ref import ObjectRef
@@ -24,5 +24,6 @@ from .row_builder import ColumnSlotIdx, ExecProfile, RowBuilder
24
24
  from .rowid_ref import RowidRef
25
25
  from .similarity_expr import SimilarityExpr
26
26
  from .sql_element_cache import SqlElementCache
27
+ from .string_op import StringOp
27
28
  from .type_cast import TypeCast
28
29
  from .variable import Variable
@@ -19,6 +19,8 @@ class ArithmeticExpr(Expr):
19
19
  Allows arithmetic exprs on json paths
20
20
  """
21
21
 
22
+ operator: ArithmeticOperator
23
+
22
24
  def __init__(self, operator: ArithmeticOperator, op1: Expr, op2: Expr):
23
25
  if op1.col_type.is_json_type() or op2.col_type.is_json_type() or operator == ArithmeticOperator.DIV:
24
26
  # we assume it's a float