pixeltable 0.4.0rc1__tar.gz → 0.4.0rc3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (188) hide show
  1. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/PKG-INFO +2 -1
  2. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/__version__.py +2 -2
  3. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/catalog/catalog.py +4 -0
  4. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/catalog/table.py +16 -0
  5. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/catalog/table_version.py +17 -2
  6. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/catalog/view.py +24 -1
  7. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/dataframe.py +185 -9
  8. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/env.py +2 -0
  9. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exec/__init__.py +1 -1
  10. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exec/expr_eval/evaluators.py +4 -1
  11. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exec/sql_node.py +152 -12
  12. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exprs/data_row.py +5 -3
  13. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exprs/expr.py +7 -0
  14. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exprs/literal.py +2 -0
  15. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/func/tools.py +1 -1
  16. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/functions/anthropic.py +19 -45
  17. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/functions/deepseek.py +19 -38
  18. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/functions/fireworks.py +9 -18
  19. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/functions/gemini.py +2 -3
  20. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/functions/llama_cpp.py +6 -6
  21. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/functions/mistralai.py +15 -41
  22. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/functions/ollama.py +1 -1
  23. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/functions/openai.py +82 -165
  24. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/functions/together.py +22 -80
  25. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/globals.py +5 -0
  26. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/metadata/__init__.py +11 -2
  27. pixeltable-0.4.0rc3/pixeltable/metadata/converters/convert_36.py +38 -0
  28. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/metadata/notes.py +1 -0
  29. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/metadata/schema.py +3 -0
  30. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/plan.py +217 -10
  31. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/share/packager.py +115 -6
  32. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/utils/formatter.py +64 -42
  33. pixeltable-0.4.0rc3/pixeltable/utils/sample.py +25 -0
  34. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pyproject.toml +2 -1
  35. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/LICENSE +0 -0
  36. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/README.md +0 -0
  37. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/__init__.py +0 -0
  38. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/catalog/__init__.py +0 -0
  39. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/catalog/column.py +0 -0
  40. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/catalog/dir.py +0 -0
  41. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/catalog/globals.py +0 -0
  42. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/catalog/insertable_table.py +0 -0
  43. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/catalog/named_function.py +0 -0
  44. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/catalog/path.py +0 -0
  45. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/catalog/schema_object.py +0 -0
  46. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/catalog/table_version_handle.py +0 -0
  47. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/catalog/table_version_path.py +0 -0
  48. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/config.py +0 -0
  49. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exceptions.py +0 -0
  50. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exec/aggregation_node.py +0 -0
  51. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exec/cache_prefetch_node.py +0 -0
  52. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exec/component_iteration_node.py +0 -0
  53. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exec/data_row_batch.py +0 -0
  54. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exec/exec_context.py +0 -0
  55. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exec/exec_node.py +0 -0
  56. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exec/expr_eval/__init__.py +0 -0
  57. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exec/expr_eval/expr_eval_node.py +0 -0
  58. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exec/expr_eval/globals.py +0 -0
  59. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exec/expr_eval/row_buffer.py +0 -0
  60. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exec/expr_eval/schedulers.py +0 -0
  61. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exec/in_memory_data_node.py +0 -0
  62. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exec/row_update_node.py +0 -0
  63. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exprs/__init__.py +0 -0
  64. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exprs/arithmetic_expr.py +0 -0
  65. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exprs/array_slice.py +0 -0
  66. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exprs/column_property_ref.py +0 -0
  67. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exprs/column_ref.py +0 -0
  68. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exprs/comparison.py +0 -0
  69. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exprs/compound_predicate.py +0 -0
  70. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exprs/expr_dict.py +0 -0
  71. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exprs/expr_set.py +0 -0
  72. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exprs/function_call.py +0 -0
  73. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exprs/globals.py +0 -0
  74. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exprs/in_predicate.py +0 -0
  75. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exprs/inline_expr.py +0 -0
  76. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exprs/is_null.py +0 -0
  77. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exprs/json_mapper.py +0 -0
  78. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exprs/json_path.py +0 -0
  79. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exprs/method_ref.py +0 -0
  80. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exprs/object_ref.py +0 -0
  81. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exprs/row_builder.py +0 -0
  82. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exprs/rowid_ref.py +0 -0
  83. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exprs/similarity_expr.py +0 -0
  84. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exprs/sql_element_cache.py +0 -0
  85. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exprs/string_op.py +0 -0
  86. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exprs/type_cast.py +0 -0
  87. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/exprs/variable.py +0 -0
  88. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/ext/__init__.py +0 -0
  89. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/ext/functions/__init__.py +0 -0
  90. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/ext/functions/whisperx.py +0 -0
  91. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/ext/functions/yolox.py +0 -0
  92. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/func/__init__.py +0 -0
  93. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/func/aggregate_function.py +0 -0
  94. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/func/callable_function.py +0 -0
  95. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/func/expr_template_function.py +0 -0
  96. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/func/function.py +0 -0
  97. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/func/function_registry.py +0 -0
  98. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/func/globals.py +0 -0
  99. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/func/query_template_function.py +0 -0
  100. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/func/signature.py +0 -0
  101. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/func/udf.py +0 -0
  102. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/functions/__init__.py +0 -0
  103. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/functions/audio.py +0 -0
  104. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/functions/bedrock.py +0 -0
  105. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/functions/date.py +0 -0
  106. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/functions/globals.py +0 -0
  107. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/functions/huggingface.py +0 -0
  108. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/functions/image.py +0 -0
  109. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/functions/json.py +0 -0
  110. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/functions/math.py +0 -0
  111. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/functions/replicate.py +0 -0
  112. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/functions/string.py +0 -0
  113. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/functions/timestamp.py +0 -0
  114. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/functions/util.py +0 -0
  115. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/functions/video.py +0 -0
  116. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/functions/vision.py +0 -0
  117. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/functions/whisper.py +0 -0
  118. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/index/__init__.py +0 -0
  119. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/index/base.py +0 -0
  120. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/index/btree.py +0 -0
  121. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/index/embedding_index.py +0 -0
  122. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/io/__init__.py +0 -0
  123. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/io/datarows.py +0 -0
  124. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/io/external_store.py +0 -0
  125. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/io/fiftyone.py +0 -0
  126. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/io/globals.py +0 -0
  127. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/io/hf_datasets.py +0 -0
  128. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/io/label_studio.py +0 -0
  129. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/io/pandas.py +0 -0
  130. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/io/parquet.py +0 -0
  131. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/io/table_data_conduit.py +0 -0
  132. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/io/utils.py +0 -0
  133. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/iterators/__init__.py +0 -0
  134. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/iterators/audio.py +0 -0
  135. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/iterators/base.py +0 -0
  136. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/iterators/document.py +0 -0
  137. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/iterators/image.py +0 -0
  138. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/iterators/string.py +0 -0
  139. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/iterators/video.py +0 -0
  140. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/metadata/converters/convert_10.py +0 -0
  141. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/metadata/converters/convert_12.py +0 -0
  142. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/metadata/converters/convert_13.py +0 -0
  143. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/metadata/converters/convert_14.py +0 -0
  144. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/metadata/converters/convert_15.py +0 -0
  145. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/metadata/converters/convert_16.py +0 -0
  146. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/metadata/converters/convert_17.py +0 -0
  147. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/metadata/converters/convert_18.py +0 -0
  148. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/metadata/converters/convert_19.py +0 -0
  149. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/metadata/converters/convert_20.py +0 -0
  150. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/metadata/converters/convert_21.py +0 -0
  151. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/metadata/converters/convert_22.py +0 -0
  152. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/metadata/converters/convert_23.py +0 -0
  153. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/metadata/converters/convert_24.py +0 -0
  154. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/metadata/converters/convert_25.py +0 -0
  155. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/metadata/converters/convert_26.py +0 -0
  156. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/metadata/converters/convert_27.py +0 -0
  157. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/metadata/converters/convert_28.py +0 -0
  158. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/metadata/converters/convert_29.py +0 -0
  159. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/metadata/converters/convert_30.py +0 -0
  160. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/metadata/converters/convert_31.py +0 -0
  161. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/metadata/converters/convert_32.py +0 -0
  162. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/metadata/converters/convert_33.py +0 -0
  163. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/metadata/converters/convert_34.py +0 -0
  164. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/metadata/converters/convert_35.py +0 -0
  165. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/metadata/converters/util.py +0 -0
  166. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/py.typed +0 -0
  167. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/share/__init__.py +0 -0
  168. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/share/publish.py +0 -0
  169. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/store.py +0 -0
  170. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/type_system.py +0 -0
  171. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/utils/__init__.py +0 -0
  172. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/utils/arrow.py +0 -0
  173. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/utils/coco.py +0 -0
  174. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/utils/code.py +0 -0
  175. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/utils/console_output.py +0 -0
  176. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/utils/coroutine.py +0 -0
  177. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/utils/dbms.py +0 -0
  178. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/utils/description_helper.py +0 -0
  179. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/utils/documents.py +0 -0
  180. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/utils/exception_handler.py +0 -0
  181. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/utils/filecache.py +0 -0
  182. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/utils/http_server.py +0 -0
  183. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/utils/iceberg.py +0 -0
  184. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/utils/media_store.py +0 -0
  185. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/utils/pytorch.py +0 -0
  186. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/utils/s3.py +0 -0
  187. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/utils/sql.py +0 -0
  188. {pixeltable-0.4.0rc1 → pixeltable-0.4.0rc3}/pixeltable/utils/transactional_directory.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: pixeltable
3
- Version: 0.4.0rc1
3
+ Version: 0.4.0rc3
4
4
  Summary: AI Data Infrastructure: Declarative, Multimodal, and Incremental
5
5
  License: Apache-2.0
6
6
  Keywords: data-science,machine-learning,database,ai,computer-vision,chatbot,ml,artificial-intelligence,feature-engineering,multimodal,mlops,feature-store,vector-database,llm,genai
@@ -36,6 +36,7 @@ Requires-Dist: numpy (>=1.25)
36
36
  Requires-Dist: pandas (>=2.0,<3.0)
37
37
  Requires-Dist: pgvector (>=0.2.1)
38
38
  Requires-Dist: pillow (>=9.3.0)
39
+ Requires-Dist: pillow-heif (>=0.15.0)
39
40
  Requires-Dist: pixeltable-pgserver (==0.3.1)
40
41
  Requires-Dist: psutil (>=5.9.5)
41
42
  Requires-Dist: psycopg[binary] (>=3.1.18)
@@ -1,3 +1,3 @@
1
1
  # These version placeholders will be replaced during build.
2
- __version__ = '0.4.0rc1'
3
- __version_tuple__ = (0, 4, "0rc1")
2
+ __version__ = '0.4.0rc3'
3
+ __version_tuple__ = (0, 4, "0rc3")
@@ -17,6 +17,8 @@ from pixeltable.env import Env
17
17
  from pixeltable.iterators import ComponentIterator
18
18
  from pixeltable.metadata import schema
19
19
 
20
+ if TYPE_CHECKING:
21
+ from pixeltable.plan import SampleClause
20
22
  from .dir import Dir
21
23
  from .globals import IfExistsParam, IfNotExistsParam, MediaValidation
22
24
  from .insertable_table import InsertableTable
@@ -526,6 +528,7 @@ class Catalog:
526
528
  base: TableVersionPath,
527
529
  select_list: Optional[list[tuple[exprs.Expr, Optional[str]]]],
528
530
  where: Optional[exprs.Expr],
531
+ sample_clause: Optional['SampleClause'],
529
532
  additional_columns: Optional[dict[str, Any]],
530
533
  is_snapshot: bool,
531
534
  iterator: Optional[tuple[type[ComponentIterator], dict[str, Any]]],
@@ -554,6 +557,7 @@ class Catalog:
554
557
  select_list=select_list,
555
558
  additional_columns=additional_columns,
556
559
  predicate=where,
560
+ sample_clause=sample_clause,
557
561
  is_snapshot=is_snapshot,
558
562
  iterator_cls=iterator_class,
559
563
  iterator_args=iterator_args,
@@ -240,6 +240,22 @@ class Table(SchemaObject):
240
240
  def limit(self, n: int) -> 'pxt.DataFrame':
241
241
  return self._df().limit(n)
242
242
 
243
+ def sample(
244
+ self,
245
+ n: Optional[int] = None,
246
+ n_per_stratum: Optional[int] = None,
247
+ fraction: Optional[float] = None,
248
+ seed: Optional[int] = None,
249
+ stratify_by: Any = None,
250
+ ) -> pxt.DataFrame:
251
+ """Choose a shuffled sample of rows
252
+
253
+ See [`DataFrame.sample`][pixeltable.DataFrame.sample] for more details.
254
+ """
255
+ return self._df().sample(
256
+ n=n, n_per_stratum=n_per_stratum, fraction=fraction, seed=seed, stratify_by=stratify_by
257
+ )
258
+
243
259
  def collect(self) -> 'pxt.dataframe.DataFrameResultSet':
244
260
  """Return rows from this table."""
245
261
  return self._df().collect()
@@ -23,6 +23,10 @@ from pixeltable.utils.exception_handler import run_cleanup_on_exception
23
23
  from pixeltable.utils.filecache import FileCache
24
24
  from pixeltable.utils.media_store import MediaStore
25
25
 
26
+ if TYPE_CHECKING:
27
+ from pixeltable.plan import SampleClause
28
+
29
+
26
30
  from ..func.globals import resolve_symbol
27
31
  from .column import Column
28
32
  from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, MediaValidation, UpdateStatus, is_valid_identifier
@@ -66,6 +70,8 @@ class TableVersion:
66
70
  path: Optional[pxt.catalog.TableVersionPath] # only set for live tables; needed to resolve computed cols
67
71
  base: Optional[TableVersionHandle] # only set for views
68
72
  predicate: Optional[exprs.Expr]
73
+ sample_clause: Optional['SampleClause']
74
+
69
75
  iterator_cls: Optional[type[ComponentIterator]]
70
76
  iterator_args: Optional[exprs.InlineDict]
71
77
  num_iterator_cols: int
@@ -132,9 +138,12 @@ class TableVersion:
132
138
 
133
139
  # view-specific initialization
134
140
  from pixeltable import exprs
141
+ from pixeltable.plan import SampleClause
135
142
 
136
143
  predicate_dict = None if self.view_md is None or self.view_md.predicate is None else self.view_md.predicate
137
144
  self.predicate = exprs.Expr.from_dict(predicate_dict) if predicate_dict is not None else None
145
+ sample_dict = None if self.view_md is None or self.view_md.sample_clause is None else self.view_md.sample_clause
146
+ self.sample_clause = SampleClause.from_dict(sample_dict) if sample_dict is not None else None
138
147
 
139
148
  # component view-specific initialization
140
149
  self.iterator_cls = None
@@ -269,7 +278,13 @@ class TableVersion:
269
278
 
270
279
  # if this is purely a snapshot (it doesn't require any additional storage for columns and it doesn't have a
271
280
  # predicate to apply at runtime), we don't create a physical table and simply use the base's table version path
272
- if view_md is not None and view_md.is_snapshot and view_md.predicate is None and len(cols) == 0:
281
+ if (
282
+ view_md is not None
283
+ and view_md.is_snapshot
284
+ and view_md.predicate is None
285
+ and view_md.sample_clause is None
286
+ and len(cols) == 0
287
+ ):
273
288
  session.add(tbl_record)
274
289
  session.add(tbl_version_record)
275
290
  session.add(schema_version_record)
@@ -906,7 +921,7 @@ class TableVersion:
906
921
  result.num_excs = num_excs
907
922
  result.num_computed_values += exec_plan.ctx.num_computed_exprs * num_rows
908
923
  result.cols_with_excs = [f'{self.name}.{self.cols_by_id[cid].name}' for cid in cols_with_excs]
909
- self._write_md(new_version=True, new_version_ts=time.time(), new_schema_version=False)
924
+ self._write_md(new_version=True, new_version_ts=timestamp, new_schema_version=False)
910
925
 
911
926
  # update views
912
927
  for view in self.mutable_views:
@@ -12,6 +12,10 @@ from pixeltable import catalog, exprs, func
12
12
  from pixeltable.env import Env
13
13
  from pixeltable.iterators import ComponentIterator
14
14
 
15
+ if TYPE_CHECKING:
16
+ from pixeltable.plan import SampleClause
17
+
18
+
15
19
  from .column import Column
16
20
  from .globals import _POS_COLUMN_NAME, MediaValidation, UpdateStatus
17
21
  from .table import Table
@@ -66,6 +70,7 @@ class View(Table):
66
70
  select_list: Optional[list[tuple[exprs.Expr, Optional[str]]]],
67
71
  additional_columns: dict[str, Any],
68
72
  predicate: Optional['exprs.Expr'],
73
+ sample_clause: Optional['SampleClause'],
69
74
  is_snapshot: bool,
70
75
  num_retained_versions: int,
71
76
  comment: str,
@@ -73,6 +78,8 @@ class View(Table):
73
78
  iterator_cls: Optional[type[ComponentIterator]],
74
79
  iterator_args: Optional[dict],
75
80
  ) -> View:
81
+ from pixeltable.plan import SampleClause
82
+
76
83
  # Convert select_list to more additional_columns if present
77
84
  include_base_columns: bool = select_list is None
78
85
  select_list_columns: List[Column] = []
@@ -84,12 +91,23 @@ class View(Table):
84
91
  columns = select_list_columns + columns_from_additional_columns
85
92
  cls._verify_schema(columns)
86
93
 
87
- # verify that filter can be evaluated in the context of the base
94
+ # verify that filters can be evaluated in the context of the base
88
95
  if predicate is not None:
89
96
  if not predicate.is_bound_by([base]):
90
97
  raise excs.Error(f'Filter cannot be computed in the context of the base {base.tbl_name()}')
91
98
  # create a copy that we can modify and store
92
99
  predicate = predicate.copy()
100
+ if sample_clause is not None:
101
+ # make sure that the sample clause can be computed in the context of the base
102
+ if sample_clause.stratify_exprs is not None and not all(
103
+ stratify_expr.is_bound_by([base]) for stratify_expr in sample_clause.stratify_exprs
104
+ ):
105
+ raise excs.Error(f'Sample clause cannot be computed in the context of the base {base.tbl_name()}')
106
+ # create a copy that we can modify and store
107
+ sc = sample_clause
108
+ sample_clause = SampleClause(
109
+ sc.version, sc.n, sc.n_per_stratum, sc.fraction, sc.seed, sc.stratify_exprs.copy()
110
+ )
93
111
 
94
112
  # same for value exprs
95
113
  for col in columns:
@@ -160,6 +178,8 @@ class View(Table):
160
178
  # if this is a snapshot, we need to retarget all exprs to the snapshot tbl versions
161
179
  if is_snapshot:
162
180
  predicate = predicate.retarget(base_version_path) if predicate is not None else None
181
+ if sample_clause is not None:
182
+ exprs.Expr.retarget_list(sample_clause.stratify_exprs, base_version_path)
163
183
  iterator_args_expr = (
164
184
  iterator_args_expr.retarget(base_version_path) if iterator_args_expr is not None else None
165
185
  )
@@ -171,6 +191,7 @@ class View(Table):
171
191
  is_snapshot=is_snapshot,
172
192
  include_base_columns=include_base_columns,
173
193
  predicate=predicate.as_dict() if predicate is not None else None,
194
+ sample_clause=sample_clause.as_dict() if sample_clause is not None else None,
174
195
  base_versions=base_version_path.as_md(),
175
196
  iterator_class_fqn=iterator_class_fqn,
176
197
  iterator_args=iterator_args_expr.as_dict() if iterator_args_expr is not None else None,
@@ -306,4 +327,6 @@ class View(Table):
306
327
 
307
328
  if self._tbl_version.get().predicate is not None:
308
329
  result.append(f'\nWhere: {self._tbl_version.get().predicate!s}')
330
+ if self._tbl_version.get().sample_clause is not None:
331
+ result.append(f'\nSample: {self._tbl_version.get().sample_clause!s}')
309
332
  return ''.join(result)
@@ -17,6 +17,7 @@ from pixeltable import catalog, exceptions as excs, exec, exprs, plan, type_syst
17
17
  from pixeltable.catalog import Catalog, is_valid_identifier
18
18
  from pixeltable.catalog.globals import UpdateStatus
19
19
  from pixeltable.env import Env
20
+ from pixeltable.plan import Planner, SampleClause
20
21
  from pixeltable.type_system import ColumnType
21
22
  from pixeltable.utils.description_helper import DescriptionHelper
22
23
  from pixeltable.utils.formatter import Formatter
@@ -139,6 +140,7 @@ class DataFrame:
139
140
  grouping_tbl: Optional[catalog.TableVersion]
140
141
  order_by_clause: Optional[list[tuple[exprs.Expr, bool]]]
141
142
  limit_val: Optional[exprs.Expr]
143
+ sample_clause: Optional[SampleClause]
142
144
 
143
145
  def __init__(
144
146
  self,
@@ -149,6 +151,7 @@ class DataFrame:
149
151
  grouping_tbl: Optional[catalog.TableVersion] = None,
150
152
  order_by_clause: Optional[list[tuple[exprs.Expr, bool]]] = None, # list[(expr, asc)]
151
153
  limit: Optional[exprs.Expr] = None,
154
+ sample_clause: Optional[SampleClause] = None,
152
155
  ):
153
156
  self._from_clause = from_clause
154
157
 
@@ -168,6 +171,7 @@ class DataFrame:
168
171
  self.grouping_tbl = grouping_tbl
169
172
  self.order_by_clause = copy.deepcopy(order_by_clause)
170
173
  self.limit_val = limit
174
+ self.sample_clause = sample_clause
171
175
 
172
176
  @classmethod
173
177
  def _normalize_select_list(
@@ -210,8 +214,7 @@ class DataFrame:
210
214
 
211
215
  @property
212
216
  def _first_tbl(self) -> catalog.TableVersionPath:
213
- assert len(self._from_clause.tbls) == 1
214
- return self._from_clause.tbls[0]
217
+ return self._from_clause._first_tbl
215
218
 
216
219
  def _vars(self) -> dict[str, exprs.Variable]:
217
220
  """
@@ -236,6 +239,36 @@ class DataFrame:
236
239
  raise excs.Error(f'Multiple definitions of parameter {var.name}')
237
240
  return unique_vars
238
241
 
242
+ @classmethod
243
+ def _convert_param_to_typed_expr(
244
+ cls, v: Any, required_type: ts.ColumnType, required: bool, name: str, range: Optional[tuple[Any, Any]] = None
245
+ ) -> Optional[exprs.Expr]:
246
+ if v is None:
247
+ if required:
248
+ raise excs.Error(f'{name!r} parameter must be present')
249
+ return v
250
+ v_expr = exprs.Expr.from_object(v)
251
+ if not v_expr.col_type.matches(required_type):
252
+ raise excs.Error(f'{name!r} parameter must be of type {required_type!r}, instead of {v_expr.col_type}')
253
+ if range is not None:
254
+ if not isinstance(v_expr, exprs.Literal):
255
+ raise excs.Error(f'{name!r} parameter must be a constant, not {v_expr}')
256
+ if range[0] is not None and not (v_expr.val >= range[0]):
257
+ raise excs.Error(f'{name!r} parameter must be >= {range[0]}')
258
+ if range[1] is not None and not (v_expr.val <= range[1]):
259
+ raise excs.Error(f'{name!r} parameter must be <= {range[1]}')
260
+ return v_expr
261
+
262
+ @classmethod
263
+ def validate_constant_type_range(
264
+ cls, v: Any, required_type: ts.ColumnType, required: bool, name: str, range: Optional[tuple[Any, Any]] = None
265
+ ) -> Any:
266
+ """Validate that the given named parameter is a constant of the required type and within the specified range."""
267
+ v_expr = cls._convert_param_to_typed_expr(v, required_type, required, name, range)
268
+ if v_expr is None:
269
+ return None
270
+ return v_expr.val
271
+
239
272
  def parameters(self) -> dict[str, ColumnType]:
240
273
  """Return a dict mapping parameter name to parameter type.
241
274
 
@@ -280,7 +313,7 @@ class DataFrame:
280
313
  num_rowid_cols = len(self.grouping_tbl.store_tbl.rowid_columns())
281
314
  # the grouping table must be a base of self.tbl
282
315
  assert num_rowid_cols <= len(self._first_tbl.tbl_version.get().store_tbl.rowid_columns())
283
- group_by_clause = [exprs.RowidRef(self._first_tbl.tbl_version, idx) for idx in range(num_rowid_cols)]
316
+ group_by_clause = self.__rowid_columns(num_rowid_cols)
284
317
  elif self.group_by_clause is not None:
285
318
  group_by_clause = self.group_by_clause
286
319
 
@@ -292,14 +325,21 @@ class DataFrame:
292
325
  self._select_list_exprs,
293
326
  where_clause=self.where_clause,
294
327
  group_by_clause=group_by_clause,
295
- order_by_clause=self.order_by_clause if self.order_by_clause is not None else [],
328
+ order_by_clause=self.order_by_clause,
296
329
  limit=self.limit_val,
330
+ sample_clause=self.sample_clause,
297
331
  )
298
332
 
333
+ def __rowid_columns(self, num_rowid_cols: Optional[int] = None) -> list[exprs.Expr]:
334
+ """Return list of RowidRef for the given number of associated rowids"""
335
+ return Planner.rowid_columns(self._first_tbl.tbl_version, num_rowid_cols)
336
+
299
337
  def _has_joins(self) -> bool:
300
338
  return len(self._from_clause.join_clauses) > 0
301
339
 
302
340
  def show(self, n: int = 20) -> DataFrameResultSet:
341
+ if self.sample_clause is not None:
342
+ raise excs.Error('show() cannot be used with sample()')
303
343
  assert n is not None
304
344
  return self.limit(n).collect()
305
345
 
@@ -322,6 +362,8 @@ class DataFrame:
322
362
  raise excs.Error('head() cannot be used with order_by()')
323
363
  if self._has_joins():
324
364
  raise excs.Error('head() not supported for joins')
365
+ if self.sample_clause is not None:
366
+ raise excs.Error('head() cannot be used with sample()')
325
367
  if self.group_by_clause is not None:
326
368
  raise excs.Error('head() cannot be used with group_by()')
327
369
  num_rowid_cols = len(self._first_tbl.tbl_version.get().store_tbl.rowid_columns())
@@ -347,6 +389,8 @@ class DataFrame:
347
389
  raise excs.Error('tail() cannot be used with order_by()')
348
390
  if self._has_joins():
349
391
  raise excs.Error('tail() not supported for joins')
392
+ if self.sample_clause is not None:
393
+ raise excs.Error('tail() cannot be used with sample()')
350
394
  if self.group_by_clause is not None:
351
395
  raise excs.Error('tail() cannot be used with group_by()')
352
396
  num_rowid_cols = len(self._first_tbl.tbl_version.get().store_tbl.rowid_columns())
@@ -510,6 +554,9 @@ class DataFrame:
510
554
  if self.limit_val is not None:
511
555
  heading_vals.append('Limit')
512
556
  info_vals.append(self.limit_val.display_str(inline=False))
557
+ if self.sample_clause is not None:
558
+ heading_vals.append('Sample')
559
+ info_vals.append(self.sample_clause.display_str(inline=False))
513
560
  assert len(heading_vals) == len(info_vals)
514
561
  return pd.DataFrame(info_vals, index=heading_vals)
515
562
 
@@ -644,6 +691,8 @@ class DataFrame:
644
691
  """
645
692
  if self.where_clause is not None:
646
693
  raise excs.Error('Where clause already specified')
694
+ if self.sample_clause is not None:
695
+ raise excs.Error('where cannot be used after sample()')
647
696
  if not isinstance(pred, exprs.Expr):
648
697
  raise excs.Error(f'Where() requires a Pixeltable expression, but instead got {type(pred)}')
649
698
  if not pred.col_type.is_bool_type():
@@ -771,6 +820,8 @@ class DataFrame:
771
820
 
772
821
  >>> df = t.join(d, on=(t.d1 == d.pk1) & (t.d2 == d.pk2), how='left')
773
822
  """
823
+ if self.sample_clause is not None:
824
+ raise excs.Error('join() cannot be used with sample()')
774
825
  join_pred: Optional[exprs.Expr]
775
826
  if how == 'cross':
776
827
  if on is not None:
@@ -838,6 +889,9 @@ class DataFrame:
838
889
  """
839
890
  if self.group_by_clause is not None:
840
891
  raise excs.Error('Group-by already specified')
892
+ if self.sample_clause is not None:
893
+ raise excs.Error('group_by() cannot be used with sample()')
894
+
841
895
  grouping_tbl: Optional[catalog.TableVersion] = None
842
896
  group_by_clause: Optional[list[exprs.Expr]] = None
843
897
  for item in grouping_items:
@@ -921,6 +975,8 @@ class DataFrame:
921
975
 
922
976
  >>> df = book.order_by(t.price, asc=False).order_by(t.pages)
923
977
  """
978
+ if self.sample_clause is not None:
979
+ raise excs.Error('group_by() cannot be used with sample()')
924
980
  for e in expr_list:
925
981
  if not isinstance(e, exprs.Expr):
926
982
  raise excs.Error(f'Invalid expression in order_by(): {e}')
@@ -945,10 +1001,10 @@ class DataFrame:
945
1001
  Returns:
946
1002
  A new DataFrame with the specified limited rows.
947
1003
  """
948
- assert n is not None
949
- n = exprs.Expr.from_object(n)
950
- if not n.col_type.is_int_type():
951
- raise excs.Error(f'limit(): parameter must be of type int, instead of {n.col_type}')
1004
+ if self.sample_clause is not None:
1005
+ raise excs.Error('limit() cannot be used with sample()')
1006
+
1007
+ limit_expr = self._convert_param_to_typed_expr(n, ts.IntType(nullable=False), True, 'limit()')
952
1008
  return DataFrame(
953
1009
  from_clause=self._from_clause,
954
1010
  select_list=self.select_list,
@@ -956,7 +1012,124 @@ class DataFrame:
956
1012
  group_by_clause=self.group_by_clause,
957
1013
  grouping_tbl=self.grouping_tbl,
958
1014
  order_by_clause=self.order_by_clause,
959
- limit=n,
1015
+ limit=limit_expr,
1016
+ )
1017
+
1018
+ def sample(
1019
+ self,
1020
+ n: Optional[int] = None,
1021
+ n_per_stratum: Optional[int] = None,
1022
+ fraction: Optional[float] = None,
1023
+ seed: Optional[int] = None,
1024
+ stratify_by: Any = None,
1025
+ ) -> DataFrame:
1026
+ """
1027
+ Return a new DataFrame specifying a sample of rows from the DataFrame, considered in a shuffled order.
1028
+
1029
+ The size of the sample can be specified in three ways:
1030
+
1031
+ - `n`: the total number of rows to produce as a sample
1032
+ - `n_per_stratum`: the number of rows to produce per stratum as a sample
1033
+ - `fraction`: the fraction of available rows to produce as a sample
1034
+
1035
+ The sample can be stratified by one or more columns, which means that the sample will
1036
+ be selected from each stratum separately.
1037
+
1038
+ The data is shuffled before creating the sample.
1039
+
1040
+ Args:
1041
+ n: Total number of rows to produce as a sample.
1042
+ n_per_stratum: Number of rows to produce per stratum as a sample. This parameter is only valid if
1043
+ `stratify_by` is specified. Only one of `n` or `n_per_stratum` can be specified.
1044
+ fraction: Fraction of available rows to produce as a sample. This parameter is not usable with `n` or
1045
+ `n_per_stratum`. The fraction must be between 0.0 and 1.0.
1046
+ seed: Random seed for reproducible shuffling
1047
+ stratify_by: If specified, the sample will be stratified by these values.
1048
+
1049
+ Returns:
1050
+ A new DataFrame which specifies the sampled rows
1051
+
1052
+ Examples:
1053
+ Given the Table `person` containing the field 'age', we can create samples of the table in various ways:
1054
+
1055
+ Sample 100 rows from the above Table:
1056
+
1057
+ >>> df = person.sample(n=100)
1058
+
1059
+ Sample 10% of the rows from the above Table:
1060
+
1061
+ >>> df = person.sample(fraction=0.1)
1062
+
1063
+ Sample 10% of the rows from the above Table, stratified by the column 'age':
1064
+
1065
+ >>> df = person.sample(fraction=0.1, stratify_by=t.age)
1066
+
1067
+ Equal allocation sampling: Sample 2 rows from each age present in the above Table:
1068
+
1069
+ >>> df = person.sample(n_per_stratum=2, stratify_by=t.age)
1070
+
1071
+ Sampling is compatible with the where clause, so we can also sample from a filtered DataFrame:
1072
+
1073
+ >>> df = person.where(t.age > 30).sample(n=100)
1074
+ """
1075
+ # Check context of usage
1076
+ if self.sample_clause is not None:
1077
+ raise excs.Error('sample() cannot be used with sample()')
1078
+ if self.group_by_clause is not None:
1079
+ raise excs.Error('sample() cannot be used with group_by()')
1080
+ if self.order_by_clause is not None:
1081
+ raise excs.Error('sample() cannot be used with order_by()')
1082
+ if self.limit_val is not None:
1083
+ raise excs.Error('sample() cannot be used with limit()')
1084
+ if self._has_joins():
1085
+ raise excs.Error('sample() cannot be used with join()')
1086
+
1087
+ # Check paramter combinations
1088
+ if (n is not None) + (n_per_stratum is not None) + (fraction is not None) != 1:
1089
+ raise excs.Error('Exactly one of `n`, `n_per_stratum`, or `fraction` must be specified.')
1090
+ if n_per_stratum is not None and stratify_by is None:
1091
+ raise excs.Error('Must specify `stratify_by` to use `n_per_stratum`')
1092
+
1093
+ # Check parameter types and values
1094
+ n = self.validate_constant_type_range(n, ts.IntType(nullable=False), False, 'n', (1, None))
1095
+ n_per_stratum = self.validate_constant_type_range(
1096
+ n_per_stratum, ts.IntType(nullable=False), False, 'n_per_stratum', (1, None)
1097
+ )
1098
+ fraction = self.validate_constant_type_range(
1099
+ fraction, ts.FloatType(nullable=False), False, 'fraction', (0.0, 1.0)
1100
+ )
1101
+ seed = self.validate_constant_type_range(seed, ts.IntType(nullable=False), False, 'seed')
1102
+
1103
+ # analyze stratify list
1104
+ stratify_exprs: list[exprs.Expr] = []
1105
+ if stratify_by is not None:
1106
+ if isinstance(stratify_by, exprs.Expr):
1107
+ stratify_by = [stratify_by]
1108
+ if not isinstance(stratify_by, (list, tuple)):
1109
+ raise excs.Error('`stratify_by` must be a list of scalar expressions')
1110
+ for expr in stratify_by:
1111
+ if expr is None or not isinstance(expr, exprs.Expr):
1112
+ raise excs.Error(f'Invalid expression: {expr}')
1113
+ if not expr.col_type.is_scalar_type():
1114
+ raise excs.Error(f'Invalid type: expression must be a scalar type (not {expr.col_type})')
1115
+ if not expr.is_bound_by(self._from_clause.tbls):
1116
+ raise excs.Error(
1117
+ f"Expression '{expr}' cannot be evaluated in the context of this query's tables "
1118
+ f'({",".join(tbl.tbl_name() for tbl in self._from_clause.tbls)})'
1119
+ )
1120
+ stratify_exprs.append(expr)
1121
+
1122
+ sample_clause = SampleClause(None, n, n_per_stratum, fraction, seed, stratify_exprs)
1123
+
1124
+ return DataFrame(
1125
+ from_clause=self._from_clause,
1126
+ select_list=self.select_list,
1127
+ where_clause=self.where_clause,
1128
+ group_by_clause=self.group_by_clause,
1129
+ grouping_tbl=self.grouping_tbl,
1130
+ order_by_clause=self.order_by_clause,
1131
+ limit=self.limit_val,
1132
+ sample_clause=sample_clause,
960
1133
  )
961
1134
 
962
1135
  def update(self, value_spec: dict[str, Any], cascade: bool = True) -> UpdateStatus:
@@ -1055,6 +1228,7 @@ class DataFrame:
1055
1228
  if self.order_by_clause is not None
1056
1229
  else None,
1057
1230
  'limit_val': self.limit_val.as_dict() if self.limit_val is not None else None,
1231
+ 'sample_clause': self.sample_clause.as_dict() if self.sample_clause is not None else None,
1058
1232
  }
1059
1233
  return d
1060
1234
 
@@ -1081,6 +1255,7 @@ class DataFrame:
1081
1255
  else None
1082
1256
  )
1083
1257
  limit_val = exprs.Expr.from_dict(d['limit_val']) if d['limit_val'] is not None else None
1258
+ sample_clause = SampleClause.from_dict(d['sample_clause']) if d['sample_clause'] is not None else None
1084
1259
 
1085
1260
  return DataFrame(
1086
1261
  from_clause=from_clause,
@@ -1090,6 +1265,7 @@ class DataFrame:
1090
1265
  grouping_tbl=grouping_tbl,
1091
1266
  order_by_clause=order_by_clause,
1092
1267
  limit=limit_val,
1268
+ sample_clause=sample_clause,
1093
1269
  )
1094
1270
 
1095
1271
  def _hash_result_set(self) -> str:
@@ -25,6 +25,7 @@ from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
25
25
 
26
26
  import pixeltable_pgserver
27
27
  import sqlalchemy as sql
28
+ from pillow_heif import register_heif_opener # type: ignore[import-untyped]
28
29
  from tqdm import TqdmWarning
29
30
 
30
31
  from pixeltable import exceptions as excs
@@ -598,6 +599,7 @@ class Env:
598
599
 
599
600
  def _set_up_runtime(self) -> None:
600
601
  """Check for and start runtime services"""
602
+ register_heif_opener()
601
603
  self._start_web_server()
602
604
  self.__register_packages()
603
605
 
@@ -9,4 +9,4 @@ from .exec_node import ExecNode
9
9
  from .expr_eval import ExprEvalNode
10
10
  from .in_memory_data_node import InMemoryDataNode
11
11
  from .row_update_node import RowUpdateNode
12
- from .sql_node import SqlAggregationNode, SqlJoinNode, SqlLookupNode, SqlNode, SqlScanNode
12
+ from .sql_node import SqlAggregationNode, SqlJoinNode, SqlLookupNode, SqlNode, SqlSampleNode, SqlScanNode
@@ -317,7 +317,10 @@ class JsonMapperDispatcher(Evaluator):
317
317
  for _ in src
318
318
  ]
319
319
  for nested_row, anchor_val in zip(nested_rows, src):
320
- nested_row[self.scope_anchor.slot_idx] = anchor_val
320
+ # It's possible that self.scope_anchor.slot_idx is None; this corresponds to the case where the
321
+ # mapper expression doesn't actually contain references to RELATIVE_PATH_ROOT.
322
+ if self.scope_anchor.slot_idx is not None:
323
+ nested_row[self.scope_anchor.slot_idx] = anchor_val
321
324
  for slot_idx_, nested_slot_idx in self.external_slot_map.items():
322
325
  nested_row[nested_slot_idx] = row[slot_idx_]
323
326
  self.nested_exec_ctx.init_rows(nested_rows)