pixeltable 0.3.10__py3-none-any.whl → 0.3.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (119) hide show
  1. pixeltable/__init__.py +2 -2
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +2 -1
  4. pixeltable/catalog/catalog.py +370 -93
  5. pixeltable/catalog/column.py +6 -4
  6. pixeltable/catalog/dir.py +5 -5
  7. pixeltable/catalog/globals.py +14 -16
  8. pixeltable/catalog/insertable_table.py +6 -8
  9. pixeltable/catalog/path.py +14 -7
  10. pixeltable/catalog/table.py +72 -62
  11. pixeltable/catalog/table_version.py +137 -107
  12. pixeltable/catalog/table_version_handle.py +3 -0
  13. pixeltable/catalog/table_version_path.py +1 -1
  14. pixeltable/catalog/view.py +10 -14
  15. pixeltable/dataframe.py +5 -3
  16. pixeltable/env.py +108 -42
  17. pixeltable/exec/__init__.py +2 -0
  18. pixeltable/exec/aggregation_node.py +6 -8
  19. pixeltable/exec/cache_prefetch_node.py +4 -7
  20. pixeltable/exec/component_iteration_node.py +1 -3
  21. pixeltable/exec/data_row_batch.py +1 -2
  22. pixeltable/exec/exec_context.py +1 -1
  23. pixeltable/exec/exec_node.py +1 -2
  24. pixeltable/exec/expr_eval/__init__.py +2 -0
  25. pixeltable/exec/expr_eval/evaluators.py +137 -20
  26. pixeltable/exec/expr_eval/expr_eval_node.py +43 -64
  27. pixeltable/exec/expr_eval/globals.py +68 -7
  28. pixeltable/exec/expr_eval/schedulers.py +25 -23
  29. pixeltable/exec/in_memory_data_node.py +8 -6
  30. pixeltable/exec/row_update_node.py +3 -4
  31. pixeltable/exec/sql_node.py +16 -18
  32. pixeltable/exprs/__init__.py +1 -1
  33. pixeltable/exprs/column_property_ref.py +1 -1
  34. pixeltable/exprs/column_ref.py +3 -3
  35. pixeltable/exprs/compound_predicate.py +1 -1
  36. pixeltable/exprs/data_row.py +17 -1
  37. pixeltable/exprs/expr.py +12 -12
  38. pixeltable/exprs/function_call.py +34 -2
  39. pixeltable/exprs/json_mapper.py +95 -48
  40. pixeltable/exprs/json_path.py +4 -9
  41. pixeltable/exprs/method_ref.py +2 -2
  42. pixeltable/exprs/object_ref.py +2 -2
  43. pixeltable/exprs/row_builder.py +33 -6
  44. pixeltable/exprs/similarity_expr.py +1 -1
  45. pixeltable/exprs/sql_element_cache.py +1 -1
  46. pixeltable/exprs/string_op.py +2 -2
  47. pixeltable/ext/__init__.py +1 -1
  48. pixeltable/ext/functions/__init__.py +1 -1
  49. pixeltable/ext/functions/whisperx.py +1 -1
  50. pixeltable/ext/functions/yolox.py +1 -1
  51. pixeltable/func/__init__.py +1 -1
  52. pixeltable/func/aggregate_function.py +2 -2
  53. pixeltable/func/callable_function.py +3 -6
  54. pixeltable/func/expr_template_function.py +24 -4
  55. pixeltable/func/function.py +7 -9
  56. pixeltable/func/function_registry.py +1 -1
  57. pixeltable/func/query_template_function.py +87 -4
  58. pixeltable/func/signature.py +1 -1
  59. pixeltable/func/tools.py +1 -1
  60. pixeltable/func/udf.py +2 -2
  61. pixeltable/functions/__init__.py +1 -1
  62. pixeltable/functions/anthropic.py +2 -2
  63. pixeltable/functions/audio.py +1 -1
  64. pixeltable/functions/deepseek.py +1 -1
  65. pixeltable/functions/fireworks.py +1 -1
  66. pixeltable/functions/globals.py +6 -6
  67. pixeltable/functions/huggingface.py +1 -1
  68. pixeltable/functions/image.py +1 -1
  69. pixeltable/functions/json.py +1 -1
  70. pixeltable/functions/llama_cpp.py +1 -1
  71. pixeltable/functions/math.py +1 -1
  72. pixeltable/functions/mistralai.py +1 -1
  73. pixeltable/functions/ollama.py +1 -1
  74. pixeltable/functions/openai.py +2 -2
  75. pixeltable/functions/replicate.py +1 -1
  76. pixeltable/functions/string.py +1 -1
  77. pixeltable/functions/timestamp.py +1 -1
  78. pixeltable/functions/together.py +1 -1
  79. pixeltable/functions/util.py +1 -1
  80. pixeltable/functions/video.py +2 -2
  81. pixeltable/functions/vision.py +2 -2
  82. pixeltable/globals.py +7 -2
  83. pixeltable/index/embedding_index.py +12 -1
  84. pixeltable/io/__init__.py +5 -3
  85. pixeltable/io/fiftyone.py +6 -7
  86. pixeltable/io/label_studio.py +21 -20
  87. pixeltable/io/pandas.py +6 -5
  88. pixeltable/iterators/__init__.py +1 -1
  89. pixeltable/metadata/__init__.py +6 -4
  90. pixeltable/metadata/converters/convert_24.py +3 -3
  91. pixeltable/metadata/converters/convert_25.py +1 -1
  92. pixeltable/metadata/converters/convert_29.py +1 -1
  93. pixeltable/metadata/converters/convert_31.py +11 -0
  94. pixeltable/metadata/converters/convert_32.py +15 -0
  95. pixeltable/metadata/converters/convert_33.py +17 -0
  96. pixeltable/metadata/notes.py +3 -0
  97. pixeltable/metadata/schema.py +26 -1
  98. pixeltable/plan.py +2 -3
  99. pixeltable/share/packager.py +8 -24
  100. pixeltable/share/publish.py +20 -9
  101. pixeltable/store.py +9 -6
  102. pixeltable/type_system.py +19 -7
  103. pixeltable/utils/console_output.py +3 -2
  104. pixeltable/utils/coroutine.py +3 -3
  105. pixeltable/utils/dbms.py +66 -0
  106. pixeltable/utils/documents.py +61 -67
  107. pixeltable/utils/exception_handler.py +59 -0
  108. pixeltable/utils/filecache.py +1 -1
  109. pixeltable/utils/http_server.py +3 -2
  110. pixeltable/utils/pytorch.py +1 -1
  111. pixeltable/utils/sql.py +1 -1
  112. pixeltable-0.3.12.dist-info/METADATA +436 -0
  113. pixeltable-0.3.12.dist-info/RECORD +183 -0
  114. pixeltable/catalog/path_dict.py +0 -169
  115. pixeltable-0.3.10.dist-info/METADATA +0 -382
  116. pixeltable-0.3.10.dist-info/RECORD +0 -179
  117. {pixeltable-0.3.10.dist-info → pixeltable-0.3.12.dist-info}/LICENSE +0 -0
  118. {pixeltable-0.3.10.dist-info → pixeltable-0.3.12.dist-info}/WHEEL +0 -0
  119. {pixeltable-0.3.10.dist-info → pixeltable-0.3.12.dist-info}/entry_points.txt +0 -0
@@ -5,16 +5,14 @@ import os
5
5
  from dataclasses import dataclass
6
6
  from pathlib import Path
7
7
  from typing import Any, Iterator, Literal, Optional, cast
8
- from xml.etree import ElementTree
8
+ from xml.etree import ElementTree as ET
9
9
 
10
10
  import label_studio_sdk # type: ignore[import-untyped]
11
11
  import PIL.Image
12
12
  from requests.exceptions import HTTPError
13
13
 
14
14
  import pixeltable as pxt
15
- import pixeltable.env as env
16
- import pixeltable.exceptions as excs
17
- from pixeltable import Column, Table
15
+ from pixeltable import Column, Table, env, exceptions as excs
18
16
  from pixeltable.config import Config
19
17
  from pixeltable.exprs import ColumnRef, DataRow, Expr
20
18
  from pixeltable.io.external_store import Project, SyncStatus
@@ -140,7 +138,8 @@ class LabelStudioProject(Project):
140
138
  page += 1
141
139
  if unknown_task_count > 0:
142
140
  _logger.warning(
143
- f'Skipped {unknown_task_count} unrecognized task(s) when syncing Label Studio project "{self.project_title}".'
141
+ f'Skipped {unknown_task_count} unrecognized task(s) when syncing '
142
+ f'Label Studio project {self.project_title!r}.'
144
143
  )
145
144
 
146
145
  def __update_tasks(self, t: Table, existing_tasks: dict[tuple, dict]) -> SyncStatus:
@@ -174,11 +173,11 @@ class LabelStudioProject(Project):
174
173
  # Send media to Label Studio by HTTP post.
175
174
  assert len(t_data_cols) == 1 # This was verified when the project was set up
176
175
  return self.__update_tasks_by_post(t, existing_tasks, t_data_cols[0], t_rl_cols, rl_info)
177
- elif self.media_import_method == 'file' or self.media_import_method == 'url':
176
+ elif self.media_import_method in ('file', 'url'):
178
177
  # Send media to Label Studio by file reference (local file or URL).
179
178
  return self.__update_tasks_by_files(t, existing_tasks, t_data_cols, t_rl_cols, rl_info)
180
179
  else:
181
- assert False
180
+ raise AssertionError()
182
181
 
183
182
  def __update_tasks_by_post(
184
183
  self,
@@ -227,7 +226,7 @@ class LabelStudioProject(Project):
227
226
  )
228
227
  for i in range(len(coco_annotations))
229
228
  ]
230
- _logger.debug(f'`predictions`: %s', predictions)
229
+ _logger.debug('`predictions`: {%s}', predictions)
231
230
  self.project.create_predictions(predictions)
232
231
  tasks_created += 1
233
232
 
@@ -358,7 +357,7 @@ class LabelStudioProject(Project):
358
357
  def __localpath_to_lspath(cls, localpath: str) -> str:
359
358
  # Transform the local path into Label Studio's bespoke path format.
360
359
  relpath = Path(localpath).relative_to(Config.get().home)
361
- return f'/data/local-files/?d={str(relpath)}'
360
+ return f'/data/local-files/?d={relpath}'
362
361
 
363
362
  def __delete_stale_tasks(
364
363
  self, existing_tasks: dict[tuple, dict], row_ids_in_pxt: set[tuple], tasks_created: int
@@ -405,7 +404,8 @@ class LabelStudioProject(Project):
405
404
  updates = [{'_rowid': rowid, local_annotations_col.name: ann} for rowid, ann in annotations.items()]
406
405
  if len(updates) > 0:
407
406
  _logger.info(
408
- f'Updating table `{t._name}`, column `{local_annotations_col.name}` with {len(updates)} total annotations.'
407
+ f'Updating table {t._name!r}, column {local_annotations_col.name!r} '
408
+ f'with {len(updates)} total annotations.'
409
409
  )
410
410
  # batch_update currently doesn't propagate from views to base tables. As a workaround, we call
411
411
  # batch_update on the actual ancestor table that holds the annotations column.
@@ -451,7 +451,7 @@ class LabelStudioProject(Project):
451
451
  Parses a Label Studio XML config, extracting the names and Pixeltable types of
452
452
  all input variables.
453
453
  """
454
- root: ElementTree.Element = ElementTree.fromstring(xml_config)
454
+ root: ET.Element = ET.fromstring(xml_config)
455
455
  if root.tag.lower() != 'view':
456
456
  raise excs.Error('Root of Label Studio config must be a `View`')
457
457
  config = _LabelStudioConfig(
@@ -461,7 +461,7 @@ class LabelStudioProject(Project):
461
461
  return config
462
462
 
463
463
  @classmethod
464
- def __parse_data_keys_config(cls, root: ElementTree.Element) -> dict[str, '_DataKey']:
464
+ def __parse_data_keys_config(cls, root: ET.Element) -> dict[str, '_DataKey']:
465
465
  """Parses the data keys from a Label Studio XML config."""
466
466
  config: dict[str, '_DataKey'] = {}
467
467
  for element in root:
@@ -477,7 +477,7 @@ class LabelStudioProject(Project):
477
477
  return config
478
478
 
479
479
  @classmethod
480
- def __parse_rectangle_labels_config(cls, root: ElementTree.Element) -> dict[str, '_RectangleLabel']:
480
+ def __parse_rectangle_labels_config(cls, root: ET.Element) -> dict[str, '_RectangleLabel']:
481
481
  """Parses the RectangleLabels from a Label Studio XML config."""
482
482
  config: dict[str, '_RectangleLabel'] = {}
483
483
  for element in root:
@@ -534,7 +534,7 @@ class LabelStudioProject(Project):
534
534
  _label_studio_client().delete_project(self.project_id)
535
535
  env.Env.get().console_logger.info(f'Deleted Label Studio project: {title}')
536
536
 
537
- def __eq__(self, other) -> bool:
537
+ def __eq__(self, other: object) -> bool:
538
538
  return isinstance(other, LabelStudioProject) and self.project_id == other.project_id
539
539
 
540
540
  def __hash__(self) -> int:
@@ -576,7 +576,7 @@ class LabelStudioProject(Project):
576
576
  local_annotations_column = ANNOTATIONS_COLUMN
577
577
  else:
578
578
  local_annotations_column = next(k for k, v in col_mapping.items() if v == ANNOTATIONS_COLUMN)
579
- if local_annotations_column not in t._schema.keys():
579
+ if local_annotations_column not in t._schema:
580
580
  t.add_columns({local_annotations_column: pxt.JsonType(nullable=True)})
581
581
 
582
582
  resolved_col_mapping = cls.validate_columns(
@@ -591,9 +591,9 @@ class LabelStudioProject(Project):
591
591
  if media_import_method != 'url':
592
592
  raise excs.Error("`s3_configuration` is only valid when `media_import_method == 'url'`")
593
593
  s3_configuration = copy.copy(s3_configuration)
594
- if not 'bucket' in s3_configuration:
594
+ if 'bucket' not in s3_configuration:
595
595
  raise excs.Error('`s3_configuration` must contain a `bucket` field')
596
- if not 'title' in s3_configuration:
596
+ if 'title' not in s3_configuration:
597
597
  s3_configuration['title'] = 'Pixeltable-S3-Import-Storage'
598
598
  if (
599
599
  'aws_access_key_id' not in s3_configuration
@@ -633,7 +633,8 @@ class LabelStudioProject(Project):
633
633
  raise excs.Error(
634
634
  '`media_import_method` is set to `file`, but your Label Studio server is not configured '
635
635
  'for local file storage.\nPlease set the `LABEL_STUDIO_LOCAL_FILES_SERVING_ENABLED` '
636
- 'environment variable to `true` in the environment where your Label Studio server is running.'
636
+ 'environment variable to `true` in the environment where your Label Studio server '
637
+ 'is running.'
637
638
  ) from exc
638
639
  raise # Handle any other exception type normally
639
640
 
@@ -663,7 +664,7 @@ class _LabelStudioConfig:
663
664
  rectangle_labels: dict[str, _RectangleLabel]
664
665
 
665
666
  def validate(self) -> None:
666
- data_key_names = set(key.name for key in self.data_keys.values() if key.name is not None)
667
+ data_key_names = {key.name for key in self.data_keys.values() if key.name is not None}
667
668
  for name, rl in self.rectangle_labels.items():
668
669
  if rl.to_name not in data_key_names:
669
670
  raise excs.Error(
@@ -674,7 +675,7 @@ class _LabelStudioConfig:
674
675
  @property
675
676
  def export_columns(self) -> dict[str, pxt.ColumnType]:
676
677
  data_key_cols = {key_id: key_info.column_type for key_id, key_info in self.data_keys.items()}
677
- rl_cols = {name: pxt.JsonType() for name in self.rectangle_labels.keys()}
678
+ rl_cols = {name: pxt.JsonType() for name in self.rectangle_labels}
678
679
  return {**data_key_cols, **rl_cols}
679
680
 
680
681
 
pixeltable/io/pandas.py CHANGED
@@ -1,3 +1,4 @@
1
+ import os
1
2
  from typing import Any, Optional, Union
2
3
 
3
4
  import numpy as np
@@ -52,12 +53,12 @@ def import_pandas(
52
53
 
53
54
  def import_csv(
54
55
  tbl_name: str,
55
- filepath_or_buffer,
56
+ filepath_or_buffer: Union[str, os.PathLike],
56
57
  schema_overrides: Optional[dict[str, Any]] = None,
57
58
  primary_key: Optional[Union[str, list[str]]] = None,
58
59
  num_retained_versions: int = 10,
59
60
  comment: str = '',
60
- **kwargs,
61
+ **kwargs: Any,
61
62
  ) -> pxt.Table:
62
63
  """
63
64
  Creates a new base table from a csv file. This is a convenience method and is equivalent
@@ -81,13 +82,13 @@ def import_csv(
81
82
 
82
83
  def import_excel(
83
84
  tbl_name: str,
84
- io,
85
- *args,
85
+ io: Union[str, os.PathLike],
86
+ *,
86
87
  schema_overrides: Optional[dict[str, Any]] = None,
87
88
  primary_key: Optional[Union[str, list[str]]] = None,
88
89
  num_retained_versions: int = 10,
89
90
  comment: str = '',
90
- **kwargs,
91
+ **kwargs: Any,
91
92
  ) -> pxt.Table:
92
93
  """
93
94
  Creates a new base table from an Excel (.xlsx) file. This is a convenience method and is
@@ -12,5 +12,5 @@ __removed_symbols = {'base', 'document', 'video'}
12
12
  __all__ = sorted(__default_dir - __removed_symbols)
13
13
 
14
14
 
15
- def __dir__():
15
+ def __dir__() -> list[str]:
16
16
  return __all__
@@ -16,7 +16,7 @@ _console_logger = ConsoleLogger(logging.getLogger('pixeltable'))
16
16
 
17
17
 
18
18
  # current version of the metadata; this is incremented whenever the metadata schema changes
19
- VERSION = 31
19
+ VERSION = 34
20
20
 
21
21
 
22
22
  def create_system_info(engine: sql.engine.Engine) -> None:
@@ -24,9 +24,11 @@ def create_system_info(engine: sql.engine.Engine) -> None:
24
24
  system_md = SystemInfoMd(schema_version=VERSION)
25
25
  record = SystemInfo(md=dataclasses.asdict(system_md))
26
26
  with orm.Session(engine, future=True) as session:
27
- session.add(record)
28
- session.flush()
29
- session.commit()
27
+ # Write system metadata only once for idempotency
28
+ if session.query(SystemInfo).count() == 0:
29
+ session.add(record)
30
+ session.flush()
31
+ session.commit()
30
32
 
31
33
 
32
34
  # conversion functions for upgrading the metadata schema from one version to the following
@@ -19,11 +19,11 @@ def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], A
19
19
  isinstance(v, dict)
20
20
  and '_classpath' in v
21
21
  and v['_classpath']
22
- in {
22
+ in (
23
23
  'pixeltable.func.callable_function.CallableFunction',
24
24
  'pixeltable.func.aggregate_function.AggregateFunction',
25
25
  'pixeltable.func.expr_template_function.ExprTemplateFunction',
26
- }
26
+ )
27
27
  ):
28
28
  if 'path' in v:
29
29
  assert 'signature' not in v
@@ -50,6 +50,6 @@ def __substitute_path(path: str) -> str:
50
50
  # versions, it's necessary to resolve the function symbol to get the signature. The following
51
51
  # adjustment is necessary for function names that are stored in db artifacts of version < 25, but
52
52
  # have changed in some version > 25.
53
- if path in {'pixeltable.functions.huggingface.clip_text', 'pixeltable.functions.huggingface.clip_image'}:
53
+ if path in ('pixeltable.functions.huggingface.clip_text', 'pixeltable.functions.huggingface.clip_image'):
54
54
  return 'pixeltable.functions.huggingface.clip'
55
55
  return path
@@ -13,7 +13,7 @@ def _(engine: sql.engine.Engine) -> None:
13
13
 
14
14
  def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
15
15
  if k == 'path' and (
16
- v in {'pixeltable.functions.huggingface.clip_text', 'pixeltable.functions.huggingface.clip_image'}
16
+ v in ('pixeltable.functions.huggingface.clip_text', 'pixeltable.functions.huggingface.clip_image')
17
17
  ):
18
18
  return 'path', 'pixeltable.functions.huggingface.clip'
19
19
  return None
@@ -80,7 +80,7 @@ def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], A
80
80
  rolled_kwargs = kwargs.pop(param['name'])
81
81
 
82
82
  if rolled_args is not None:
83
- assert rolled_args['_classname'] in {'InlineArray', 'InlineList'}
83
+ assert rolled_args['_classname'] in ('InlineArray', 'InlineList')
84
84
  new_args.extend(rolled_args['components'])
85
85
  if rolled_kwargs is not None:
86
86
  assert rolled_kwargs['_classname'] == 'InlineDict'
@@ -0,0 +1,11 @@
1
+ import sqlalchemy as sql
2
+
3
+ from pixeltable.metadata import register_converter
4
+
5
+
6
+ @register_converter(version=31)
7
+ def _(engine: sql.engine.Engine) -> None:
8
+ # Add a column "lock_dummy: int8" to the dirs table in the store
9
+ # This column is the target of an UPDATE operation to synchronize directory operations
10
+ with engine.begin() as conn:
11
+ conn.execute(sql.text('ALTER TABLE dirs ADD COLUMN lock_dummy int8'))
@@ -0,0 +1,15 @@
1
+ from uuid import UUID
2
+
3
+ import sqlalchemy as sql
4
+
5
+ from pixeltable.metadata import register_converter
6
+ from pixeltable.metadata.converters.util import convert_table_md
7
+
8
+
9
+ @register_converter(version=32)
10
+ def _(engine: sql.engine.Engine) -> None:
11
+ convert_table_md(engine, table_md_updater=__update_table_md)
12
+
13
+
14
+ def __update_table_md(table_md: dict, table_id: UUID) -> None:
15
+ table_md['is_replica'] = False
@@ -0,0 +1,17 @@
1
+ from uuid import UUID
2
+
3
+ import sqlalchemy as sql
4
+
5
+ from pixeltable.metadata import register_converter
6
+ from pixeltable.metadata.converters.util import convert_table_md
7
+
8
+
9
+ @register_converter(version=33)
10
+ def _(engine: sql.engine.Engine) -> None:
11
+ convert_table_md(engine, table_md_updater=__update_table_md)
12
+
13
+
14
+ def __update_table_md(table_md: dict, table_id: UUID) -> None:
15
+ """Set default value of 'is_pk' field in column metadata to False"""
16
+ for col_md in table_md['column_md'].values():
17
+ col_md['is_pk'] = False if col_md['is_pk'] is None else col_md['is_pk']
@@ -2,6 +2,9 @@
2
2
  # rather than as a comment, so that the existence of a description can be enforced by
3
3
  # the unit tests when new versions are added.
4
4
  VERSION_NOTES = {
5
+ 34: 'Set default value for is_pk field in column metadata to False',
6
+ 33: 'Add is_replica field to table metadata',
7
+ 32: 'Add the lock_dummy BIGINT column to the dirs table',
5
8
  31: 'Add table ids to metadata structs',
6
9
  30: 'Store default values and constant arguments as literals',
7
10
  29: 'Add user and additional_md fields to metadata structs',
@@ -1,7 +1,7 @@
1
1
  import dataclasses
2
2
  import typing
3
3
  import uuid
4
- from typing import Any, Optional, TypeVar, Union, get_type_hints
4
+ from typing import Any, NamedTuple, Optional, TypeVar, Union, get_type_hints
5
5
 
6
6
  import sqlalchemy as sql
7
7
  from sqlalchemy import BigInteger, ForeignKey, Integer, LargeBinary, orm
@@ -84,6 +84,8 @@ class Dir(Base):
84
84
  )
85
85
  parent_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=True)
86
86
  md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False) # DirMd
87
+ # This field is updated to synchronize database operations across multiple sessions
88
+ lock_dummy: orm.Mapped[int] = orm.mapped_column(BigInteger, nullable=True)
87
89
 
88
90
 
89
91
  @dataclasses.dataclass
@@ -155,6 +157,7 @@ class ViewMd:
155
157
  class TableMd:
156
158
  tbl_id: str # uuid.UUID
157
159
  name: str
160
+ is_replica: bool
158
161
 
159
162
  user: Optional[str]
160
163
 
@@ -286,3 +289,25 @@ class Function(Base):
286
289
  dir_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=True)
287
290
  md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False) # FunctionMd
288
291
  binary_obj: orm.Mapped[Optional[bytes]] = orm.mapped_column(LargeBinary, nullable=True)
292
+
293
+
294
+ class FullTableMd(NamedTuple):
295
+ tbl_md: TableMd
296
+ version_md: TableVersionMd
297
+ schema_version_md: TableSchemaVersionMd
298
+
299
+ def as_dict(self) -> dict[str, Any]:
300
+ return {
301
+ 'table_id': self.tbl_md.tbl_id,
302
+ 'table_md': dataclasses.asdict(self.tbl_md),
303
+ 'table_version_md': dataclasses.asdict(self.version_md),
304
+ 'table_schema_version_md': dataclasses.asdict(self.schema_version_md),
305
+ }
306
+
307
+ @classmethod
308
+ def from_dict(cls, data_dict: dict[str, Any]) -> 'FullTableMd':
309
+ return FullTableMd(
310
+ tbl_md=md_from_dict(TableMd, data_dict['table_md']),
311
+ version_md=md_from_dict(TableVersionMd, data_dict['table_version_md']),
312
+ schema_version_md=md_from_dict(TableSchemaVersionMd, data_dict['table_schema_version_md']),
313
+ )
pixeltable/plan.py CHANGED
@@ -768,8 +768,7 @@ class Planner:
768
768
  # - select list subexprs that aren't aggregates
769
769
  # - join clause subexprs
770
770
  # - subexprs of Where clause conjuncts that can't be run in SQL
771
- # - all grouping exprs, if any aggregate function call can't be run in SQL (in that case, they all have to be
772
- # run in Python)
771
+ # - all grouping exprs
773
772
  candidates = list(
774
773
  exprs.Expr.list_subexprs(
775
774
  analyzer.select_list,
@@ -784,7 +783,7 @@ class Planner:
784
783
  candidates.extend(
785
784
  exprs.Expr.subexprs(analyzer.filter, filter=sql_elements.contains, traverse_matches=False)
786
785
  )
787
- if is_python_agg and analyzer.group_by_clause is not None:
786
+ if analyzer.group_by_clause is not None:
788
787
  candidates.extend(
789
788
  exprs.Expr.list_subexprs(analyzer.group_by_clause, filter=sql_elements.contains, traverse_matches=False)
790
789
  )
@@ -1,4 +1,3 @@
1
- import dataclasses
2
1
  import io
3
2
  import json
4
3
  import logging
@@ -6,7 +5,6 @@ import tarfile
6
5
  import urllib.parse
7
6
  import urllib.request
8
7
  import uuid
9
- from datetime import datetime
10
8
  from pathlib import Path
11
9
  from typing import Any, Iterator, Optional
12
10
 
@@ -58,28 +56,14 @@ class TablePackager:
58
56
  self.tmp_dir = Path(Env.get().create_tmp_path())
59
57
  self.media_files = {}
60
58
 
61
- # Generate metadata
62
- self.md = {
63
- 'pxt_version': pxt.__version__,
64
- 'pxt_md_version': metadata.VERSION,
65
- 'md': {
66
- 'tables': [
67
- {
68
- 'table_id': str(t._tbl_version.id),
69
- # These are temporary; will replace with a better solution once the concurrency
70
- # changes to catalog have been merged
71
- 'table_md': dataclasses.asdict(t._tbl_version.get()._create_tbl_md()),
72
- 'table_version_md': dataclasses.asdict(
73
- t._tbl_version.get()._create_version_md(datetime.now().timestamp())
74
- ),
75
- 'table_schema_version_md': dataclasses.asdict(
76
- t._tbl_version.get()._create_schema_version_md(0)
77
- ),
78
- }
79
- for t in (table, *table._bases)
80
- ]
81
- },
82
- }
59
+ # Load metadata
60
+ with Env.get().begin_xact():
61
+ tbl_md = catalog.Catalog.get().load_replica_md(table)
62
+ self.md = {
63
+ 'pxt_version': pxt.__version__,
64
+ 'pxt_md_version': metadata.VERSION,
65
+ 'md': {'tables': [md.as_dict() for md in tbl_md]},
66
+ }
83
67
  if additional_md is not None:
84
68
  self.md.update(additional_md)
85
69
 
@@ -1,4 +1,3 @@
1
- import os
2
1
  import sys
3
2
  import urllib.parse
4
3
  import urllib.request
@@ -10,22 +9,22 @@ from tqdm import tqdm
10
9
  import pixeltable as pxt
11
10
  from pixeltable import exceptions as excs
12
11
  from pixeltable.env import Env
12
+ from pixeltable.metadata.schema import FullTableMd
13
13
  from pixeltable.utils import sha256sum
14
14
 
15
15
  from .packager import TablePackager
16
16
 
17
17
  # These URLs are abstracted out for now, but will be replaced with actual (hard-coded) URLs once the
18
18
  # pixeltable.com URLs are available.
19
- _PUBLISH_URL = os.environ.get('PIXELTABLE_PUBLISH_URL')
20
- _FINALIZE_URL = os.environ.get('PIXELTABLE_FINALIZE_URL')
19
+
20
+ PIXELTABLE_API_URL = 'https://internal-api.pixeltable.com'
21
21
 
22
22
 
23
23
  def publish_snapshot(dest_tbl_uri: str, src_tbl: pxt.Table) -> str:
24
24
  packager = TablePackager(src_tbl, additional_md={'table_uri': dest_tbl_uri})
25
- request_json = packager.md
26
- headers_json = {'X-api-key': Env.get().pxt_api_key}
27
-
28
- response = requests.post(_PUBLISH_URL, json=request_json, headers=headers_json)
25
+ request_json = packager.md | {'operation_type': 'publish_snapshot'}
26
+ headers_json = {'X-api-key': Env.get().pxt_api_key, 'Content-Type': 'application/json'}
27
+ response = requests.post(PIXELTABLE_API_URL, json=request_json, headers=headers_json)
29
28
  if response.status_code != 200:
30
29
  raise excs.Error(f'Error publishing snapshot: {response.text}')
31
30
  response_json = response.json()
@@ -47,14 +46,14 @@ def publish_snapshot(dest_tbl_uri: str, src_tbl: pxt.Table) -> str:
47
46
  Env.get().console_logger.info('Finalizing snapshot ...')
48
47
 
49
48
  finalize_request_json = {
49
+ 'operation_type': 'finalize_snapshot',
50
50
  'upload_id': upload_id,
51
51
  'datafile': bundle.name,
52
52
  'size': bundle.stat().st_size,
53
53
  'sha256': sha256sum(bundle), # Generate our own SHA for independent verification
54
54
  }
55
-
56
55
  # TODO: Use Pydantic for validation
57
- finalize_response = requests.post(_FINALIZE_URL, json=finalize_request_json, headers=headers_json)
56
+ finalize_response = requests.post(PIXELTABLE_API_URL, json=finalize_request_json, headers=headers_json)
58
57
  if finalize_response.status_code != 200:
59
58
  raise excs.Error(f'Error finalizing snapshot: {finalize_response.text}')
60
59
  finalize_response_json = finalize_response.json()
@@ -66,6 +65,18 @@ def publish_snapshot(dest_tbl_uri: str, src_tbl: pxt.Table) -> str:
66
65
  return confirmed_tbl_uri
67
66
 
68
67
 
68
+ def clone_snapshot(dest_tbl_uri: str) -> list[FullTableMd]:
69
+ headers_json = {'X-api-key': Env.get().pxt_api_key, 'Content-Type': 'application/json'}
70
+ clone_request_json = {'operation_type': 'clone_snapshot', 'table_uri': dest_tbl_uri}
71
+ response = requests.post(PIXELTABLE_API_URL, json=clone_request_json, headers=headers_json)
72
+ if response.status_code != 200:
73
+ raise excs.Error(f'Error cloning snapshot: {response.text}')
74
+ response_json = response.json()
75
+ if not isinstance(response_json, dict) or 'table_uri' not in response_json:
76
+ raise excs.Error(f'Unexpected response from server.\n{response_json}')
77
+ return [FullTableMd.from_dict(t) for t in response_json['md']['tables']]
78
+
79
+
69
80
  def _upload_bundle_to_s3(bundle: Path, parsed_location: urllib.parse.ParseResult) -> None:
70
81
  from pixeltable.utils.s3 import get_client
71
82
 
pixeltable/store.py CHANGED
@@ -16,6 +16,7 @@ from pixeltable import catalog, exceptions as excs, exprs
16
16
  from pixeltable.env import Env
17
17
  from pixeltable.exec import ExecNode
18
18
  from pixeltable.metadata import schema
19
+ from pixeltable.utils.exception_handler import run_cleanup
19
20
  from pixeltable.utils.media_store import MediaStore
20
21
  from pixeltable.utils.sql import log_explain, log_stmt
21
22
 
@@ -99,9 +100,9 @@ class StoreBase:
99
100
 
100
101
  # v_min/v_max indices: speeds up base table scans needed to propagate a base table insert or delete
101
102
  idx_name = f'vmin_idx_{self.tbl_version.id.hex}'
102
- idxs.append(sql.Index(idx_name, self.v_min_col, postgresql_using='brin'))
103
+ idxs.append(sql.Index(idx_name, self.v_min_col, postgresql_using=Env.get().dbms.version_index_type))
103
104
  idx_name = f'vmax_idx_{self.tbl_version.id.hex}'
104
- idxs.append(sql.Index(idx_name, self.v_max_col, postgresql_using='brin'))
105
+ idxs.append(sql.Index(idx_name, self.v_max_col, postgresql_using=Env.get().dbms.version_index_type))
105
106
 
106
107
  self.sa_tbl = sql.Table(self._storage_name(), self.sa_md, *all_cols, *idxs)
107
108
 
@@ -232,7 +233,6 @@ class StoreBase:
232
233
  assert col.tbl.id == self.tbl_version.id
233
234
  num_excs = 0
234
235
  num_rows = 0
235
-
236
236
  # create temp table to store output of exec_plan, with the same primary key as the store table
237
237
  tmp_name = f'temp_{self._storage_name()}'
238
238
  tmp_pk_cols = [sql.Column(col.name, col.type, primary_key=True) for col in self.pk_columns()]
@@ -301,10 +301,13 @@ class StoreBase:
301
301
  )
302
302
  log_explain(_logger, update_stmt, conn)
303
303
  conn.execute(update_stmt)
304
-
305
304
  finally:
306
- tmp_tbl.drop(bind=conn)
307
- self.sa_md.remove(tmp_tbl)
305
+
306
+ def remove_tmp_tbl() -> None:
307
+ self.sa_md.remove(tmp_tbl)
308
+ tmp_tbl.drop(bind=conn)
309
+
310
+ run_cleanup(remove_tmp_tbl, raise_error=True)
308
311
  return num_excs
309
312
 
310
313
  def insert_rows(
pixeltable/type_system.py CHANGED
@@ -512,7 +512,7 @@ class StringType(ColumnType):
512
512
  def __init__(self, nullable: bool = False):
513
513
  super().__init__(self.Type.STRING, nullable=nullable)
514
514
 
515
- def has_supertype(self):
515
+ def has_supertype(self) -> bool:
516
516
  return not self.nullable
517
517
 
518
518
  @classmethod
@@ -602,7 +602,7 @@ class TimestampType(ColumnType):
602
602
  def __init__(self, nullable: bool = False):
603
603
  super().__init__(self.Type.TIMESTAMP, nullable=nullable)
604
604
 
605
- def has_supertype(self):
605
+ def has_supertype(self) -> bool:
606
606
  return not self.nullable
607
607
 
608
608
  @classmethod
@@ -768,7 +768,7 @@ class JsonType(ColumnType):
768
768
  a_type = a.get('type')
769
769
  b_type = b.get('type')
770
770
 
771
- if a_type in {'string', 'integer', 'number', 'boolean', 'object', 'array'} and a_type == b_type:
771
+ if a_type in ('string', 'integer', 'number', 'boolean', 'object', 'array') and a_type == b_type:
772
772
  # a and b both have the same type designation, but are not identical. This can happen if
773
773
  # (for example) they have validators or other attributes that differ. In this case, we
774
774
  # generalize to {'type': t}, where t is their shared type, with no other qualifications.
@@ -1170,6 +1170,20 @@ class DocumentType(ColumnType):
1170
1170
  XML = 3
1171
1171
  TXT = 4
1172
1172
 
1173
+ @classmethod
1174
+ def from_extension(cls, ext: str) -> Optional['DocumentType.DocumentFormat']:
1175
+ if ext in ('.htm', '.html'):
1176
+ return cls.HTML
1177
+ if ext == '.md':
1178
+ return cls.MD
1179
+ if ext == '.pdf':
1180
+ return cls.PDF
1181
+ if ext == '.xml':
1182
+ return cls.XML
1183
+ if ext == '.txt':
1184
+ return cls.TXT
1185
+ return None
1186
+
1173
1187
  def __init__(self, nullable: bool = False, doc_formats: Optional[str] = None):
1174
1188
  super().__init__(self.Type.DOCUMENT, nullable=nullable)
1175
1189
  self.doc_formats = doc_formats
@@ -1203,9 +1217,7 @@ class DocumentType(ColumnType):
1203
1217
  assert isinstance(val, str)
1204
1218
  from pixeltable.utils.documents import get_document_handle
1205
1219
 
1206
- dh = get_document_handle(val)
1207
- if dh is None:
1208
- raise excs.Error(f'Not a recognized document format: {val}')
1220
+ _ = get_document_handle(val)
1209
1221
 
1210
1222
 
1211
1223
  T = typing.TypeVar('T')
@@ -1240,7 +1252,7 @@ class _PxtType:
1240
1252
  `ColumnType`.
1241
1253
  """
1242
1254
 
1243
- def __init__(self):
1255
+ def __init__(self) -> None:
1244
1256
  raise TypeError(f'Type `{type(self)}` cannot be instantiated.')
1245
1257
 
1246
1258
  @classmethod
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ from typing import TextIO
2
3
 
3
4
 
4
5
  def map_level(verbosity: int) -> int:
@@ -22,10 +23,10 @@ def map_level(verbosity: int) -> int:
22
23
 
23
24
 
24
25
  class ConsoleOutputHandler(logging.StreamHandler):
25
- def __init__(self, stream):
26
+ def __init__(self, stream: TextIO):
26
27
  super().__init__(stream)
27
28
 
28
- def emit(self, record):
29
+ def emit(self, record: logging.LogRecord) -> None:
29
30
  if record.msg.endswith('\n'):
30
31
  self.stream.write(record.msg)
31
32
  else: