pixeltable 0.1.2__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (140) hide show
  1. pixeltable/__init__.py +21 -4
  2. pixeltable/catalog/__init__.py +13 -0
  3. pixeltable/catalog/catalog.py +159 -0
  4. pixeltable/catalog/column.py +200 -0
  5. pixeltable/catalog/dir.py +32 -0
  6. pixeltable/catalog/globals.py +33 -0
  7. pixeltable/catalog/insertable_table.py +191 -0
  8. pixeltable/catalog/named_function.py +36 -0
  9. pixeltable/catalog/path.py +58 -0
  10. pixeltable/catalog/path_dict.py +139 -0
  11. pixeltable/catalog/schema_object.py +39 -0
  12. pixeltable/catalog/table.py +581 -0
  13. pixeltable/catalog/table_version.py +749 -0
  14. pixeltable/catalog/table_version_path.py +133 -0
  15. pixeltable/catalog/view.py +203 -0
  16. pixeltable/client.py +520 -31
  17. pixeltable/dataframe.py +540 -349
  18. pixeltable/env.py +373 -48
  19. pixeltable/exceptions.py +12 -21
  20. pixeltable/exec/__init__.py +9 -0
  21. pixeltable/exec/aggregation_node.py +78 -0
  22. pixeltable/exec/cache_prefetch_node.py +113 -0
  23. pixeltable/exec/component_iteration_node.py +79 -0
  24. pixeltable/exec/data_row_batch.py +95 -0
  25. pixeltable/exec/exec_context.py +22 -0
  26. pixeltable/exec/exec_node.py +61 -0
  27. pixeltable/exec/expr_eval_node.py +217 -0
  28. pixeltable/exec/in_memory_data_node.py +69 -0
  29. pixeltable/exec/media_validation_node.py +43 -0
  30. pixeltable/exec/sql_scan_node.py +225 -0
  31. pixeltable/exprs/__init__.py +24 -0
  32. pixeltable/exprs/arithmetic_expr.py +102 -0
  33. pixeltable/exprs/array_slice.py +71 -0
  34. pixeltable/exprs/column_property_ref.py +77 -0
  35. pixeltable/exprs/column_ref.py +105 -0
  36. pixeltable/exprs/comparison.py +77 -0
  37. pixeltable/exprs/compound_predicate.py +98 -0
  38. pixeltable/exprs/data_row.py +187 -0
  39. pixeltable/exprs/expr.py +586 -0
  40. pixeltable/exprs/expr_set.py +39 -0
  41. pixeltable/exprs/function_call.py +380 -0
  42. pixeltable/exprs/globals.py +69 -0
  43. pixeltable/exprs/image_member_access.py +115 -0
  44. pixeltable/exprs/image_similarity_predicate.py +58 -0
  45. pixeltable/exprs/inline_array.py +107 -0
  46. pixeltable/exprs/inline_dict.py +101 -0
  47. pixeltable/exprs/is_null.py +38 -0
  48. pixeltable/exprs/json_mapper.py +121 -0
  49. pixeltable/exprs/json_path.py +159 -0
  50. pixeltable/exprs/literal.py +54 -0
  51. pixeltable/exprs/object_ref.py +41 -0
  52. pixeltable/exprs/predicate.py +44 -0
  53. pixeltable/exprs/row_builder.py +355 -0
  54. pixeltable/exprs/rowid_ref.py +94 -0
  55. pixeltable/exprs/type_cast.py +53 -0
  56. pixeltable/exprs/variable.py +45 -0
  57. pixeltable/func/__init__.py +9 -0
  58. pixeltable/func/aggregate_function.py +194 -0
  59. pixeltable/func/batched_function.py +53 -0
  60. pixeltable/func/callable_function.py +69 -0
  61. pixeltable/func/expr_template_function.py +82 -0
  62. pixeltable/func/function.py +110 -0
  63. pixeltable/func/function_registry.py +227 -0
  64. pixeltable/func/globals.py +36 -0
  65. pixeltable/func/nos_function.py +202 -0
  66. pixeltable/func/signature.py +166 -0
  67. pixeltable/func/udf.py +163 -0
  68. pixeltable/functions/__init__.py +52 -103
  69. pixeltable/functions/eval.py +216 -0
  70. pixeltable/functions/fireworks.py +61 -0
  71. pixeltable/functions/huggingface.py +120 -0
  72. pixeltable/functions/image.py +16 -0
  73. pixeltable/functions/openai.py +88 -0
  74. pixeltable/functions/pil/image.py +148 -7
  75. pixeltable/functions/string.py +13 -0
  76. pixeltable/functions/together.py +27 -0
  77. pixeltable/functions/util.py +41 -0
  78. pixeltable/functions/video.py +62 -0
  79. pixeltable/iterators/__init__.py +3 -0
  80. pixeltable/iterators/base.py +48 -0
  81. pixeltable/iterators/document.py +311 -0
  82. pixeltable/iterators/video.py +89 -0
  83. pixeltable/metadata/__init__.py +54 -0
  84. pixeltable/metadata/converters/convert_10.py +18 -0
  85. pixeltable/metadata/schema.py +211 -0
  86. pixeltable/plan.py +656 -0
  87. pixeltable/store.py +413 -182
  88. pixeltable/tests/conftest.py +143 -86
  89. pixeltable/tests/test_audio.py +65 -0
  90. pixeltable/tests/test_catalog.py +27 -0
  91. pixeltable/tests/test_client.py +14 -14
  92. pixeltable/tests/test_component_view.py +372 -0
  93. pixeltable/tests/test_dataframe.py +433 -0
  94. pixeltable/tests/test_dirs.py +78 -62
  95. pixeltable/tests/test_document.py +117 -0
  96. pixeltable/tests/test_exprs.py +591 -135
  97. pixeltable/tests/test_function.py +297 -67
  98. pixeltable/tests/test_functions.py +283 -1
  99. pixeltable/tests/test_migration.py +43 -0
  100. pixeltable/tests/test_nos.py +54 -0
  101. pixeltable/tests/test_snapshot.py +208 -0
  102. pixeltable/tests/test_table.py +1086 -258
  103. pixeltable/tests/test_transactional_directory.py +42 -0
  104. pixeltable/tests/test_types.py +5 -11
  105. pixeltable/tests/test_video.py +149 -34
  106. pixeltable/tests/test_view.py +530 -0
  107. pixeltable/tests/utils.py +186 -45
  108. pixeltable/tool/create_test_db_dump.py +149 -0
  109. pixeltable/type_system.py +490 -133
  110. pixeltable/utils/__init__.py +17 -46
  111. pixeltable/utils/clip.py +12 -15
  112. pixeltable/utils/coco.py +136 -0
  113. pixeltable/utils/documents.py +39 -0
  114. pixeltable/utils/filecache.py +195 -0
  115. pixeltable/utils/help.py +11 -0
  116. pixeltable/utils/media_store.py +76 -0
  117. pixeltable/utils/parquet.py +126 -0
  118. pixeltable/utils/pytorch.py +172 -0
  119. pixeltable/utils/s3.py +13 -0
  120. pixeltable/utils/sql.py +17 -0
  121. pixeltable/utils/transactional_directory.py +35 -0
  122. pixeltable-0.2.1.dist-info/LICENSE +18 -0
  123. pixeltable-0.2.1.dist-info/METADATA +119 -0
  124. pixeltable-0.2.1.dist-info/RECORD +125 -0
  125. {pixeltable-0.1.2.dist-info → pixeltable-0.2.1.dist-info}/WHEEL +1 -1
  126. pixeltable/catalog.py +0 -1421
  127. pixeltable/exprs.py +0 -1745
  128. pixeltable/function.py +0 -269
  129. pixeltable/functions/clip.py +0 -10
  130. pixeltable/functions/pil/__init__.py +0 -23
  131. pixeltable/functions/tf.py +0 -21
  132. pixeltable/index.py +0 -57
  133. pixeltable/tests/test_dict.py +0 -24
  134. pixeltable/tests/test_tf.py +0 -69
  135. pixeltable/tf.py +0 -33
  136. pixeltable/utils/tf.py +0 -33
  137. pixeltable/utils/video.py +0 -32
  138. pixeltable-0.1.2.dist-info/LICENSE +0 -201
  139. pixeltable-0.1.2.dist-info/METADATA +0 -89
  140. pixeltable-0.1.2.dist-info/RECORD +0 -37
pixeltable/__init__.py CHANGED
@@ -1,15 +1,23 @@
1
+ from .catalog import Column, Table, InsertableTable, View
1
2
  from .client import Client
2
3
  from .dataframe import DataFrame
3
- from .catalog import Column
4
- from .exceptions import UnknownEntityError, Error
4
+ from .exceptions import Error, Error
5
+ from .exprs import RELATIVE_PATH_ROOT
6
+ from .func import Function, udf, uda, Aggregator, expr_udf
5
7
  from .type_system import \
6
- ColumnType, StringType, IntType, FloatType, BoolType, TimestampType, JsonType, ArrayType, ImageType, VideoType
8
+ ColumnType, StringType, IntType, FloatType, BoolType, TimestampType, JsonType, ArrayType, ImageType, VideoType, \
9
+ AudioType, DocumentType
10
+ from .utils.help import help
11
+ # noinspection PyUnresolvedReferences
12
+ from . import functions
7
13
 
8
14
  __all__ = [
9
15
  'Client',
10
16
  'DataFrame',
11
17
  'Column',
12
- 'UnknownEntityError',
18
+ 'Table',
19
+ 'InsertableTable',
20
+ 'View',
13
21
  'Error',
14
22
  'ColumnType',
15
23
  'StringType',
@@ -18,9 +26,18 @@ __all__ = [
18
26
  'BoolType',
19
27
  'TimestampType',
20
28
  'JsonType',
29
+ 'RELATIVE_PATH_ROOT',
21
30
  'ArrayType',
22
31
  'ImageType',
23
32
  'VideoType',
33
+ 'AudioType',
34
+ 'DocumentType',
35
+ 'Function',
36
+ 'help',
37
+ 'udf',
38
+ 'Aggregator',
39
+ 'uda',
40
+ 'expr_udf',
24
41
  ]
25
42
 
26
43
 
@@ -0,0 +1,13 @@
1
+ from .catalog import Catalog
2
+ from .column import Column
3
+ from .table_version_path import TableVersionPath
4
+ from .table_version import TableVersion
5
+ from .schema_object import SchemaObject
6
+ from .named_function import NamedFunction
7
+ from .dir import Dir
8
+ from .table import Table
9
+ from .insertable_table import InsertableTable
10
+ from .view import View
11
+ from .path import Path
12
+ from .path_dict import PathDict
13
+ from .globals import is_valid_identifier, is_valid_path
@@ -0,0 +1,159 @@
1
+ from __future__ import annotations
2
+ from typing import Optional, List, Any, Dict, Tuple
3
+ from uuid import UUID
4
+ import dataclasses
5
+ import logging
6
+
7
+ import sqlalchemy as sql
8
+ import sqlalchemy.orm as orm
9
+
10
+ from .table_version import TableVersion
11
+ from .table_version_path import TableVersionPath
12
+ from .table import Table
13
+ from .named_function import NamedFunction
14
+ from .path_dict import PathDict
15
+ import pixeltable.env as env
16
+ import pixeltable.metadata.schema as schema
17
+
18
+ _logger = logging.getLogger('pixeltable')
19
+
20
+ class Catalog:
21
+ """A repository of catalog objects"""
22
+ _instance: Optional[Catalog] = None
23
+
24
+ @classmethod
25
+ def get(cls) -> Catalog:
26
+ if cls._instance is None:
27
+ cls._instance = cls()
28
+ with orm.Session(env.Env.get().engine, future=True) as session:
29
+ cls._instance._load_table_versions(session)
30
+ #cls._instance._load_functions(session)
31
+ return cls._instance
32
+
33
+ @classmethod
34
+ def clear(cls) -> None:
35
+ """Remove the instance. Used for testing."""
36
+ cls._instance = None
37
+
38
+ def __init__(self) -> None:
39
+ # key: [id, version]
40
+ # - mutable version of a table: version == None (even though TableVersion.version is set correctly)
41
+ # - snapshot versions: records the version of the snapshot
42
+ self.tbl_versions: Dict[Tuple[UUID, int], TableVersion] = {}
43
+
44
+ self.tbls: Dict[UUID, Table] = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
45
+ self.tbl_dependents: Dict[UUID, List[Table]] = {}
46
+
47
+ self._init_store()
48
+ self.paths = PathDict() # do this after _init_catalog()
49
+
50
+ def _init_store(self) -> None:
51
+ """One-time initialization of the stored catalog. Idempotent."""
52
+ with orm.Session(env.Env.get().engine, future=True) as session:
53
+ if session.query(sql.func.count(schema.Dir.id)).scalar() > 0:
54
+ return
55
+ # create a top-level directory, so that every schema object has a directory
56
+ dir_md = schema.DirMd(name='')
57
+ dir_record = schema.Dir(parent_id=None, md=dataclasses.asdict(dir_md))
58
+ session.add(dir_record)
59
+ session.flush()
60
+ session.commit()
61
+ _logger.info(f'Initialized catalog')
62
+
63
+ def _load_snapshot_version(
64
+ self, tbl_id: UUID, version: int, base: Optional[TableVersion], session: orm.Session
65
+ ) -> TableVersion:
66
+ q = session.query(schema.Table, schema.TableSchemaVersion) \
67
+ .select_from(schema.Table) \
68
+ .join(schema.TableVersion) \
69
+ .join(schema.TableSchemaVersion) \
70
+ .where(schema.Table.id == tbl_id) \
71
+ .where(sql.text(f"({schema.TableVersion.__table__}.md->>'version')::int = {version}")) \
72
+ .where(sql.text((
73
+ f"({schema.TableVersion.__table__}.md->>'schema_version')::int = "
74
+ f"{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}")))
75
+ tbl_record, schema_version_record = q.one()
76
+ tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
77
+ schema_version_md = schema.md_from_dict(schema.TableSchemaVersionMd, schema_version_record.md)
78
+ # we ignore tbl_record.base_tbl_id/base_snapshot_id and use 'base' instead: if the base is a snapshot
79
+ # we'd have to look that up first
80
+ return TableVersion(tbl_record.id, tbl_md, version, schema_version_md, is_snapshot=True, base=base)
81
+
82
+ def _load_table_versions(self, session: orm.Session) -> None:
83
+ from .insertable_table import InsertableTable
84
+ from .view import View
85
+
86
+ # load tables/views;
87
+ # do this in ascending order of creation ts so that we can resolve base references in one pass
88
+ q = session.query(schema.Table, schema.TableSchemaVersion) \
89
+ .select_from(schema.Table) \
90
+ .join(schema.TableVersion) \
91
+ .join(schema.TableSchemaVersion) \
92
+ .where(sql.text(f"({schema.TableVersion.__table__}.md->>'version')::int = 0")) \
93
+ .where(sql.text((
94
+ f"({schema.Table.__table__}.md->>'current_schema_version')::int = "
95
+ f"{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}"))) \
96
+ .order_by(sql.text(f"({schema.TableVersion.__table__}.md->>'created_at')::float"))
97
+
98
+ for tbl_record, schema_version_record in q.all():
99
+ tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
100
+ schema_version_md = schema.md_from_dict(schema.TableSchemaVersionMd, schema_version_record.md)
101
+ view_md = tbl_md.view_md
102
+
103
+ if view_md is not None:
104
+ assert len(view_md.base_versions) > 0
105
+ # construct a TableVersionPath for the view
106
+ refd_versions = [(UUID(tbl_id), version) for tbl_id, version in view_md.base_versions]
107
+ base_path: Optional[TableVersionPath] = None
108
+ base: Optional[TableVersion] = None
109
+ # go through the versions in reverse order, so we can construct TableVersionPaths
110
+ for base_id, version in refd_versions[::-1]:
111
+ base_version = self.tbl_versions.get((base_id, version), None)
112
+ if base_version is None:
113
+ if version is None:
114
+ # debugging
115
+ pass
116
+ # if this is a reference to a mutable table, we should have loaded it already
117
+ assert version is not None
118
+ base_version = self._load_snapshot_version(base_id, version, base, session)
119
+ base_path = TableVersionPath(base_version, base=base_path)
120
+ base = base_version
121
+ assert base_path is not None
122
+
123
+ base_tbl = self.tbls[base_path.tbl_version.id]
124
+ is_snapshot = view_md is not None and view_md.is_snapshot
125
+ snapshot_only = is_snapshot and view_md.predicate is None and len(schema_version_md.columns) == 0
126
+ if snapshot_only:
127
+ # this is a pure snapshot, without a physical table backing it
128
+ view_path = base_path
129
+ else:
130
+ tbl_version = TableVersion(
131
+ tbl_record.id, tbl_md, tbl_md.current_version, schema_version_md, is_snapshot=is_snapshot,
132
+ base=base_path.tbl_version if is_snapshot else None,
133
+ base_path=base_path if not is_snapshot else None)
134
+ view_path = TableVersionPath(tbl_version, base=base_path)
135
+
136
+ tbl = View(
137
+ tbl_record.id, tbl_record.dir_id, tbl_md.name, view_path, base_tbl,
138
+ snapshot_only=snapshot_only)
139
+ self.tbl_dependents[base_tbl._id].append(tbl)
140
+
141
+ else:
142
+ tbl_version = TableVersion(tbl_record.id, tbl_md, tbl_md.current_version, schema_version_md)
143
+ tbl = InsertableTable(tbl_record.dir_id, tbl_version)
144
+
145
+ self.tbls[tbl._id] = tbl
146
+ self.tbl_dependents[tbl._id] = []
147
+ self.paths.add_schema_obj(tbl._dir_id, tbl_md.name, tbl)
148
+
149
+ # def _load_functions(self, session: orm.Session) -> None:
150
+ # # load Function metadata; doesn't load the actual callable, which can be large and is only done on-demand by the
151
+ # # FunctionRegistry
152
+ # q = session.query(schema.Function.id, schema.Function.dir_id, schema.Function.md) \
153
+ # .where(sql.text(f"({schema.Function.__table__}.md->>'name')::text IS NOT NULL"))
154
+ # for id, dir_id, md in q.all():
155
+ # assert 'name' in md
156
+ # name = md['name']
157
+ # assert name is not None
158
+ # named_fn = NamedFunction(id, dir_id, name)
159
+ # self.paths.add_schema_obj(dir_id, name, named_fn)
@@ -0,0 +1,200 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from typing import Optional, Union, Callable, Set
5
+
6
+ import sqlalchemy as sql
7
+ from pgvector.sqlalchemy import Vector
8
+
9
+ from pixeltable import exceptions as excs
10
+ from pixeltable.metadata import schema
11
+ from pixeltable.type_system import ColumnType, StringType
12
+ from .globals import is_valid_identifier
13
+
14
+ _logger = logging.getLogger('pixeltable')
15
+
16
+ class Column:
17
+ """Representation of a column in the schema of a Table/DataFrame.
18
+
19
+ A Column contains all the metadata necessary for executing queries and updates against a particular version of a
20
+ table/view.
21
+ """
22
+ def __init__(
23
+ self, name: str, col_type: Optional[ColumnType] = None,
24
+ computed_with: Optional[Union['Expr', Callable]] = None,
25
+ primary_key: bool = False, stored: Optional[bool] = None,
26
+ indexed: bool = False,
27
+ # these parameters aren't set by users
28
+ col_id: Optional[int] = None):
29
+ """Column constructor.
30
+
31
+ Args:
32
+ name: column name
33
+ col_type: column type; can be None if the type can be derived from ``computed_with``
34
+ computed_with: a callable or an Expr object that computes the column value
35
+ primary_key: if True, this column is part of the primary key
36
+ stored: determines whether a computed column is present in the stored table or recomputed on demand
37
+ indexed: if True, this column has a nearest neighbor index (only valid for image columns)
38
+ col_id: column ID (only used internally)
39
+
40
+ Computed columns: those have a non-None ``computed_with`` argument
41
+
42
+ - when constructed by the user: ``computed_with`` was constructed explicitly and is passed in;
43
+ col_type is None
44
+ - when loaded from md store: ``computed_with`` is set and col_type is set
45
+
46
+ ``computed_with`` is a Callable:
47
+
48
+ - the callable's parameter names must correspond to existing columns in the table for which this Column
49
+ is being used
50
+ - ``col_type`` needs to be set to the callable's return type
51
+
52
+ ``stored`` (only valid for computed image columns):
53
+
54
+ - if True: the column is present in the stored table
55
+ - if False: the column is not present in the stored table and recomputed during a query
56
+ - if None: the system chooses for you (at present, this is always False, but this may change in the future)
57
+
58
+ indexed: only valid for image columns; if true, maintains an NN index for this column
59
+ """
60
+ if not is_valid_identifier(name):
61
+ raise excs.Error(f"Invalid column name: '{name}'")
62
+ self.name = name
63
+ if col_type is None and computed_with is None:
64
+ raise excs.Error(f'Column {name}: col_type is required if computed_with is not specified')
65
+
66
+ self.value_expr: Optional['Expr'] = None
67
+ self.compute_func: Optional[Callable] = None
68
+ from pixeltable import exprs
69
+ if computed_with is not None:
70
+ value_expr = exprs.Expr.from_object(computed_with)
71
+ if value_expr is None:
72
+ # computed_with needs to be a Callable
73
+ if not isinstance(computed_with, Callable):
74
+ raise excs.Error(
75
+ f'Column {name}: computed_with needs to be either a Pixeltable expression or a Callable, '
76
+ f'but it is a {type(computed_with)}')
77
+ if col_type is None:
78
+ raise excs.Error(f'Column {name}: col_type is required if computed_with is a Callable')
79
+ # we need to turn the computed_with function into an Expr, but this requires resolving
80
+ # column name references and for that we need to wait until we're assigned to a Table
81
+ self.compute_func = computed_with
82
+ else:
83
+ self.value_expr = value_expr.copy()
84
+ self.col_type = self.value_expr.col_type
85
+
86
+ if col_type is not None:
87
+ self.col_type = col_type
88
+ assert self.col_type is not None
89
+
90
+ self.stored = stored
91
+ self.dependent_cols: Set[Column] = set() # cols with value_exprs that reference us; set by TableVersion
92
+ self.id = col_id
93
+ self.primary_key = primary_key
94
+
95
+ # column in the stored table for the values of this Column
96
+ self.sa_col: Optional[sql.schema.Column] = None
97
+
98
+ # computed cols also have storage columns for the exception string and type
99
+ self.sa_errormsg_col: Optional[sql.schema.Column] = None
100
+ self.sa_errortype_col: Optional[sql.schema.Column] = None
101
+ # indexed columns also have a column for the embeddings
102
+ self.sa_idx_col: Optional[sql.schema.Column] = None
103
+ from .table_version import TableVersion
104
+ self.tbl: Optional[TableVersion] = None # set by owning TableVersion
105
+
106
+ if indexed and not self.col_type.is_image_type():
107
+ raise excs.Error(f'Column {name}: indexed=True requires ImageType')
108
+ self.is_indexed = indexed
109
+
110
+ @classmethod
111
+ def from_md(cls, col_id: int, md: schema.SchemaColumn, tbl: 'TableVersion') -> Column:
112
+ """Construct a Column from metadata.
113
+
114
+ Leaves out value_expr, because that requires TableVersion.cols to be complete.
115
+ """
116
+ col = cls(
117
+ md.name, col_type=ColumnType.from_dict(md.col_type), primary_key=md.is_pk,
118
+ stored=md.stored, indexed=md.is_indexed, col_id=col_id)
119
+ col.tbl = tbl
120
+ return col
121
+
122
+ def __hash__(self) -> int:
123
+ assert self.tbl is not None
124
+ return hash((self.tbl.id, self.id))
125
+
126
+ def check_value_expr(self) -> None:
127
+ assert self.value_expr is not None
128
+ if self.stored == False and self.is_computed and self.has_window_fn_call():
129
+ raise excs.Error(
130
+ f'Column {self.name}: stored={self.stored} not supported for columns computed with window functions:'
131
+ f'\n{self.value_expr}')
132
+
133
+ def has_window_fn_call(self) -> bool:
134
+ if self.value_expr is None:
135
+ return False
136
+ from pixeltable import exprs
137
+ l = list(self.value_expr.subexprs(filter=lambda e: isinstance(e, exprs.FunctionCall) and e.is_window_fn_call))
138
+ return len(l) > 0
139
+
140
+ @property
141
+ def is_computed(self) -> bool:
142
+ return self.compute_func is not None or self.value_expr is not None
143
+
144
+ @property
145
+ def is_stored(self) -> bool:
146
+ """Returns True if column is materialized in the stored table."""
147
+ assert self.stored is not None
148
+ return self.stored
149
+
150
+ @property
151
+ def records_errors(self) -> bool:
152
+ """True if this column also stores error information."""
153
+ return self.is_stored and (self.is_computed or self.col_type.is_media_type())
154
+
155
+ def source(self) -> None:
156
+ """
157
+ If this is a computed col and the top-level expr is a function call, print the source, if possible.
158
+ """
159
+ from pixeltable import exprs
160
+ if self.value_expr is None or not isinstance(self.value_expr, exprs.FunctionCall):
161
+ return
162
+ self.value_expr.fn.source()
163
+
164
+ def create_sa_cols(self) -> None:
165
+ """
166
+ These need to be recreated for every new table schema version.
167
+ """
168
+ assert self.is_stored
169
+ # all storage columns are nullable (we deal with null errors in Pixeltable directly)
170
+ self.sa_col = sql.Column(self.storage_name(), self.col_type.to_sa_type(), nullable=True)
171
+ if self.is_computed or self.col_type.is_media_type():
172
+ self.sa_errormsg_col = sql.Column(self.errormsg_storage_name(), StringType().to_sa_type(), nullable=True)
173
+ self.sa_errortype_col = sql.Column(self.errortype_storage_name(), StringType().to_sa_type(), nullable=True)
174
+ if self.is_indexed:
175
+ self.sa_idx_col = sql.Column(self.index_storage_name(), Vector(512), nullable=True)
176
+
177
+ def storage_name(self) -> str:
178
+ assert self.id is not None
179
+ assert self.is_stored
180
+ return f'col_{self.id}'
181
+
182
+ def errormsg_storage_name(self) -> str:
183
+ return f'{self.storage_name()}_errormsg'
184
+
185
+ def errortype_storage_name(self) -> str:
186
+ return f'{self.storage_name()}_errortype'
187
+
188
+ def index_storage_name(self) -> str:
189
+ return f'{self.storage_name()}_idx_0'
190
+
191
+ def __str__(self) -> str:
192
+ return f'{self.name}: {self.col_type}'
193
+
194
+ def __eq__(self, other: object) -> bool:
195
+ if not isinstance(other, Column):
196
+ return False
197
+ assert self.tbl is not None
198
+ assert other.tbl is not None
199
+ return self.tbl.id == other.tbl.id and self.id == other.id
200
+
@@ -0,0 +1,32 @@
1
+ from __future__ import annotations
2
+
3
+ import dataclasses
4
+ import logging
5
+ from uuid import UUID
6
+
7
+ import sqlalchemy as sql
8
+
9
+ from .schema_object import SchemaObject
10
+ from pixeltable.env import Env
11
+ from pixeltable.metadata import schema
12
+
13
+
14
+ _logger = logging.getLogger('pixeltable')
15
+
16
+ class Dir(SchemaObject):
17
+ def __init__(self, id: UUID, parent_id: UUID, name: str):
18
+ super().__init__(id, name, parent_id)
19
+
20
+ @classmethod
21
+ def display_name(cls) -> str:
22
+ return 'directory'
23
+
24
+ def move(self, new_name: str, new_dir_id: UUID) -> None:
25
+ super().move(new_name, new_dir_id)
26
+ with Env.get().engine.begin() as conn:
27
+ dir_md = schema.DirMd(name=new_name)
28
+ conn.execute(
29
+ sql.update(schema.Dir.__table__)
30
+ .values({schema.Dir.parent_id: self._dir_id, schema.Dir.md: dataclasses.asdict(dir_md)})
31
+ .where(schema.Dir.id == self._id))
32
+
@@ -0,0 +1,33 @@
1
+ from typing import List
2
+ import dataclasses
3
+ import logging
4
+
5
+
6
+ _logger = logging.getLogger('pixeltable')
7
+
8
+ # name of the position column in a component view
9
+ POS_COLUMN_NAME = 'pos'
10
+
11
+ @dataclasses.dataclass
12
+ class UpdateStatus:
13
+ num_rows: int = 0
14
+ # TODO: disambiguate what this means: # of slots computed or # of columns computed?
15
+ num_computed_values: int = 0
16
+ num_excs: int = 0
17
+ updated_cols: List[str] = dataclasses.field(default_factory=list)
18
+ cols_with_excs: List[str] = dataclasses.field(default_factory=list)
19
+
20
+ def is_valid_identifier(name: str) -> bool:
21
+ return name.isidentifier() and not name.startswith('_')
22
+
23
+ def is_valid_path(path: str, empty_is_valid : bool) -> bool:
24
+ if path == '':
25
+ return empty_is_valid
26
+
27
+ for part in path.split('.'):
28
+ if not is_valid_identifier(part):
29
+ return False
30
+ return True
31
+
32
+ def is_system_column_name(name: str) -> bool:
33
+ return name == POS_COLUMN_NAME
@@ -0,0 +1,191 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from typing import Optional, List, Any, Dict, overload, Iterable
5
+ from uuid import UUID
6
+
7
+ import sqlalchemy.orm as orm
8
+
9
+ import pixeltable
10
+ import pixeltable.type_system as ts
11
+ from pixeltable import exceptions as excs
12
+ from pixeltable.env import Env
13
+ from .catalog import Catalog
14
+ from .table import Table
15
+ from .table_version import TableVersion
16
+ from .table_version_path import TableVersionPath
17
+
18
+ _logger = logging.getLogger('pixeltable')
19
+
20
+ class InsertableTable(Table):
21
+ """A `Table` that allows inserting and deleting rows."""
22
+ def __init__(self, dir_id: UUID, tbl_version: TableVersion):
23
+ tbl_version_path = TableVersionPath(tbl_version)
24
+ super().__init__(tbl_version.id, dir_id, tbl_version.name, tbl_version_path)
25
+
26
+ @classmethod
27
+ def display_name(cls) -> str:
28
+ return 'table'
29
+
30
+ # MODULE-LOCAL, NOT PUBLIC
31
+ @classmethod
32
+ def create(
33
+ cls, dir_id: UUID, name: str, schema: Dict[str, ts.ColumnType], primary_key: List[str],
34
+ num_retained_versions: int, comment: str
35
+ ) -> InsertableTable:
36
+ columns = cls._create_columns(schema)
37
+ cls._verify_schema(columns)
38
+ column_names = [col.name for col in columns]
39
+ for pk_col in primary_key:
40
+ if pk_col not in column_names:
41
+ raise excs.Error(f'Primary key column {pk_col} not found in table schema')
42
+ col = columns[column_names.index(pk_col)]
43
+ if col.col_type.nullable:
44
+ raise excs.Error(f'Primary key column {pk_col} cannot be nullable')
45
+ col.primary_key = True
46
+
47
+ with orm.Session(Env.get().engine, future=True) as session:
48
+ _, tbl_version = TableVersion.create(session, dir_id, name, columns, num_retained_versions, comment)
49
+ tbl = cls(dir_id, tbl_version)
50
+ session.commit()
51
+ cat = Catalog.get()
52
+ cat.tbl_dependents[tbl._id] = []
53
+ cat.tbls[tbl._id] = tbl
54
+
55
+ _logger.info(f'Created table `{name}`, id={tbl_version.id}')
56
+ print(f'Created table `{name}`.')
57
+ return tbl
58
+
59
+ @overload
60
+ def insert(self, rows: Iterable[Dict[str, Any]], /, print_stats: bool = False, fail_on_exception: bool = True): ...
61
+
62
+ @overload
63
+ def insert(self, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any): ...
64
+
65
+ def insert(self, *args, **kwargs) -> Table.UpdateStatus:
66
+ """Insert rows into table.
67
+
68
+ To insert multiple rows at a time:
69
+
70
+ ``insert(rows: List[Dict[str, Any]], print_stats: bool = False, fail_on_exception: bool = True)``
71
+
72
+ To insert just a single row, you can use the more convenient syntax:
73
+ ``insert(print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any)``
74
+
75
+ Args:
76
+ rows: (if inserting multiple rows) A list of rows to insert, each of which is a dictionary mapping column
77
+ names to values.
78
+ kwargs: (if inserting a single row) keyword-argument pairs representing column names and values.
79
+ print_stats: If ``True``, print statistics about the cost of computed columns.
80
+ fail_on_exception:
81
+ Determines how exceptions in computed columns and invalid media files (e.g., corrupt images)
82
+ are handled.
83
+ If ``False``, store error information (accessible as column properties 'errortype' and 'errormsg')
84
+ for those cases, but continue inserting rows.
85
+ If ``True``, raise an exception that aborts the insert.
86
+
87
+ Returns:
88
+ execution status
89
+
90
+ Raises:
91
+ Error: if a row does not match the table schema or contains values for computed columns
92
+
93
+ Examples:
94
+ Insert two rows into a table with three int columns ``a``, ``b``, and ``c``. Column ``c`` is nullable.
95
+
96
+ >>> tbl.insert([{'a': 1, 'b': 1, 'c': 1}, {'a': 2, 'b': 2}])
97
+
98
+ Insert a single row into a table with three int columns ``a``, ``b``, and ``c``.
99
+
100
+ >>> tbl.insert(a=1, b=1, c=1)
101
+ """
102
+ print_stats = kwargs.pop('print_stats', False)
103
+ fail_on_exception = kwargs.pop('fail_on_exception', True)
104
+ if len(args) > 0:
105
+ # There's a positional argument; this means `rows` is expressed as a
106
+ # list of dicts (multi-insert)
107
+ rows = list(args[0])
108
+ else:
109
+ # No positional argument; this means we're inserting a single row
110
+ # using kwargs syntax
111
+ rows = [kwargs]
112
+
113
+ if not isinstance(rows, list):
114
+ raise excs.Error('rows must be a list of dictionaries')
115
+ if len(rows) == 0:
116
+ raise excs.Error('rows must not be empty')
117
+ for row in rows:
118
+ if not isinstance(row, dict):
119
+ raise excs.Error('rows must be a list of dictionaries')
120
+ self._validate_input_rows(rows)
121
+ result = self.tbl_version.insert(rows, print_stats=print_stats, fail_on_exception=fail_on_exception)
122
+
123
+ if result.num_excs == 0:
124
+ cols_with_excs_str = ''
125
+ else:
126
+ cols_with_excs_str = \
127
+ f' across {len(result.cols_with_excs)} column{"" if len(result.cols_with_excs) == 1 else "s"}'
128
+ cols_with_excs_str += f' ({", ".join(result.cols_with_excs)})'
129
+ msg = (
130
+ f'Inserted {result.num_rows} row{"" if result.num_rows == 1 else "s"} '
131
+ f'with {result.num_excs} error{"" if result.num_excs == 1 else "s"}{cols_with_excs_str}.'
132
+ )
133
+ print(msg)
134
+ _logger.info(f'InsertableTable {self._name}: {msg}')
135
+ return result
136
+
137
+ def _validate_input_rows(self, rows: List[Dict[str, Any]]) -> None:
138
+ """Verify that the input rows match the table schema"""
139
+ valid_col_names = set(self.column_names())
140
+ reqd_col_names = set(self.tbl_version_path.tbl_version.get_required_col_names())
141
+ computed_col_names = set(self.tbl_version_path.tbl_version.get_computed_col_names())
142
+ for row in rows:
143
+ assert isinstance(row, dict)
144
+ col_names = set(row.keys())
145
+ if len(reqd_col_names - col_names) > 0:
146
+ raise excs.Error(f'Missing required column(s) ({", ".join(reqd_col_names - col_names)}) in row {row}')
147
+
148
+ for col_name, val in row.items():
149
+ if col_name not in valid_col_names:
150
+ raise excs.Error(f'Unknown column name {col_name} in row {row}')
151
+ if col_name in computed_col_names:
152
+ raise excs.Error(f'Value for computed column {col_name} in row {row}')
153
+
154
+ # validate data
155
+ col = self.tbl_version_path.get_column(col_name)
156
+ try:
157
+ # basic sanity checks here
158
+ checked_val = col.col_type.create_literal(val)
159
+ row[col_name] = checked_val
160
+ except TypeError as e:
161
+ msg = str(e)
162
+ raise excs.Error(f'Error in column {col.name}: {msg[0].lower() + msg[1:]}\nRow: {row}')
163
+
164
+ def delete(self, where: Optional['pixeltable.exprs.Predicate'] = None) -> Table.UpdateStatus:
165
+ """Delete rows in this table.
166
+
167
+ Args:
168
+ where: a Predicate to filter rows to delete.
169
+
170
+ Examples:
171
+ Delete all rows in a table:
172
+
173
+ >>> tbl.delete()
174
+
175
+ Delete all rows in a table where column `a` is greater than 5:
176
+
177
+ >>> tbl.delete(tbl.a > 5)
178
+ """
179
+ from pixeltable.exprs import Predicate
180
+ from pixeltable.plan import Planner
181
+ if where is not None:
182
+ if not isinstance(where, Predicate):
183
+ raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
184
+ analysis_info = Planner.analyze(self.tbl_version, where)
185
+ if analysis_info.similarity_clause is not None:
186
+ raise excs.Error('nearest() cannot be used with delete()')
187
+ # for now we require that the updated rows can be identified via SQL, rather than via a Python filter
188
+ if analysis_info.filter is not None:
189
+ raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
190
+
191
+ return self.tbl_version.delete(where)