pixeltable 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (147) hide show
  1. pixeltable/__init__.py +64 -11
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +1 -1
  4. pixeltable/catalog/catalog.py +50 -27
  5. pixeltable/catalog/column.py +27 -11
  6. pixeltable/catalog/dir.py +6 -4
  7. pixeltable/catalog/globals.py +8 -1
  8. pixeltable/catalog/insertable_table.py +25 -15
  9. pixeltable/catalog/named_function.py +10 -6
  10. pixeltable/catalog/path.py +3 -2
  11. pixeltable/catalog/path_dict.py +8 -6
  12. pixeltable/catalog/schema_object.py +2 -1
  13. pixeltable/catalog/table.py +123 -103
  14. pixeltable/catalog/table_version.py +292 -143
  15. pixeltable/catalog/table_version_path.py +8 -5
  16. pixeltable/catalog/view.py +68 -27
  17. pixeltable/dataframe.py +102 -72
  18. pixeltable/env.py +39 -23
  19. pixeltable/exec/__init__.py +2 -2
  20. pixeltable/exec/aggregation_node.py +10 -4
  21. pixeltable/exec/cache_prefetch_node.py +5 -3
  22. pixeltable/exec/component_iteration_node.py +9 -8
  23. pixeltable/exec/data_row_batch.py +21 -10
  24. pixeltable/exec/exec_context.py +10 -3
  25. pixeltable/exec/exec_node.py +23 -12
  26. pixeltable/exec/expr_eval/evaluators.py +18 -17
  27. pixeltable/exec/expr_eval/expr_eval_node.py +29 -16
  28. pixeltable/exec/expr_eval/globals.py +33 -11
  29. pixeltable/exec/expr_eval/row_buffer.py +5 -6
  30. pixeltable/exec/expr_eval/schedulers.py +170 -42
  31. pixeltable/exec/in_memory_data_node.py +8 -7
  32. pixeltable/exec/row_update_node.py +15 -5
  33. pixeltable/exec/sql_node.py +56 -27
  34. pixeltable/exprs/__init__.py +2 -2
  35. pixeltable/exprs/arithmetic_expr.py +57 -26
  36. pixeltable/exprs/array_slice.py +1 -1
  37. pixeltable/exprs/column_property_ref.py +2 -1
  38. pixeltable/exprs/column_ref.py +20 -15
  39. pixeltable/exprs/comparison.py +6 -2
  40. pixeltable/exprs/compound_predicate.py +1 -3
  41. pixeltable/exprs/data_row.py +2 -2
  42. pixeltable/exprs/expr.py +101 -72
  43. pixeltable/exprs/expr_dict.py +2 -1
  44. pixeltable/exprs/expr_set.py +3 -1
  45. pixeltable/exprs/function_call.py +39 -41
  46. pixeltable/exprs/globals.py +1 -0
  47. pixeltable/exprs/in_predicate.py +2 -2
  48. pixeltable/exprs/inline_expr.py +20 -17
  49. pixeltable/exprs/json_mapper.py +4 -2
  50. pixeltable/exprs/json_path.py +12 -18
  51. pixeltable/exprs/literal.py +5 -9
  52. pixeltable/exprs/method_ref.py +1 -0
  53. pixeltable/exprs/object_ref.py +1 -1
  54. pixeltable/exprs/row_builder.py +31 -16
  55. pixeltable/exprs/rowid_ref.py +14 -5
  56. pixeltable/exprs/similarity_expr.py +11 -6
  57. pixeltable/exprs/sql_element_cache.py +1 -1
  58. pixeltable/exprs/type_cast.py +24 -9
  59. pixeltable/ext/__init__.py +1 -0
  60. pixeltable/ext/functions/__init__.py +1 -0
  61. pixeltable/ext/functions/whisperx.py +2 -2
  62. pixeltable/ext/functions/yolox.py +11 -11
  63. pixeltable/func/aggregate_function.py +17 -13
  64. pixeltable/func/callable_function.py +6 -6
  65. pixeltable/func/expr_template_function.py +15 -14
  66. pixeltable/func/function.py +16 -16
  67. pixeltable/func/function_registry.py +11 -8
  68. pixeltable/func/globals.py +4 -2
  69. pixeltable/func/query_template_function.py +12 -13
  70. pixeltable/func/signature.py +18 -9
  71. pixeltable/func/tools.py +10 -17
  72. pixeltable/func/udf.py +106 -11
  73. pixeltable/functions/__init__.py +21 -2
  74. pixeltable/functions/anthropic.py +21 -15
  75. pixeltable/functions/fireworks.py +63 -5
  76. pixeltable/functions/gemini.py +13 -3
  77. pixeltable/functions/globals.py +18 -6
  78. pixeltable/functions/huggingface.py +20 -38
  79. pixeltable/functions/image.py +7 -3
  80. pixeltable/functions/json.py +1 -0
  81. pixeltable/functions/llama_cpp.py +1 -4
  82. pixeltable/functions/mistralai.py +31 -20
  83. pixeltable/functions/ollama.py +4 -18
  84. pixeltable/functions/openai.py +214 -109
  85. pixeltable/functions/replicate.py +11 -10
  86. pixeltable/functions/string.py +70 -7
  87. pixeltable/functions/timestamp.py +21 -8
  88. pixeltable/functions/together.py +66 -52
  89. pixeltable/functions/video.py +1 -0
  90. pixeltable/functions/vision.py +14 -11
  91. pixeltable/functions/whisper.py +2 -1
  92. pixeltable/globals.py +61 -28
  93. pixeltable/index/__init__.py +1 -1
  94. pixeltable/index/btree.py +5 -3
  95. pixeltable/index/embedding_index.py +15 -14
  96. pixeltable/io/__init__.py +1 -1
  97. pixeltable/io/external_store.py +30 -25
  98. pixeltable/io/fiftyone.py +6 -14
  99. pixeltable/io/globals.py +33 -27
  100. pixeltable/io/hf_datasets.py +3 -2
  101. pixeltable/io/label_studio.py +80 -71
  102. pixeltable/io/pandas.py +33 -9
  103. pixeltable/io/parquet.py +10 -13
  104. pixeltable/iterators/__init__.py +1 -0
  105. pixeltable/iterators/audio.py +205 -0
  106. pixeltable/iterators/document.py +19 -8
  107. pixeltable/iterators/image.py +6 -24
  108. pixeltable/iterators/string.py +3 -6
  109. pixeltable/iterators/video.py +1 -7
  110. pixeltable/metadata/__init__.py +9 -2
  111. pixeltable/metadata/converters/convert_10.py +2 -2
  112. pixeltable/metadata/converters/convert_15.py +1 -5
  113. pixeltable/metadata/converters/convert_16.py +2 -4
  114. pixeltable/metadata/converters/convert_17.py +2 -4
  115. pixeltable/metadata/converters/convert_18.py +2 -4
  116. pixeltable/metadata/converters/convert_19.py +2 -5
  117. pixeltable/metadata/converters/convert_20.py +1 -4
  118. pixeltable/metadata/converters/convert_21.py +4 -6
  119. pixeltable/metadata/converters/convert_22.py +1 -0
  120. pixeltable/metadata/converters/convert_23.py +5 -5
  121. pixeltable/metadata/converters/convert_24.py +12 -13
  122. pixeltable/metadata/converters/convert_26.py +23 -0
  123. pixeltable/metadata/converters/util.py +3 -4
  124. pixeltable/metadata/notes.py +1 -0
  125. pixeltable/metadata/schema.py +13 -2
  126. pixeltable/plan.py +173 -98
  127. pixeltable/store.py +42 -26
  128. pixeltable/type_system.py +130 -85
  129. pixeltable/utils/arrow.py +1 -7
  130. pixeltable/utils/coco.py +16 -17
  131. pixeltable/utils/code.py +1 -1
  132. pixeltable/utils/console_output.py +44 -0
  133. pixeltable/utils/description_helper.py +7 -7
  134. pixeltable/utils/documents.py +3 -1
  135. pixeltable/utils/filecache.py +13 -8
  136. pixeltable/utils/http_server.py +9 -8
  137. pixeltable/utils/media_store.py +2 -1
  138. pixeltable/utils/pytorch.py +11 -14
  139. pixeltable/utils/s3.py +1 -0
  140. pixeltable/utils/sql.py +1 -0
  141. pixeltable/utils/transactional_directory.py +2 -2
  142. {pixeltable-0.3.1.dist-info → pixeltable-0.3.3.dist-info}/METADATA +7 -8
  143. pixeltable-0.3.3.dist-info/RECORD +163 -0
  144. pixeltable-0.3.1.dist-info/RECORD +0 -160
  145. {pixeltable-0.3.1.dist-info → pixeltable-0.3.3.dist-info}/LICENSE +0 -0
  146. {pixeltable-0.3.1.dist-info → pixeltable-0.3.3.dist-info}/WHEEL +0 -0
  147. {pixeltable-0.3.1.dist-info → pixeltable-0.3.3.dist-info}/entry_points.txt +0 -0
pixeltable/store.py CHANGED
@@ -32,6 +32,7 @@ class StoreBase:
32
32
  - v_min: version at which the row was created
33
33
  - v_max: version at which the row was deleted (or MAX_VERSION if it's still live)
34
34
  """
35
+
35
36
  tbl_version: catalog.TableVersion
36
37
  sa_md: sql.MetaData
37
38
  sa_tbl: Optional[sql.Table]
@@ -65,8 +66,9 @@ class StoreBase:
65
66
  """Create and return system columns"""
66
67
  rowid_cols = self._create_rowid_columns()
67
68
  self.v_min_col = sql.Column('v_min', sql.BigInteger, nullable=False)
68
- self.v_max_col = \
69
- sql.Column('v_max', sql.BigInteger, nullable=False, server_default=str(schema.Table.MAX_VERSION))
69
+ self.v_max_col = sql.Column(
70
+ 'v_max', sql.BigInteger, nullable=False, server_default=str(schema.Table.MAX_VERSION)
71
+ )
70
72
  self._pk_cols = [*rowid_cols, self.v_min_col]
71
73
  return [*rowid_cols, self.v_min_col, self.v_max_col]
72
74
 
@@ -134,7 +136,7 @@ class StoreBase:
134
136
  return new_file_url
135
137
 
136
138
  def _move_tmp_media_files(
137
- self, table_rows: list[dict[str, Any]], media_cols: list[catalog.Column], v_min: int
139
+ self, table_rows: list[dict[str, Any]], media_cols: list[catalog.Column], v_min: int
138
140
  ) -> None:
139
141
  """Move tmp media files that we generated to a permanent location"""
140
142
  for c in media_cols:
@@ -143,7 +145,7 @@ class StoreBase:
143
145
  table_row[c.store_name()] = self._move_tmp_media_file(file_url, c, v_min)
144
146
 
145
147
  def _create_table_row(
146
- self, input_row: exprs.DataRow, row_builder: exprs.RowBuilder, exc_col_ids: set[int], pk: tuple[int, ...]
148
+ self, input_row: exprs.DataRow, row_builder: exprs.RowBuilder, exc_col_ids: set[int], pk: tuple[int, ...]
147
149
  ) -> tuple[dict[str, Any], int]:
148
150
  """Return Tuple[complete table row, # of exceptions] for insert()
149
151
  Creates a row that includes the PK columns, with the values from input_row.pk.
@@ -193,11 +195,13 @@ class StoreBase:
193
195
  added_storage_cols = [col.store_name()]
194
196
  if col.records_errors:
195
197
  # we also need to create the errormsg and errortype storage cols
196
- stmt = sql.text(f'ALTER TABLE {self._storage_name()} '
197
- f'ADD COLUMN {col.errormsg_store_name()} VARCHAR DEFAULT NULL')
198
+ stmt = sql.text(
199
+ f'ALTER TABLE {self._storage_name()} ADD COLUMN {col.errormsg_store_name()} VARCHAR DEFAULT NULL'
200
+ )
198
201
  conn.execute(stmt)
199
- stmt = sql.text(f'ALTER TABLE {self._storage_name()} '
200
- f'ADD COLUMN {col.errortype_store_name()} VARCHAR DEFAULT NULL')
202
+ stmt = sql.text(
203
+ f'ALTER TABLE {self._storage_name()} ADD COLUMN {col.errortype_store_name()} VARCHAR DEFAULT NULL'
204
+ )
201
205
  conn.execute(stmt)
202
206
  added_storage_cols.extend([col.errormsg_store_name(), col.errortype_store_name()])
203
207
  self.create_sa_tbl()
@@ -219,7 +223,7 @@ class StoreBase:
219
223
  exec_plan: ExecNode,
220
224
  value_expr_slot_idx: int,
221
225
  conn: sql.engine.Connection,
222
- on_error: Literal['abort', 'ignore']
226
+ on_error: Literal['abort', 'ignore'],
223
227
  ) -> int:
224
228
  """Update store column of a computed column with values produced by an execution plan
225
229
 
@@ -295,10 +299,9 @@ class StoreBase:
295
299
  update_stmt = update_stmt.where(pk_col == tmp_pk_col)
296
300
  update_stmt = update_stmt.values({col.sa_col: tmp_val_col})
297
301
  if col.records_errors:
298
- update_stmt = update_stmt.values({
299
- col.sa_errortype_col: tmp_errortype_col,
300
- col.sa_errormsg_col: tmp_errormsg_col
301
- })
302
+ update_stmt = update_stmt.values(
303
+ {col.sa_errortype_col: tmp_errortype_col, col.sa_errormsg_col: tmp_errormsg_col}
304
+ )
302
305
  log_explain(_logger, update_stmt, conn)
303
306
  conn.execute(update_stmt)
304
307
 
@@ -308,8 +311,13 @@ class StoreBase:
308
311
  return num_excs
309
312
 
310
313
  def insert_rows(
311
- self, exec_plan: ExecNode, conn: sql.engine.Connection, v_min: Optional[int] = None,
312
- show_progress: bool = True, rowids: Optional[Iterator[int]] = None, abort_on_exc: bool = False
314
+ self,
315
+ exec_plan: ExecNode,
316
+ conn: sql.engine.Connection,
317
+ v_min: Optional[int] = None,
318
+ show_progress: bool = True,
319
+ rowids: Optional[Iterator[int]] = None,
320
+ abort_on_exc: bool = False,
313
321
  ) -> tuple[int, int, set[int]]:
314
322
  """Insert rows into the store table and update the catalog table's md
315
323
  Returns:
@@ -347,12 +355,12 @@ class StoreBase:
347
355
 
348
356
  if show_progress:
349
357
  if progress_bar is None:
350
- warnings.simplefilter("ignore", category=TqdmWarning)
358
+ warnings.simplefilter('ignore', category=TqdmWarning)
351
359
  progress_bar = tqdm(
352
360
  desc=f'Inserting rows into `{self.tbl_version.name}`',
353
361
  unit=' rows',
354
362
  ncols=100,
355
- file=sys.stdout
363
+ file=sys.stdout,
356
364
  )
357
365
  progress_bar.update(1)
358
366
 
@@ -379,8 +387,13 @@ class StoreBase:
379
387
  return sql.and_(clause, self.base._versions_clause(versions[1:], match_on_vmin))
380
388
 
381
389
  def delete_rows(
382
- self, current_version: int, base_versions: list[Optional[int]], match_on_vmin: bool,
383
- where_clause: Optional[sql.ColumnElement[bool]], conn: sql.engine.Connection) -> int:
390
+ self,
391
+ current_version: int,
392
+ base_versions: list[Optional[int]],
393
+ match_on_vmin: bool,
394
+ where_clause: Optional[sql.ColumnElement[bool]],
395
+ conn: sql.engine.Connection,
396
+ ) -> int:
384
397
  """Mark rows as deleted that are live and were created prior to current_version.
385
398
  Also: populate the undo columns
386
399
  Args:
@@ -394,12 +407,12 @@ class StoreBase:
394
407
  """
395
408
  where_clause = sql.true() if where_clause is None else where_clause
396
409
  where_clause = sql.and_(
397
- self.v_min_col < current_version,
398
- self.v_max_col == schema.Table.MAX_VERSION,
399
- where_clause)
410
+ self.v_min_col < current_version, self.v_max_col == schema.Table.MAX_VERSION, where_clause
411
+ )
400
412
  rowid_join_clause = self._rowid_join_predicate()
401
- base_versions_clause = sql.true() if len(base_versions) == 0 \
402
- else self.base._versions_clause(base_versions, match_on_vmin)
413
+ base_versions_clause = (
414
+ sql.true() if len(base_versions) == 0 else self.base._versions_clause(base_versions, match_on_vmin)
415
+ )
403
416
  set_clause: dict[sql.Column, Union[int, sql.Column]] = {self.v_max_col: current_version}
404
417
  for index_info in self.tbl_version.idxs_by_name.values():
405
418
  # copy value column to undo column
@@ -450,7 +463,9 @@ class StoreView(StoreBase):
450
463
  def _rowid_join_predicate(self) -> sql.ColumnElement[bool]:
451
464
  return sql.and_(
452
465
  self.base._rowid_join_predicate(),
453
- *[c1 == c2 for c1, c2 in zip(self.rowid_columns(), self.base.rowid_columns())])
466
+ *[c1 == c2 for c1, c2 in zip(self.rowid_columns(), self.base.rowid_columns())],
467
+ )
468
+
454
469
 
455
470
  class StoreComponentView(StoreView):
456
471
  """A view that stores components of its base, as produced by a ComponentIterator
@@ -482,4 +497,5 @@ class StoreComponentView(StoreView):
482
497
  def _rowid_join_predicate(self) -> sql.ColumnElement[bool]:
483
498
  return sql.and_(
484
499
  self.base._rowid_join_predicate(),
485
- *[c1 == c2 for c1, c2 in zip(self.rowid_columns()[:-1], self.base.rowid_columns())])
500
+ *[c1 == c2 for c1, c2 in zip(self.rowid_columns()[:-1], self.base.rowid_columns())],
501
+ )
pixeltable/type_system.py CHANGED
@@ -9,17 +9,18 @@ import typing
9
9
  import urllib.parse
10
10
  import urllib.request
11
11
  from pathlib import Path
12
+
13
+ from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
12
14
  from typing import Any, Iterable, Mapping, Optional, Sequence, Union
13
15
 
14
- import PIL.Image
15
16
  import av # type: ignore
16
17
  import jsonschema
17
18
  import jsonschema.protocols
18
19
  import jsonschema.validators
19
20
  import numpy as np
21
+ import PIL.Image
20
22
  import pydantic
21
23
  import sqlalchemy as sql
22
- from typing import _GenericAlias # type: ignore[attr-defined]
23
24
  from typing_extensions import _AnnotatedAlias
24
25
 
25
26
  import pixeltable.exceptions as excs
@@ -45,9 +46,11 @@ class ColumnType:
45
46
 
46
47
  @classmethod
47
48
  def supertype(
48
- cls, type1: 'ColumnType.Type', type2: 'ColumnType.Type',
49
- # we need to pass this in because we can't easily append it as a class member
50
- common_supertypes: dict[tuple['ColumnType.Type', 'ColumnType.Type'], 'ColumnType.Type']
49
+ cls,
50
+ type1: 'ColumnType.Type',
51
+ type2: 'ColumnType.Type',
52
+ # we need to pass this in because we can't easily append it as a class member
53
+ common_supertypes: dict[tuple['ColumnType.Type', 'ColumnType.Type'], 'ColumnType.Type'],
51
54
  ) -> Optional['ColumnType.Type']:
52
55
  if type1 == type2:
53
56
  return type1
@@ -59,23 +62,23 @@ class ColumnType:
59
62
  return t
60
63
  return None
61
64
 
62
-
63
65
  @enum.unique
64
66
  class DType(enum.Enum):
65
67
  """
66
68
  Base type used in images and arrays
67
69
  """
68
- BOOL = 0,
69
- INT8 = 1,
70
- INT16 = 2,
71
- INT32 = 3,
72
- INT64 = 4,
73
- UINT8 = 5,
74
- UINT16 = 6,
75
- UINT32 = 7,
76
- UINT64 = 8,
77
- FLOAT16 = 9,
78
- FLOAT32 = 10,
70
+
71
+ BOOL = (0,)
72
+ INT8 = (1,)
73
+ INT16 = (2,)
74
+ INT32 = (3,)
75
+ INT64 = (4,)
76
+ UINT8 = (5,)
77
+ UINT16 = (6,)
78
+ UINT32 = (7,)
79
+ UINT64 = (8,)
80
+ FLOAT16 = (9,)
81
+ FLOAT32 = (10,)
79
82
  FLOAT64 = 11
80
83
 
81
84
  scalar_types = {Type.STRING, Type.INT, Type.FLOAT, Type.BOOL, Type.TIMESTAMP}
@@ -113,10 +116,7 @@ class ColumnType:
113
116
  return json.dumps([t.as_dict() for t in type_list])
114
117
 
115
118
  def as_dict(self) -> dict:
116
- return {
117
- '_classname': self.__class__.__name__,
118
- **self._as_dict(),
119
- }
119
+ return {'_classname': self.__class__.__name__, **self._as_dict()}
120
120
 
121
121
  def _as_dict(self) -> dict:
122
122
  return {'nullable': self.nullable}
@@ -277,10 +277,7 @@ class ColumnType:
277
277
 
278
278
  @classmethod
279
279
  def from_python_type(
280
- cls,
281
- t: Union[type, _GenericAlias],
282
- nullable_default: bool = False,
283
- allow_builtin_types: bool = True
280
+ cls, t: Union[type, _GenericAlias], nullable_default: bool = False, allow_builtin_types: bool = True
284
281
  ) -> Optional[ColumnType]:
285
282
  """
286
283
  Convert a Python type into a Pixeltable `ColumnType` instance.
@@ -309,9 +306,7 @@ class ColumnType:
309
306
  required_args = typing.get_args(t)
310
307
  assert len(required_args) == 1
311
308
  return cls.from_python_type(
312
- required_args[0],
313
- nullable_default=False,
314
- allow_builtin_types=allow_builtin_types
309
+ required_args[0], nullable_default=False, allow_builtin_types=allow_builtin_types
315
310
  )
316
311
  elif origin is typing.Annotated:
317
312
  annotated_args = typing.get_args(t)
@@ -349,7 +344,7 @@ class ColumnType:
349
344
  cls,
350
345
  t: Union[ColumnType, type, _AnnotatedAlias],
351
346
  nullable_default: bool = False,
352
- allow_builtin_types: bool = True
347
+ allow_builtin_types: bool = True,
353
348
  ) -> ColumnType:
354
349
  """
355
350
  Convert any type recognizable by Pixeltable to its corresponding ColumnType.
@@ -415,7 +410,7 @@ class ColumnType:
415
410
 
416
411
  def _create_literal(self, val: Any) -> Any:
417
412
  """Create a literal of this type from val, including any needed conversions.
418
- val is guaranteed to be non-None"""
413
+ val is guaranteed to be non-None"""
419
414
  return val
420
415
 
421
416
  def create_literal(self, val: Any) -> Any:
@@ -484,12 +479,7 @@ class ColumnType:
484
479
 
485
480
  def to_json_schema(self) -> dict[str, Any]:
486
481
  if self.nullable:
487
- return {
488
- 'anyOf': [
489
- self._to_json_schema(),
490
- {'type': 'null'},
491
- ]
492
- }
482
+ return {'anyOf': [self._to_json_schema(), {'type': 'null'}]}
493
483
  else:
494
484
  return self._to_json_schema()
495
485
 
@@ -612,7 +602,6 @@ class TimestampType(ColumnType):
612
602
 
613
603
 
614
604
  class JsonType(ColumnType):
615
-
616
605
  json_schema: Optional[dict[str, Any]]
617
606
  __validator: Optional[jsonschema.protocols.Validator]
618
607
 
@@ -699,8 +688,7 @@ class JsonType(ColumnType):
699
688
  superschema = self.__superschema(self.json_schema, other.json_schema)
700
689
 
701
690
  return JsonType(
702
- json_schema=(None if len(superschema) == 0 else superschema),
703
- nullable=(self.nullable or other.nullable)
691
+ json_schema=(None if len(superschema) == 0 else superschema), nullable=(self.nullable or other.nullable)
704
692
  )
705
693
 
706
694
  @classmethod
@@ -755,7 +743,7 @@ class JsonType(ColumnType):
755
743
  a_type = a.get('type')
756
744
  b_type = b.get('type')
757
745
 
758
- if (a_type in ('string', 'integer', 'number', 'boolean', 'object', 'array') and a_type == b_type):
746
+ if a_type in ('string', 'integer', 'number', 'boolean', 'object', 'array') and a_type == b_type:
759
747
  # a and b both have the same type designation, but are not identical. This can happen if
760
748
  # (for example) they have validators or other attributes that differ. In this case, we
761
749
  # generalize to {'type': t}, where t is their shared type, with no other qualifications.
@@ -793,12 +781,29 @@ class JsonType(ColumnType):
793
781
 
794
782
 
795
783
  class ArrayType(ColumnType):
796
- def __init__(self, shape: tuple[Union[int, None], ...], dtype: ColumnType, nullable: bool = False):
784
+ shape: Optional[tuple[Optional[int], ...]]
785
+ pxt_dtype: Optional[ColumnType]
786
+ dtype: Optional[ColumnType.Type]
787
+
788
+ def __init__(
789
+ self,
790
+ shape: Optional[tuple[Optional[int], ...]] = None,
791
+ dtype: Optional[ColumnType] = None,
792
+ nullable: bool = False,
793
+ ):
797
794
  super().__init__(self.Type.ARRAY, nullable=nullable)
795
+ assert shape is None or dtype is not None, (shape, dtype) # cannot specify a shape without a dtype
796
+ assert (
797
+ dtype is None
798
+ or dtype.is_int_type()
799
+ or dtype.is_float_type()
800
+ or dtype.is_bool_type()
801
+ or dtype.is_string_type()
802
+ )
803
+
798
804
  self.shape = shape
799
- assert dtype.is_int_type() or dtype.is_float_type() or dtype.is_bool_type() or dtype.is_string_type()
800
- self.pxt_dtype = dtype
801
- self.dtype = dtype._type
805
+ self.pxt_dtype = dtype # we need this for copy() and __str__()
806
+ self.dtype = None if dtype is None else dtype._type
802
807
 
803
808
  def copy(self, nullable: bool) -> ColumnType:
804
809
  return ArrayType(self.shape, self.pxt_dtype, nullable=nullable)
@@ -812,41 +817,53 @@ class ArrayType(ColumnType):
812
817
  def supertype(self, other: ColumnType) -> Optional[ArrayType]:
813
818
  if not isinstance(other, ArrayType):
814
819
  return None
820
+ super_dtype = self.Type.supertype(self.dtype, other.dtype, self.common_supertypes)
821
+ if super_dtype is None:
822
+ # if the dtypes are incompatible, then the supertype is a fully general array
823
+ return ArrayType(nullable=(self.nullable or other.nullable))
824
+ super_shape: Optional[tuple[Optional[int], ...]]
815
825
  if len(self.shape) != len(other.shape):
816
- return None
817
- base_type = self.Type.supertype(self.dtype, other.dtype, self.common_supertypes)
818
- if base_type is None:
819
- return None
820
- shape = [n1 if n1 == n2 else None for n1, n2 in zip(self.shape, other.shape)]
821
- return ArrayType(tuple(shape), self.make_type(base_type), nullable=(self.nullable or other.nullable))
826
+ super_shape = None
827
+ else:
828
+ super_shape = tuple(n1 if n1 == n2 else None for n1, n2 in zip(self.shape, other.shape))
829
+ return ArrayType(super_shape, self.make_type(super_dtype), nullable=(self.nullable or other.nullable))
822
830
 
823
831
  def _as_dict(self) -> dict:
824
832
  result = super()._as_dict()
825
- result.update(shape=list(self.shape), dtype=self.dtype.value)
833
+ shape_as_list = None if self.shape is None else list(self.shape)
834
+ dtype_value = None if self.dtype is None else self.dtype.value
835
+ result.update(shape=shape_as_list, dtype=dtype_value)
826
836
  return result
827
837
 
828
838
  def _to_base_str(self) -> str:
839
+ if self.shape is None and self.dtype is None:
840
+ return 'Array'
841
+ if self.shape is None:
842
+ return f'Array[{self.pxt_dtype}]'
843
+ assert self.dtype is not None
829
844
  return f'Array[{self.shape}, {self.pxt_dtype}]'
830
845
 
831
846
  @classmethod
832
847
  def _from_dict(cls, d: dict) -> ColumnType:
833
848
  assert 'shape' in d
834
849
  assert 'dtype' in d
835
- shape = tuple(d['shape'])
836
- dtype = cls.make_type(cls.Type(d['dtype']))
850
+ shape = None if d['shape'] is None else tuple(d['shape'])
851
+ dtype = None if d['dtype'] is None else cls.make_type(cls.Type(d['dtype']))
837
852
  return cls(shape, dtype, nullable=d['nullable'])
838
853
 
839
854
  @classmethod
840
855
  def from_literal(cls, val: np.ndarray, nullable: bool = False) -> Optional[ArrayType]:
841
856
  # determine our dtype
842
857
  assert isinstance(val, np.ndarray)
858
+ dtype: ColumnType
843
859
  if np.issubdtype(val.dtype, np.integer):
844
- dtype: ColumnType = IntType()
860
+ dtype = IntType()
845
861
  elif np.issubdtype(val.dtype, np.floating):
846
862
  dtype = FloatType()
847
863
  elif val.dtype == np.bool_:
848
864
  dtype = BoolType()
849
- elif val.dtype == np.str_:
865
+ elif np.issubdtype(val.dtype, np.str_):
866
+ # Note that this includes NumPy types like '<U1' -- arrays of single Unicode characters
850
867
  dtype = StringType()
851
868
  else:
852
869
  return None
@@ -855,32 +872,49 @@ class ArrayType(ColumnType):
855
872
  def is_valid_literal(self, val: np.ndarray) -> bool:
856
873
  if not isinstance(val, np.ndarray):
857
874
  return False
858
- if len(val.shape) != len(self.shape):
875
+
876
+ # If a dtype is specified, check that there's a match
877
+ if self.dtype is not None and not np.issubdtype(val.dtype, self.numpy_dtype()):
859
878
  return False
860
- # check that the shapes are compatible
861
- for n1, n2 in zip(val.shape, self.shape):
862
- if n1 is None:
863
- return False
864
- if n2 is None:
865
- # wildcard
866
- continue
867
- if n1 != n2:
879
+
880
+ # If no dtype is specified, we still need to check that the dtype is one of the supported types
881
+ if self.dtype is None and not any(
882
+ np.issubdtype(val.dtype, ndtype) for ndtype in [np.int64, np.float32, np.bool_, np.str_]
883
+ ):
884
+ return False
885
+
886
+ # If a shape is specified, check that there's a match
887
+ if self.shape is not None:
888
+ if len(val.shape) != len(self.shape):
868
889
  return False
869
- return np.issubdtype(val.dtype, self.numpy_dtype())
890
+ # check that the shapes are compatible
891
+ for n1, n2 in zip(val.shape, self.shape):
892
+ assert n1 is not None # `val` must have a concrete shape
893
+ if n2 is None:
894
+ continue # wildcard
895
+ if n1 != n2:
896
+ return False
897
+
898
+ return True
870
899
 
871
900
  def _to_json_schema(self) -> dict[str, Any]:
872
- return {
873
- 'type': 'array',
874
- 'items': self.pxt_dtype._to_json_schema(),
875
- }
901
+ return {'type': 'array', 'items': self.pxt_dtype._to_json_schema()}
876
902
 
877
903
  def _validate_literal(self, val: Any) -> None:
878
904
  if not isinstance(val, np.ndarray):
879
905
  raise TypeError(f'Expected numpy.ndarray, got {val.__class__.__name__}')
880
906
  if not self.is_valid_literal(val):
881
- raise TypeError((
882
- f'Expected ndarray({self.shape}, dtype={self.numpy_dtype()}), '
883
- f'got ndarray({val.shape}, dtype={val.dtype})'))
907
+ if self.shape is not None:
908
+ raise TypeError(
909
+ f'Expected numpy.ndarray({self.shape}, dtype={self.numpy_dtype()}), '
910
+ f'got numpy.ndarray({val.shape}, dtype={val.dtype})'
911
+ )
912
+ elif self.dtype is not None:
913
+ raise TypeError(
914
+ f'Expected numpy.ndarray of dtype {self.numpy_dtype()}, got numpy.ndarray of dtype {val.dtype}'
915
+ )
916
+ else:
917
+ raise TypeError(f'Unsupported dtype for numpy.ndarray: {val.dtype}')
884
918
 
885
919
  def _create_literal(self, val: Any) -> Any:
886
920
  if isinstance(val, (list, tuple)):
@@ -892,7 +926,9 @@ class ArrayType(ColumnType):
892
926
  def to_sa_type(self) -> sql.types.TypeEngine:
893
927
  return sql.LargeBinary()
894
928
 
895
- def numpy_dtype(self) -> np.dtype:
929
+ def numpy_dtype(self) -> Optional[np.dtype]:
930
+ if self.dtype is None:
931
+ return None
896
932
  if self.dtype == self.Type.INT:
897
933
  return np.dtype(np.int64)
898
934
  if self.dtype == self.Type.FLOAT:
@@ -901,20 +937,24 @@ class ArrayType(ColumnType):
901
937
  return np.dtype(np.bool_)
902
938
  if self.dtype == self.Type.STRING:
903
939
  return np.dtype(np.str_)
904
- assert False
940
+ assert False, self.dtype
905
941
 
906
942
 
907
943
  class ImageType(ColumnType):
908
944
  def __init__(
909
- self, width: Optional[int] = None, height: Optional[int] = None, size: Optional[tuple[int, int]] = None,
910
- mode: Optional[str] = None, nullable: bool = False
945
+ self,
946
+ width: Optional[int] = None,
947
+ height: Optional[int] = None,
948
+ size: Optional[tuple[int, int]] = None,
949
+ mode: Optional[str] = None,
950
+ nullable: bool = False,
911
951
  ):
912
952
  """
913
953
  TODO: does it make sense to specify only width or height?
914
954
  """
915
955
  super().__init__(self.Type.IMAGE, nullable=nullable)
916
- assert not(width is not None and size is not None)
917
- assert not(height is not None and size is not None)
956
+ assert not (width is not None and size is not None)
957
+ assert not (height is not None and size is not None)
918
958
  if size is not None:
919
959
  self.width = size[0]
920
960
  self.height = size[1]
@@ -1104,6 +1144,7 @@ class DocumentType(ColumnType):
1104
1144
  def validate_media(self, val: Any) -> None:
1105
1145
  assert isinstance(val, str)
1106
1146
  from pixeltable.utils.documents import get_document_handle
1147
+
1107
1148
  dh = get_document_handle(val)
1108
1149
  if dh is None:
1109
1150
  raise excs.Error(f'Not a recognized document format: {val}')
@@ -1117,6 +1158,7 @@ class Required(typing.Generic[T]):
1117
1158
  Marker class to indicate that a column is non-nullable in a schema definition. This has no meaning as a type hint,
1118
1159
  and is intended only for schema declarations.
1119
1160
  """
1161
+
1120
1162
  pass
1121
1163
 
1122
1164
 
@@ -1139,6 +1181,7 @@ class _PxtType:
1139
1181
  `Image[(300, 300), 'RGB']`. The specialized forms resolve to `typing.Annotated` instances whose annotation is a
1140
1182
  `ColumnType`.
1141
1183
  """
1184
+
1142
1185
  def __init__(self):
1143
1186
  raise TypeError(f'Type `{type(self)}` cannot be instantiated.')
1144
1187
 
@@ -1174,6 +1217,8 @@ class Array(np.ndarray, _PxtType):
1174
1217
  params = item if isinstance(item, tuple) else (item,)
1175
1218
  shape: Optional[tuple] = None
1176
1219
  dtype: Optional[ColumnType] = None
1220
+ if not any(isinstance(param, (type, _AnnotatedAlias)) for param in params):
1221
+ raise TypeError('Array type parameter must include a dtype.')
1177
1222
  for param in params:
1178
1223
  if isinstance(param, tuple):
1179
1224
  if not all(n is None or (isinstance(n, int) and n >= 1) for n in param):
@@ -1181,21 +1226,17 @@ class Array(np.ndarray, _PxtType):
1181
1226
  if shape is not None:
1182
1227
  raise TypeError(f'Duplicate Array type parameter: {param}')
1183
1228
  shape = param
1184
- elif isinstance(param, type) or isinstance(param, _AnnotatedAlias):
1229
+ elif isinstance(param, (type, _AnnotatedAlias)):
1185
1230
  if dtype is not None:
1186
1231
  raise TypeError(f'Duplicate Array type parameter: {param}')
1187
1232
  dtype = ColumnType.normalize_type(param, allow_builtin_types=False)
1188
1233
  else:
1189
1234
  raise TypeError(f'Invalid Array type parameter: {param}')
1190
- if shape is None:
1191
- raise TypeError('Array type is missing parameter: shape')
1192
- if dtype is None:
1193
- raise TypeError('Array type is missing parameter: dtype')
1194
1235
  return typing.Annotated[np.ndarray, ArrayType(shape=shape, dtype=dtype, nullable=False)]
1195
1236
 
1196
1237
  @classmethod
1197
1238
  def as_col_type(cls, nullable: bool) -> ColumnType:
1198
- raise TypeError('Array type cannot be used without specifying shape and dtype')
1239
+ return ArrayType(nullable=nullable)
1199
1240
 
1200
1241
 
1201
1242
  class Image(PIL.Image.Image, _PxtType):
@@ -1219,7 +1260,11 @@ class Image(PIL.Image.Image, _PxtType):
1219
1260
  mode: Optional[str] = None
1220
1261
  for param in params:
1221
1262
  if isinstance(param, tuple):
1222
- if len(param) != 2 or not isinstance(param[0], (int, type(None))) or not isinstance(param[1], (int, type(None))):
1263
+ if (
1264
+ len(param) != 2
1265
+ or not isinstance(param[0], (int, type(None)))
1266
+ or not isinstance(param[1], (int, type(None)))
1267
+ ):
1223
1268
  raise TypeError(f'Invalid Image type parameter: {param}')
1224
1269
  if size is not None:
1225
1270
  raise TypeError(f'Duplicate Image type parameter: {param}')
pixeltable/utils/arrow.py CHANGED
@@ -1,16 +1,10 @@
1
- import logging
1
+ import datetime
2
2
  from typing import Any, Iterator, Optional, Union
3
3
 
4
4
  import numpy as np
5
5
  import pyarrow as pa
6
- import datetime
7
6
 
8
7
  import pixeltable.type_system as ts
9
- from pixeltable.env import Env
10
-
11
- _tz_def = Env().get().default_time_zone
12
-
13
- _logger = logging.getLogger(__name__)
14
8
 
15
9
  _pa_to_pt: dict[pa.DataType, ts.ColumnType] = {
16
10
  pa.string(): ts.StringType(nullable=True),