pixeltable 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (147) hide show
  1. pixeltable/__init__.py +64 -11
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +1 -1
  4. pixeltable/catalog/catalog.py +50 -27
  5. pixeltable/catalog/column.py +27 -11
  6. pixeltable/catalog/dir.py +6 -4
  7. pixeltable/catalog/globals.py +8 -1
  8. pixeltable/catalog/insertable_table.py +22 -12
  9. pixeltable/catalog/named_function.py +10 -6
  10. pixeltable/catalog/path.py +3 -2
  11. pixeltable/catalog/path_dict.py +8 -6
  12. pixeltable/catalog/schema_object.py +2 -1
  13. pixeltable/catalog/table.py +121 -101
  14. pixeltable/catalog/table_version.py +291 -142
  15. pixeltable/catalog/table_version_path.py +8 -5
  16. pixeltable/catalog/view.py +67 -26
  17. pixeltable/dataframe.py +102 -72
  18. pixeltable/env.py +20 -21
  19. pixeltable/exec/__init__.py +2 -2
  20. pixeltable/exec/aggregation_node.py +10 -4
  21. pixeltable/exec/cache_prefetch_node.py +5 -3
  22. pixeltable/exec/component_iteration_node.py +9 -8
  23. pixeltable/exec/data_row_batch.py +21 -10
  24. pixeltable/exec/exec_context.py +10 -3
  25. pixeltable/exec/exec_node.py +23 -12
  26. pixeltable/exec/expr_eval/evaluators.py +13 -7
  27. pixeltable/exec/expr_eval/expr_eval_node.py +24 -15
  28. pixeltable/exec/expr_eval/globals.py +30 -7
  29. pixeltable/exec/expr_eval/row_buffer.py +5 -6
  30. pixeltable/exec/expr_eval/schedulers.py +151 -31
  31. pixeltable/exec/in_memory_data_node.py +8 -7
  32. pixeltable/exec/row_update_node.py +15 -5
  33. pixeltable/exec/sql_node.py +56 -27
  34. pixeltable/exprs/__init__.py +2 -2
  35. pixeltable/exprs/arithmetic_expr.py +57 -26
  36. pixeltable/exprs/array_slice.py +1 -1
  37. pixeltable/exprs/column_property_ref.py +2 -1
  38. pixeltable/exprs/column_ref.py +20 -15
  39. pixeltable/exprs/comparison.py +6 -2
  40. pixeltable/exprs/compound_predicate.py +1 -3
  41. pixeltable/exprs/data_row.py +2 -2
  42. pixeltable/exprs/expr.py +101 -72
  43. pixeltable/exprs/expr_dict.py +2 -1
  44. pixeltable/exprs/expr_set.py +3 -1
  45. pixeltable/exprs/function_call.py +39 -41
  46. pixeltable/exprs/globals.py +1 -0
  47. pixeltable/exprs/in_predicate.py +2 -2
  48. pixeltable/exprs/inline_expr.py +20 -17
  49. pixeltable/exprs/json_mapper.py +4 -2
  50. pixeltable/exprs/json_path.py +12 -18
  51. pixeltable/exprs/literal.py +5 -9
  52. pixeltable/exprs/method_ref.py +1 -0
  53. pixeltable/exprs/object_ref.py +1 -1
  54. pixeltable/exprs/row_builder.py +32 -17
  55. pixeltable/exprs/rowid_ref.py +14 -5
  56. pixeltable/exprs/similarity_expr.py +11 -6
  57. pixeltable/exprs/sql_element_cache.py +1 -1
  58. pixeltable/exprs/type_cast.py +24 -9
  59. pixeltable/ext/__init__.py +1 -0
  60. pixeltable/ext/functions/__init__.py +1 -0
  61. pixeltable/ext/functions/whisperx.py +2 -2
  62. pixeltable/ext/functions/yolox.py +11 -11
  63. pixeltable/func/aggregate_function.py +17 -13
  64. pixeltable/func/callable_function.py +6 -6
  65. pixeltable/func/expr_template_function.py +15 -14
  66. pixeltable/func/function.py +16 -16
  67. pixeltable/func/function_registry.py +11 -8
  68. pixeltable/func/globals.py +4 -2
  69. pixeltable/func/query_template_function.py +12 -13
  70. pixeltable/func/signature.py +18 -9
  71. pixeltable/func/tools.py +10 -17
  72. pixeltable/func/udf.py +106 -11
  73. pixeltable/functions/__init__.py +21 -2
  74. pixeltable/functions/anthropic.py +16 -12
  75. pixeltable/functions/fireworks.py +63 -5
  76. pixeltable/functions/gemini.py +13 -3
  77. pixeltable/functions/globals.py +18 -6
  78. pixeltable/functions/huggingface.py +20 -38
  79. pixeltable/functions/image.py +7 -3
  80. pixeltable/functions/json.py +1 -0
  81. pixeltable/functions/llama_cpp.py +1 -4
  82. pixeltable/functions/mistralai.py +31 -20
  83. pixeltable/functions/ollama.py +4 -18
  84. pixeltable/functions/openai.py +201 -108
  85. pixeltable/functions/replicate.py +11 -10
  86. pixeltable/functions/string.py +70 -7
  87. pixeltable/functions/timestamp.py +21 -8
  88. pixeltable/functions/together.py +66 -52
  89. pixeltable/functions/video.py +1 -0
  90. pixeltable/functions/vision.py +14 -11
  91. pixeltable/functions/whisper.py +2 -1
  92. pixeltable/globals.py +60 -26
  93. pixeltable/index/__init__.py +1 -1
  94. pixeltable/index/btree.py +5 -3
  95. pixeltable/index/embedding_index.py +15 -14
  96. pixeltable/io/__init__.py +1 -1
  97. pixeltable/io/external_store.py +30 -25
  98. pixeltable/io/fiftyone.py +6 -14
  99. pixeltable/io/globals.py +33 -27
  100. pixeltable/io/hf_datasets.py +2 -1
  101. pixeltable/io/label_studio.py +77 -68
  102. pixeltable/io/pandas.py +33 -9
  103. pixeltable/io/parquet.py +9 -12
  104. pixeltable/iterators/__init__.py +1 -0
  105. pixeltable/iterators/audio.py +205 -0
  106. pixeltable/iterators/document.py +19 -8
  107. pixeltable/iterators/image.py +6 -24
  108. pixeltable/iterators/string.py +3 -6
  109. pixeltable/iterators/video.py +1 -7
  110. pixeltable/metadata/__init__.py +7 -1
  111. pixeltable/metadata/converters/convert_10.py +2 -2
  112. pixeltable/metadata/converters/convert_15.py +1 -5
  113. pixeltable/metadata/converters/convert_16.py +2 -4
  114. pixeltable/metadata/converters/convert_17.py +2 -4
  115. pixeltable/metadata/converters/convert_18.py +2 -4
  116. pixeltable/metadata/converters/convert_19.py +2 -5
  117. pixeltable/metadata/converters/convert_20.py +1 -4
  118. pixeltable/metadata/converters/convert_21.py +4 -6
  119. pixeltable/metadata/converters/convert_22.py +1 -0
  120. pixeltable/metadata/converters/convert_23.py +5 -5
  121. pixeltable/metadata/converters/convert_24.py +12 -13
  122. pixeltable/metadata/converters/convert_26.py +23 -0
  123. pixeltable/metadata/converters/util.py +3 -4
  124. pixeltable/metadata/notes.py +1 -0
  125. pixeltable/metadata/schema.py +13 -2
  126. pixeltable/plan.py +173 -98
  127. pixeltable/store.py +42 -26
  128. pixeltable/type_system.py +62 -54
  129. pixeltable/utils/arrow.py +1 -2
  130. pixeltable/utils/coco.py +16 -17
  131. pixeltable/utils/code.py +1 -1
  132. pixeltable/utils/console_output.py +6 -3
  133. pixeltable/utils/description_helper.py +7 -7
  134. pixeltable/utils/documents.py +3 -1
  135. pixeltable/utils/filecache.py +12 -7
  136. pixeltable/utils/http_server.py +9 -8
  137. pixeltable/utils/media_store.py +2 -1
  138. pixeltable/utils/pytorch.py +11 -14
  139. pixeltable/utils/s3.py +1 -0
  140. pixeltable/utils/sql.py +1 -0
  141. pixeltable/utils/transactional_directory.py +2 -2
  142. {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/METADATA +6 -8
  143. pixeltable-0.3.3.dist-info/RECORD +163 -0
  144. pixeltable-0.3.2.dist-info/RECORD +0 -161
  145. {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/LICENSE +0 -0
  146. {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/WHEEL +0 -0
  147. {pixeltable-0.3.2.dist-info → pixeltable-0.3.3.dist-info}/entry_points.txt +0 -0
pixeltable/store.py CHANGED
@@ -32,6 +32,7 @@ class StoreBase:
32
32
  - v_min: version at which the row was created
33
33
  - v_max: version at which the row was deleted (or MAX_VERSION if it's still live)
34
34
  """
35
+
35
36
  tbl_version: catalog.TableVersion
36
37
  sa_md: sql.MetaData
37
38
  sa_tbl: Optional[sql.Table]
@@ -65,8 +66,9 @@ class StoreBase:
65
66
  """Create and return system columns"""
66
67
  rowid_cols = self._create_rowid_columns()
67
68
  self.v_min_col = sql.Column('v_min', sql.BigInteger, nullable=False)
68
- self.v_max_col = \
69
- sql.Column('v_max', sql.BigInteger, nullable=False, server_default=str(schema.Table.MAX_VERSION))
69
+ self.v_max_col = sql.Column(
70
+ 'v_max', sql.BigInteger, nullable=False, server_default=str(schema.Table.MAX_VERSION)
71
+ )
70
72
  self._pk_cols = [*rowid_cols, self.v_min_col]
71
73
  return [*rowid_cols, self.v_min_col, self.v_max_col]
72
74
 
@@ -134,7 +136,7 @@ class StoreBase:
134
136
  return new_file_url
135
137
 
136
138
  def _move_tmp_media_files(
137
- self, table_rows: list[dict[str, Any]], media_cols: list[catalog.Column], v_min: int
139
+ self, table_rows: list[dict[str, Any]], media_cols: list[catalog.Column], v_min: int
138
140
  ) -> None:
139
141
  """Move tmp media files that we generated to a permanent location"""
140
142
  for c in media_cols:
@@ -143,7 +145,7 @@ class StoreBase:
143
145
  table_row[c.store_name()] = self._move_tmp_media_file(file_url, c, v_min)
144
146
 
145
147
  def _create_table_row(
146
- self, input_row: exprs.DataRow, row_builder: exprs.RowBuilder, exc_col_ids: set[int], pk: tuple[int, ...]
148
+ self, input_row: exprs.DataRow, row_builder: exprs.RowBuilder, exc_col_ids: set[int], pk: tuple[int, ...]
147
149
  ) -> tuple[dict[str, Any], int]:
148
150
  """Return Tuple[complete table row, # of exceptions] for insert()
149
151
  Creates a row that includes the PK columns, with the values from input_row.pk.
@@ -193,11 +195,13 @@ class StoreBase:
193
195
  added_storage_cols = [col.store_name()]
194
196
  if col.records_errors:
195
197
  # we also need to create the errormsg and errortype storage cols
196
- stmt = sql.text(f'ALTER TABLE {self._storage_name()} '
197
- f'ADD COLUMN {col.errormsg_store_name()} VARCHAR DEFAULT NULL')
198
+ stmt = sql.text(
199
+ f'ALTER TABLE {self._storage_name()} ADD COLUMN {col.errormsg_store_name()} VARCHAR DEFAULT NULL'
200
+ )
198
201
  conn.execute(stmt)
199
- stmt = sql.text(f'ALTER TABLE {self._storage_name()} '
200
- f'ADD COLUMN {col.errortype_store_name()} VARCHAR DEFAULT NULL')
202
+ stmt = sql.text(
203
+ f'ALTER TABLE {self._storage_name()} ADD COLUMN {col.errortype_store_name()} VARCHAR DEFAULT NULL'
204
+ )
201
205
  conn.execute(stmt)
202
206
  added_storage_cols.extend([col.errormsg_store_name(), col.errortype_store_name()])
203
207
  self.create_sa_tbl()
@@ -219,7 +223,7 @@ class StoreBase:
219
223
  exec_plan: ExecNode,
220
224
  value_expr_slot_idx: int,
221
225
  conn: sql.engine.Connection,
222
- on_error: Literal['abort', 'ignore']
226
+ on_error: Literal['abort', 'ignore'],
223
227
  ) -> int:
224
228
  """Update store column of a computed column with values produced by an execution plan
225
229
 
@@ -295,10 +299,9 @@ class StoreBase:
295
299
  update_stmt = update_stmt.where(pk_col == tmp_pk_col)
296
300
  update_stmt = update_stmt.values({col.sa_col: tmp_val_col})
297
301
  if col.records_errors:
298
- update_stmt = update_stmt.values({
299
- col.sa_errortype_col: tmp_errortype_col,
300
- col.sa_errormsg_col: tmp_errormsg_col
301
- })
302
+ update_stmt = update_stmt.values(
303
+ {col.sa_errortype_col: tmp_errortype_col, col.sa_errormsg_col: tmp_errormsg_col}
304
+ )
302
305
  log_explain(_logger, update_stmt, conn)
303
306
  conn.execute(update_stmt)
304
307
 
@@ -308,8 +311,13 @@ class StoreBase:
308
311
  return num_excs
309
312
 
310
313
  def insert_rows(
311
- self, exec_plan: ExecNode, conn: sql.engine.Connection, v_min: Optional[int] = None,
312
- show_progress: bool = True, rowids: Optional[Iterator[int]] = None, abort_on_exc: bool = False
314
+ self,
315
+ exec_plan: ExecNode,
316
+ conn: sql.engine.Connection,
317
+ v_min: Optional[int] = None,
318
+ show_progress: bool = True,
319
+ rowids: Optional[Iterator[int]] = None,
320
+ abort_on_exc: bool = False,
313
321
  ) -> tuple[int, int, set[int]]:
314
322
  """Insert rows into the store table and update the catalog table's md
315
323
  Returns:
@@ -347,12 +355,12 @@ class StoreBase:
347
355
 
348
356
  if show_progress:
349
357
  if progress_bar is None:
350
- warnings.simplefilter("ignore", category=TqdmWarning)
358
+ warnings.simplefilter('ignore', category=TqdmWarning)
351
359
  progress_bar = tqdm(
352
360
  desc=f'Inserting rows into `{self.tbl_version.name}`',
353
361
  unit=' rows',
354
362
  ncols=100,
355
- file=sys.stdout
363
+ file=sys.stdout,
356
364
  )
357
365
  progress_bar.update(1)
358
366
 
@@ -379,8 +387,13 @@ class StoreBase:
379
387
  return sql.and_(clause, self.base._versions_clause(versions[1:], match_on_vmin))
380
388
 
381
389
  def delete_rows(
382
- self, current_version: int, base_versions: list[Optional[int]], match_on_vmin: bool,
383
- where_clause: Optional[sql.ColumnElement[bool]], conn: sql.engine.Connection) -> int:
390
+ self,
391
+ current_version: int,
392
+ base_versions: list[Optional[int]],
393
+ match_on_vmin: bool,
394
+ where_clause: Optional[sql.ColumnElement[bool]],
395
+ conn: sql.engine.Connection,
396
+ ) -> int:
384
397
  """Mark rows as deleted that are live and were created prior to current_version.
385
398
  Also: populate the undo columns
386
399
  Args:
@@ -394,12 +407,12 @@ class StoreBase:
394
407
  """
395
408
  where_clause = sql.true() if where_clause is None else where_clause
396
409
  where_clause = sql.and_(
397
- self.v_min_col < current_version,
398
- self.v_max_col == schema.Table.MAX_VERSION,
399
- where_clause)
410
+ self.v_min_col < current_version, self.v_max_col == schema.Table.MAX_VERSION, where_clause
411
+ )
400
412
  rowid_join_clause = self._rowid_join_predicate()
401
- base_versions_clause = sql.true() if len(base_versions) == 0 \
402
- else self.base._versions_clause(base_versions, match_on_vmin)
413
+ base_versions_clause = (
414
+ sql.true() if len(base_versions) == 0 else self.base._versions_clause(base_versions, match_on_vmin)
415
+ )
403
416
  set_clause: dict[sql.Column, Union[int, sql.Column]] = {self.v_max_col: current_version}
404
417
  for index_info in self.tbl_version.idxs_by_name.values():
405
418
  # copy value column to undo column
@@ -450,7 +463,9 @@ class StoreView(StoreBase):
450
463
  def _rowid_join_predicate(self) -> sql.ColumnElement[bool]:
451
464
  return sql.and_(
452
465
  self.base._rowid_join_predicate(),
453
- *[c1 == c2 for c1, c2 in zip(self.rowid_columns(), self.base.rowid_columns())])
466
+ *[c1 == c2 for c1, c2 in zip(self.rowid_columns(), self.base.rowid_columns())],
467
+ )
468
+
454
469
 
455
470
  class StoreComponentView(StoreView):
456
471
  """A view that stores components of its base, as produced by a ComponentIterator
@@ -482,4 +497,5 @@ class StoreComponentView(StoreView):
482
497
  def _rowid_join_predicate(self) -> sql.ColumnElement[bool]:
483
498
  return sql.and_(
484
499
  self.base._rowid_join_predicate(),
485
- *[c1 == c2 for c1, c2 in zip(self.rowid_columns()[:-1], self.base.rowid_columns())])
500
+ *[c1 == c2 for c1, c2 in zip(self.rowid_columns()[:-1], self.base.rowid_columns())],
501
+ )
pixeltable/type_system.py CHANGED
@@ -9,17 +9,18 @@ import typing
9
9
  import urllib.parse
10
10
  import urllib.request
11
11
  from pathlib import Path
12
+
13
+ from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
12
14
  from typing import Any, Iterable, Mapping, Optional, Sequence, Union
13
15
 
14
- import PIL.Image
15
16
  import av # type: ignore
16
17
  import jsonschema
17
18
  import jsonschema.protocols
18
19
  import jsonschema.validators
19
20
  import numpy as np
21
+ import PIL.Image
20
22
  import pydantic
21
23
  import sqlalchemy as sql
22
- from typing import _GenericAlias # type: ignore[attr-defined]
23
24
  from typing_extensions import _AnnotatedAlias
24
25
 
25
26
  import pixeltable.exceptions as excs
@@ -45,9 +46,11 @@ class ColumnType:
45
46
 
46
47
  @classmethod
47
48
  def supertype(
48
- cls, type1: 'ColumnType.Type', type2: 'ColumnType.Type',
49
- # we need to pass this in because we can't easily append it as a class member
50
- common_supertypes: dict[tuple['ColumnType.Type', 'ColumnType.Type'], 'ColumnType.Type']
49
+ cls,
50
+ type1: 'ColumnType.Type',
51
+ type2: 'ColumnType.Type',
52
+ # we need to pass this in because we can't easily append it as a class member
53
+ common_supertypes: dict[tuple['ColumnType.Type', 'ColumnType.Type'], 'ColumnType.Type'],
51
54
  ) -> Optional['ColumnType.Type']:
52
55
  if type1 == type2:
53
56
  return type1
@@ -59,23 +62,23 @@ class ColumnType:
59
62
  return t
60
63
  return None
61
64
 
62
-
63
65
  @enum.unique
64
66
  class DType(enum.Enum):
65
67
  """
66
68
  Base type used in images and arrays
67
69
  """
68
- BOOL = 0,
69
- INT8 = 1,
70
- INT16 = 2,
71
- INT32 = 3,
72
- INT64 = 4,
73
- UINT8 = 5,
74
- UINT16 = 6,
75
- UINT32 = 7,
76
- UINT64 = 8,
77
- FLOAT16 = 9,
78
- FLOAT32 = 10,
70
+
71
+ BOOL = (0,)
72
+ INT8 = (1,)
73
+ INT16 = (2,)
74
+ INT32 = (3,)
75
+ INT64 = (4,)
76
+ UINT8 = (5,)
77
+ UINT16 = (6,)
78
+ UINT32 = (7,)
79
+ UINT64 = (8,)
80
+ FLOAT16 = (9,)
81
+ FLOAT32 = (10,)
79
82
  FLOAT64 = 11
80
83
 
81
84
  scalar_types = {Type.STRING, Type.INT, Type.FLOAT, Type.BOOL, Type.TIMESTAMP}
@@ -113,10 +116,7 @@ class ColumnType:
113
116
  return json.dumps([t.as_dict() for t in type_list])
114
117
 
115
118
  def as_dict(self) -> dict:
116
- return {
117
- '_classname': self.__class__.__name__,
118
- **self._as_dict(),
119
- }
119
+ return {'_classname': self.__class__.__name__, **self._as_dict()}
120
120
 
121
121
  def _as_dict(self) -> dict:
122
122
  return {'nullable': self.nullable}
@@ -277,10 +277,7 @@ class ColumnType:
277
277
 
278
278
  @classmethod
279
279
  def from_python_type(
280
- cls,
281
- t: Union[type, _GenericAlias],
282
- nullable_default: bool = False,
283
- allow_builtin_types: bool = True
280
+ cls, t: Union[type, _GenericAlias], nullable_default: bool = False, allow_builtin_types: bool = True
284
281
  ) -> Optional[ColumnType]:
285
282
  """
286
283
  Convert a Python type into a Pixeltable `ColumnType` instance.
@@ -309,9 +306,7 @@ class ColumnType:
309
306
  required_args = typing.get_args(t)
310
307
  assert len(required_args) == 1
311
308
  return cls.from_python_type(
312
- required_args[0],
313
- nullable_default=False,
314
- allow_builtin_types=allow_builtin_types
309
+ required_args[0], nullable_default=False, allow_builtin_types=allow_builtin_types
315
310
  )
316
311
  elif origin is typing.Annotated:
317
312
  annotated_args = typing.get_args(t)
@@ -349,7 +344,7 @@ class ColumnType:
349
344
  cls,
350
345
  t: Union[ColumnType, type, _AnnotatedAlias],
351
346
  nullable_default: bool = False,
352
- allow_builtin_types: bool = True
347
+ allow_builtin_types: bool = True,
353
348
  ) -> ColumnType:
354
349
  """
355
350
  Convert any type recognizable by Pixeltable to its corresponding ColumnType.
@@ -415,7 +410,7 @@ class ColumnType:
415
410
 
416
411
  def _create_literal(self, val: Any) -> Any:
417
412
  """Create a literal of this type from val, including any needed conversions.
418
- val is guaranteed to be non-None"""
413
+ val is guaranteed to be non-None"""
419
414
  return val
420
415
 
421
416
  def create_literal(self, val: Any) -> Any:
@@ -484,12 +479,7 @@ class ColumnType:
484
479
 
485
480
  def to_json_schema(self) -> dict[str, Any]:
486
481
  if self.nullable:
487
- return {
488
- 'anyOf': [
489
- self._to_json_schema(),
490
- {'type': 'null'},
491
- ]
492
- }
482
+ return {'anyOf': [self._to_json_schema(), {'type': 'null'}]}
493
483
  else:
494
484
  return self._to_json_schema()
495
485
 
@@ -612,7 +602,6 @@ class TimestampType(ColumnType):
612
602
 
613
603
 
614
604
  class JsonType(ColumnType):
615
-
616
605
  json_schema: Optional[dict[str, Any]]
617
606
  __validator: Optional[jsonschema.protocols.Validator]
618
607
 
@@ -699,8 +688,7 @@ class JsonType(ColumnType):
699
688
  superschema = self.__superschema(self.json_schema, other.json_schema)
700
689
 
701
690
  return JsonType(
702
- json_schema=(None if len(superschema) == 0 else superschema),
703
- nullable=(self.nullable or other.nullable)
691
+ json_schema=(None if len(superschema) == 0 else superschema), nullable=(self.nullable or other.nullable)
704
692
  )
705
693
 
706
694
  @classmethod
@@ -755,7 +743,7 @@ class JsonType(ColumnType):
755
743
  a_type = a.get('type')
756
744
  b_type = b.get('type')
757
745
 
758
- if (a_type in ('string', 'integer', 'number', 'boolean', 'object', 'array') and a_type == b_type):
746
+ if a_type in ('string', 'integer', 'number', 'boolean', 'object', 'array') and a_type == b_type:
759
747
  # a and b both have the same type designation, but are not identical. This can happen if
760
748
  # (for example) they have validators or other attributes that differ. In this case, we
761
749
  # generalize to {'type': t}, where t is their shared type, with no other qualifications.
@@ -793,15 +781,25 @@ class JsonType(ColumnType):
793
781
 
794
782
 
795
783
  class ArrayType(ColumnType):
796
-
797
784
  shape: Optional[tuple[Optional[int], ...]]
798
785
  pxt_dtype: Optional[ColumnType]
799
786
  dtype: Optional[ColumnType.Type]
800
787
 
801
- def __init__(self, shape: Optional[tuple[Optional[int], ...]] = None, dtype: Optional[ColumnType] = None, nullable: bool = False):
788
+ def __init__(
789
+ self,
790
+ shape: Optional[tuple[Optional[int], ...]] = None,
791
+ dtype: Optional[ColumnType] = None,
792
+ nullable: bool = False,
793
+ ):
802
794
  super().__init__(self.Type.ARRAY, nullable=nullable)
803
795
  assert shape is None or dtype is not None, (shape, dtype) # cannot specify a shape without a dtype
804
- assert dtype is None or dtype.is_int_type() or dtype.is_float_type() or dtype.is_bool_type() or dtype.is_string_type()
796
+ assert (
797
+ dtype is None
798
+ or dtype.is_int_type()
799
+ or dtype.is_float_type()
800
+ or dtype.is_bool_type()
801
+ or dtype.is_string_type()
802
+ )
805
803
 
806
804
  self.shape = shape
807
805
  self.pxt_dtype = dtype # we need this for copy() and __str__()
@@ -857,13 +855,15 @@ class ArrayType(ColumnType):
857
855
  def from_literal(cls, val: np.ndarray, nullable: bool = False) -> Optional[ArrayType]:
858
856
  # determine our dtype
859
857
  assert isinstance(val, np.ndarray)
858
+ dtype: ColumnType
860
859
  if np.issubdtype(val.dtype, np.integer):
861
- dtype: ColumnType = IntType()
860
+ dtype = IntType()
862
861
  elif np.issubdtype(val.dtype, np.floating):
863
862
  dtype = FloatType()
864
863
  elif val.dtype == np.bool_:
865
864
  dtype = BoolType()
866
- elif val.dtype == np.str_:
865
+ elif np.issubdtype(val.dtype, np.str_):
866
+ # Note that this includes NumPy types like '<U1' -- arrays of single Unicode characters
867
867
  dtype = StringType()
868
868
  else:
869
869
  return None
@@ -898,10 +898,7 @@ class ArrayType(ColumnType):
898
898
  return True
899
899
 
900
900
  def _to_json_schema(self) -> dict[str, Any]:
901
- return {
902
- 'type': 'array',
903
- 'items': self.pxt_dtype._to_json_schema(),
904
- }
901
+ return {'type': 'array', 'items': self.pxt_dtype._to_json_schema()}
905
902
 
906
903
  def _validate_literal(self, val: Any) -> None:
907
904
  if not isinstance(val, np.ndarray):
@@ -945,15 +942,19 @@ class ArrayType(ColumnType):
945
942
 
946
943
  class ImageType(ColumnType):
947
944
  def __init__(
948
- self, width: Optional[int] = None, height: Optional[int] = None, size: Optional[tuple[int, int]] = None,
949
- mode: Optional[str] = None, nullable: bool = False
945
+ self,
946
+ width: Optional[int] = None,
947
+ height: Optional[int] = None,
948
+ size: Optional[tuple[int, int]] = None,
949
+ mode: Optional[str] = None,
950
+ nullable: bool = False,
950
951
  ):
951
952
  """
952
953
  TODO: does it make sense to specify only width or height?
953
954
  """
954
955
  super().__init__(self.Type.IMAGE, nullable=nullable)
955
- assert not(width is not None and size is not None)
956
- assert not(height is not None and size is not None)
956
+ assert not (width is not None and size is not None)
957
+ assert not (height is not None and size is not None)
957
958
  if size is not None:
958
959
  self.width = size[0]
959
960
  self.height = size[1]
@@ -1143,6 +1144,7 @@ class DocumentType(ColumnType):
1143
1144
  def validate_media(self, val: Any) -> None:
1144
1145
  assert isinstance(val, str)
1145
1146
  from pixeltable.utils.documents import get_document_handle
1147
+
1146
1148
  dh = get_document_handle(val)
1147
1149
  if dh is None:
1148
1150
  raise excs.Error(f'Not a recognized document format: {val}')
@@ -1156,6 +1158,7 @@ class Required(typing.Generic[T]):
1156
1158
  Marker class to indicate that a column is non-nullable in a schema definition. This has no meaning as a type hint,
1157
1159
  and is intended only for schema declarations.
1158
1160
  """
1161
+
1159
1162
  pass
1160
1163
 
1161
1164
 
@@ -1178,6 +1181,7 @@ class _PxtType:
1178
1181
  `Image[(300, 300), 'RGB']`. The specialized forms resolve to `typing.Annotated` instances whose annotation is a
1179
1182
  `ColumnType`.
1180
1183
  """
1184
+
1181
1185
  def __init__(self):
1182
1186
  raise TypeError(f'Type `{type(self)}` cannot be instantiated.')
1183
1187
 
@@ -1256,7 +1260,11 @@ class Image(PIL.Image.Image, _PxtType):
1256
1260
  mode: Optional[str] = None
1257
1261
  for param in params:
1258
1262
  if isinstance(param, tuple):
1259
- if len(param) != 2 or not isinstance(param[0], (int, type(None))) or not isinstance(param[1], (int, type(None))):
1263
+ if (
1264
+ len(param) != 2
1265
+ or not isinstance(param[0], (int, type(None)))
1266
+ or not isinstance(param[1], (int, type(None)))
1267
+ ):
1260
1268
  raise TypeError(f'Invalid Image type parameter: {param}')
1261
1269
  if size is not None:
1262
1270
  raise TypeError(f'Duplicate Image type parameter: {param}')
pixeltable/utils/arrow.py CHANGED
@@ -1,12 +1,11 @@
1
+ import datetime
1
2
  from typing import Any, Iterator, Optional, Union
2
3
 
3
4
  import numpy as np
4
5
  import pyarrow as pa
5
- import datetime
6
6
 
7
7
  import pixeltable.type_system as ts
8
8
 
9
-
10
9
  _pa_to_pt: dict[pa.DataType, ts.ColumnType] = {
11
10
  pa.string(): ts.StringType(nullable=True),
12
11
  pa.bool_(): ts.BoolType(nullable=True),
pixeltable/utils/coco.py CHANGED
@@ -22,6 +22,7 @@ Required format:
22
22
  }
23
23
  """
24
24
 
25
+
25
26
  def _verify_input_dict(input_dict: dict[str, Any]) -> None:
26
27
  """Verify that input_dict is a valid input dict for write_coco_dataset()"""
27
28
  if not isinstance(input_dict, dict):
@@ -30,7 +31,7 @@ def _verify_input_dict(input_dict: dict[str, Any]) -> None:
30
31
  raise excs.Error(f'Missing key "image" in input dict: {input_dict}{format_msg}')
31
32
  if not isinstance(input_dict['image'], PIL.Image.Image):
32
33
  raise excs.Error(f'Value for "image" is not a PIL.Image.Image: {input_dict}{format_msg}')
33
- if 'annotations' not in input_dict:
34
+ if 'annotations' not in input_dict:
34
35
  raise excs.Error(f'Missing key "annotations" in input dict: {input_dict}{format_msg}')
35
36
  if not isinstance(input_dict['annotations'], list):
36
37
  raise excs.Error(f'Value for "annotations" is not a list: {input_dict}{format_msg}')
@@ -48,6 +49,7 @@ def _verify_input_dict(input_dict: dict[str, Any]) -> None:
48
49
  if not isinstance(annotation['category'], (str, int)):
49
50
  raise excs.Error(f'Value for "category" is not a str or int: {annotation}{format_msg}')
50
51
 
52
+
51
53
  def write_coco_dataset(df: pxt.DataFrame, dest_path: Path) -> Path:
52
54
  """Export a DataFrame result set as a COCO dataset in dest_path and return the path of the data.json file."""
53
55
  # TODO: validate schema
@@ -96,12 +98,7 @@ def write_coco_dataset(df: pxt.DataFrame, dest_path: Path) -> Path:
96
98
  img_path = images_dir / f'{img_id}.jpg'
97
99
  img.save(img_path)
98
100
 
99
- images.append({
100
- 'id': img_id,
101
- 'file_name': str(img_path),
102
- 'width': img.width,
103
- 'height': img.height,
104
- })
101
+ images.append({'id': img_id, 'file_name': str(img_path), 'width': img.width, 'height': img.height})
105
102
 
106
103
  # create annotation records for this image
107
104
  for annotation in input_dict['annotations']:
@@ -109,15 +106,17 @@ def write_coco_dataset(df: pxt.DataFrame, dest_path: Path) -> Path:
109
106
  x, y, w, h = annotation['bbox']
110
107
  category = annotation['category']
111
108
  categories.add(category)
112
- annotations.append({
113
- 'id': ann_id,
114
- 'image_id': img_id,
115
- # we use the category name here and fix it up at the end, when we have assigned category ids
116
- 'category_id': category,
117
- 'bbox': annotation['bbox'],
118
- 'area': w * h,
119
- 'iscrowd': 0,
120
- })
109
+ annotations.append(
110
+ {
111
+ 'id': ann_id,
112
+ 'image_id': img_id,
113
+ # we use the category name here and fix it up at the end, when we have assigned category ids
114
+ 'category_id': category,
115
+ 'bbox': annotation['bbox'],
116
+ 'area': w * h,
117
+ 'iscrowd': 0,
118
+ }
119
+ )
121
120
 
122
121
  # replace category names with ids
123
122
  category_ids = {category: id for id, category in enumerate(sorted(list(categories)))}
@@ -226,5 +225,5 @@ COCO_2017_CATEGORIES = {
226
225
  87: 'scissors',
227
226
  88: 'teddy bear',
228
227
  89: 'hair drier',
229
- 90: 'toothbrush'
228
+ 90: 'toothbrush',
230
229
  }
pixeltable/utils/code.py CHANGED
@@ -3,9 +3,9 @@ from typing import Optional
3
3
 
4
4
  from pixeltable.func import Function
5
5
 
6
-
7
6
  # Utilities related to the organization of the Pixeltable codebase.
8
7
 
8
+
9
9
  def local_public_names(mod_name: str, exclude: Optional[list[str]] = None) -> list[str]:
10
10
  """
11
11
  Returns a list of all functions and submodules that are local to the specified module and are
@@ -1,5 +1,6 @@
1
1
  import logging
2
2
 
3
+
3
4
  def map_level(verbosity: int) -> int:
4
5
  """
5
6
  Map verbosity level to logging level.
@@ -19,6 +20,7 @@ def map_level(verbosity: int) -> int:
19
20
  return logging.DEBUG
20
21
  return logging.INFO
21
22
 
23
+
22
24
  class ConsoleOutputHandler(logging.StreamHandler):
23
25
  def __init__(self, stream):
24
26
  super().__init__(stream)
@@ -29,13 +31,14 @@ class ConsoleOutputHandler(logging.StreamHandler):
29
31
  else:
30
32
  self.stream.write(record.msg + '\n')
31
33
 
34
+
32
35
  class ConsoleMessageFilter(logging.Filter):
33
36
  def filter(self, record: logging.LogRecord) -> bool:
34
37
  if hasattr(record, 'user_visible') and record.user_visible:
35
38
  return True
36
39
  return False
37
40
 
38
- class ConsoleLogger(logging.LoggerAdapter):
39
- def __init__(self, logger:logging.Logger):
40
- super().__init__(logger, extra={'user_visible' : True})
41
41
 
42
+ class ConsoleLogger(logging.LoggerAdapter):
43
+ def __init__(self, logger: logging.Logger):
44
+ super().__init__(logger, extra={'user_visible': True})
@@ -25,6 +25,7 @@ class DescriptionHelper:
25
25
  DescriptionHelper can convert a list of descriptors into either HTML or plaintext and do something reasonable
26
26
  in each case.
27
27
  """
28
+
28
29
  __descriptors: list[_Descriptor]
29
30
 
30
31
  def __init__(self) -> None:
@@ -69,18 +70,17 @@ class DescriptionHelper:
69
70
  return (
70
71
  # Render the string as a single-cell DataFrame. This will ensure a consistent style of output in
71
72
  # cases where strings appear alongside DataFrames in the same DescriptionHelper.
72
- pd.DataFrame([descriptor.body]).style
73
- .set_properties(None, **{'white-space': 'pre-wrap', 'text-align': 'left', 'font-weight': 'bold'})
74
- .hide(axis='index').hide(axis='columns')
73
+ pd.DataFrame([descriptor.body])
74
+ .style.set_properties(None, **{'white-space': 'pre-wrap', 'text-align': 'left', 'font-weight': 'bold'})
75
+ .hide(axis='index')
76
+ .hide(axis='columns')
75
77
  )
76
78
  else:
77
79
  styler = descriptor.styler
78
80
  if styler is None:
79
81
  styler = descriptor.body.style
80
- styler = (
81
- styler
82
- .set_properties(None, **{'white-space': 'pre-wrap', 'text-align': 'left'})
83
- .set_table_styles([dict(selector='th', props=[('text-align', 'left')])])
82
+ styler = styler.set_properties(None, **{'white-space': 'pre-wrap', 'text-align': 'left'}).set_table_styles(
83
+ [dict(selector='th', props=[('text-align', 'left')])]
84
84
  )
85
85
  if not descriptor.show_header:
86
86
  styler = styler.hide(axis='columns')
@@ -83,6 +83,7 @@ def get_xml_handle(path: str) -> Optional[bs4.BeautifulSoup]:
83
83
  def get_markdown_handle(path: str) -> Optional[dict]:
84
84
  Env.get().require_package('mistune', [3, 0])
85
85
  import mistune
86
+
86
87
  try:
87
88
  with open(path, encoding='utf8') as file:
88
89
  text = file.read()
@@ -91,9 +92,10 @@ def get_markdown_handle(path: str) -> Optional[dict]:
91
92
  except Exception:
92
93
  return None
93
94
 
95
+
94
96
  def get_txt(path: str) -> Optional[str]:
95
97
  try:
96
- with open(path, "r") as f:
98
+ with open(path, 'r') as f:
97
99
  doc = f.read()
98
100
  return doc if doc != '' else None
99
101
  except Exception: