pixeltable 0.2.28__py3-none-any.whl → 0.2.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -1
- pixeltable/catalog/dir.py +6 -0
- pixeltable/catalog/globals.py +13 -0
- pixeltable/catalog/named_function.py +4 -0
- pixeltable/catalog/path_dict.py +37 -11
- pixeltable/catalog/schema_object.py +6 -0
- pixeltable/catalog/table.py +22 -5
- pixeltable/catalog/table_version.py +22 -8
- pixeltable/dataframe.py +201 -3
- pixeltable/env.py +9 -3
- pixeltable/exec/expr_eval_node.py +1 -1
- pixeltable/exec/sql_node.py +2 -2
- pixeltable/exprs/function_call.py +134 -24
- pixeltable/exprs/inline_expr.py +22 -2
- pixeltable/exprs/row_builder.py +1 -1
- pixeltable/exprs/similarity_expr.py +9 -2
- pixeltable/func/aggregate_function.py +148 -68
- pixeltable/func/callable_function.py +49 -13
- pixeltable/func/expr_template_function.py +55 -24
- pixeltable/func/function.py +183 -22
- pixeltable/func/function_registry.py +2 -1
- pixeltable/func/query_template_function.py +11 -6
- pixeltable/func/signature.py +64 -7
- pixeltable/func/udf.py +57 -35
- pixeltable/functions/globals.py +54 -34
- pixeltable/functions/json.py +3 -8
- pixeltable/functions/ollama.py +4 -4
- pixeltable/functions/timestamp.py +1 -1
- pixeltable/functions/video.py +2 -8
- pixeltable/functions/vision.py +1 -1
- pixeltable/globals.py +218 -59
- pixeltable/index/embedding_index.py +44 -24
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_16.py +2 -1
- pixeltable/metadata/converters/convert_17.py +2 -1
- pixeltable/metadata/converters/convert_23.py +35 -0
- pixeltable/metadata/converters/convert_24.py +47 -0
- pixeltable/metadata/converters/util.py +4 -2
- pixeltable/metadata/notes.py +2 -0
- pixeltable/metadata/schema.py +1 -0
- pixeltable/tool/create_test_db_dump.py +11 -0
- pixeltable/tool/doc_plugins/griffe.py +4 -3
- pixeltable/type_system.py +180 -45
- {pixeltable-0.2.28.dist-info → pixeltable-0.2.29.dist-info}/METADATA +3 -2
- {pixeltable-0.2.28.dist-info → pixeltable-0.2.29.dist-info}/RECORD +49 -47
- {pixeltable-0.2.28.dist-info → pixeltable-0.2.29.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.28.dist-info → pixeltable-0.2.29.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.28.dist-info → pixeltable-0.2.29.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
from typing import Any, Optional
|
|
3
|
+
|
|
4
|
+
import sqlalchemy as sql
|
|
5
|
+
|
|
6
|
+
from pixeltable.metadata import register_converter
|
|
7
|
+
from pixeltable.metadata.converters.util import convert_table_md
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@register_converter(version=24)
|
|
11
|
+
def _(engine: sql.engine.Engine) -> None:
|
|
12
|
+
convert_table_md(engine, substitution_fn=__substitute_md)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], Any]]:
|
|
16
|
+
from pixeltable import func
|
|
17
|
+
from pixeltable.func.globals import resolve_symbol
|
|
18
|
+
|
|
19
|
+
if (isinstance(v, dict) and
|
|
20
|
+
'_classpath' in v and
|
|
21
|
+
v['_classpath'] in ['pixeltable.func.callable_function.CallableFunction',
|
|
22
|
+
'pixeltable.func.aggregate_function.AggregateFunction',
|
|
23
|
+
'pixeltable.func.expr_template_function.ExprTemplateFunction']):
|
|
24
|
+
if 'path' in v:
|
|
25
|
+
assert 'signature' not in v
|
|
26
|
+
f = resolve_symbol(v['path'])
|
|
27
|
+
assert isinstance(f, func.Function)
|
|
28
|
+
v['signature'] = f.signatures[0].as_dict()
|
|
29
|
+
return k, v
|
|
30
|
+
|
|
31
|
+
if isinstance(v, dict) and '_classname' in v and v['_classname'] == 'FunctionCall':
|
|
32
|
+
# Correct an older serialization mechanism where Expr elements of FunctionCall args and
|
|
33
|
+
# kwargs were indicated with idx == -1 rather than None. This was fixed for InlineList
|
|
34
|
+
# and InlineDict back in convert_20, but not for FunctionCall.
|
|
35
|
+
assert 'args' in v and isinstance(v['args'], list)
|
|
36
|
+
assert 'kwargs' in v and isinstance(v['kwargs'], dict)
|
|
37
|
+
v['args'] = [
|
|
38
|
+
(None, arg) if idx == -1 else (idx, arg)
|
|
39
|
+
for idx, arg in v['args']
|
|
40
|
+
]
|
|
41
|
+
v['kwargs'] = {
|
|
42
|
+
k: (None, arg) if idx == -1 else (idx, arg)
|
|
43
|
+
for k, (idx, arg) in v['kwargs'].items()
|
|
44
|
+
}
|
|
45
|
+
return k, v
|
|
46
|
+
|
|
47
|
+
return None
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import copy
|
|
2
2
|
import logging
|
|
3
3
|
from typing import Any, Callable, Optional
|
|
4
|
+
from uuid import UUID
|
|
4
5
|
|
|
5
6
|
import sqlalchemy as sql
|
|
6
7
|
|
|
@@ -11,7 +12,7 @@ __logger = logging.getLogger('pixeltable')
|
|
|
11
12
|
|
|
12
13
|
def convert_table_md(
|
|
13
14
|
engine: sql.engine.Engine,
|
|
14
|
-
table_md_updater: Optional[Callable[[dict], None]] = None,
|
|
15
|
+
table_md_updater: Optional[Callable[[dict, UUID], None]] = None,
|
|
15
16
|
column_md_updater: Optional[Callable[[dict], None]] = None,
|
|
16
17
|
external_store_md_updater: Optional[Callable[[dict], None]] = None,
|
|
17
18
|
substitution_fn: Optional[Callable[[Optional[str], Any], Optional[tuple[Optional[str], Any]]]] = None
|
|
@@ -22,6 +23,7 @@ def convert_table_md(
|
|
|
22
23
|
Args:
|
|
23
24
|
engine: The SQLAlchemy engine.
|
|
24
25
|
table_md_updater: A function that updates schema.TableMd dicts in place.
|
|
26
|
+
It takes two arguments: the metadata dict (new values) and the table id.
|
|
25
27
|
column_md_updater: A function that updates schema.ColumnMd dicts in place.
|
|
26
28
|
external_store_md_updater: A function that updates the external store metadata in place.
|
|
27
29
|
substitution_fn: A function that substitutes metadata values. If specified, all metadata will be traversed
|
|
@@ -37,7 +39,7 @@ def convert_table_md(
|
|
|
37
39
|
assert isinstance(table_md, dict)
|
|
38
40
|
updated_table_md = copy.deepcopy(table_md)
|
|
39
41
|
if table_md_updater is not None:
|
|
40
|
-
table_md_updater(updated_table_md)
|
|
42
|
+
table_md_updater(updated_table_md, id)
|
|
41
43
|
if column_md_updater is not None:
|
|
42
44
|
__update_column_md(updated_table_md, column_md_updater)
|
|
43
45
|
if external_store_md_updater is not None:
|
pixeltable/metadata/notes.py
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
# rather than as a comment, so that the existence of a description can be enforced by
|
|
3
3
|
# the unit tests when new versions are added.
|
|
4
4
|
VERSION_NOTES = {
|
|
5
|
+
25: 'Functions with multiple signatures',
|
|
6
|
+
24: 'Added TableMd/IndexMd.indexed_col_tbl_id',
|
|
5
7
|
23: 'DataFrame.from_clause',
|
|
6
8
|
22: 'TableMd/ColumnMd.media_validation',
|
|
7
9
|
21: 'Separate InlineArray and InlineList',
|
pixeltable/metadata/schema.py
CHANGED
|
@@ -112,6 +112,7 @@ class IndexMd:
|
|
|
112
112
|
"""
|
|
113
113
|
id: int
|
|
114
114
|
name: str
|
|
115
|
+
indexed_col_tbl_id: str # UUID of the table (as string) that contains column being indexed
|
|
115
116
|
indexed_col_id: int # column being indexed
|
|
116
117
|
index_val_col_id: int # column holding the values to be indexed
|
|
117
118
|
index_val_undo_col_id: int # column holding index values for deleted rows
|
|
@@ -5,6 +5,7 @@ import os
|
|
|
5
5
|
import pathlib
|
|
6
6
|
import subprocess
|
|
7
7
|
import sys
|
|
8
|
+
import time
|
|
8
9
|
from typing import Any
|
|
9
10
|
from zoneinfo import ZoneInfo
|
|
10
11
|
|
|
@@ -65,6 +66,9 @@ class Dumper:
|
|
|
65
66
|
stdout=dump,
|
|
66
67
|
check=True
|
|
67
68
|
)
|
|
69
|
+
if pg_dump_process.poll() != 0:
|
|
70
|
+
# wait for a 2s before checking again & raising error
|
|
71
|
+
time.sleep(2)
|
|
68
72
|
if pg_dump_process.poll() != 0:
|
|
69
73
|
raise RuntimeError(f'pg_dump failed with return code {pg_dump_process.returncode}')
|
|
70
74
|
info_file = self.output_dir / f'pixeltable-v{md_version:03d}-test-info.toml'
|
|
@@ -275,6 +279,13 @@ class Dumper:
|
|
|
275
279
|
string_embed=pxt.functions.huggingface.clip_text.using(model_id='openai/clip-vit-base-patch32')
|
|
276
280
|
)
|
|
277
281
|
|
|
282
|
+
if t.get_metadata()['is_view']:
|
|
283
|
+
# Add an embedding index to the view that is on a column in the base table
|
|
284
|
+
t.add_embedding_index(
|
|
285
|
+
'base_table_function_call',
|
|
286
|
+
string_embed=pxt.functions.huggingface.clip_text.using(model_id='openai/clip-vit-base-patch32')
|
|
287
|
+
)
|
|
288
|
+
|
|
278
289
|
# query()
|
|
279
290
|
@t.query
|
|
280
291
|
def q1(i: int):
|
|
@@ -38,13 +38,14 @@ class PxtGriffeExtension(Extension):
|
|
|
38
38
|
warnings.simplefilter("ignore")
|
|
39
39
|
udf = griffe.dynamic_import(func.path)
|
|
40
40
|
assert isinstance(udf, pxt.Function)
|
|
41
|
+
# TODO: Find a way to support multiple signatures?
|
|
41
42
|
# Convert the return type to a Pixeltable type reference
|
|
42
|
-
func.returns = str(udf.
|
|
43
|
+
func.returns = str(udf.signatures[0].get_return_type())
|
|
43
44
|
# Convert the parameter types to Pixeltable type references
|
|
44
45
|
for griffe_param in func.parameters:
|
|
45
46
|
assert isinstance(griffe_param.annotation, griffe.expressions.Expr)
|
|
46
|
-
if griffe_param.name not in udf.
|
|
47
|
+
if griffe_param.name not in udf.signatures[0].parameters:
|
|
47
48
|
logger.warning(f'Parameter `{griffe_param.name}` not found in signature for UDF: {udf.display_name}')
|
|
48
49
|
continue
|
|
49
|
-
pxt_param = udf.
|
|
50
|
+
pxt_param = udf.signatures[0].parameters[griffe_param.name]
|
|
50
51
|
griffe_param.annotation = str(pxt_param.col_type)
|
pixeltable/type_system.py
CHANGED
|
@@ -5,7 +5,6 @@ import datetime
|
|
|
5
5
|
import enum
|
|
6
6
|
import io
|
|
7
7
|
import json
|
|
8
|
-
import types
|
|
9
8
|
import typing
|
|
10
9
|
import urllib.parse
|
|
11
10
|
import urllib.request
|
|
@@ -14,6 +13,9 @@ from typing import Any, Iterable, Mapping, Optional, Sequence, Union
|
|
|
14
13
|
|
|
15
14
|
import PIL.Image
|
|
16
15
|
import av # type: ignore
|
|
16
|
+
import jsonschema
|
|
17
|
+
import jsonschema.protocols
|
|
18
|
+
import jsonschema.validators
|
|
17
19
|
import numpy as np
|
|
18
20
|
import sqlalchemy as sql
|
|
19
21
|
from typing import _GenericAlias # type: ignore[attr-defined]
|
|
@@ -479,6 +481,20 @@ class ColumnType:
|
|
|
479
481
|
"""
|
|
480
482
|
pass
|
|
481
483
|
|
|
484
|
+
def to_json_schema(self) -> dict[str, Any]:
|
|
485
|
+
if self.nullable:
|
|
486
|
+
return {
|
|
487
|
+
'anyOf': [
|
|
488
|
+
self._to_json_schema(),
|
|
489
|
+
{'type': 'null'},
|
|
490
|
+
]
|
|
491
|
+
}
|
|
492
|
+
else:
|
|
493
|
+
return self._to_json_schema()
|
|
494
|
+
|
|
495
|
+
def _to_json_schema(self) -> dict[str, Any]:
|
|
496
|
+
raise excs.Error(f'Pixeltable type {self} is not a valid JSON type')
|
|
497
|
+
|
|
482
498
|
|
|
483
499
|
class InvalidType(ColumnType):
|
|
484
500
|
def __init__(self, nullable: bool = False):
|
|
@@ -501,6 +517,9 @@ class StringType(ColumnType):
|
|
|
501
517
|
def to_sa_type(self) -> sql.types.TypeEngine:
|
|
502
518
|
return sql.String()
|
|
503
519
|
|
|
520
|
+
def _to_json_schema(self) -> dict[str, Any]:
|
|
521
|
+
return {'type': 'string'}
|
|
522
|
+
|
|
504
523
|
def print_value(self, val: Any) -> str:
|
|
505
524
|
return f"'{val}'"
|
|
506
525
|
|
|
@@ -524,8 +543,13 @@ class IntType(ColumnType):
|
|
|
524
543
|
def to_sa_type(self) -> sql.types.TypeEngine:
|
|
525
544
|
return sql.BigInteger()
|
|
526
545
|
|
|
546
|
+
def _to_json_schema(self) -> dict[str, Any]:
|
|
547
|
+
return {'type': 'integer'}
|
|
548
|
+
|
|
527
549
|
def _validate_literal(self, val: Any) -> None:
|
|
528
|
-
|
|
550
|
+
# bool is a subclass of int, so we need to check for it
|
|
551
|
+
# explicitly first
|
|
552
|
+
if isinstance(val, bool) or not isinstance(val, int):
|
|
529
553
|
raise TypeError(f'Expected int, got {val.__class__.__name__}')
|
|
530
554
|
|
|
531
555
|
|
|
@@ -536,6 +560,9 @@ class FloatType(ColumnType):
|
|
|
536
560
|
def to_sa_type(self) -> sql.types.TypeEngine:
|
|
537
561
|
return sql.Float()
|
|
538
562
|
|
|
563
|
+
def _to_json_schema(self) -> dict[str, Any]:
|
|
564
|
+
return {'type': 'number'}
|
|
565
|
+
|
|
539
566
|
def _validate_literal(self, val: Any) -> None:
|
|
540
567
|
if not isinstance(val, float):
|
|
541
568
|
raise TypeError(f'Expected float, got {val.__class__.__name__}')
|
|
@@ -553,6 +580,9 @@ class BoolType(ColumnType):
|
|
|
553
580
|
def to_sa_type(self) -> sql.types.TypeEngine:
|
|
554
581
|
return sql.Boolean()
|
|
555
582
|
|
|
583
|
+
def _to_json_schema(self) -> dict[str, Any]:
|
|
584
|
+
return {'type': 'boolean'}
|
|
585
|
+
|
|
556
586
|
def _validate_literal(self, val: Any) -> None:
|
|
557
587
|
if not isinstance(val, bool):
|
|
558
588
|
raise TypeError(f'Expected bool, got {val.__class__.__name__}')
|
|
@@ -581,61 +611,44 @@ class TimestampType(ColumnType):
|
|
|
581
611
|
|
|
582
612
|
|
|
583
613
|
class JsonType(ColumnType):
|
|
584
|
-
|
|
585
|
-
|
|
614
|
+
|
|
615
|
+
json_schema: Optional[dict[str, Any]]
|
|
616
|
+
__validator: Optional[jsonschema.protocols.Validator]
|
|
617
|
+
|
|
618
|
+
def __init__(self, json_schema: Optional[dict[str, Any]] = None, nullable: bool = False):
|
|
586
619
|
super().__init__(self.Type.JSON, nullable=nullable)
|
|
587
|
-
self.
|
|
620
|
+
self.json_schema = json_schema
|
|
621
|
+
if json_schema is None:
|
|
622
|
+
self.__validator = None
|
|
623
|
+
else:
|
|
624
|
+
validator_cls = jsonschema.validators.validator_for(json_schema)
|
|
625
|
+
validator_cls.check_schema(json_schema)
|
|
626
|
+
self.__validator = validator_cls(json_schema)
|
|
588
627
|
|
|
589
628
|
def copy(self, nullable: bool) -> ColumnType:
|
|
590
|
-
return JsonType(self.
|
|
629
|
+
return JsonType(json_schema=self.json_schema, nullable=nullable)
|
|
591
630
|
|
|
592
631
|
def matches(self, other: ColumnType) -> bool:
|
|
593
|
-
return isinstance(other, JsonType) and self.
|
|
594
|
-
|
|
595
|
-
def supertype(self, other: ColumnType) -> Optional[JsonType]:
|
|
596
|
-
if not isinstance(other, JsonType):
|
|
597
|
-
return None
|
|
598
|
-
if self.type_spec is None:
|
|
599
|
-
# we don't have a type spec and can accept anything accepted by other
|
|
600
|
-
return JsonType(nullable=(self.nullable or other.nullable))
|
|
601
|
-
if other.type_spec is None:
|
|
602
|
-
# we have a type spec but other doesn't
|
|
603
|
-
return JsonType(nullable=(self.nullable or other.nullable))
|
|
604
|
-
|
|
605
|
-
# we both have type specs; the supertype's type spec is the union of the two
|
|
606
|
-
type_spec: dict[str, ColumnType] = {}
|
|
607
|
-
type_spec.update(self.type_spec)
|
|
608
|
-
for other_field_name, other_field_type in other.type_spec.items():
|
|
609
|
-
if other_field_name not in type_spec:
|
|
610
|
-
type_spec[other_field_name] = other_field_type
|
|
611
|
-
else:
|
|
612
|
-
# both type specs have this field
|
|
613
|
-
field_type = type_spec[other_field_name].supertype(other_field_type)
|
|
614
|
-
if field_type is None:
|
|
615
|
-
# conflicting types
|
|
616
|
-
return JsonType(nullable=(self.nullable or other.nullable))
|
|
617
|
-
type_spec[other_field_name] = field_type
|
|
618
|
-
return JsonType(type_spec, nullable=(self.nullable or other.nullable))
|
|
632
|
+
return isinstance(other, JsonType) and self.json_schema == other.json_schema
|
|
619
633
|
|
|
620
634
|
def _as_dict(self) -> dict:
|
|
621
635
|
result = super()._as_dict()
|
|
622
|
-
if self.
|
|
623
|
-
|
|
624
|
-
result.update({'type_spec': type_spec_dict})
|
|
636
|
+
if self.json_schema is not None:
|
|
637
|
+
result.update({'json_schema': self.json_schema})
|
|
625
638
|
return result
|
|
626
639
|
|
|
627
640
|
@classmethod
|
|
628
641
|
def _from_dict(cls, d: dict) -> ColumnType:
|
|
629
|
-
|
|
630
|
-
if 'type_spec' in d:
|
|
631
|
-
type_spec = {
|
|
632
|
-
field_name: cls.deserialize(field_type_dict) for field_name, field_type_dict in d['type_spec'].items()
|
|
633
|
-
}
|
|
634
|
-
return cls(type_spec, nullable=d['nullable'])
|
|
642
|
+
return cls(json_schema=d.get('json_schema'), nullable=d['nullable'])
|
|
635
643
|
|
|
636
644
|
def to_sa_type(self) -> sql.types.TypeEngine:
|
|
637
645
|
return sql.dialects.postgresql.JSONB()
|
|
638
646
|
|
|
647
|
+
def _to_json_schema(self) -> dict[str, Any]:
|
|
648
|
+
if self.json_schema is None:
|
|
649
|
+
return {}
|
|
650
|
+
return self.json_schema
|
|
651
|
+
|
|
639
652
|
def print_value(self, val: Any) -> str:
|
|
640
653
|
val_type = self.infer_literal_type(val)
|
|
641
654
|
if val_type is None:
|
|
@@ -648,17 +661,19 @@ class JsonType(ColumnType):
|
|
|
648
661
|
if not isinstance(val, dict) and not isinstance(val, list):
|
|
649
662
|
# TODO In the future we should accept scalars too, which would enable us to remove this top-level check
|
|
650
663
|
raise TypeError(f'Expected dict or list, got {val.__class__.__name__}')
|
|
651
|
-
if not self.
|
|
664
|
+
if not self.__is_valid_json(val):
|
|
652
665
|
raise TypeError(f'That literal is not a valid Pixeltable JSON object: {val}')
|
|
666
|
+
if self.__validator is not None:
|
|
667
|
+
self.__validator.validate(val)
|
|
653
668
|
|
|
654
669
|
@classmethod
|
|
655
|
-
def
|
|
670
|
+
def __is_valid_json(cls, val: Any) -> bool:
|
|
656
671
|
if val is None or isinstance(val, (str, int, float, bool)):
|
|
657
672
|
return True
|
|
658
673
|
if isinstance(val, (list, tuple)):
|
|
659
|
-
return all(cls.
|
|
674
|
+
return all(cls.__is_valid_json(v) for v in val)
|
|
660
675
|
if isinstance(val, dict):
|
|
661
|
-
return all(isinstance(k, str) and cls.
|
|
676
|
+
return all(isinstance(k, str) and cls.__is_valid_json(v) for k, v in val.items())
|
|
662
677
|
return False
|
|
663
678
|
|
|
664
679
|
def _create_literal(self, val: Any) -> Any:
|
|
@@ -666,6 +681,116 @@ class JsonType(ColumnType):
|
|
|
666
681
|
val = list(val)
|
|
667
682
|
return val
|
|
668
683
|
|
|
684
|
+
def supertype(self, other: ColumnType) -> Optional[JsonType]:
|
|
685
|
+
# Try using the (much faster) supertype logic in ColumnType first. That will work if, for example, the types
|
|
686
|
+
# are identical except for nullability. If that doesn't work and both types are JsonType, then we will need to
|
|
687
|
+
# merge their schemas.
|
|
688
|
+
basic_supertype = super().supertype(other)
|
|
689
|
+
if basic_supertype is not None:
|
|
690
|
+
assert isinstance(basic_supertype, JsonType)
|
|
691
|
+
return basic_supertype
|
|
692
|
+
|
|
693
|
+
if not isinstance(other, JsonType):
|
|
694
|
+
return None
|
|
695
|
+
|
|
696
|
+
if self.json_schema is None or other.json_schema is None:
|
|
697
|
+
return JsonType(nullable=(self.nullable or other.nullable))
|
|
698
|
+
|
|
699
|
+
superschema = self.__superschema(self.json_schema, other.json_schema)
|
|
700
|
+
|
|
701
|
+
return JsonType(
|
|
702
|
+
json_schema=(None if len(superschema) == 0 else superschema),
|
|
703
|
+
nullable=(self.nullable or other.nullable)
|
|
704
|
+
)
|
|
705
|
+
|
|
706
|
+
@classmethod
|
|
707
|
+
def __superschema(cls, a: dict[str, Any], b: dict[str, Any]) -> Optional[dict[str, Any]]:
|
|
708
|
+
# Defining a general type hierarchy over all JSON schemas would be a challenging problem. In order to keep
|
|
709
|
+
# things manageable, we only define a hierarchy among "conforming" schemas, which provides enough generality
|
|
710
|
+
# for the most important use cases (unions for type inference, validation of inline exprs). A schema is
|
|
711
|
+
# considered to be conforming if either:
|
|
712
|
+
# (i) it is a scalar (string, integer, number, boolean) or dictionary (object) type; or
|
|
713
|
+
# (ii) it is an "anyOf" schema of one of the above types and the exact schema {'type': 'null'}.
|
|
714
|
+
# Conforming schemas are organized into a type hierarchy in an internally consistent way. Nonconforming
|
|
715
|
+
# schemas are allowed, but they are isolates in the type hierarchy: a nonconforming schema has no proper
|
|
716
|
+
# subtypes, and its only proper supertype is an unconstrained JsonType().
|
|
717
|
+
#
|
|
718
|
+
# There is some subtlety in the handling of nullable fields. Nullable fields are represented in JSON
|
|
719
|
+
# schemas as (for example) {'anyOf': [{'type': 'string'}, {'type': 'null'}]}. When finding the supertype
|
|
720
|
+
# of schemas that might be nullable, we first unpack the 'anyOf's, find the supertype of the underlyings,
|
|
721
|
+
# then reapply the 'anyOf' if appropriate. The top-level schema (i.e., JsonType.json_schema) is presumed
|
|
722
|
+
# to NOT be in this form (since nullability is indicated by the `nullable` field of the JsonType object),
|
|
723
|
+
# so this subtlety is applicable only to types that occur in subfields.
|
|
724
|
+
#
|
|
725
|
+
# There is currently no special handling of lists; distinct schemas with type 'array' will union to the
|
|
726
|
+
# generic {'type': 'array'} schema. This could be a TODO item if there is a need for it in the future.
|
|
727
|
+
|
|
728
|
+
if a == b:
|
|
729
|
+
return a
|
|
730
|
+
|
|
731
|
+
if 'properties' in a and 'properties' in b:
|
|
732
|
+
a_props = a['properties']
|
|
733
|
+
b_props = b['properties']
|
|
734
|
+
a_req = a.get('required', [])
|
|
735
|
+
b_req = b.get('required', [])
|
|
736
|
+
super_props = {}
|
|
737
|
+
super_req = []
|
|
738
|
+
for key, a_prop_schema in a_props.items():
|
|
739
|
+
if key in b_props: # in both a and b
|
|
740
|
+
prop_schema = cls.__superschema_with_nulls(a_prop_schema, b_props[key])
|
|
741
|
+
super_props[key] = prop_schema
|
|
742
|
+
if key in a_req and key in b_req:
|
|
743
|
+
super_req.append(key)
|
|
744
|
+
else: # in a but not b
|
|
745
|
+
# Add it to the supertype schema as optional (regardless of its status in a)
|
|
746
|
+
super_props[key] = a_prop_schema
|
|
747
|
+
for key, b_prop_schema in b_props.items():
|
|
748
|
+
if key not in a_props: # in b but not a
|
|
749
|
+
super_props[key] = b_prop_schema
|
|
750
|
+
schema = {'type': 'object', 'properties': super_props}
|
|
751
|
+
if len(super_req) > 0:
|
|
752
|
+
schema['required'] = super_req
|
|
753
|
+
return schema
|
|
754
|
+
|
|
755
|
+
a_type = a.get('type')
|
|
756
|
+
b_type = b.get('type')
|
|
757
|
+
|
|
758
|
+
if (a_type in ('string', 'integer', 'number', 'boolean', 'object', 'array') and a_type == b_type):
|
|
759
|
+
# a and b both have the same type designation, but are not identical. This can happen if
|
|
760
|
+
# (for example) they have validators or other attributes that differ. In this case, we
|
|
761
|
+
# generalize to {'type': t}, where t is their shared type, with no other qualifications.
|
|
762
|
+
return {'type': a_type}
|
|
763
|
+
|
|
764
|
+
return {} # Unresolvable type conflict; the supertype is an unrestricted JsonType.
|
|
765
|
+
|
|
766
|
+
@classmethod
|
|
767
|
+
def __superschema_with_nulls(cls, a: dict[str, Any], b: dict[str, Any]) -> Optional[dict[str, Any]]:
|
|
768
|
+
a, a_nullable = cls.__unpack_null_from_schema(a)
|
|
769
|
+
b, b_nullable = cls.__unpack_null_from_schema(b)
|
|
770
|
+
|
|
771
|
+
result = cls.__superschema(a, b)
|
|
772
|
+
if len(result) > 0 and (a_nullable or b_nullable):
|
|
773
|
+
# if len(result) == 0, then null is implicitly accepted; otherwise, we need to explicitly allow it
|
|
774
|
+
return {'anyOf': [result, {'type': 'null'}]}
|
|
775
|
+
return result
|
|
776
|
+
|
|
777
|
+
@classmethod
|
|
778
|
+
def __unpack_null_from_schema(cls, s: dict[str, Any]) -> tuple[dict[str, Any], bool]:
|
|
779
|
+
if 'anyOf' in s and len(s['anyOf']) == 2 and {'type': 'null'} in s['anyOf']:
|
|
780
|
+
try:
|
|
781
|
+
return next(s for s in s['anyOf'] if s != {'type': 'null'}), True
|
|
782
|
+
except StopIteration:
|
|
783
|
+
pass
|
|
784
|
+
return s, False
|
|
785
|
+
|
|
786
|
+
def _to_base_str(self) -> str:
|
|
787
|
+
if self.json_schema is None:
|
|
788
|
+
return 'Json'
|
|
789
|
+
elif 'title' in self.json_schema:
|
|
790
|
+
return f'Json[{self.json_schema["title"]}]'
|
|
791
|
+
else:
|
|
792
|
+
return f'Json[{self.json_schema}]'
|
|
793
|
+
|
|
669
794
|
|
|
670
795
|
class ArrayType(ColumnType):
|
|
671
796
|
def __init__(self, shape: tuple[Union[int, None], ...], dtype: ColumnType, nullable: bool = False):
|
|
@@ -1017,6 +1142,16 @@ class _PxtType:
|
|
|
1017
1142
|
|
|
1018
1143
|
|
|
1019
1144
|
class Json(_PxtType):
|
|
1145
|
+
def __class_getitem__(cls, item: Any) -> _AnnotatedAlias:
|
|
1146
|
+
"""
|
|
1147
|
+
`item` (the type subscript) must be a `dict` representing a valid JSON Schema.
|
|
1148
|
+
"""
|
|
1149
|
+
if not isinstance(item, dict):
|
|
1150
|
+
raise TypeError('Json type parameter must be a dict')
|
|
1151
|
+
|
|
1152
|
+
# The JsonType initializer will validate the JSON Schema.
|
|
1153
|
+
return typing.Annotated[Any, JsonType(json_schema=item, nullable=False)]
|
|
1154
|
+
|
|
1020
1155
|
@classmethod
|
|
1021
1156
|
def as_col_type(cls, nullable: bool) -> ColumnType:
|
|
1022
1157
|
return JsonType(nullable=nullable)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: pixeltable
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.29
|
|
4
4
|
Summary: AI Data Infrastructure: Declarative, Multimodal, and Incremental
|
|
5
5
|
Home-page: https://pixeltable.com/
|
|
6
6
|
License: Apache-2.0
|
|
@@ -29,13 +29,14 @@ Requires-Dist: cloudpickle (>=2.2.1,<3.0.0)
|
|
|
29
29
|
Requires-Dist: ftfy (>=6.2.0,<7.0.0)
|
|
30
30
|
Requires-Dist: jinja2 (>=3.1.3,<4.0.0)
|
|
31
31
|
Requires-Dist: jmespath (>=1.0.1,<2.0.0)
|
|
32
|
+
Requires-Dist: jsonschema (>=4.1.0)
|
|
32
33
|
Requires-Dist: lxml (>=5.0)
|
|
33
34
|
Requires-Dist: more-itertools (>=10.2,<11.0)
|
|
34
35
|
Requires-Dist: numpy (>=1.25,<2.0)
|
|
35
36
|
Requires-Dist: pandas (>=2.0,<3.0)
|
|
36
37
|
Requires-Dist: pgvector (>=0.2.1,<0.3.0)
|
|
37
38
|
Requires-Dist: pillow (>=9.3.0)
|
|
38
|
-
Requires-Dist: pixeltable-pgserver (==0.2.
|
|
39
|
+
Requires-Dist: pixeltable-pgserver (==0.2.9)
|
|
39
40
|
Requires-Dist: psutil (>=5.9.5,<6.0.0)
|
|
40
41
|
Requires-Dist: psycopg[binary] (>=3.1.18)
|
|
41
42
|
Requires-Dist: puremagic (>=1.20)
|