protodantic-py 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- protodantic/__init__.py +25 -0
- protodantic/_version.py +1 -0
- protodantic/cli.py +42 -0
- protodantic/codegen.py +310 -0
- protodantic/compiler.py +39 -0
- protodantic/py.typed +0 -0
- protodantic/runtime.py +345 -0
- protodantic/types.py +59 -0
- protodantic_py-0.1.0.dist-info/METADATA +155 -0
- protodantic_py-0.1.0.dist-info/RECORD +13 -0
- protodantic_py-0.1.0.dist-info/WHEEL +4 -0
- protodantic_py-0.1.0.dist-info/entry_points.txt +2 -0
- protodantic_py-0.1.0.dist-info/licenses/LICENSE +21 -0
protodantic/__init__.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""Bidirectional bridge between Protocol Buffers and Pydantic."""
|
|
2
|
+
|
|
3
|
+
from ._version import __version__
|
|
4
|
+
from .codegen import generate_source
|
|
5
|
+
from .compiler import compile_fdset
|
|
6
|
+
from .runtime import OpenEnum, ProtoModel, load_pool, model_for
|
|
7
|
+
from .types import NULL, Int32, Int64, ListValue, Struct, UInt32, UInt64, Value
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"Int32",
|
|
11
|
+
"Int64",
|
|
12
|
+
"ListValue",
|
|
13
|
+
"NULL",
|
|
14
|
+
"OpenEnum",
|
|
15
|
+
"ProtoModel",
|
|
16
|
+
"Struct",
|
|
17
|
+
"UInt32",
|
|
18
|
+
"UInt64",
|
|
19
|
+
"Value",
|
|
20
|
+
"__version__",
|
|
21
|
+
"compile_fdset",
|
|
22
|
+
"generate_source",
|
|
23
|
+
"load_pool",
|
|
24
|
+
"model_for",
|
|
25
|
+
]
|
protodantic/_version.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
protodantic/cli.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import click
|
|
6
|
+
|
|
7
|
+
from ._version import __version__
|
|
8
|
+
from .codegen import generate_source
|
|
9
|
+
from .compiler import compile_fdset
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@click.group(context_settings={"help_option_names": ["-h", "--help"]})
|
|
13
|
+
@click.version_option(__version__, prog_name="protodantic")
|
|
14
|
+
def main() -> None:
|
|
15
|
+
"""Bidirectional bridge between Protocol Buffers and pydantic models."""
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@main.command()
|
|
19
|
+
@click.argument("protos", nargs=-1, required=True)
|
|
20
|
+
@click.option(
|
|
21
|
+
"-I", "--include", "includes", multiple=True, metavar="DIR",
|
|
22
|
+
help="Additional import search path (repeatable).",
|
|
23
|
+
)
|
|
24
|
+
@click.option(
|
|
25
|
+
"-o", "--out", "out", required=True, metavar="FILE",
|
|
26
|
+
help="Output python module path (e.g. models.py).",
|
|
27
|
+
)
|
|
28
|
+
def generate(protos: tuple[str, ...], includes: tuple[str, ...], out: str) -> None:
|
|
29
|
+
"""Generate pydantic models from .proto files."""
|
|
30
|
+
try:
|
|
31
|
+
fdset = compile_fdset(protos=protos, includes=includes)
|
|
32
|
+
source = generate_source(fdset_bytes=fdset)
|
|
33
|
+
out_path = Path(out)
|
|
34
|
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
35
|
+
out_path.write_text(source, encoding="utf-8")
|
|
36
|
+
except (RuntimeError, NotImplementedError, ValueError, OSError) as exc:
|
|
37
|
+
raise click.ClickException(str(exc)) from exc
|
|
38
|
+
click.echo(f"wrote {out_path}")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
if __name__ == "__main__":
|
|
42
|
+
main()
|
protodantic/codegen.py
ADDED
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import base64
|
|
4
|
+
import keyword
|
|
5
|
+
import textwrap
|
|
6
|
+
from collections import Counter
|
|
7
|
+
from typing import NamedTuple
|
|
8
|
+
|
|
9
|
+
from google.protobuf import any_pb2, descriptor_pb2, duration_pb2, struct_pb2, timestamp_pb2, wrappers_pb2
|
|
10
|
+
from google.protobuf.descriptor import Descriptor, EnumDescriptor, FieldDescriptor, OneofDescriptor
|
|
11
|
+
|
|
12
|
+
from ._version import __version__
|
|
13
|
+
from .runtime import load_pool, python_field_name
|
|
14
|
+
|
|
15
|
+
# proto scalar type -> (python annotation, default expression); protodantic
|
|
16
|
+
# names go through the _pd module alias so user message names can never
|
|
17
|
+
# shadow them inside the generated module
|
|
18
|
+
_SCALARS: dict[int, tuple[str, str]] = {
|
|
19
|
+
FieldDescriptor.TYPE_DOUBLE: ("float", "0.0"),
|
|
20
|
+
FieldDescriptor.TYPE_FLOAT: ("float", "0.0"),
|
|
21
|
+
FieldDescriptor.TYPE_INT64: ("_pd.Int64", "0"),
|
|
22
|
+
FieldDescriptor.TYPE_UINT64: ("_pd.UInt64", "0"),
|
|
23
|
+
FieldDescriptor.TYPE_INT32: ("_pd.Int32", "0"),
|
|
24
|
+
FieldDescriptor.TYPE_FIXED64: ("_pd.UInt64", "0"),
|
|
25
|
+
FieldDescriptor.TYPE_FIXED32: ("_pd.UInt32", "0"),
|
|
26
|
+
FieldDescriptor.TYPE_BOOL: ("bool", "False"),
|
|
27
|
+
FieldDescriptor.TYPE_STRING: ("str", '""'),
|
|
28
|
+
FieldDescriptor.TYPE_BYTES: ("bytes", 'b""'),
|
|
29
|
+
FieldDescriptor.TYPE_UINT32: ("_pd.UInt32", "0"),
|
|
30
|
+
FieldDescriptor.TYPE_SFIXED32: ("_pd.Int32", "0"),
|
|
31
|
+
FieldDescriptor.TYPE_SFIXED64: ("_pd.Int64", "0"),
|
|
32
|
+
FieldDescriptor.TYPE_SINT32: ("_pd.Int32", "0"),
|
|
33
|
+
FieldDescriptor.TYPE_SINT64: ("_pd.Int64", "0"),
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
# well-known types are handled by the runtime, not generated as classes; all
|
|
37
|
+
# imported names in generated code are underscore-aliased so user message
|
|
38
|
+
# names (e.g. `message Any`) can never shadow them
|
|
39
|
+
_WKT_PY: dict[str, str] = {
|
|
40
|
+
timestamp_pb2.Timestamp.DESCRIPTOR.full_name: "_datetime.datetime",
|
|
41
|
+
duration_pb2.Duration.DESCRIPTOR.full_name: "_datetime.timedelta",
|
|
42
|
+
any_pb2.Any.DESCRIPTOR.full_name: "_typing.Any",
|
|
43
|
+
struct_pb2.Struct.DESCRIPTOR.full_name: "_pd.Struct",
|
|
44
|
+
struct_pb2.Value.DESCRIPTOR.full_name: "_pd.Value",
|
|
45
|
+
struct_pb2.ListValue.DESCRIPTOR.full_name: "_pd.ListValue",
|
|
46
|
+
wrappers_pb2.DoubleValue.DESCRIPTOR.full_name: "float",
|
|
47
|
+
wrappers_pb2.FloatValue.DESCRIPTOR.full_name: "float",
|
|
48
|
+
wrappers_pb2.Int64Value.DESCRIPTOR.full_name: "_pd.Int64",
|
|
49
|
+
wrappers_pb2.UInt64Value.DESCRIPTOR.full_name: "_pd.UInt64",
|
|
50
|
+
wrappers_pb2.Int32Value.DESCRIPTOR.full_name: "_pd.Int32",
|
|
51
|
+
wrappers_pb2.UInt32Value.DESCRIPTOR.full_name: "_pd.UInt32",
|
|
52
|
+
wrappers_pb2.BoolValue.DESCRIPTOR.full_name: "bool",
|
|
53
|
+
wrappers_pb2.StringValue.DESCRIPTOR.full_name: "str",
|
|
54
|
+
wrappers_pb2.BytesValue.DESCRIPTOR.full_name: "bytes",
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
_HEADER = f'''\
|
|
58
|
+
"""Generated by protodantic {__version__}. DO NOT EDIT."""
|
|
59
|
+
|
|
60
|
+
from __future__ import annotations
|
|
61
|
+
|
|
62
|
+
import base64 as _base64
|
|
63
|
+
import datetime as _datetime
|
|
64
|
+
import typing as _typing
|
|
65
|
+
|
|
66
|
+
from pydantic import Field as _Field
|
|
67
|
+
|
|
68
|
+
import protodantic as _pd
|
|
69
|
+
|
|
70
|
+
'''
|
|
71
|
+
|
|
72
|
+
_B64_LINE_WIDTH = 84
|
|
73
|
+
|
|
74
|
+
# names a generated type may not use: builtins that appear in generated
|
|
75
|
+
# annotations/defaults, and the names the generated module itself emits
|
|
76
|
+
_REFERENCED_BUILTINS = frozenset({"bool", "bytes", "dict", "float", "int", "list", "str", "tuple"})
|
|
77
|
+
_EMITTED_MODULE_NAMES = frozenset({"_pd", "_typing", "_datetime", "_base64", "_Field", "_POOL", "_model"})
|
|
78
|
+
# names the enum machinery reserves on members
|
|
79
|
+
_RESERVED_ENUM_MEMBERS = frozenset({"mro"})
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _python_type_name(*, name: str) -> str:
|
|
83
|
+
"""Trailing-underscore escape (same rule as fields) for type names that
|
|
84
|
+
are python keywords or would shadow names the generated module relies on."""
|
|
85
|
+
if keyword.iskeyword(name) or name in _REFERENCED_BUILTINS or name in _EMITTED_MODULE_NAMES:
|
|
86
|
+
return name + "_"
|
|
87
|
+
return name
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _python_enum_member(*, name: str) -> str:
|
|
91
|
+
if keyword.iskeyword(name) or name in _RESERVED_ENUM_MEMBERS:
|
|
92
|
+
return name + "_"
|
|
93
|
+
return name
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class _Entry(NamedTuple):
|
|
97
|
+
kind: str # "message" | "enum"
|
|
98
|
+
desc: Descriptor | EnumDescriptor
|
|
99
|
+
base_name: str
|
|
100
|
+
package: str
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def generate_source(fdset_bytes: bytes) -> str:
|
|
104
|
+
"""Render one python module with pydantic models for every message in the
|
|
105
|
+
serialized FileDescriptorSet."""
|
|
106
|
+
return _ModuleGenerator(fdset_bytes=fdset_bytes).render()
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _is_wkt_file(*, file_name: str) -> bool:
|
|
110
|
+
return file_name.startswith("google/protobuf/")
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _is_synthetic_oneof(*, oneof: OneofDescriptor) -> bool:
|
|
114
|
+
# proto3 `optional` is backed by a single-field oneof named _<field>
|
|
115
|
+
fields = list(oneof.fields)
|
|
116
|
+
return len(fields) == 1 and oneof.name == "_" + fields[0].name
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class _ModuleGenerator:
|
|
120
|
+
def __init__(self, *, fdset_bytes: bytes) -> None:
|
|
121
|
+
self._fdset_bytes = fdset_bytes
|
|
122
|
+
self._fdset = descriptor_pb2.FileDescriptorSet.FromString(fdset_bytes)
|
|
123
|
+
self._pool = load_pool(fdset_bytes)
|
|
124
|
+
self._entries: list[_Entry] = []
|
|
125
|
+
self._py_names: dict[str, str] = {}
|
|
126
|
+
|
|
127
|
+
def render(self) -> str:
|
|
128
|
+
self._reject_non_proto3()
|
|
129
|
+
self._collect_entries()
|
|
130
|
+
self._assign_python_names()
|
|
131
|
+
|
|
132
|
+
parts = [_HEADER, self._render_pool()]
|
|
133
|
+
parts.extend(self._render_enum(desc=e.desc) for e in self._entries if e.kind == "enum")
|
|
134
|
+
parts.extend(self._render_message(desc=e.desc) for e in self._entries if e.kind == "message")
|
|
135
|
+
rebuild = self._render_rebuild()
|
|
136
|
+
if rebuild:
|
|
137
|
+
parts.append(rebuild)
|
|
138
|
+
return "\n\n".join(parts)
|
|
139
|
+
|
|
140
|
+
def _reject_non_proto3(self) -> None:
|
|
141
|
+
for file_proto in self._fdset.file:
|
|
142
|
+
if _is_wkt_file(file_name=file_proto.name):
|
|
143
|
+
continue
|
|
144
|
+
if file_proto.syntax != "proto3":
|
|
145
|
+
raise NotImplementedError(
|
|
146
|
+
f"{file_proto.name}: protodantic supports proto3 only, "
|
|
147
|
+
f"got {file_proto.syntax or 'proto2'}"
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
def _collect_entries(self) -> None:
|
|
151
|
+
for file_proto in self._fdset.file:
|
|
152
|
+
if _is_wkt_file(file_name=file_proto.name):
|
|
153
|
+
continue
|
|
154
|
+
file_desc = self._pool.FindFileByName(file_proto.name)
|
|
155
|
+
for enum_desc in file_desc.enum_types_by_name.values():
|
|
156
|
+
self._register_enum(desc=enum_desc, prefix="", package=file_desc.package)
|
|
157
|
+
for msg_desc in file_desc.message_types_by_name.values():
|
|
158
|
+
self._register_message(desc=msg_desc, prefix="", package=file_desc.package)
|
|
159
|
+
|
|
160
|
+
def _register_message(self, *, desc: Descriptor, prefix: str, package: str) -> None:
|
|
161
|
+
if desc.GetOptions().map_entry:
|
|
162
|
+
return
|
|
163
|
+
base_name = prefix + desc.name
|
|
164
|
+
self._entries.append(_Entry(kind="message", desc=desc, base_name=base_name, package=package))
|
|
165
|
+
for enum_desc in desc.enum_types:
|
|
166
|
+
self._register_enum(desc=enum_desc, prefix=base_name + "_", package=package)
|
|
167
|
+
for nested in desc.nested_types:
|
|
168
|
+
self._register_message(desc=nested, prefix=base_name + "_", package=package)
|
|
169
|
+
|
|
170
|
+
def _register_enum(self, *, desc: EnumDescriptor, prefix: str, package: str) -> None:
|
|
171
|
+
self._entries.append(_Entry(kind="enum", desc=desc, base_name=prefix + desc.name, package=package))
|
|
172
|
+
|
|
173
|
+
def _assign_python_names(self) -> None:
|
|
174
|
+
# same base name in two packages -> qualify with the package path
|
|
175
|
+
base_counts = Counter(entry.base_name for entry in self._entries)
|
|
176
|
+
for entry in self._entries:
|
|
177
|
+
if base_counts[entry.base_name] > 1 and entry.package:
|
|
178
|
+
name = f"{entry.package.replace('.', '_')}_{entry.base_name}"
|
|
179
|
+
else:
|
|
180
|
+
name = entry.base_name
|
|
181
|
+
self._py_names[entry.desc.full_name] = _python_type_name(name=name)
|
|
182
|
+
self._reject_residual_collisions()
|
|
183
|
+
|
|
184
|
+
def _reject_residual_collisions(self) -> None:
|
|
185
|
+
# flattening joins nesting with "_", so Outer.Inner and a literal
|
|
186
|
+
# Outer_Inner collide even within one package — refuse, never overwrite
|
|
187
|
+
by_python_name: dict[str, list[str]] = {}
|
|
188
|
+
for full_name, py_name in self._py_names.items():
|
|
189
|
+
by_python_name.setdefault(py_name, []).append(full_name)
|
|
190
|
+
collisions = {name: fulls for name, fulls in by_python_name.items() if len(fulls) > 1}
|
|
191
|
+
if collisions:
|
|
192
|
+
details = "; ".join(
|
|
193
|
+
f"{' and '.join(sorted(fulls))} both map to {name!r}"
|
|
194
|
+
for name, fulls in sorted(collisions.items())
|
|
195
|
+
)
|
|
196
|
+
raise ValueError(
|
|
197
|
+
f"python class name collision after flattening: {details}. "
|
|
198
|
+
"Rename one of the conflicting proto types."
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
def _base_type(self, *, fd: FieldDescriptor) -> str:
|
|
202
|
+
if fd.type == FieldDescriptor.TYPE_MESSAGE:
|
|
203
|
+
full_name = fd.message_type.full_name
|
|
204
|
+
if full_name in _WKT_PY:
|
|
205
|
+
return _WKT_PY[full_name]
|
|
206
|
+
return self._py_names[full_name]
|
|
207
|
+
if fd.type == FieldDescriptor.TYPE_ENUM:
|
|
208
|
+
return self._py_names[fd.enum_type.full_name]
|
|
209
|
+
return _SCALARS[fd.type][0]
|
|
210
|
+
|
|
211
|
+
def _annotation(self, *, fd: FieldDescriptor) -> str:
|
|
212
|
+
if _is_map_field(fd=fd):
|
|
213
|
+
key_fd = fd.message_type.fields_by_name["key"]
|
|
214
|
+
value_fd = fd.message_type.fields_by_name["value"]
|
|
215
|
+
return f"dict[{self._base_type(fd=key_fd)}, {self._base_type(fd=value_fd)}]"
|
|
216
|
+
if fd.is_repeated:
|
|
217
|
+
return f"list[{self._base_type(fd=fd)}]"
|
|
218
|
+
if fd.has_presence:
|
|
219
|
+
return f"{self._base_type(fd=fd)} | None"
|
|
220
|
+
return self._base_type(fd=fd)
|
|
221
|
+
|
|
222
|
+
def _default(self, *, fd: FieldDescriptor) -> str:
|
|
223
|
+
alias = f'"{fd.name}"' if python_field_name(proto_name=fd.name) != fd.name else None
|
|
224
|
+
if _is_map_field(fd=fd):
|
|
225
|
+
return f"_Field(default_factory=dict, alias={alias})" if alias else "_Field(default_factory=dict)"
|
|
226
|
+
if fd.is_repeated:
|
|
227
|
+
return f"_Field(default_factory=list, alias={alias})" if alias else "_Field(default_factory=list)"
|
|
228
|
+
if fd.has_presence:
|
|
229
|
+
plain = "None"
|
|
230
|
+
elif fd.type == FieldDescriptor.TYPE_ENUM:
|
|
231
|
+
plain = f"{self._py_names[fd.enum_type.full_name]}(0)"
|
|
232
|
+
else:
|
|
233
|
+
plain = _SCALARS[fd.type][1]
|
|
234
|
+
return f"_Field(default={plain}, alias={alias})" if alias else plain
|
|
235
|
+
|
|
236
|
+
def _render_pool(self) -> str:
|
|
237
|
+
encoded = base64.b64encode(self._fdset_bytes).decode("ascii")
|
|
238
|
+
chunks = "\n".join(f' "{line}"' for line in textwrap.wrap(encoded, _B64_LINE_WIDTH))
|
|
239
|
+
return f"_POOL = _pd.load_pool(_base64.b64decode(\n{chunks}\n))\n"
|
|
240
|
+
|
|
241
|
+
def _render_enum(self, *, desc: EnumDescriptor) -> str:
|
|
242
|
+
py_name = self._py_names[desc.full_name]
|
|
243
|
+
members = [(_python_enum_member(name=v.name), v.name, v.number) for v in desc.values]
|
|
244
|
+
self._reject_member_collisions(enum_name=desc.full_name, members=members)
|
|
245
|
+
lines = [f"class {py_name}(_pd.OpenEnum):"]
|
|
246
|
+
lines.extend(f" {member} = {number}" for member, _, number in members)
|
|
247
|
+
return "\n".join(lines) + "\n"
|
|
248
|
+
|
|
249
|
+
def _reject_member_collisions(
|
|
250
|
+
self, *, enum_name: str, members: list[tuple[str, str, int]]
|
|
251
|
+
) -> None:
|
|
252
|
+
# escaping can map two proto member names to one python name; python
|
|
253
|
+
# enums forbid redeclaring a name (even for same-value aliases), so
|
|
254
|
+
# refuse rather than crash opaquely at import
|
|
255
|
+
by_member: dict[str, list[str]] = {}
|
|
256
|
+
for py_member, proto_member, _ in members:
|
|
257
|
+
by_member.setdefault(py_member, []).append(proto_member)
|
|
258
|
+
collisions = {py: names for py, names in by_member.items() if len(names) > 1}
|
|
259
|
+
if collisions:
|
|
260
|
+
details = "; ".join(
|
|
261
|
+
f"{' and '.join(names)} both map to {py!r}"
|
|
262
|
+
for py, names in sorted(collisions.items())
|
|
263
|
+
)
|
|
264
|
+
raise ValueError(
|
|
265
|
+
f"enum {enum_name!r}: member name collision after escaping: {details}. "
|
|
266
|
+
"Rename one of the conflicting members."
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
def _render_message(self, *, desc: Descriptor) -> str:
|
|
270
|
+
lines = [
|
|
271
|
+
f"class {self._py_names[desc.full_name]}(_pd.ProtoModel):",
|
|
272
|
+
f' __proto_full_name__: _typing.ClassVar[str] = "{desc.full_name}"',
|
|
273
|
+
" __proto_pool__: _typing.ClassVar[_typing.Any] = _POOL",
|
|
274
|
+
]
|
|
275
|
+
oneofs = self._render_oneofs(desc=desc)
|
|
276
|
+
if oneofs:
|
|
277
|
+
lines.append(oneofs)
|
|
278
|
+
if desc.fields:
|
|
279
|
+
lines.append("")
|
|
280
|
+
lines.extend(
|
|
281
|
+
f" {python_field_name(proto_name=fd.name)}: {self._annotation(fd=fd)} = {self._default(fd=fd)}"
|
|
282
|
+
for fd in desc.fields
|
|
283
|
+
)
|
|
284
|
+
return "\n".join(lines) + "\n"
|
|
285
|
+
|
|
286
|
+
def _render_oneofs(self, *, desc: Descriptor) -> str | None:
|
|
287
|
+
real_oneofs = [o for o in desc.oneofs if not _is_synthetic_oneof(oneof=o)]
|
|
288
|
+
if not real_oneofs:
|
|
289
|
+
return None
|
|
290
|
+
groups = ", ".join(
|
|
291
|
+
'"{}": ({})'.format(
|
|
292
|
+
oneof.name,
|
|
293
|
+
", ".join(f'"{python_field_name(proto_name=f.name)}"' for f in oneof.fields)
|
|
294
|
+
+ ("," if len(oneof.fields) == 1 else ""),
|
|
295
|
+
)
|
|
296
|
+
for oneof in real_oneofs
|
|
297
|
+
)
|
|
298
|
+
return f" __proto_oneofs__: _typing.ClassVar[dict[str, tuple[str, ...]]] = {{{groups}}}"
|
|
299
|
+
|
|
300
|
+
def _render_rebuild(self) -> str | None:
|
|
301
|
+
names = [self._py_names[e.desc.full_name] for e in self._entries if e.kind == "message"]
|
|
302
|
+
if not names:
|
|
303
|
+
return None
|
|
304
|
+
joined = ", ".join(names)
|
|
305
|
+
trailing = "," if len(names) == 1 else ""
|
|
306
|
+
return f"for _model in ({joined}{trailing}):\n _model.model_rebuild()\ndel _model\n"
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def _is_map_field(*, fd: FieldDescriptor) -> bool:
|
|
310
|
+
return fd.type == FieldDescriptor.TYPE_MESSAGE and fd.message_type.GetOptions().map_entry
|
protodantic/compiler.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import importlib.resources
|
|
4
|
+
import os
|
|
5
|
+
import tempfile
|
|
6
|
+
from collections.abc import Iterable
|
|
7
|
+
|
|
8
|
+
from grpc_tools import protoc as _protoc
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def compile_fdset(protos: Iterable[str], includes: Iterable[str] = ()) -> bytes:
|
|
12
|
+
"""Compile .proto files into a serialized FileDescriptorSet, imports
|
|
13
|
+
included, so the result is self-contained. The directory of each input and
|
|
14
|
+
the well-known types shipped with grpcio-tools are always on the path."""
|
|
15
|
+
proto_paths = [os.path.abspath(p) for p in protos]
|
|
16
|
+
if not proto_paths:
|
|
17
|
+
raise ValueError("at least one .proto file is required")
|
|
18
|
+
|
|
19
|
+
include_paths = [os.path.abspath(i) for i in includes]
|
|
20
|
+
for path in proto_paths:
|
|
21
|
+
parent = os.path.dirname(path)
|
|
22
|
+
if parent not in include_paths:
|
|
23
|
+
include_paths.append(parent)
|
|
24
|
+
include_paths.append(str(importlib.resources.files("grpc_tools") / "_proto"))
|
|
25
|
+
|
|
26
|
+
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
27
|
+
out_path = os.path.join(tmp_dir, "fdset.bin")
|
|
28
|
+
args = [
|
|
29
|
+
"protoc",
|
|
30
|
+
f"--descriptor_set_out={out_path}",
|
|
31
|
+
"--include_imports",
|
|
32
|
+
*[f"-I{i}" for i in include_paths],
|
|
33
|
+
*proto_paths,
|
|
34
|
+
]
|
|
35
|
+
exit_code = _protoc.main(args)
|
|
36
|
+
if exit_code != 0:
|
|
37
|
+
raise RuntimeError(f"protoc failed with exit code {exit_code} (args: {args[1:]})")
|
|
38
|
+
with open(out_path, "rb") as f:
|
|
39
|
+
return f.read()
|
protodantic/py.typed
ADDED
|
File without changes
|
protodantic/runtime.py
ADDED
|
@@ -0,0 +1,345 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import datetime
|
|
4
|
+
import enum
|
|
5
|
+
import keyword
|
|
6
|
+
from typing import Any, ClassVar, Self
|
|
7
|
+
|
|
8
|
+
from google.protobuf import (
|
|
9
|
+
any_pb2,
|
|
10
|
+
descriptor_pb2,
|
|
11
|
+
descriptor_pool,
|
|
12
|
+
duration_pb2,
|
|
13
|
+
json_format,
|
|
14
|
+
message_factory,
|
|
15
|
+
struct_pb2,
|
|
16
|
+
timestamp_pb2,
|
|
17
|
+
wrappers_pb2,
|
|
18
|
+
)
|
|
19
|
+
from google.protobuf.descriptor import FieldDescriptor
|
|
20
|
+
from google.protobuf.message import Message
|
|
21
|
+
from pydantic import BaseModel, ConfigDict, model_validator
|
|
22
|
+
|
|
23
|
+
from .types import NULL, _strip_null_sentinel
|
|
24
|
+
|
|
25
|
+
_TIMESTAMP = timestamp_pb2.Timestamp.DESCRIPTOR.full_name
|
|
26
|
+
_DURATION = duration_pb2.Duration.DESCRIPTOR.full_name
|
|
27
|
+
_ANY = any_pb2.Any.DESCRIPTOR.full_name
|
|
28
|
+
_VALUE = struct_pb2.Value.DESCRIPTOR.full_name
|
|
29
|
+
_STRUCT_TYPES = frozenset(
|
|
30
|
+
m.DESCRIPTOR.full_name for m in (struct_pb2.Struct, struct_pb2.Value, struct_pb2.ListValue)
|
|
31
|
+
)
|
|
32
|
+
_WRAPPER_TYPES = frozenset(
|
|
33
|
+
m.DESCRIPTOR.full_name
|
|
34
|
+
for m in (
|
|
35
|
+
wrappers_pb2.DoubleValue,
|
|
36
|
+
wrappers_pb2.FloatValue,
|
|
37
|
+
wrappers_pb2.Int64Value,
|
|
38
|
+
wrappers_pb2.UInt64Value,
|
|
39
|
+
wrappers_pb2.Int32Value,
|
|
40
|
+
wrappers_pb2.UInt32Value,
|
|
41
|
+
wrappers_pb2.BoolValue,
|
|
42
|
+
wrappers_pb2.StringValue,
|
|
43
|
+
wrappers_pb2.BytesValue,
|
|
44
|
+
)
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
# nested-message resolution is scoped per descriptor pool (i.e. per generated
|
|
48
|
+
# module) so duplicate generated modules can coexist in one process; the
|
|
49
|
+
# by-name registry backs model_for() and Any fallback (last import wins)
|
|
50
|
+
_MODELS_BY_POOL: dict[tuple[Any, str], type[ProtoModel]] = {}
|
|
51
|
+
_MODELS_BY_NAME: dict[str, type[ProtoModel]] = {}
|
|
52
|
+
|
|
53
|
+
_RESERVED_NAMES = frozenset({
|
|
54
|
+
"proto_class",
|
|
55
|
+
"to_proto",
|
|
56
|
+
"to_proto_bytes",
|
|
57
|
+
"to_proto_json",
|
|
58
|
+
"from_proto",
|
|
59
|
+
"from_proto_bytes",
|
|
60
|
+
"from_proto_json",
|
|
61
|
+
})
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def python_field_name(*, proto_name: str) -> str:
|
|
65
|
+
"""Python attribute for a proto field: keywords, model_*, and ProtoModel API
|
|
66
|
+
names get a trailing underscore; the proto name stays usable as an alias."""
|
|
67
|
+
if (
|
|
68
|
+
keyword.iskeyword(proto_name)
|
|
69
|
+
or proto_name.startswith("model_")
|
|
70
|
+
or proto_name in _RESERVED_NAMES
|
|
71
|
+
):
|
|
72
|
+
return proto_name + "_"
|
|
73
|
+
return proto_name
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def model_for(full_name: str) -> type[ProtoModel]:
|
|
77
|
+
"""Generated model class for a proto full name (e.g. "pkg.Msg"). The module
|
|
78
|
+
defining it must be imported first; on duplicates the last import wins."""
|
|
79
|
+
try:
|
|
80
|
+
return _MODELS_BY_NAME[full_name]
|
|
81
|
+
except KeyError:
|
|
82
|
+
raise KeyError(
|
|
83
|
+
f"no generated model imported for proto type {full_name!r}"
|
|
84
|
+
) from None
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def load_pool(fdset_bytes: bytes) -> descriptor_pool.DescriptorPool:
|
|
88
|
+
"""Build a fresh DescriptorPool from a serialized FileDescriptorSet."""
|
|
89
|
+
fdset = descriptor_pb2.FileDescriptorSet.FromString(fdset_bytes)
|
|
90
|
+
pool = descriptor_pool.DescriptorPool()
|
|
91
|
+
for file_proto in fdset.file:
|
|
92
|
+
pool.Add(file_proto)
|
|
93
|
+
return pool
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class OpenEnum(enum.IntEnum):
|
|
97
|
+
"""IntEnum matching proto3 open-enum semantics: values missing from the
|
|
98
|
+
schema become pseudo-members instead of raising."""
|
|
99
|
+
|
|
100
|
+
@classmethod
|
|
101
|
+
def _missing_(cls, value: object) -> OpenEnum | None:
|
|
102
|
+
if not isinstance(value, int) or isinstance(value, bool):
|
|
103
|
+
return None
|
|
104
|
+
pseudo = int.__new__(cls, value)
|
|
105
|
+
pseudo._name_ = None
|
|
106
|
+
pseudo._value_ = value
|
|
107
|
+
return pseudo
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _is_map(*, fd: FieldDescriptor) -> bool:
|
|
111
|
+
return (
|
|
112
|
+
fd.type == FieldDescriptor.TYPE_MESSAGE
|
|
113
|
+
and fd.message_type.GetOptions().map_entry
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _scalar_to_proto(*, value: Any) -> Any:
|
|
118
|
+
# enum members (incl. open-enum pseudo-members) flatten to plain ints
|
|
119
|
+
return int(value) if isinstance(value, int) and not isinstance(value, bool) else value
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _resolve_model(*, full_name: str, pool: Any) -> type[ProtoModel] | None:
|
|
123
|
+
model_cls = _MODELS_BY_POOL.get((pool, full_name))
|
|
124
|
+
if model_cls is not None:
|
|
125
|
+
return model_cls
|
|
126
|
+
return _MODELS_BY_NAME.get(full_name)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _fill_message(*, target: Message, value: Any) -> None:
|
|
130
|
+
full_name = target.DESCRIPTOR.full_name
|
|
131
|
+
if full_name == _TIMESTAMP:
|
|
132
|
+
target.FromDatetime(value)
|
|
133
|
+
elif full_name == _DURATION:
|
|
134
|
+
target.FromTimedelta(value)
|
|
135
|
+
elif full_name == _ANY:
|
|
136
|
+
if not isinstance(value, ProtoModel):
|
|
137
|
+
raise TypeError(
|
|
138
|
+
f"google.protobuf.Any fields accept ProtoModel instances, got {type(value).__name__}"
|
|
139
|
+
)
|
|
140
|
+
target.Pack(value.to_proto())
|
|
141
|
+
elif full_name in _STRUCT_TYPES:
|
|
142
|
+
json_format.ParseDict(_strip_null_sentinel(value), target)
|
|
143
|
+
elif full_name in _WRAPPER_TYPES:
|
|
144
|
+
target.value = value
|
|
145
|
+
else:
|
|
146
|
+
target.CopyFrom(value.to_proto())
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _message_to_python(*, msg: Message, pool: Any) -> Any:
|
|
150
|
+
full_name = msg.DESCRIPTOR.full_name
|
|
151
|
+
if full_name == _TIMESTAMP:
|
|
152
|
+
return msg.ToDatetime(tzinfo=datetime.timezone.utc)
|
|
153
|
+
if full_name == _DURATION:
|
|
154
|
+
return msg.ToTimedelta()
|
|
155
|
+
if full_name == _ANY:
|
|
156
|
+
return _unpack_any(msg=msg, pool=pool)
|
|
157
|
+
if full_name in _STRUCT_TYPES:
|
|
158
|
+
result = json_format.MessageToDict(msg)
|
|
159
|
+
# a set-but-null Value maps to NULL; None is reserved for "unset"
|
|
160
|
+
if full_name == _VALUE and result is None:
|
|
161
|
+
return NULL
|
|
162
|
+
return result
|
|
163
|
+
if full_name in _WRAPPER_TYPES:
|
|
164
|
+
return msg.value
|
|
165
|
+
model_cls = _resolve_model(full_name=full_name, pool=pool)
|
|
166
|
+
if model_cls is None:
|
|
167
|
+
raise LookupError(
|
|
168
|
+
f"no protodantic model registered for {full_name!r}; "
|
|
169
|
+
"import the generated module that defines it first"
|
|
170
|
+
)
|
|
171
|
+
return model_cls.from_proto(msg)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _unpack_any(*, msg: Message, pool: Any) -> ProtoModel:
|
|
175
|
+
type_name = msg.type_url.rpartition("/")[2]
|
|
176
|
+
model_cls = _resolve_model(full_name=type_name, pool=pool)
|
|
177
|
+
if model_cls is None:
|
|
178
|
+
raise LookupError(
|
|
179
|
+
f"cannot unpack Any: no generated model imported for {type_name!r}"
|
|
180
|
+
)
|
|
181
|
+
inner = model_cls._new_message()
|
|
182
|
+
if not msg.Unpack(inner):
|
|
183
|
+
raise ValueError(f"failed to unpack Any containing {type_name!r}")
|
|
184
|
+
return model_cls.from_proto(inner)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _set_proto_field(*, msg: Message, fd: FieldDescriptor, value: Any) -> None:
|
|
188
|
+
if _is_map(fd=fd):
|
|
189
|
+
_fill_map(target=getattr(msg, fd.name), fd=fd, value=value)
|
|
190
|
+
return
|
|
191
|
+
if fd.is_repeated:
|
|
192
|
+
_fill_repeated(target=getattr(msg, fd.name), fd=fd, value=value)
|
|
193
|
+
return
|
|
194
|
+
if fd.type == FieldDescriptor.TYPE_MESSAGE:
|
|
195
|
+
_fill_message(target=getattr(msg, fd.name), value=value)
|
|
196
|
+
return
|
|
197
|
+
setattr(msg, fd.name, _scalar_to_proto(value=value))
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _fill_map(*, target: Any, fd: FieldDescriptor, value: dict) -> None:
|
|
201
|
+
value_fd = fd.message_type.fields_by_name["value"]
|
|
202
|
+
if value_fd.type == FieldDescriptor.TYPE_MESSAGE:
|
|
203
|
+
for key, item in value.items():
|
|
204
|
+
_fill_message(target=target[key], value=item)
|
|
205
|
+
else:
|
|
206
|
+
for key, item in value.items():
|
|
207
|
+
target[key] = _scalar_to_proto(value=item)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def _fill_repeated(*, target: Any, fd: FieldDescriptor, value: list) -> None:
|
|
211
|
+
if fd.type == FieldDescriptor.TYPE_MESSAGE:
|
|
212
|
+
for item in value:
|
|
213
|
+
_fill_message(target=target.add(), value=item)
|
|
214
|
+
else:
|
|
215
|
+
target.extend(_scalar_to_proto(value=item) for item in value)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _read_proto_field(*, msg: Message, fd: FieldDescriptor, pool: Any) -> Any:
|
|
219
|
+
if _is_map(fd=fd):
|
|
220
|
+
return _read_map(target=getattr(msg, fd.name), fd=fd, pool=pool)
|
|
221
|
+
if fd.is_repeated:
|
|
222
|
+
value = getattr(msg, fd.name)
|
|
223
|
+
if fd.type == FieldDescriptor.TYPE_MESSAGE:
|
|
224
|
+
return [_message_to_python(msg=item, pool=pool) for item in value]
|
|
225
|
+
return list(value)
|
|
226
|
+
if fd.has_presence and not msg.HasField(fd.name):
|
|
227
|
+
return None
|
|
228
|
+
if fd.type == FieldDescriptor.TYPE_MESSAGE:
|
|
229
|
+
return _message_to_python(msg=getattr(msg, fd.name), pool=pool)
|
|
230
|
+
return getattr(msg, fd.name)
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def _read_map(*, target: Any, fd: FieldDescriptor, pool: Any) -> dict:
|
|
234
|
+
value_fd = fd.message_type.fields_by_name["value"]
|
|
235
|
+
if value_fd.type == FieldDescriptor.TYPE_MESSAGE:
|
|
236
|
+
return {key: _message_to_python(msg=item, pool=pool) for key, item in target.items()}
|
|
237
|
+
return dict(target)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
class ProtoModel(BaseModel):
|
|
241
|
+
"""Pydantic base model bound to a protobuf message type."""
|
|
242
|
+
|
|
243
|
+
model_config = ConfigDict(
|
|
244
|
+
populate_by_name=True,
|
|
245
|
+
protected_namespaces=(),
|
|
246
|
+
validate_assignment=True,
|
|
247
|
+
extra="forbid",
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
__proto_full_name__: ClassVar[str] = ""
|
|
251
|
+
__proto_pool__: ClassVar[Any] = None
|
|
252
|
+
__proto_oneofs__: ClassVar[dict[str, tuple[str, ...]]] = {}
|
|
253
|
+
|
|
254
|
+
@classmethod
|
|
255
|
+
def __pydantic_init_subclass__(cls, **kwargs: Any) -> None:
|
|
256
|
+
super().__pydantic_init_subclass__(**kwargs)
|
|
257
|
+
# own-body declaration only: plain subclasses don't hijack resolution;
|
|
258
|
+
# re-declaring __proto_full_name__ is the explicit opt-in to take over
|
|
259
|
+
if cls.__dict__.get("__proto_full_name__"):
|
|
260
|
+
_MODELS_BY_POOL[(cls.__proto_pool__, cls.__proto_full_name__)] = cls
|
|
261
|
+
_MODELS_BY_NAME[cls.__proto_full_name__] = cls
|
|
262
|
+
|
|
263
|
+
def __setattr__(self, name: str, value: Any) -> None:
|
|
264
|
+
# pydantic mutates before after-validators run; restore on failure so
|
|
265
|
+
# a rejected assignment cannot leave the model in an invalid state
|
|
266
|
+
had_value = name in self.__dict__
|
|
267
|
+
old_value = self.__dict__.get(name)
|
|
268
|
+
was_set = name in self.__pydantic_fields_set__
|
|
269
|
+
try:
|
|
270
|
+
super().__setattr__(name, value)
|
|
271
|
+
except Exception:
|
|
272
|
+
if had_value:
|
|
273
|
+
self.__dict__[name] = old_value
|
|
274
|
+
else:
|
|
275
|
+
self.__dict__.pop(name, None)
|
|
276
|
+
if not was_set:
|
|
277
|
+
self.__pydantic_fields_set__.discard(name)
|
|
278
|
+
raise
|
|
279
|
+
|
|
280
|
+
@model_validator(mode="after")
|
|
281
|
+
def _validate_oneofs(self) -> Self:
|
|
282
|
+
for group, fields in type(self).__proto_oneofs__.items():
|
|
283
|
+
set_fields = [name for name in fields if getattr(self, name) is not None]
|
|
284
|
+
if len(set_fields) > 1:
|
|
285
|
+
raise ValueError(
|
|
286
|
+
f"oneof {group!r} allows at most one field to be set, got {set_fields}"
|
|
287
|
+
)
|
|
288
|
+
return self
|
|
289
|
+
|
|
290
|
+
@classmethod
|
|
291
|
+
def proto_class(cls) -> type[Message]:
|
|
292
|
+
"""The dynamic protobuf message class this model is bound to."""
|
|
293
|
+
descriptor = cls.__proto_pool__.FindMessageTypeByName(cls.__proto_full_name__)
|
|
294
|
+
return message_factory.GetMessageClass(descriptor)
|
|
295
|
+
|
|
296
|
+
@classmethod
|
|
297
|
+
def _new_message(cls) -> Message:
|
|
298
|
+
message_cls = cls.proto_class()
|
|
299
|
+
return message_cls()
|
|
300
|
+
|
|
301
|
+
def to_proto(self) -> Message:
|
|
302
|
+
"""Convert this model to a protobuf message."""
|
|
303
|
+
msg = self._new_message()
|
|
304
|
+
for fd in msg.DESCRIPTOR.fields:
|
|
305
|
+
value = getattr(self, python_field_name(proto_name=fd.name))
|
|
306
|
+
if value is not None:
|
|
307
|
+
_set_proto_field(msg=msg, fd=fd, value=value)
|
|
308
|
+
return msg
|
|
309
|
+
|
|
310
|
+
def to_proto_bytes(self) -> bytes:
|
|
311
|
+
"""Serialize this model to protobuf wire format."""
|
|
312
|
+
return self.to_proto().SerializeToString()
|
|
313
|
+
|
|
314
|
+
def to_proto_json(self, **kwargs: Any) -> str:
|
|
315
|
+
"""Serialize this model to canonical proto JSON."""
|
|
316
|
+
return json_format.MessageToJson(self.to_proto(), **kwargs)
|
|
317
|
+
|
|
318
|
+
@classmethod
|
|
319
|
+
def from_proto(cls, msg: Message) -> Self:
|
|
320
|
+
"""Build a model from a protobuf message of this model's proto type.
|
|
321
|
+
Works with any message whose descriptor matches this schema, including
|
|
322
|
+
classic _pb2 instances; unrelated message types are rejected."""
|
|
323
|
+
if msg.DESCRIPTOR.full_name != cls.__proto_full_name__:
|
|
324
|
+
raise TypeError(
|
|
325
|
+
f"{cls.__name__}.from_proto expects {cls.__proto_full_name__!r}, "
|
|
326
|
+
f"got {msg.DESCRIPTOR.full_name!r}"
|
|
327
|
+
)
|
|
328
|
+
data = {
|
|
329
|
+
python_field_name(proto_name=fd.name): _read_proto_field(
|
|
330
|
+
msg=msg, fd=fd, pool=cls.__proto_pool__
|
|
331
|
+
)
|
|
332
|
+
for fd in msg.DESCRIPTOR.fields
|
|
333
|
+
}
|
|
334
|
+
return cls(**data)
|
|
335
|
+
|
|
336
|
+
@classmethod
|
|
337
|
+
def from_proto_bytes(cls, data: bytes) -> Self:
|
|
338
|
+
"""Parse protobuf wire format into a model instance."""
|
|
339
|
+
return cls.from_proto(cls.proto_class().FromString(data))
|
|
340
|
+
|
|
341
|
+
@classmethod
|
|
342
|
+
def from_proto_json(cls, data: str, **kwargs: Any) -> Self:
|
|
343
|
+
"""Parse canonical proto JSON into a model instance. kwargs pass
|
|
344
|
+
through to json_format.Parse (e.g. ignore_unknown_fields=True)."""
|
|
345
|
+
return cls.from_proto(json_format.Parse(data, cls._new_message(), **kwargs))
|
protodantic/types.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Annotated, Any
|
|
4
|
+
|
|
5
|
+
from pydantic import Field, PlainSerializer
|
|
6
|
+
|
|
7
|
+
Int32 = Annotated[int, Field(ge=-(2**31), le=2**31 - 1)]
|
|
8
|
+
UInt32 = Annotated[int, Field(ge=0, le=2**32 - 1)]
|
|
9
|
+
Int64 = Annotated[int, Field(ge=-(2**63), le=2**63 - 1)]
|
|
10
|
+
UInt64 = Annotated[int, Field(ge=0, le=2**64 - 1)]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class _NullType:
|
|
14
|
+
"""Singleton sentinel: an explicit JSON null in a google.protobuf.Value
|
|
15
|
+
field, as opposed to None which means "field unset"."""
|
|
16
|
+
|
|
17
|
+
_instance: _NullType | None = None
|
|
18
|
+
|
|
19
|
+
def __new__(cls) -> _NullType:
|
|
20
|
+
if cls._instance is None:
|
|
21
|
+
cls._instance = super().__new__(cls)
|
|
22
|
+
return cls._instance
|
|
23
|
+
|
|
24
|
+
def __repr__(self) -> str:
|
|
25
|
+
return "protodantic.NULL"
|
|
26
|
+
|
|
27
|
+
def __bool__(self) -> bool:
|
|
28
|
+
return False
|
|
29
|
+
|
|
30
|
+
# copies must preserve identity: `x is NULL` is the documented check
|
|
31
|
+
def __copy__(self) -> _NullType:
|
|
32
|
+
return self
|
|
33
|
+
|
|
34
|
+
def __deepcopy__(self, memo: dict) -> _NullType:
|
|
35
|
+
return self
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
NULL = _NullType()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# positional arg by contract: pydantic's PlainSerializer invokes this callback
|
|
42
|
+
# as f(value), so the signature is fixed by the framework
|
|
43
|
+
def _strip_null_sentinel(value: Any) -> Any:
|
|
44
|
+
if value is NULL:
|
|
45
|
+
return None
|
|
46
|
+
if isinstance(value, dict):
|
|
47
|
+
return {k: _strip_null_sentinel(v) for k, v in value.items()}
|
|
48
|
+
if isinstance(value, (list, tuple)):
|
|
49
|
+
return [_strip_null_sentinel(v) for v in value]
|
|
50
|
+
return value
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# JSON dumps turn the sentinel into a real null; python-mode dumps keep it so
|
|
54
|
+
# NULL-vs-unset survives a model_dump()/model_validate() round-trip
|
|
55
|
+
_NullSafe = PlainSerializer(_strip_null_sentinel, when_used="json")
|
|
56
|
+
|
|
57
|
+
Struct = Annotated[dict[str, Any], _NullSafe]
|
|
58
|
+
Value = Annotated[Any, _NullSafe]
|
|
59
|
+
ListValue = Annotated[list[Any], _NullSafe]
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: protodantic-py
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Bidirectional bridge between Protocol Buffers and Pydantic: generate pydantic models from .proto files with lossless round-trips to proto messages and wire bytes
|
|
5
|
+
Project-URL: Repository, https://github.com/Koryto/protodantic
|
|
6
|
+
Project-URL: Issues, https://github.com/Koryto/protodantic/issues
|
|
7
|
+
Project-URL: Changelog, https://github.com/Koryto/protodantic/blob/master/CHANGELOG.md
|
|
8
|
+
Author-email: My Koryto <mykoryto93@gmail.com>
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: codegen,grpc,proto3,protobuf,pydantic,serialization
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Framework :: Pydantic
|
|
14
|
+
Classifier: Framework :: Pydantic :: 2
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Software Development :: Code Generators
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
|
+
Classifier: Typing :: Typed
|
|
23
|
+
Requires-Python: >=3.11
|
|
24
|
+
Requires-Dist: click>=8.2
|
|
25
|
+
Requires-Dist: grpcio-tools>=1.66
|
|
26
|
+
Requires-Dist: protobuf>=6.30
|
|
27
|
+
Requires-Dist: pydantic>=2.7
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
|
|
30
|
+
# protodantic
|
|
31
|
+
|
|
32
|
+
[](https://github.com/Koryto/protodantic/actions/workflows/ci.yml)
|
|
33
|
+
[](https://pypi.org/project/protodantic-py/)
|
|
34
|
+
[](https://pypi.org/project/protodantic-py/)
|
|
35
|
+
[](LICENSE)
|
|
36
|
+
|
|
37
|
+
Bidirectional bridge between **Protocol Buffers** and **Pydantic**.
|
|
38
|
+
|
|
39
|
+
Point it at your `.proto` files and it generates plain pydantic v2 models — with full validation — where every model round-trips losslessly to and from real protobuf messages, wire bytes, and proto JSON. The pydantic → proto direction is a first-class citizen: `to_proto_bytes()` produces genuine wire-format output that any protobuf consumer in any language can parse.
|
|
40
|
+
|
|
41
|
+
## Install
|
|
42
|
+
|
|
43
|
+
```sh
|
|
44
|
+
uv add protodantic-py # or: pip install protodantic-py
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
The distribution is named `protodantic-py` (the plain name is squatted on PyPI); the import stays `protodantic`:
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
import protodantic
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Usage
|
|
54
|
+
|
|
55
|
+
Given `demo.proto`:
|
|
56
|
+
|
|
57
|
+
```proto
|
|
58
|
+
syntax = "proto3";
|
|
59
|
+
package demo;
|
|
60
|
+
|
|
61
|
+
message Address {
|
|
62
|
+
string street = 1;
|
|
63
|
+
string city = 2;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
message User {
|
|
67
|
+
int64 id = 1;
|
|
68
|
+
string name = 2;
|
|
69
|
+
Address address = 3;
|
|
70
|
+
repeated string tags = 4;
|
|
71
|
+
optional string nickname = 5;
|
|
72
|
+
}
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Generate models:
|
|
76
|
+
|
|
77
|
+
```sh
|
|
78
|
+
protodantic generate demo.proto -o models.py
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Then:
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
from models import User, Address
|
|
85
|
+
|
|
86
|
+
user = User(id=7, name="kory", address=Address(city="Warsaw"), tags=["a", "b"])
|
|
87
|
+
|
|
88
|
+
# pydantic -> proto: real wire format, readable by any protobuf runtime
|
|
89
|
+
data: bytes = user.to_proto_bytes()
|
|
90
|
+
msg = user.to_proto() # a live protobuf Message
|
|
91
|
+
json_str = user.to_proto_json() # canonical proto JSON
|
|
92
|
+
|
|
93
|
+
# proto -> pydantic: parse + validate in one step
|
|
94
|
+
restored = User.from_proto_bytes(data)
|
|
95
|
+
assert restored == user
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Or drive it from Python:
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
from protodantic import compile_fdset, generate_source
|
|
102
|
+
|
|
103
|
+
source = generate_source(compile_fdset(["demo.proto"]))
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## Type mapping
|
|
107
|
+
|
|
108
|
+
| proto | pydantic |
|
|
109
|
+
| --- | --- |
|
|
110
|
+
| `int32/64`, `uint32/64`, `sint`, `fixed` | range-validated `int` (out-of-range fails at construction) |
|
|
111
|
+
| `float`, `double` | `float` |
|
|
112
|
+
| `string` / `bytes` / `bool` | `str` / `bytes` / `bool` |
|
|
113
|
+
| `enum` | generated `OpenEnum` (`IntEnum` that preserves unknown wire values — proto3 enums are open) |
|
|
114
|
+
| `message` | generated `ProtoModel` (nested types flattened as `Outer_Inner`) |
|
|
115
|
+
| `repeated T` / `map<K, V>` | `list[T]` / `dict[K, V]` |
|
|
116
|
+
| `optional`, oneof members, singular messages | `T \| None` (presence-aware: `None` ⇄ unset) |
|
|
117
|
+
| oneof groups | mutual exclusion enforced by a model validator |
|
|
118
|
+
| `google.protobuf.Timestamp` | `datetime.datetime` (UTC; naive input treated as UTC) |
|
|
119
|
+
| `google.protobuf.Duration` | `datetime.timedelta` |
|
|
120
|
+
| `google.protobuf.*Value` wrappers | `T \| None` |
|
|
121
|
+
| `google.protobuf.Struct` / `Value` / `ListValue` | `dict[str, Any]` / `Any` / `list[Any]` |
|
|
122
|
+
| `google.protobuf.Any` | `typing.Any` — accepts any `ProtoModel`; packed/unpacked via the model registry |
|
|
123
|
+
|
|
124
|
+
Field names that collide with python keywords or pydantic internals (`class`, `from`, `model_config`, ...) get a trailing underscore (`class_`) with the proto name kept as a populate alias. The same rule applies to message/enum type names and enum members that are python keywords or would shadow generated code (`message list` → `class list_`) — the proto full name stays the source of truth. Same-named messages in different packages get package-qualified class names; every model is also reachable via `protodantic.model_for("pkg.Message")`.
|
|
125
|
+
|
|
126
|
+
Semantics worth knowing:
|
|
127
|
+
|
|
128
|
+
- **Validation on mutation is on by default** (`validate_assignment=True`): assigning a second oneof member or an out-of-range int raises immediately. Opt out per-model with standard pydantic config on a subclass.
|
|
129
|
+
- **`protodantic.NULL`** expresses an explicit JSON null in a `google.protobuf.Value` field (`None` means *unset*). In `model_dump_json()` it serializes as real `null`; python-mode dumps keep the sentinel.
|
|
130
|
+
- **Subclassing a generated model does not affect parsing**: `from_proto`/`model_for` keep resolving to the generated class. To make your subclass the resolution target (e.g. to add custom validators applied on parse), re-declare `__proto_full_name__` in its body — explicit opt-in.
|
|
131
|
+
|
|
132
|
+
## Interop with existing `_pb2` code
|
|
133
|
+
|
|
134
|
+
Already consuming a centralized proto package as protoc-generated `_pb2` modules? Generated models interoperate directly:
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
user = User.from_proto(their_pb2_user_instance) # accepts _pb2 messages
|
|
138
|
+
their_msg = their_pb2.User.FromString(user.to_proto_bytes()) # canonical bytes
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## How it works
|
|
142
|
+
|
|
143
|
+
`protoc` (bundled via `grpcio-tools`) compiles your protos to a `FileDescriptorSet`, which codegen embeds in the generated module. At runtime, `ProtoModel` builds dynamic protobuf message classes from those descriptors — no `_pb2.py` files needed, and no protobuf internals leak into your models.
|
|
144
|
+
|
|
145
|
+
If several imported generated modules define the same proto type, the registry behind `model_for()` / nested-message resolution is last-import-wins.
|
|
146
|
+
|
|
147
|
+
## Status & roadmap
|
|
148
|
+
|
|
149
|
+
Requires Python ≥ 3.11. proto3 only by design (proto2 input is rejected with a clear error). The full supported-behavior spec lives in [tests/](tests/) — every test documents one use case. Documented policies: unknown fields are dropped when a model re-serializes (the model is the source of truth), and naive datetimes are interpreted as UTC.
|
|
150
|
+
|
|
151
|
+
- **0.1.0 (current)** — greenfield: `.proto` → pydantic codegen with lossless bidirectional round-trips, plus the semantics future drops build on.
|
|
152
|
+
- **0.2.0 — brownfield** — reverse schema codegen (pydantic models → `.proto`), generating from installed `_pb2` packages by descriptor reflection, `to_proto(into=TheirPb2Class)`.
|
|
153
|
+
- **0.3.0 — performance** — benchmark suite (vs `json.loads`+pydantic, raw `_pb2`, betterproto), then cached field plans and trusted-construction fast paths.
|
|
154
|
+
|
|
155
|
+
gRPC service stubs are out of scope: protodantic is a message layer.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
protodantic/__init__.py,sha256=emVIfT8_5hVoo_74Snyfq92ZnR2MuuB6Cfa6M3wI9t8,574
|
|
2
|
+
protodantic/_version.py,sha256=kUR5RAFc7HCeiqdlX36dZOHkUI5wI6V_43RpEcD8b-0,22
|
|
3
|
+
protodantic/cli.py,sha256=Z5gR04_mR3jpPSSolPIWwUYNI6WV_Pl15D3BSlEWRZQ,1361
|
|
4
|
+
protodantic/codegen.py,sha256=bA0XQtqUzuWMdzRqzgKb36L3e27heZS3fQrjKe1EI40,13669
|
|
5
|
+
protodantic/compiler.py,sha256=O8-y2udmeGbS4yHtDgAEIAfNlvYrtf7A26SRfoJvli4,1455
|
|
6
|
+
protodantic/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
+
protodantic/runtime.py,sha256=131pfqlUv2TLGjrm812uE64tuHN2ZKRlJjFMeUu74pM,12566
|
|
8
|
+
protodantic/types.py,sha256=3IOhXkRN4fLxQRLw9ccSsMSrwln-JVrHxuKsKcRnjsg,1804
|
|
9
|
+
protodantic_py-0.1.0.dist-info/METADATA,sha256=murSwqAf8qfU5gqZ47xVaRTmB-aFGZ0moB5v8QZ6kvg,7477
|
|
10
|
+
protodantic_py-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
11
|
+
protodantic_py-0.1.0.dist-info/entry_points.txt,sha256=40KPnPZrJRvt_4-f9ds1eE957hFbR2DNsWtg8__0V84,53
|
|
12
|
+
protodantic_py-0.1.0.dist-info/licenses/LICENSE,sha256=AruywRaIPJhOHT3mzIonIdc92XVgdFbxBi2FGjYBsTc,1066
|
|
13
|
+
protodantic_py-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 My Koryto
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|