protowire-python 0.70.0__tar.gz → 0.75.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {protowire_python-0.70.0 → protowire_python-0.75.0}/.github/workflows/ci.yml +5 -6
- {protowire_python-0.70.0 → protowire_python-0.75.0}/.github/workflows/codeql.yml +2 -2
- {protowire_python-0.70.0 → protowire_python-0.75.0}/.github/workflows/publish.yml +5 -7
- protowire_python-0.75.0/CHANGELOG.md +117 -0
- {protowire_python-0.70.0 → protowire_python-0.75.0}/PKG-INFO +1 -1
- {protowire_python-0.70.0 → protowire_python-0.75.0}/pyproject.toml +1 -1
- {protowire_python-0.70.0 → protowire_python-0.75.0}/src/_protowire/module.cc +214 -7
- {protowire_python-0.70.0 → protowire_python-0.75.0}/src/protowire/__init__.py +1 -1
- protowire_python-0.75.0/src/protowire/pxf.py +433 -0
- protowire_python-0.75.0/tests/test_pxf_directives.py +189 -0
- protowire_python-0.75.0/tests/test_pxf_table_reader.py +235 -0
- protowire_python-0.70.0/CHANGELOG.md +0 -53
- protowire_python-0.70.0/src/protowire/pxf.py +0 -95
- {protowire_python-0.70.0 → protowire_python-0.75.0}/.editorconfig +0 -0
- {protowire_python-0.70.0 → protowire_python-0.75.0}/.github/CODEOWNERS +0 -0
- {protowire_python-0.70.0 → protowire_python-0.75.0}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {protowire_python-0.70.0 → protowire_python-0.75.0}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {protowire_python-0.70.0 → protowire_python-0.75.0}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {protowire_python-0.70.0 → protowire_python-0.75.0}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {protowire_python-0.70.0 → protowire_python-0.75.0}/.github/dependabot.yml +0 -0
- {protowire_python-0.70.0 → protowire_python-0.75.0}/.gitignore +0 -0
- {protowire_python-0.70.0 → protowire_python-0.75.0}/CMakeLists.txt +0 -0
- {protowire_python-0.70.0 → protowire_python-0.75.0}/CODE_OF_CONDUCT.md +0 -0
- {protowire_python-0.70.0 → protowire_python-0.75.0}/CONTRIBUTING.md +0 -0
- {protowire_python-0.70.0 → protowire_python-0.75.0}/GOVERNANCE.md +0 -0
- {protowire_python-0.70.0 → protowire_python-0.75.0}/LICENSE +0 -0
- {protowire_python-0.70.0 → protowire_python-0.75.0}/README.md +0 -0
- {protowire_python-0.70.0 → protowire_python-0.75.0}/SECURITY.md +0 -0
- {protowire_python-0.70.0 → protowire_python-0.75.0}/scripts/bench_pxf.py +0 -0
- {protowire_python-0.70.0 → protowire_python-0.75.0}/scripts/bench_sbe.py +0 -0
- {protowire_python-0.70.0 → protowire_python-0.75.0}/scripts/cibw_install_protobuf_linux.sh +0 -0
- {protowire_python-0.70.0 → protowire_python-0.75.0}/scripts/dump_envelope.py +0 -0
- {protowire_python-0.70.0 → protowire_python-0.75.0}/src/protowire/_schema.py +0 -0
- {protowire_python-0.70.0 → protowire_python-0.75.0}/src/protowire/envelope.py +0 -0
- {protowire_python-0.70.0 → protowire_python-0.75.0}/src/protowire/py.typed +0 -0
- {protowire_python-0.70.0 → protowire_python-0.75.0}/src/protowire/sbe.py +0 -0
- {protowire_python-0.70.0 → protowire_python-0.75.0}/testdata/order.proto +0 -0
- {protowire_python-0.70.0 → protowire_python-0.75.0}/testdata/test.proto +0 -0
- {protowire_python-0.70.0 → protowire_python-0.75.0}/tests/conftest.py +0 -0
- {protowire_python-0.70.0 → protowire_python-0.75.0}/tests/test_envelope.py +0 -0
- {protowire_python-0.70.0 → protowire_python-0.75.0}/tests/test_pxf.py +0 -0
- {protowire_python-0.70.0 → protowire_python-0.75.0}/tests/test_pxf_full_roundtrip.py +0 -0
- {protowire_python-0.70.0 → protowire_python-0.75.0}/tests/test_sbe.py +0 -0
- {protowire_python-0.70.0 → protowire_python-0.75.0}/tests/test_sbe_view_navigation.py +0 -0
|
@@ -16,12 +16,11 @@ concurrency:
|
|
|
16
16
|
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
|
|
17
17
|
|
|
18
18
|
env:
|
|
19
|
-
# Pin the sibling C++ checkout to a specific
|
|
20
|
-
#
|
|
21
|
-
#
|
|
22
|
-
#
|
|
23
|
-
|
|
24
|
-
PROTOWIRE_CPP_REF: 9af2ec04918a417933848de1577cd61f83a710b0
|
|
19
|
+
# Pin the sibling C++ checkout to a specific tag. v0.75.0 carries the
|
|
20
|
+
# PXF v0.72-series feature set (@<name> / @entry / @table directive
|
|
21
|
+
# grammar, schema validator, Result accessors, TableReader streaming)
|
|
22
|
+
# the Python port wraps. Bump in lockstep with cpp release cuts.
|
|
23
|
+
PROTOWIRE_CPP_REF: v0.75.0
|
|
25
24
|
|
|
26
25
|
jobs:
|
|
27
26
|
# ---------------------------------------------------------------------
|
|
@@ -16,8 +16,8 @@ permissions:
|
|
|
16
16
|
security-events: write
|
|
17
17
|
|
|
18
18
|
env:
|
|
19
|
-
# See ci.yml for
|
|
20
|
-
PROTOWIRE_CPP_REF:
|
|
19
|
+
# See ci.yml for the rationale on this pin.
|
|
20
|
+
PROTOWIRE_CPP_REF: v0.75.0
|
|
21
21
|
|
|
22
22
|
jobs:
|
|
23
23
|
analyze:
|
|
@@ -24,13 +24,11 @@ env:
|
|
|
24
24
|
# frozen FFI surface, so it must be an immutable ref — never a
|
|
25
25
|
# branch.
|
|
26
26
|
#
|
|
27
|
-
#
|
|
28
|
-
#
|
|
29
|
-
#
|
|
30
|
-
#
|
|
31
|
-
|
|
32
|
-
# cuts a tag that includes them.
|
|
33
|
-
PROTOWIRE_CPP_REF: 9af2ec04918a417933848de1577cd61f83a710b0
|
|
27
|
+
# Pinned to a tagged C++ release. v0.75.0 ships the PXF v0.72-series
|
|
28
|
+
# feature set (@<name> / @entry / @table grammar, schema validator,
|
|
29
|
+
# Result accessors, TableReader streaming) that this Python port
|
|
30
|
+
# wraps. Bump in lockstep with cpp release cuts.
|
|
31
|
+
PROTOWIRE_CPP_REF: v0.75.0
|
|
34
32
|
|
|
35
33
|
jobs:
|
|
36
34
|
# ---------------------------------------------------------------------
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to `protowire-python` are documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
The version number is kept aligned with the rest of the `protowire-*`
|
|
9
|
+
stack — releases bump in lockstep across language ports when the wire
|
|
10
|
+
format changes.
|
|
11
|
+
|
|
12
|
+
## [Unreleased]
|
|
13
|
+
|
|
14
|
+
## [0.75.0] — 2026-05-12
|
|
15
|
+
|
|
16
|
+
First release after the v0.70.0 baseline. Wraps the
|
|
17
|
+
[`protowire-cpp` v0.75.0](https://github.com/trendvidia/protowire-cpp/releases/tag/v0.75.0)
|
|
18
|
+
PXF v0.72-series feature set into the Python port, bringing the
|
|
19
|
+
`@<name>` / `@entry` / `@table` directive grammar, schema reserved-name
|
|
20
|
+
validator, `Result.directives` / `Result.tables` accessors, and the
|
|
21
|
+
`TableReader` streaming row reader through the nanobind FFI. The
|
|
22
|
+
Python port skips intermediate version numbers and lands the bundled
|
|
23
|
+
feature set directly on v0.75.0 to match the active wire revision
|
|
24
|
+
across the `protowire-*` stack.
|
|
25
|
+
|
|
26
|
+
### Added
|
|
27
|
+
|
|
28
|
+
- **`pxf.TableReader` and `pxf.bind_row`** (draft §3.4.4). Streaming
|
|
29
|
+
consumption for the `@table` directive, alternative to materializing
|
|
30
|
+
every row into `Result.tables` up front. Construct via
|
|
31
|
+
`pxf.TableReader.from_bytes(data)`; iterate with the standard for
|
|
32
|
+
loop or call `next_or_none()` until it returns `None`. The reader
|
|
33
|
+
exposes the header `type` / `columns` / `directives` properties and
|
|
34
|
+
a `tail()` method that returns the unconsumed buffer for chaining a
|
|
35
|
+
second reader on multi-`@table` documents. `bind_row(msg, columns,
|
|
36
|
+
row)` is the per-row binder used by `scan()` and exposed as a
|
|
37
|
+
free function for callers iterating `Result.tables[i].rows` from
|
|
38
|
+
the materializing path. Strategy is format-and-reparse, matching
|
|
39
|
+
the C++ port: cells are rendered as a synthetic PXF body and run
|
|
40
|
+
through `unmarshal`, reusing every branch of the existing decoder
|
|
41
|
+
(WKT timestamps / durations, wrapper-nullability, enum-by-name,
|
|
42
|
+
`pxf.required` / `pxf.default`, oneof). PR-2 takes input as bytes;
|
|
43
|
+
a file-like / chunked-IO bridge is a possible follow-up.
|
|
44
|
+
|
|
45
|
+
### Changed
|
|
46
|
+
|
|
47
|
+
- **CI pin to protowire-cpp v0.75.0.** The cpp sibling now ships the
|
|
48
|
+
PXF v0.72-series feature set (directive grammar, schema validator,
|
|
49
|
+
Result accessors, TableReader streaming). The pin moves from the
|
|
50
|
+
pre-v0.72 commit `9af2ec0` to the `v0.75.0` tag so the Python
|
|
51
|
+
wrapper exposes the new surface.
|
|
52
|
+
|
|
53
|
+
### Added
|
|
54
|
+
|
|
55
|
+
- **`pxf.Result.directives` / `pxf.Result.tables`** — the document-root
|
|
56
|
+
directives the decoder saw at `unmarshal_full` time, exposed as
|
|
57
|
+
immutable dataclasses:
|
|
58
|
+
- `pxf.Directive(name, prefixes, type, body, has_body, line, column)`
|
|
59
|
+
for generic `@<name> *(prefix) [{ ... }]` blocks. `body` is the
|
|
60
|
+
raw bytes between `{` and `}` (verbatim), suitable for handing to
|
|
61
|
+
a follow-up `pxf.unmarshal` against the consumer's message type.
|
|
62
|
+
`type` keeps the v0.72.0 single-prefix back-compat shape.
|
|
63
|
+
- `pxf.TableDirective(type, columns, rows)` for `@table` directives,
|
|
64
|
+
with cells modeled as `None` (absent) or a `(kind, value)` 2-tuple
|
|
65
|
+
where kind ∈ {`"null"`, `"string"`, `"int"`, `"float"`, `"bool"`,
|
|
66
|
+
`"bytes"`, `"ident"`, `"timestamp"`, `"duration"`} — faithful to
|
|
67
|
+
the three-state cell grammar (absent / present-but-null /
|
|
68
|
+
present-with-value, draft §3.4.4).
|
|
69
|
+
- **`pxf.validate_descriptor(msg)` + `pxf.Violation`** — schema
|
|
70
|
+
reserved-name check (draft §3.13). Returns the list of fields,
|
|
71
|
+
oneofs, and enum values whose names case-sensitively match a PXF
|
|
72
|
+
value keyword (`null` / `true` / `false`). Sorted by element FQN.
|
|
73
|
+
- **`skip_validate` keyword** on `pxf.unmarshal` and
|
|
74
|
+
`pxf.unmarshal_full` (and the `_bytes` variants) — opt-out of the
|
|
75
|
+
per-call schema validator when the caller has already validated the
|
|
76
|
+
descriptor at registry-load time.
|
|
77
|
+
|
|
78
|
+
## [0.70.0]
|
|
79
|
+
|
|
80
|
+
Initial public release. The version number aligns this port with the rest
|
|
81
|
+
of the `protowire-*` stack, which targets the 0.70.x series for the first
|
|
82
|
+
coordinated public release. The wire codec is provided by
|
|
83
|
+
[`protowire-cpp`](https://github.com/trendvidia/protowire-cpp) and reaches
|
|
84
|
+
Python through a [nanobind](https://github.com/wjakob/nanobind) FFI; this
|
|
85
|
+
port's behaviour follows the C++ port's at every wire-level question.
|
|
86
|
+
|
|
87
|
+
### Added
|
|
88
|
+
|
|
89
|
+
- **PyPI distribution** as the `protowire-python` package (the bare
|
|
90
|
+
`protowire` was taken by an unrelated 2021 CLI; the import name stays
|
|
91
|
+
`import protowire`). Binary wheels built by CI for CPython 3.10–3.13
|
|
92
|
+
on Linux × {x86_64, aarch64}, macOS × {x86_64, arm64}, and Windows ×
|
|
93
|
+
x86_64. Wheels are published through PyPI OIDC trusted publishing
|
|
94
|
+
with Sigstore provenance attestations.
|
|
95
|
+
- **Comprehensive CI matrix**: build + test on Python 3.10/3.11/3.12/3.13
|
|
96
|
+
across Linux/macOS/Windows, plus a `cibuildwheel` smoke build on every
|
|
97
|
+
PR to catch packaging regressions early. Weekly CodeQL SAST.
|
|
98
|
+
- **Governance scaffolding**: `LICENSE` (MIT), `CONTRIBUTING.md`,
|
|
99
|
+
`SECURITY.md` (security@trendvidia.com), `GOVERNANCE.md`,
|
|
100
|
+
`CODE_OF_CONDUCT.md`, `.github/CODEOWNERS`, issue + PR templates,
|
|
101
|
+
Dependabot for GitHub Actions and pip.
|
|
102
|
+
|
|
103
|
+
### Changed (breaking)
|
|
104
|
+
|
|
105
|
+
- **PXF parser stricter on key forms**, mirroring the upstream grammar
|
|
106
|
+
tightening in
|
|
107
|
+
[`trendvidia/protowire@8262bbb`](https://github.com/trendvidia/protowire/commit/8262bbb)
|
|
108
|
+
(`docs/grammar.ebnf`, `docs/draft-trendvidia-protowire-00.txt`):
|
|
109
|
+
- `=` (field assignment) and `{ … }` (submessage) now require an
|
|
110
|
+
identifier key. Inputs like `123 = 234` or `child { 123 = 123 }`
|
|
111
|
+
now raise `pxf.ParseError` with
|
|
112
|
+
`"field assignment with '=' requires an identifier key, got integer
|
|
113
|
+
(\"123\"); use ':' for map entries"`.
|
|
114
|
+
- `:` (map entry) is rejected at document top level — the document
|
|
115
|
+
represents a proto message, never a `map<K,V>`. Use `=` for
|
|
116
|
+
top-level field assignments. Map literals (`field = { 1: "x" }`)
|
|
117
|
+
still work because `:` remains valid inside `{ … }` blocks.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: protowire-python
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.75.0
|
|
4
4
|
Summary: Python wrapper around protowire-cpp — PXF text, SBE binary, and envelope codecs.
|
|
5
5
|
Keywords: protobuf,pxf,sbe,wire-format,fix,trading
|
|
6
6
|
Author-Email: "TrendVidia, LLC" <open-source@trendvidia.com>
|
|
@@ -7,7 +7,7 @@ build-backend = "scikit_build_core.build"
|
|
|
7
7
|
# 2021 CLI tool). The import name stays `import protowire` — these two
|
|
8
8
|
# names are independent (cf. python-dateutil → import dateutil).
|
|
9
9
|
name = "protowire-python"
|
|
10
|
-
version = "0.
|
|
10
|
+
version = "0.75.0"
|
|
11
11
|
description = "Python wrapper around protowire-cpp — PXF text, SBE binary, and envelope codecs."
|
|
12
12
|
readme = "README.md"
|
|
13
13
|
requires-python = ">=3.10"
|
|
@@ -17,10 +17,13 @@
|
|
|
17
17
|
|
|
18
18
|
#include <cstdint>
|
|
19
19
|
#include <memory>
|
|
20
|
+
#include <optional>
|
|
20
21
|
#include <span>
|
|
22
|
+
#include <sstream>
|
|
21
23
|
#include <string>
|
|
22
24
|
#include <string_view>
|
|
23
25
|
#include <utility>
|
|
26
|
+
#include <variant>
|
|
24
27
|
#include <vector>
|
|
25
28
|
|
|
26
29
|
#include <google/protobuf/descriptor.h>
|
|
@@ -77,9 +80,59 @@ const pbuf::Descriptor* FindDescriptor(const SchemaBundle& s,
|
|
|
77
80
|
|
|
78
81
|
// --- pxf bindings ---------------------------------------------------------
|
|
79
82
|
|
|
83
|
+
// CellToPyTuple converts a single AST cell value (or std::nullopt for an
|
|
84
|
+
// absent cell) into the FFI shape consumed by pxf.py — `None` for absent,
|
|
85
|
+
// `(kind, value)` otherwise. Used by PxfUnmarshalFull for @table rows.
|
|
86
|
+
//
|
|
87
|
+
// kind values mirror the AST variant tags:
|
|
88
|
+
// "null" → nb::none()
|
|
89
|
+
// "string" → str (already-unescaped UTF-8)
|
|
90
|
+
// "int" → str (raw integer text — Python wrapper decides parse)
|
|
91
|
+
// "float" → str (raw float text)
|
|
92
|
+
// "bool" → bool
|
|
93
|
+
// "bytes" → bytes
|
|
94
|
+
// "ident" → str
|
|
95
|
+
// "timestamp" → str (raw RFC3339)
|
|
96
|
+
// "duration" → str (raw duration)
|
|
97
|
+
nb::object CellToPyTuple(const std::optional<protowire::pxf::ValuePtr>& cell) {
|
|
98
|
+
if (!cell.has_value()) return nb::none();
|
|
99
|
+
using namespace protowire::pxf;
|
|
100
|
+
return std::visit(
|
|
101
|
+
[](const auto& p) -> nb::object {
|
|
102
|
+
using T = std::decay_t<decltype(*p)>;
|
|
103
|
+
if constexpr (std::is_same_v<T, NullVal>) {
|
|
104
|
+
return nb::make_tuple(std::string("null"), nb::none());
|
|
105
|
+
} else if constexpr (std::is_same_v<T, StringVal>) {
|
|
106
|
+
return nb::make_tuple(std::string("string"), p->value);
|
|
107
|
+
} else if constexpr (std::is_same_v<T, IntVal>) {
|
|
108
|
+
return nb::make_tuple(std::string("int"), p->raw);
|
|
109
|
+
} else if constexpr (std::is_same_v<T, FloatVal>) {
|
|
110
|
+
return nb::make_tuple(std::string("float"), p->raw);
|
|
111
|
+
} else if constexpr (std::is_same_v<T, BoolVal>) {
|
|
112
|
+
return nb::make_tuple(std::string("bool"), p->value);
|
|
113
|
+
} else if constexpr (std::is_same_v<T, BytesVal>) {
|
|
114
|
+
return nb::make_tuple(
|
|
115
|
+
std::string("bytes"),
|
|
116
|
+
nb::bytes(reinterpret_cast<const char*>(p->value.data()), p->value.size()));
|
|
117
|
+
} else if constexpr (std::is_same_v<T, IdentVal>) {
|
|
118
|
+
return nb::make_tuple(std::string("ident"), p->name);
|
|
119
|
+
} else if constexpr (std::is_same_v<T, TimestampVal>) {
|
|
120
|
+
return nb::make_tuple(std::string("timestamp"), p->raw);
|
|
121
|
+
} else if constexpr (std::is_same_v<T, DurationVal>) {
|
|
122
|
+
return nb::make_tuple(std::string("duration"), p->raw);
|
|
123
|
+
} else {
|
|
124
|
+
// List / Block are rejected at @table cell-parse time, so this
|
|
125
|
+
// branch is unreachable for cells. Surface as a clean error.
|
|
126
|
+
return nb::make_tuple(std::string("unknown"), nb::none());
|
|
127
|
+
}
|
|
128
|
+
},
|
|
129
|
+
*cell);
|
|
130
|
+
}
|
|
131
|
+
|
|
80
132
|
// PXF text -> binary proto bytes.
|
|
81
133
|
nb::bytes PxfUnmarshal(nb::bytes text, nb::bytes fds_bytes,
|
|
82
|
-
const std::string& full_name, bool discard_unknown
|
|
134
|
+
const std::string& full_name, bool discard_unknown,
|
|
135
|
+
bool skip_validate) {
|
|
83
136
|
auto schema = BuildSchema(std::string_view(fds_bytes.c_str(), fds_bytes.size()));
|
|
84
137
|
const auto* desc = FindDescriptor(schema, full_name);
|
|
85
138
|
std::unique_ptr<pbuf::Message> msg(
|
|
@@ -87,6 +140,7 @@ nb::bytes PxfUnmarshal(nb::bytes text, nb::bytes fds_bytes,
|
|
|
87
140
|
|
|
88
141
|
protowire::pxf::UnmarshalOptions opts;
|
|
89
142
|
opts.discard_unknown = discard_unknown;
|
|
143
|
+
opts.skip_validate = skip_validate;
|
|
90
144
|
auto st = protowire::pxf::Unmarshal(
|
|
91
145
|
std::string_view(text.c_str(), text.size()), msg.get(), opts);
|
|
92
146
|
if (!st.ok()) {
|
|
@@ -99,10 +153,20 @@ nb::bytes PxfUnmarshal(nb::bytes text, nb::bytes fds_bytes,
|
|
|
99
153
|
return nb::bytes(out.data(), out.size());
|
|
100
154
|
}
|
|
101
155
|
|
|
102
|
-
//
|
|
103
|
-
std::tuple<
|
|
156
|
+
// Directive FFI shape: (name, prefixes, type, body, has_body, line, column).
|
|
157
|
+
using PyDirective = std::tuple<std::string, std::vector<std::string>, std::string,
|
|
158
|
+
nb::bytes, bool, int, int>;
|
|
159
|
+
// TableDirective FFI shape: (type, columns, rows) where rows is a list of
|
|
160
|
+
// lists of cells (each cell None or (kind, value); see CellToPyTuple).
|
|
161
|
+
using PyTableDirective = std::tuple<std::string, std::vector<std::string>,
|
|
162
|
+
std::vector<std::vector<nb::object>>>;
|
|
163
|
+
|
|
164
|
+
// PXF text -> (binary proto bytes, set_paths, null_paths, directives, tables).
|
|
165
|
+
std::tuple<nb::bytes, std::vector<std::string>, std::vector<std::string>,
|
|
166
|
+
std::vector<PyDirective>, std::vector<PyTableDirective>>
|
|
104
167
|
PxfUnmarshalFull(nb::bytes text, nb::bytes fds_bytes,
|
|
105
|
-
const std::string& full_name, bool discard_unknown
|
|
168
|
+
const std::string& full_name, bool discard_unknown,
|
|
169
|
+
bool skip_validate) {
|
|
106
170
|
auto schema = BuildSchema(std::string_view(fds_bytes.c_str(), fds_bytes.size()));
|
|
107
171
|
const auto* desc = FindDescriptor(schema, full_name);
|
|
108
172
|
std::unique_ptr<pbuf::Message> msg(
|
|
@@ -110,6 +174,7 @@ PxfUnmarshalFull(nb::bytes text, nb::bytes fds_bytes,
|
|
|
110
174
|
|
|
111
175
|
protowire::pxf::UnmarshalOptions opts;
|
|
112
176
|
opts.discard_unknown = discard_unknown;
|
|
177
|
+
opts.skip_validate = skip_validate;
|
|
113
178
|
auto r = protowire::pxf::UnmarshalFull(
|
|
114
179
|
std::string_view(text.c_str(), text.size()), msg.get(), opts);
|
|
115
180
|
if (!r.ok()) {
|
|
@@ -119,11 +184,141 @@ PxfUnmarshalFull(nb::bytes text, nb::bytes fds_bytes,
|
|
|
119
184
|
if (!msg->SerializeToString(&out)) {
|
|
120
185
|
throw nb::value_error("pxf.unmarshal_full: proto serialization failed");
|
|
121
186
|
}
|
|
187
|
+
// Marshal directives.
|
|
188
|
+
std::vector<PyDirective> py_dirs;
|
|
189
|
+
py_dirs.reserve(r->Directives().size());
|
|
190
|
+
for (const auto& d : r->Directives()) {
|
|
191
|
+
py_dirs.emplace_back(
|
|
192
|
+
d.name, d.prefixes, d.type,
|
|
193
|
+
nb::bytes(d.body.data(), d.body.size()),
|
|
194
|
+
d.has_body, d.pos.line, d.pos.column);
|
|
195
|
+
}
|
|
196
|
+
// Marshal tables.
|
|
197
|
+
std::vector<PyTableDirective> py_tables;
|
|
198
|
+
py_tables.reserve(r->Tables().size());
|
|
199
|
+
for (const auto& t : r->Tables()) {
|
|
200
|
+
std::vector<std::vector<nb::object>> py_rows;
|
|
201
|
+
py_rows.reserve(t.rows.size());
|
|
202
|
+
for (const auto& row : t.rows) {
|
|
203
|
+
std::vector<nb::object> py_cells;
|
|
204
|
+
py_cells.reserve(row.cells.size());
|
|
205
|
+
for (const auto& cell : row.cells) py_cells.push_back(CellToPyTuple(cell));
|
|
206
|
+
py_rows.push_back(std::move(py_cells));
|
|
207
|
+
}
|
|
208
|
+
py_tables.emplace_back(t.type, t.columns, std::move(py_rows));
|
|
209
|
+
}
|
|
122
210
|
return {nb::bytes(out.data(), out.size()),
|
|
123
211
|
r->SetFields(),
|
|
124
|
-
r->NullFields()
|
|
212
|
+
r->NullFields(),
|
|
213
|
+
std::move(py_dirs),
|
|
214
|
+
std::move(py_tables)};
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// PXF schema reserved-name check (draft §3.13). Returns a list of
|
|
218
|
+
// (kind, element, name, file) tuples. Empty list ⇒ conformant schema.
|
|
219
|
+
// kind values: "field" / "oneof" / "enum_value".
|
|
220
|
+
std::vector<std::tuple<std::string, std::string, std::string, std::string>>
|
|
221
|
+
PxfValidateDescriptor(nb::bytes fds_bytes, const std::string& full_name) {
|
|
222
|
+
auto schema = BuildSchema(std::string_view(fds_bytes.c_str(), fds_bytes.size()));
|
|
223
|
+
const auto* desc = FindDescriptor(schema, full_name);
|
|
224
|
+
auto vs = protowire::pxf::ValidateDescriptor(desc);
|
|
225
|
+
std::vector<std::tuple<std::string, std::string, std::string, std::string>> out;
|
|
226
|
+
out.reserve(vs.size());
|
|
227
|
+
for (const auto& v : vs) {
|
|
228
|
+
std::string kind;
|
|
229
|
+
switch (v.kind) {
|
|
230
|
+
case protowire::pxf::ViolationKind::kField: kind = "field"; break;
|
|
231
|
+
case protowire::pxf::ViolationKind::kOneof: kind = "oneof"; break;
|
|
232
|
+
case protowire::pxf::ViolationKind::kEnumValue: kind = "enum_value"; break;
|
|
233
|
+
}
|
|
234
|
+
out.emplace_back(std::move(kind), v.element, v.name, v.file);
|
|
235
|
+
}
|
|
236
|
+
return out;
|
|
125
237
|
}
|
|
126
238
|
|
|
239
|
+
// --- PyTableReader: streaming @table consumption -------------------------
|
|
240
|
+
//
|
|
241
|
+
// Wraps protowire::pxf::TableReader. The reader takes a std::istream*; we
|
|
242
|
+
// hold the istringstream alongside the reader so its lifetime is bound to
|
|
243
|
+
// the Python object. Input is provided as bytes (PR-2 scope); a file-like
|
|
244
|
+
// streambuf bridge is a possible follow-up.
|
|
245
|
+
class PyTableReader {
|
|
246
|
+
public:
|
|
247
|
+
static std::unique_ptr<PyTableReader> FromBytes(nb::bytes data) {
|
|
248
|
+
auto out = std::unique_ptr<PyTableReader>(new PyTableReader());
|
|
249
|
+
out->stream_ = std::make_unique<std::istringstream>(
|
|
250
|
+
std::string(data.c_str(), data.size()));
|
|
251
|
+
auto tr = protowire::pxf::TableReader::Create(out->stream_.get());
|
|
252
|
+
if (!tr.ok()) {
|
|
253
|
+
throw nb::value_error(("pxf.TableReader: " + tr.status().ToString()).c_str());
|
|
254
|
+
}
|
|
255
|
+
out->reader_ = std::move(*tr);
|
|
256
|
+
// Marshal the side-channel directives once at construction; they're
|
|
257
|
+
// fixed for the reader's lifetime.
|
|
258
|
+
for (const auto& d : out->reader_->Directives()) {
|
|
259
|
+
out->directives_.emplace_back(
|
|
260
|
+
d.name, d.prefixes, d.type,
|
|
261
|
+
nb::bytes(d.body.data(), d.body.size()),
|
|
262
|
+
d.has_body, d.pos.line, d.pos.column);
|
|
263
|
+
}
|
|
264
|
+
return out;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
const std::string& Type() const { return reader_->Type(); }
|
|
268
|
+
const std::vector<std::string>& Columns() const { return reader_->Columns(); }
|
|
269
|
+
const std::vector<PyDirective>& Directives() const { return directives_; }
|
|
270
|
+
bool Done() const { return reader_->Done(); }
|
|
271
|
+
|
|
272
|
+
// Returns the next row as a Python list of cells, or None at EOF.
|
|
273
|
+
// Raises ValueError on parse error.
|
|
274
|
+
nb::object NextOrNone() {
|
|
275
|
+
if (reader_->Done()) return nb::none();
|
|
276
|
+
protowire::pxf::TableRow row;
|
|
277
|
+
auto s = reader_->Next(&row);
|
|
278
|
+
if (!s.ok()) {
|
|
279
|
+
throw nb::value_error(("pxf.TableReader.next: " + s.ToString()).c_str());
|
|
280
|
+
}
|
|
281
|
+
if (reader_->Done()) return nb::none();
|
|
282
|
+
return RowToList(row);
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
// Iterator protocol: __next__ raises StopIteration at EOF.
|
|
286
|
+
nb::object Next() {
|
|
287
|
+
if (reader_->Done()) throw nb::stop_iteration();
|
|
288
|
+
protowire::pxf::TableRow row;
|
|
289
|
+
auto s = reader_->Next(&row);
|
|
290
|
+
if (!s.ok()) {
|
|
291
|
+
throw nb::value_error(("pxf.TableReader.next: " + s.ToString()).c_str());
|
|
292
|
+
}
|
|
293
|
+
if (reader_->Done()) throw nb::stop_iteration();
|
|
294
|
+
return RowToList(row);
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
// Drains the remaining buffered + underlying bytes. Only meaningful
|
|
298
|
+
// after Done(); the Python wrapper exposes this as a method that
|
|
299
|
+
// returns bytes so callers can chain a second TableReader on
|
|
300
|
+
// multi-@table documents.
|
|
301
|
+
nb::bytes Tail() {
|
|
302
|
+
auto t = reader_->Tail();
|
|
303
|
+
std::ostringstream buf;
|
|
304
|
+
buf << t->rdbuf();
|
|
305
|
+
std::string s = buf.str();
|
|
306
|
+
return nb::bytes(s.data(), s.size());
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
private:
|
|
310
|
+
static nb::object RowToList(const protowire::pxf::TableRow& row) {
|
|
311
|
+
std::vector<nb::object> cells;
|
|
312
|
+
cells.reserve(row.cells.size());
|
|
313
|
+
for (const auto& cell : row.cells) cells.push_back(CellToPyTuple(cell));
|
|
314
|
+
return nb::cast(cells);
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
std::unique_ptr<std::istringstream> stream_;
|
|
318
|
+
std::unique_ptr<protowire::pxf::TableReader> reader_;
|
|
319
|
+
std::vector<PyDirective> directives_;
|
|
320
|
+
};
|
|
321
|
+
|
|
127
322
|
// Binary proto bytes -> PXF text.
|
|
128
323
|
std::string PxfMarshal(nb::bytes msg_bytes, nb::bytes fds_bytes,
|
|
129
324
|
const std::string& full_name) {
|
|
@@ -301,10 +496,22 @@ NB_MODULE(_protowire, m) {
|
|
|
301
496
|
m.doc() = "protowire native extension (nanobind shim around protowire-cpp)";
|
|
302
497
|
|
|
303
498
|
m.def("pxf_unmarshal", &PxfUnmarshal, "text"_a, "fds"_a, "full_name"_a,
|
|
304
|
-
"discard_unknown"_a = false);
|
|
499
|
+
"discard_unknown"_a = false, "skip_validate"_a = false);
|
|
305
500
|
m.def("pxf_unmarshal_full", &PxfUnmarshalFull, "text"_a, "fds"_a,
|
|
306
|
-
"full_name"_a, "discard_unknown"_a = false);
|
|
501
|
+
"full_name"_a, "discard_unknown"_a = false, "skip_validate"_a = false);
|
|
307
502
|
m.def("pxf_marshal", &PxfMarshal, "msg_bytes"_a, "fds"_a, "full_name"_a);
|
|
503
|
+
m.def("pxf_validate_descriptor", &PxfValidateDescriptor, "fds"_a, "full_name"_a);
|
|
504
|
+
|
|
505
|
+
nb::class_<PyTableReader>(m, "PxfTableReader")
|
|
506
|
+
.def_static("from_bytes", &PyTableReader::FromBytes, "data"_a)
|
|
507
|
+
.def_prop_ro("type", &PyTableReader::Type)
|
|
508
|
+
.def_prop_ro("columns", &PyTableReader::Columns)
|
|
509
|
+
.def_prop_ro("directives", &PyTableReader::Directives)
|
|
510
|
+
.def_prop_ro("done", &PyTableReader::Done)
|
|
511
|
+
.def("next_or_none", &PyTableReader::NextOrNone)
|
|
512
|
+
.def("tail", &PyTableReader::Tail)
|
|
513
|
+
.def("__iter__", [](PyTableReader& self) -> PyTableReader& { return self; })
|
|
514
|
+
.def("__next__", &PyTableReader::Next);
|
|
308
515
|
|
|
309
516
|
nb::class_<SbeCodec>(m, "SbeCodec")
|
|
310
517
|
.def_static("create", &SbeCodec::Create, "fds"_a, "file_names"_a)
|