protowire-python 0.70.0__tar.gz → 0.75.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {protowire_python-0.70.0 → protowire_python-0.75.0}/.github/workflows/ci.yml +5 -6
  2. {protowire_python-0.70.0 → protowire_python-0.75.0}/.github/workflows/codeql.yml +2 -2
  3. {protowire_python-0.70.0 → protowire_python-0.75.0}/.github/workflows/publish.yml +5 -7
  4. protowire_python-0.75.0/CHANGELOG.md +117 -0
  5. {protowire_python-0.70.0 → protowire_python-0.75.0}/PKG-INFO +1 -1
  6. {protowire_python-0.70.0 → protowire_python-0.75.0}/pyproject.toml +1 -1
  7. {protowire_python-0.70.0 → protowire_python-0.75.0}/src/_protowire/module.cc +214 -7
  8. {protowire_python-0.70.0 → protowire_python-0.75.0}/src/protowire/__init__.py +1 -1
  9. protowire_python-0.75.0/src/protowire/pxf.py +433 -0
  10. protowire_python-0.75.0/tests/test_pxf_directives.py +189 -0
  11. protowire_python-0.75.0/tests/test_pxf_table_reader.py +235 -0
  12. protowire_python-0.70.0/CHANGELOG.md +0 -53
  13. protowire_python-0.70.0/src/protowire/pxf.py +0 -95
  14. {protowire_python-0.70.0 → protowire_python-0.75.0}/.editorconfig +0 -0
  15. {protowire_python-0.70.0 → protowire_python-0.75.0}/.github/CODEOWNERS +0 -0
  16. {protowire_python-0.70.0 → protowire_python-0.75.0}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  17. {protowire_python-0.70.0 → protowire_python-0.75.0}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  18. {protowire_python-0.70.0 → protowire_python-0.75.0}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  19. {protowire_python-0.70.0 → protowire_python-0.75.0}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  20. {protowire_python-0.70.0 → protowire_python-0.75.0}/.github/dependabot.yml +0 -0
  21. {protowire_python-0.70.0 → protowire_python-0.75.0}/.gitignore +0 -0
  22. {protowire_python-0.70.0 → protowire_python-0.75.0}/CMakeLists.txt +0 -0
  23. {protowire_python-0.70.0 → protowire_python-0.75.0}/CODE_OF_CONDUCT.md +0 -0
  24. {protowire_python-0.70.0 → protowire_python-0.75.0}/CONTRIBUTING.md +0 -0
  25. {protowire_python-0.70.0 → protowire_python-0.75.0}/GOVERNANCE.md +0 -0
  26. {protowire_python-0.70.0 → protowire_python-0.75.0}/LICENSE +0 -0
  27. {protowire_python-0.70.0 → protowire_python-0.75.0}/README.md +0 -0
  28. {protowire_python-0.70.0 → protowire_python-0.75.0}/SECURITY.md +0 -0
  29. {protowire_python-0.70.0 → protowire_python-0.75.0}/scripts/bench_pxf.py +0 -0
  30. {protowire_python-0.70.0 → protowire_python-0.75.0}/scripts/bench_sbe.py +0 -0
  31. {protowire_python-0.70.0 → protowire_python-0.75.0}/scripts/cibw_install_protobuf_linux.sh +0 -0
  32. {protowire_python-0.70.0 → protowire_python-0.75.0}/scripts/dump_envelope.py +0 -0
  33. {protowire_python-0.70.0 → protowire_python-0.75.0}/src/protowire/_schema.py +0 -0
  34. {protowire_python-0.70.0 → protowire_python-0.75.0}/src/protowire/envelope.py +0 -0
  35. {protowire_python-0.70.0 → protowire_python-0.75.0}/src/protowire/py.typed +0 -0
  36. {protowire_python-0.70.0 → protowire_python-0.75.0}/src/protowire/sbe.py +0 -0
  37. {protowire_python-0.70.0 → protowire_python-0.75.0}/testdata/order.proto +0 -0
  38. {protowire_python-0.70.0 → protowire_python-0.75.0}/testdata/test.proto +0 -0
  39. {protowire_python-0.70.0 → protowire_python-0.75.0}/tests/conftest.py +0 -0
  40. {protowire_python-0.70.0 → protowire_python-0.75.0}/tests/test_envelope.py +0 -0
  41. {protowire_python-0.70.0 → protowire_python-0.75.0}/tests/test_pxf.py +0 -0
  42. {protowire_python-0.70.0 → protowire_python-0.75.0}/tests/test_pxf_full_roundtrip.py +0 -0
  43. {protowire_python-0.70.0 → protowire_python-0.75.0}/tests/test_sbe.py +0 -0
  44. {protowire_python-0.70.0 → protowire_python-0.75.0}/tests/test_sbe_view_navigation.py +0 -0
@@ -16,12 +16,11 @@ concurrency:
16
16
  cancel-in-progress: ${{ github.event_name == 'pull_request' }}
17
17
 
18
18
  env:
19
- # Pin the sibling C++ checkout to a specific commit. cpp@v0.70.0
20
- # predates the __int128 checked_arith refactor (MSVC), the
21
- # protobuf-API-skew shim, and the MSVC source-charset fix; 9af2ec0
22
- # is the first commit with all of those. Bump to a v0.70.x tag
23
- # once cpp cuts one that includes them.
24
- PROTOWIRE_CPP_REF: 9af2ec04918a417933848de1577cd61f83a710b0
19
+ # Pin the sibling C++ checkout to a specific tag. v0.75.0 carries the
20
+ # PXF v0.72-series feature set (@<name> / @entry / @table directive
21
+ # grammar, schema validator, Result accessors, TableReader streaming)
22
+ # the Python port wraps. Bump in lockstep with cpp release cuts.
23
+ PROTOWIRE_CPP_REF: v0.75.0
25
24
 
26
25
  jobs:
27
26
  # ---------------------------------------------------------------------
@@ -16,8 +16,8 @@ permissions:
16
16
  security-events: write
17
17
 
18
18
  env:
19
- # See ci.yml for why this is a SHA, not v0.70.0.
20
- PROTOWIRE_CPP_REF: 9af2ec04918a417933848de1577cd61f83a710b0
19
+ # See ci.yml for the rationale on this pin.
20
+ PROTOWIRE_CPP_REF: v0.75.0
21
21
 
22
22
  jobs:
23
23
  analyze:
@@ -24,13 +24,11 @@ env:
24
24
  # frozen FFI surface, so it must be an immutable ref — never a
25
25
  # branch.
26
26
  #
27
- # Using a SHA (not the v0.70.0 tag) because cpp@v0.70.0 predates
28
- # the f1d3eb0 __int128 checked_arith refactor needed for MSVC,
29
- # plus the protobuf-API-skew shim and MSVC source-charset fixes.
30
- # 9af2ec0 (cpp main, ci: MSVC source-charset + skip pxf_escapes)
31
- # is the first commit with all of those. Bump to v0.70.x once cpp
32
- # cuts a tag that includes them.
33
- PROTOWIRE_CPP_REF: 9af2ec04918a417933848de1577cd61f83a710b0
27
+ # Pinned to a tagged C++ release. v0.75.0 ships the PXF v0.72-series
28
+ # feature set (@<name> / @entry / @table grammar, schema validator,
29
+ # Result accessors, TableReader streaming) that this Python port
30
+ # wraps. Bump in lockstep with cpp release cuts.
31
+ PROTOWIRE_CPP_REF: v0.75.0
34
32
 
35
33
  jobs:
36
34
  # ---------------------------------------------------------------------
@@ -0,0 +1,117 @@
1
+ # Changelog
2
+
3
+ All notable changes to `protowire-python` are documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ The version number is kept aligned with the rest of the `protowire-*`
9
+ stack — releases bump in lockstep across language ports when the wire
10
+ format changes.
11
+
12
+ ## [Unreleased]
13
+
14
+ ## [0.75.0] — 2026-05-12
15
+
16
+ First release after the v0.70.0 baseline. Wraps the
17
+ [`protowire-cpp` v0.75.0](https://github.com/trendvidia/protowire-cpp/releases/tag/v0.75.0)
18
+ PXF v0.72-series feature set into the Python port, bringing the
19
+ `@<name>` / `@entry` / `@table` directive grammar, schema reserved-name
20
+ validator, `Result.directives` / `Result.tables` accessors, and the
21
+ `TableReader` streaming row reader through the nanobind FFI. The
22
+ Python port skips intermediate version numbers and lands the bundled
23
+ feature set directly on v0.75.0 to match the active wire revision
24
+ across the `protowire-*` stack.
25
+
26
+ ### Added
27
+
28
+ - **`pxf.TableReader` and `pxf.bind_row`** (draft §3.4.4). Streaming
29
+ consumption for the `@table` directive, alternative to materializing
30
+ every row into `Result.tables` up front. Construct via
31
+ `pxf.TableReader.from_bytes(data)`; iterate with the standard for
32
+ loop or call `next_or_none()` until it returns `None`. The reader
33
+ exposes the header `type` / `columns` / `directives` properties and
34
+ a `tail()` method that returns the unconsumed buffer for chaining a
35
+ second reader on multi-`@table` documents. `bind_row(msg, columns,
36
+ row)` is the per-row binder used by `scan()` and exposed as a
37
+ free function for callers iterating `Result.tables[i].rows` from
38
+ the materializing path. Strategy is format-and-reparse, matching
39
+ the C++ port: cells are rendered as a synthetic PXF body and run
40
+ through `unmarshal`, reusing every branch of the existing decoder
41
+ (WKT timestamps / durations, wrapper-nullability, enum-by-name,
42
+ `pxf.required` / `pxf.default`, oneof). PR-2 takes input as bytes;
43
+ a file-like / chunked-IO bridge is a possible follow-up.
44
+
45
+ ### Changed
46
+
47
+ - **CI pin to protowire-cpp v0.75.0.** The cpp sibling now ships the
48
+ PXF v0.72-series feature set (directive grammar, schema validator,
49
+ Result accessors, TableReader streaming). The pin moves from the
50
+ pre-v0.72 commit `9af2ec0` to the `v0.75.0` tag so the Python
51
+ wrapper exposes the new surface.
52
+
53
+ ### Added
54
+
55
+ - **`pxf.Result.directives` / `pxf.Result.tables`** — the document-root
56
+ directives the decoder saw at `unmarshal_full` time, exposed as
57
+ immutable dataclasses:
58
+ - `pxf.Directive(name, prefixes, type, body, has_body, line, column)`
59
+ for generic `@<name> *(prefix) [{ ... }]` blocks. `body` is the
60
+ raw bytes between `{` and `}` (verbatim), suitable for handing to
61
+ a follow-up `pxf.unmarshal` against the consumer's message type.
62
+ `type` keeps the v0.72.0 single-prefix back-compat shape.
63
+ - `pxf.TableDirective(type, columns, rows)` for `@table` directives,
64
+ with cells modeled as `None` (absent) or a `(kind, value)` 2-tuple
65
+ where kind ∈ {`"null"`, `"string"`, `"int"`, `"float"`, `"bool"`,
66
+ `"bytes"`, `"ident"`, `"timestamp"`, `"duration"`} — faithful to
67
+ the three-state cell grammar (absent / present-but-null /
68
+ present-with-value, draft §3.4.4).
69
+ - **`pxf.validate_descriptor(msg)` + `pxf.Violation`** — schema
70
+ reserved-name check (draft §3.13). Returns the list of fields,
71
+ oneofs, and enum values whose names case-sensitively match a PXF
72
+ value keyword (`null` / `true` / `false`). Sorted by element FQN.
73
+ - **`skip_validate` keyword** on `pxf.unmarshal` and
74
+ `pxf.unmarshal_full` (and the `_bytes` variants) — opt-out of the
75
+ per-call schema validator when the caller has already validated the
76
+ descriptor at registry-load time.
77
+
78
+ ## [0.70.0]
79
+
80
+ Initial public release. The version number aligns this port with the rest
81
+ of the `protowire-*` stack, which targets the 0.70.x series for the first
82
+ coordinated public release. The wire codec is provided by
83
+ [`protowire-cpp`](https://github.com/trendvidia/protowire-cpp) and reaches
84
+ Python through a [nanobind](https://github.com/wjakob/nanobind) FFI; this
85
+ port's behaviour follows the C++ port's at every wire-level question.
86
+
87
+ ### Added
88
+
89
+ - **PyPI distribution** as the `protowire-python` package (the bare
90
+ `protowire` was taken by an unrelated 2021 CLI; the import name stays
91
+ `import protowire`). Binary wheels built by CI for CPython 3.10–3.13
92
+ on Linux × {x86_64, aarch64}, macOS × {x86_64, arm64}, and Windows ×
93
+ x86_64. Wheels are published through PyPI OIDC trusted publishing
94
+ with Sigstore provenance attestations.
95
+ - **Comprehensive CI matrix**: build + test on Python 3.10/3.11/3.12/3.13
96
+ across Linux/macOS/Windows, plus a `cibuildwheel` smoke build on every
97
+ PR to catch packaging regressions early. Weekly CodeQL SAST.
98
+ - **Governance scaffolding**: `LICENSE` (MIT), `CONTRIBUTING.md`,
99
+ `SECURITY.md` (security@trendvidia.com), `GOVERNANCE.md`,
100
+ `CODE_OF_CONDUCT.md`, `.github/CODEOWNERS`, issue + PR templates,
101
+ Dependabot for GitHub Actions and pip.
102
+
103
+ ### Changed (breaking)
104
+
105
+ - **PXF parser stricter on key forms**, mirroring the upstream grammar
106
+ tightening in
107
+ [`trendvidia/protowire@8262bbb`](https://github.com/trendvidia/protowire/commit/8262bbb)
108
+ (`docs/grammar.ebnf`, `docs/draft-trendvidia-protowire-00.txt`):
109
+ - `=` (field assignment) and `{ … }` (submessage) now require an
110
+ identifier key. Inputs like `123 = 234` or `child { 123 = 123 }`
111
+ now raise `pxf.ParseError` with
112
+ `"field assignment with '=' requires an identifier key, got integer
113
+ (\"123\"); use ':' for map entries"`.
114
+ - `:` (map entry) is rejected at document top level — the document
115
+ represents a proto message, never a `map<K,V>`. Use `=` for
116
+ top-level field assignments. Map literals (`field = { 1: "x" }`)
117
+ still work because `:` remains valid inside `{ … }` blocks.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: protowire-python
3
- Version: 0.70.0
3
+ Version: 0.75.0
4
4
  Summary: Python wrapper around protowire-cpp — PXF text, SBE binary, and envelope codecs.
5
5
  Keywords: protobuf,pxf,sbe,wire-format,fix,trading
6
6
  Author-Email: "TrendVidia, LLC" <open-source@trendvidia.com>
@@ -7,7 +7,7 @@ build-backend = "scikit_build_core.build"
7
7
  # 2021 CLI tool). The import name stays `import protowire` — these two
8
8
  # names are independent (cf. python-dateutil → import dateutil).
9
9
  name = "protowire-python"
10
- version = "0.70.0"
10
+ version = "0.75.0"
11
11
  description = "Python wrapper around protowire-cpp — PXF text, SBE binary, and envelope codecs."
12
12
  readme = "README.md"
13
13
  requires-python = ">=3.10"
@@ -17,10 +17,13 @@
17
17
 
18
18
  #include <cstdint>
19
19
  #include <memory>
20
+ #include <optional>
20
21
  #include <span>
22
+ #include <sstream>
21
23
  #include <string>
22
24
  #include <string_view>
23
25
  #include <utility>
26
+ #include <variant>
24
27
  #include <vector>
25
28
 
26
29
  #include <google/protobuf/descriptor.h>
@@ -77,9 +80,59 @@ const pbuf::Descriptor* FindDescriptor(const SchemaBundle& s,
77
80
 
78
81
  // --- pxf bindings ---------------------------------------------------------
79
82
 
83
+ // CellToPyTuple converts a single AST cell value (or std::nullopt for an
84
+ // absent cell) into the FFI shape consumed by pxf.py — `None` for absent,
85
+ // `(kind, value)` otherwise. Used by PxfUnmarshalFull for @table rows.
86
+ //
87
+ // kind values mirror the AST variant tags:
88
+ // "null" → nb::none()
89
+ // "string" → str (already-unescaped UTF-8)
90
+ // "int" → str (raw integer text — Python wrapper decides parse)
91
+ // "float" → str (raw float text)
92
+ // "bool" → bool
93
+ // "bytes" → bytes
94
+ // "ident" → str
95
+ // "timestamp" → str (raw RFC3339)
96
+ // "duration" → str (raw duration)
97
+ nb::object CellToPyTuple(const std::optional<protowire::pxf::ValuePtr>& cell) {
98
+ if (!cell.has_value()) return nb::none();
99
+ using namespace protowire::pxf;
100
+ return std::visit(
101
+ [](const auto& p) -> nb::object {
102
+ using T = std::decay_t<decltype(*p)>;
103
+ if constexpr (std::is_same_v<T, NullVal>) {
104
+ return nb::make_tuple(std::string("null"), nb::none());
105
+ } else if constexpr (std::is_same_v<T, StringVal>) {
106
+ return nb::make_tuple(std::string("string"), p->value);
107
+ } else if constexpr (std::is_same_v<T, IntVal>) {
108
+ return nb::make_tuple(std::string("int"), p->raw);
109
+ } else if constexpr (std::is_same_v<T, FloatVal>) {
110
+ return nb::make_tuple(std::string("float"), p->raw);
111
+ } else if constexpr (std::is_same_v<T, BoolVal>) {
112
+ return nb::make_tuple(std::string("bool"), p->value);
113
+ } else if constexpr (std::is_same_v<T, BytesVal>) {
114
+ return nb::make_tuple(
115
+ std::string("bytes"),
116
+ nb::bytes(reinterpret_cast<const char*>(p->value.data()), p->value.size()));
117
+ } else if constexpr (std::is_same_v<T, IdentVal>) {
118
+ return nb::make_tuple(std::string("ident"), p->name);
119
+ } else if constexpr (std::is_same_v<T, TimestampVal>) {
120
+ return nb::make_tuple(std::string("timestamp"), p->raw);
121
+ } else if constexpr (std::is_same_v<T, DurationVal>) {
122
+ return nb::make_tuple(std::string("duration"), p->raw);
123
+ } else {
124
+ // List / Block are rejected at @table cell-parse time, so this
125
+ // branch is unreachable for cells. Surface as a clean error.
126
+ return nb::make_tuple(std::string("unknown"), nb::none());
127
+ }
128
+ },
129
+ *cell);
130
+ }
131
+
80
132
  // PXF text -> binary proto bytes.
81
133
  nb::bytes PxfUnmarshal(nb::bytes text, nb::bytes fds_bytes,
82
- const std::string& full_name, bool discard_unknown) {
134
+ const std::string& full_name, bool discard_unknown,
135
+ bool skip_validate) {
83
136
  auto schema = BuildSchema(std::string_view(fds_bytes.c_str(), fds_bytes.size()));
84
137
  const auto* desc = FindDescriptor(schema, full_name);
85
138
  std::unique_ptr<pbuf::Message> msg(
@@ -87,6 +140,7 @@ nb::bytes PxfUnmarshal(nb::bytes text, nb::bytes fds_bytes,
87
140
 
88
141
  protowire::pxf::UnmarshalOptions opts;
89
142
  opts.discard_unknown = discard_unknown;
143
+ opts.skip_validate = skip_validate;
90
144
  auto st = protowire::pxf::Unmarshal(
91
145
  std::string_view(text.c_str(), text.size()), msg.get(), opts);
92
146
  if (!st.ok()) {
@@ -99,10 +153,20 @@ nb::bytes PxfUnmarshal(nb::bytes text, nb::bytes fds_bytes,
99
153
  return nb::bytes(out.data(), out.size());
100
154
  }
101
155
 
102
- // PXF text -> (binary proto bytes, set_paths, null_paths).
103
- std::tuple<nb::bytes, std::vector<std::string>, std::vector<std::string>>
156
+ // Directive FFI shape: (name, prefixes, type, body, has_body, line, column).
157
+ using PyDirective = std::tuple<std::string, std::vector<std::string>, std::string,
158
+ nb::bytes, bool, int, int>;
159
+ // TableDirective FFI shape: (type, columns, rows) where rows is a list of
160
+ // lists of cells (each cell None or (kind, value); see CellToPyTuple).
161
+ using PyTableDirective = std::tuple<std::string, std::vector<std::string>,
162
+ std::vector<std::vector<nb::object>>>;
163
+
164
+ // PXF text -> (binary proto bytes, set_paths, null_paths, directives, tables).
165
+ std::tuple<nb::bytes, std::vector<std::string>, std::vector<std::string>,
166
+ std::vector<PyDirective>, std::vector<PyTableDirective>>
104
167
  PxfUnmarshalFull(nb::bytes text, nb::bytes fds_bytes,
105
- const std::string& full_name, bool discard_unknown) {
168
+ const std::string& full_name, bool discard_unknown,
169
+ bool skip_validate) {
106
170
  auto schema = BuildSchema(std::string_view(fds_bytes.c_str(), fds_bytes.size()));
107
171
  const auto* desc = FindDescriptor(schema, full_name);
108
172
  std::unique_ptr<pbuf::Message> msg(
@@ -110,6 +174,7 @@ PxfUnmarshalFull(nb::bytes text, nb::bytes fds_bytes,
110
174
 
111
175
  protowire::pxf::UnmarshalOptions opts;
112
176
  opts.discard_unknown = discard_unknown;
177
+ opts.skip_validate = skip_validate;
113
178
  auto r = protowire::pxf::UnmarshalFull(
114
179
  std::string_view(text.c_str(), text.size()), msg.get(), opts);
115
180
  if (!r.ok()) {
@@ -119,11 +184,141 @@ PxfUnmarshalFull(nb::bytes text, nb::bytes fds_bytes,
119
184
  if (!msg->SerializeToString(&out)) {
120
185
  throw nb::value_error("pxf.unmarshal_full: proto serialization failed");
121
186
  }
187
+ // Marshal directives.
188
+ std::vector<PyDirective> py_dirs;
189
+ py_dirs.reserve(r->Directives().size());
190
+ for (const auto& d : r->Directives()) {
191
+ py_dirs.emplace_back(
192
+ d.name, d.prefixes, d.type,
193
+ nb::bytes(d.body.data(), d.body.size()),
194
+ d.has_body, d.pos.line, d.pos.column);
195
+ }
196
+ // Marshal tables.
197
+ std::vector<PyTableDirective> py_tables;
198
+ py_tables.reserve(r->Tables().size());
199
+ for (const auto& t : r->Tables()) {
200
+ std::vector<std::vector<nb::object>> py_rows;
201
+ py_rows.reserve(t.rows.size());
202
+ for (const auto& row : t.rows) {
203
+ std::vector<nb::object> py_cells;
204
+ py_cells.reserve(row.cells.size());
205
+ for (const auto& cell : row.cells) py_cells.push_back(CellToPyTuple(cell));
206
+ py_rows.push_back(std::move(py_cells));
207
+ }
208
+ py_tables.emplace_back(t.type, t.columns, std::move(py_rows));
209
+ }
122
210
  return {nb::bytes(out.data(), out.size()),
123
211
  r->SetFields(),
124
- r->NullFields()};
212
+ r->NullFields(),
213
+ std::move(py_dirs),
214
+ std::move(py_tables)};
215
+ }
216
+
217
+ // PXF schema reserved-name check (draft §3.13). Returns a list of
218
+ // (kind, element, name, file) tuples. Empty list ⇒ conformant schema.
219
+ // kind values: "field" / "oneof" / "enum_value".
220
+ std::vector<std::tuple<std::string, std::string, std::string, std::string>>
221
+ PxfValidateDescriptor(nb::bytes fds_bytes, const std::string& full_name) {
222
+ auto schema = BuildSchema(std::string_view(fds_bytes.c_str(), fds_bytes.size()));
223
+ const auto* desc = FindDescriptor(schema, full_name);
224
+ auto vs = protowire::pxf::ValidateDescriptor(desc);
225
+ std::vector<std::tuple<std::string, std::string, std::string, std::string>> out;
226
+ out.reserve(vs.size());
227
+ for (const auto& v : vs) {
228
+ std::string kind;
229
+ switch (v.kind) {
230
+ case protowire::pxf::ViolationKind::kField: kind = "field"; break;
231
+ case protowire::pxf::ViolationKind::kOneof: kind = "oneof"; break;
232
+ case protowire::pxf::ViolationKind::kEnumValue: kind = "enum_value"; break;
233
+ }
234
+ out.emplace_back(std::move(kind), v.element, v.name, v.file);
235
+ }
236
+ return out;
125
237
  }
126
238
 
239
+ // --- PyTableReader: streaming @table consumption -------------------------
240
+ //
241
+ // Wraps protowire::pxf::TableReader. The reader takes a std::istream*; we
242
+ // hold the istringstream alongside the reader so its lifetime is bound to
243
+ // the Python object. Input is provided as bytes (PR-2 scope); a file-like
244
+ // streambuf bridge is a possible follow-up.
245
+ class PyTableReader {
246
+ public:
247
+ static std::unique_ptr<PyTableReader> FromBytes(nb::bytes data) {
248
+ auto out = std::unique_ptr<PyTableReader>(new PyTableReader());
249
+ out->stream_ = std::make_unique<std::istringstream>(
250
+ std::string(data.c_str(), data.size()));
251
+ auto tr = protowire::pxf::TableReader::Create(out->stream_.get());
252
+ if (!tr.ok()) {
253
+ throw nb::value_error(("pxf.TableReader: " + tr.status().ToString()).c_str());
254
+ }
255
+ out->reader_ = std::move(*tr);
256
+ // Marshal the side-channel directives once at construction; they're
257
+ // fixed for the reader's lifetime.
258
+ for (const auto& d : out->reader_->Directives()) {
259
+ out->directives_.emplace_back(
260
+ d.name, d.prefixes, d.type,
261
+ nb::bytes(d.body.data(), d.body.size()),
262
+ d.has_body, d.pos.line, d.pos.column);
263
+ }
264
+ return out;
265
+ }
266
+
267
+ const std::string& Type() const { return reader_->Type(); }
268
+ const std::vector<std::string>& Columns() const { return reader_->Columns(); }
269
+ const std::vector<PyDirective>& Directives() const { return directives_; }
270
+ bool Done() const { return reader_->Done(); }
271
+
272
+ // Returns the next row as a Python list of cells, or None at EOF.
273
+ // Raises ValueError on parse error.
274
+ nb::object NextOrNone() {
275
+ if (reader_->Done()) return nb::none();
276
+ protowire::pxf::TableRow row;
277
+ auto s = reader_->Next(&row);
278
+ if (!s.ok()) {
279
+ throw nb::value_error(("pxf.TableReader.next: " + s.ToString()).c_str());
280
+ }
281
+ if (reader_->Done()) return nb::none();
282
+ return RowToList(row);
283
+ }
284
+
285
+ // Iterator protocol: __next__ raises StopIteration at EOF.
286
+ nb::object Next() {
287
+ if (reader_->Done()) throw nb::stop_iteration();
288
+ protowire::pxf::TableRow row;
289
+ auto s = reader_->Next(&row);
290
+ if (!s.ok()) {
291
+ throw nb::value_error(("pxf.TableReader.next: " + s.ToString()).c_str());
292
+ }
293
+ if (reader_->Done()) throw nb::stop_iteration();
294
+ return RowToList(row);
295
+ }
296
+
297
+ // Drains the remaining buffered + underlying bytes. Only meaningful
298
+ // after Done(); the Python wrapper exposes this as a method that
299
+ // returns bytes so callers can chain a second TableReader on
300
+ // multi-@table documents.
301
+ nb::bytes Tail() {
302
+ auto t = reader_->Tail();
303
+ std::ostringstream buf;
304
+ buf << t->rdbuf();
305
+ std::string s = buf.str();
306
+ return nb::bytes(s.data(), s.size());
307
+ }
308
+
309
+ private:
310
+ static nb::object RowToList(const protowire::pxf::TableRow& row) {
311
+ std::vector<nb::object> cells;
312
+ cells.reserve(row.cells.size());
313
+ for (const auto& cell : row.cells) cells.push_back(CellToPyTuple(cell));
314
+ return nb::cast(cells);
315
+ }
316
+
317
+ std::unique_ptr<std::istringstream> stream_;
318
+ std::unique_ptr<protowire::pxf::TableReader> reader_;
319
+ std::vector<PyDirective> directives_;
320
+ };
321
+
127
322
  // Binary proto bytes -> PXF text.
128
323
  std::string PxfMarshal(nb::bytes msg_bytes, nb::bytes fds_bytes,
129
324
  const std::string& full_name) {
@@ -301,10 +496,22 @@ NB_MODULE(_protowire, m) {
301
496
  m.doc() = "protowire native extension (nanobind shim around protowire-cpp)";
302
497
 
303
498
  m.def("pxf_unmarshal", &PxfUnmarshal, "text"_a, "fds"_a, "full_name"_a,
304
- "discard_unknown"_a = false);
499
+ "discard_unknown"_a = false, "skip_validate"_a = false);
305
500
  m.def("pxf_unmarshal_full", &PxfUnmarshalFull, "text"_a, "fds"_a,
306
- "full_name"_a, "discard_unknown"_a = false);
501
+ "full_name"_a, "discard_unknown"_a = false, "skip_validate"_a = false);
307
502
  m.def("pxf_marshal", &PxfMarshal, "msg_bytes"_a, "fds"_a, "full_name"_a);
503
+ m.def("pxf_validate_descriptor", &PxfValidateDescriptor, "fds"_a, "full_name"_a);
504
+
505
+ nb::class_<PyTableReader>(m, "PxfTableReader")
506
+ .def_static("from_bytes", &PyTableReader::FromBytes, "data"_a)
507
+ .def_prop_ro("type", &PyTableReader::Type)
508
+ .def_prop_ro("columns", &PyTableReader::Columns)
509
+ .def_prop_ro("directives", &PyTableReader::Directives)
510
+ .def_prop_ro("done", &PyTableReader::Done)
511
+ .def("next_or_none", &PyTableReader::NextOrNone)
512
+ .def("tail", &PyTableReader::Tail)
513
+ .def("__iter__", [](PyTableReader& self) -> PyTableReader& { return self; })
514
+ .def("__next__", &PyTableReader::Next);
308
515
 
309
516
  nb::class_<SbeCodec>(m, "SbeCodec")
310
517
  .def_static("create", &SbeCodec::Create, "fds"_a, "file_names"_a)
@@ -5,4 +5,4 @@
5
5
  from . import envelope, pxf, sbe
6
6
 
7
7
  __all__ = ["pxf", "sbe", "envelope"]
8
- __version__ = "0.70.0"
8
+ __version__ = "0.75.0"