cesr-ts 0.2.3 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/README.md +36 -3
  2. package/esm/src/adapters/async-iterable.js +9 -9
  3. package/esm/src/annotate/annotator.js +18 -10
  4. package/esm/src/annotate/comments.js +3 -6
  5. package/esm/src/annotate/render.js +123 -24
  6. package/esm/src/bench/parser-benchmark.js +134 -0
  7. package/esm/src/core/bytes.js +6 -0
  8. package/esm/src/core/errors.js +24 -0
  9. package/esm/src/core/parser-attachment-collector.js +154 -0
  10. package/esm/src/core/parser-constants.js +74 -0
  11. package/esm/src/core/parser-deferred-frames.js +73 -0
  12. package/esm/src/core/parser-engine.js +128 -505
  13. package/esm/src/core/parser-frame-parser.js +643 -0
  14. package/esm/src/core/parser-policy.js +137 -0
  15. package/esm/src/core/parser-stream-state.js +62 -0
  16. package/esm/src/core/recovery-diagnostics.js +25 -0
  17. package/esm/src/index.js +4 -0
  18. package/esm/src/parser/attachment-fallback-policy.js +142 -0
  19. package/esm/src/parser/group-dispatch.js +547 -233
  20. package/esm/src/primitives/counter.js +4 -5
  21. package/esm/src/primitives/mapper.js +126 -45
  22. package/esm/src/primitives/matter.js +1 -1
  23. package/esm/src/router/router-stub.js +6 -6
  24. package/esm/src/serder/serder.js +44 -7
  25. package/esm/src/serder/smell.js +2 -1
  26. package/esm/src/tables/counter-version-registry.js +201 -0
  27. package/esm/src/version.js +2 -2
  28. package/package.json +3 -1
  29. package/types/_dnt.polyfills.d.ts +5 -0
  30. package/types/_dnt.polyfills.d.ts.map +1 -1
  31. package/types/src/adapters/async-iterable.d.ts +2 -2
  32. package/types/src/adapters/async-iterable.d.ts.map +1 -1
  33. package/types/src/adapters/effection.d.ts +2 -2
  34. package/types/src/adapters/effection.d.ts.map +1 -1
  35. package/types/src/annotate/annotator.d.ts.map +1 -1
  36. package/types/src/annotate/comments.d.ts.map +1 -1
  37. package/types/src/annotate/render.d.ts +8 -2
  38. package/types/src/annotate/render.d.ts.map +1 -1
  39. package/types/src/annotate/types.d.ts +2 -2
  40. package/types/src/annotate/types.d.ts.map +1 -1
  41. package/types/src/bench/parser-benchmark.d.ts +70 -0
  42. package/types/src/bench/parser-benchmark.d.ts.map +1 -0
  43. package/types/src/core/bytes.d.ts +6 -0
  44. package/types/src/core/bytes.d.ts.map +1 -1
  45. package/types/src/core/errors.d.ts +10 -0
  46. package/types/src/core/errors.d.ts.map +1 -1
  47. package/types/src/core/parser-attachment-collector.d.ts +51 -0
  48. package/types/src/core/parser-attachment-collector.d.ts.map +1 -0
  49. package/types/src/core/parser-constants.d.ts +30 -0
  50. package/types/src/core/parser-constants.d.ts.map +1 -0
  51. package/types/src/core/parser-deferred-frames.d.ts +38 -0
  52. package/types/src/core/parser-deferred-frames.d.ts.map +1 -0
  53. package/types/src/core/parser-engine.d.ts +53 -44
  54. package/types/src/core/parser-engine.d.ts.map +1 -1
  55. package/types/src/core/parser-frame-parser.d.ts +89 -0
  56. package/types/src/core/parser-frame-parser.d.ts.map +1 -0
  57. package/types/src/core/parser-policy.d.ts +27 -0
  58. package/types/src/core/parser-policy.d.ts.map +1 -0
  59. package/types/src/core/parser-stream-state.d.ts +30 -0
  60. package/types/src/core/parser-stream-state.d.ts.map +1 -0
  61. package/types/src/core/recovery-diagnostics.d.ts +59 -0
  62. package/types/src/core/recovery-diagnostics.d.ts.map +1 -0
  63. package/types/src/core/types.d.ts +61 -7
  64. package/types/src/core/types.d.ts.map +1 -1
  65. package/types/src/index.d.ts +4 -0
  66. package/types/src/index.d.ts.map +1 -1
  67. package/types/src/parser/attachment-fallback-policy.d.ts +78 -0
  68. package/types/src/parser/attachment-fallback-policy.d.ts.map +1 -0
  69. package/types/src/parser/group-dispatch.d.ts +85 -15
  70. package/types/src/parser/group-dispatch.d.ts.map +1 -1
  71. package/types/src/primitives/aggor.d.ts +2 -2
  72. package/types/src/primitives/aggor.d.ts.map +1 -1
  73. package/types/src/primitives/blinder.d.ts +2 -2
  74. package/types/src/primitives/blinder.d.ts.map +1 -1
  75. package/types/src/primitives/counter.d.ts.map +1 -1
  76. package/types/src/primitives/mapper.d.ts +44 -1
  77. package/types/src/primitives/mapper.d.ts.map +1 -1
  78. package/types/src/primitives/mediar.d.ts +2 -2
  79. package/types/src/primitives/mediar.d.ts.map +1 -1
  80. package/types/src/primitives/sealer.d.ts +2 -2
  81. package/types/src/primitives/sealer.d.ts.map +1 -1
  82. package/types/src/router/router-stub.d.ts +5 -5
  83. package/types/src/router/router-stub.d.ts.map +1 -1
  84. package/types/src/serder/serder.d.ts +2 -2
  85. package/types/src/serder/serder.d.ts.map +1 -1
  86. package/types/src/serder/serdery.d.ts +2 -2
  87. package/types/src/serder/serdery.d.ts.map +1 -1
  88. package/types/src/serder/smell.d.ts.map +1 -1
  89. package/types/src/tables/counter-version-registry.d.ts +90 -0
  90. package/types/src/tables/counter-version-registry.d.ts.map +1 -0
  91. package/types/src/version.d.ts +2 -2
package/README.md CHANGED
@@ -9,14 +9,17 @@ CESR annotation.
9
9
  npm install cesr-ts
10
10
  ```
11
11
 
12
- ## Library usage
12
+ ## Library quick start
13
13
 
14
14
  ```ts
15
- import { createParser } from "cesr-ts";
15
+ import { annotate, createParser } from "cesr-ts";
16
16
 
17
17
  const parser = createParser();
18
- const out = parser.feed(new TextEncoder().encode("..."));
18
+ const out = parser.feed(new TextEncoder().encode("...CESR..."));
19
19
  const last = parser.flush();
20
+
21
+ const text = '{"v":"KERI10JSON00002e_","t":"rpy","d":"Eabc"}';
22
+ const annotated = annotate(text, { domainHint: "txt", pretty: true });
20
23
  ```
21
24
 
22
25
  ## CLI usage
@@ -30,3 +33,33 @@ npx cesr-annotate --in mystream.cesr --pretty
30
33
  ```bash
31
34
  deno task cesr:annotate --in mystream.cesr --pretty
32
35
  ```
36
+
37
+ ## Benchmarking (from source)
38
+
39
+ Run standard parser benchmark baselines:
40
+
41
+ ```bash
42
+ deno task bench:cesr
43
+ ```
44
+
45
+ Run a benchmark on an arbitrary stream:
46
+
47
+ ```bash
48
+ deno task bench:cesr:parser --in ../../samples/cesr-streams/CESR_1_0-oor-auth-vc.cesr
49
+ cat ../../samples/cesr-streams/CESR_1_0-oor-auth-vc.cesr | deno task bench:cesr:parser --iterations 20 --warmup 3
50
+ ```
51
+
52
+ ## Using cesr-ts through tufa
53
+
54
+ `keri-ts` exposes CESR annotation through `tufa annotate`, which is often the
55
+ easiest on-ramp:
56
+
57
+ ```bash
58
+ tufa version
59
+ tufa annotate --in mystream.cesr --pretty
60
+ ```
61
+
62
+ ## License
63
+
64
+ Licensed under the Apache License, Version 2.0 (`Apache-2.0`). See the
65
+ top-level `LICENSE` file in this repository.
@@ -2,16 +2,16 @@ import { createParser } from "../core/parser-engine.js";
2
2
  export async function* toAsyncFrames(source, options = {}) {
3
3
  const parser = createParser(options);
4
4
  for await (const chunk of source) {
5
- const emissions = parser.feed(chunk);
6
- for (const emission of emissions) {
7
- if (emission.type === "error")
8
- throw emission.error;
9
- yield emission.frame;
5
+ const frames = parser.feed(chunk);
6
+ for (const frame of frames) {
7
+ if (frame.type === "error")
8
+ throw frame.error;
9
+ yield frame.frame;
10
10
  }
11
11
  }
12
- for (const emission of parser.flush()) {
13
- if (emission.type === "error")
14
- throw emission.error;
15
- yield emission.frame;
12
+ for (const frame of parser.flush()) {
13
+ if (frame.type === "error")
14
+ throw frame.error;
15
+ yield frame.frame;
16
16
  }
17
17
  }
@@ -1,5 +1,5 @@
1
1
  import { parseBytes } from "../core/parser-engine.js";
2
- import { renderAnnotatedFrames } from "./render.js";
2
+ import { renderAnnotatedFrames, renderWrapperAnnotatedStream, } from "./render.js";
3
3
  const DEFAULT_OPTIONS = Object.freeze({
4
4
  commentMode: "inline",
5
5
  indent: 2,
@@ -14,25 +14,33 @@ function resolveOptions(options) {
14
14
  ...options,
15
15
  };
16
16
  }
17
- function framesOrThrow(emissions) {
18
- const frames = [];
19
- for (const emission of emissions) {
20
- if (emission.type === "error") {
21
- throw emission.error;
17
+ function parsedFramesOrThrow(frames) {
18
+ const parsedFrames = [];
19
+ for (const frame of frames) {
20
+ if (frame.type === "error") {
21
+ throw frame.error;
22
22
  }
23
- frames.push(emission.frame);
23
+ parsedFrames.push(frame.frame);
24
24
  }
25
- return frames;
25
+ return parsedFrames;
26
26
  }
27
27
  export function annotateFrames(input, options) {
28
28
  const opts = resolveOptions(options);
29
29
  const bytes = typeof input === "string"
30
30
  ? new TextEncoder().encode(input)
31
31
  : input;
32
- const frames = framesOrThrow(parseBytes(bytes));
32
+ const frames = parsedFramesOrThrow(parseBytes(bytes));
33
33
  return renderAnnotatedFrames(frames, opts);
34
34
  }
35
35
  export function annotate(input, options) {
36
- const frames = annotateFrames(input, options);
36
+ const opts = resolveOptions(options);
37
+ const bytes = typeof input === "string"
38
+ ? new TextEncoder().encode(input)
39
+ : input;
40
+ const wrapperAnnotated = renderWrapperAnnotatedStream(bytes, opts);
41
+ if (wrapperAnnotated !== null) {
42
+ return wrapperAnnotated;
43
+ }
44
+ const frames = annotateFrames(bytes, opts);
37
45
  return frames.map((frame) => frame.lines.join("\n")).join("\n");
38
46
  }
@@ -1,5 +1,6 @@
1
1
  import { COUNTER_CODE_NAMES_V1, COUNTER_CODE_NAMES_V2, } from "../tables/counter.tables.generated.js";
2
2
  import { MATTER_CODE_NAMES } from "../tables/matter.tables.generated.js";
3
+ import { resolveCounterCodeNameTable } from "../tables/counter-version-registry.js";
3
4
  const NATIVE_FIELD_LABELS = Object.freeze({
4
5
  v: "version string",
5
6
  t: "ilk",
@@ -26,12 +27,8 @@ export function counterCodeName(code) {
26
27
  return "Counter";
27
28
  }
28
29
  export function counterCodeNameForVersion(code, version) {
29
- if (version.major >= 2) {
30
- return COUNTER_CODE_NAMES_V2[code] ??
31
- "Counter";
32
- }
33
- return COUNTER_CODE_NAMES_V1[code] ??
34
- "Counter";
30
+ const table = resolveCounterCodeNameTable(version);
31
+ return table[code] ?? "Counter";
35
32
  }
36
33
  export function matterCodeName(code) {
37
34
  return MATTER_CODE_NAMES[code] ?? "Matter";
@@ -1,10 +1,12 @@
1
+ import { parseBytes } from "../core/parser-engine.js";
1
2
  import { DeserializeError, GroupSizeError, ShortageError, UnknownCodeError, } from "../core/errors.js";
2
3
  import { sniff } from "../parser/cold-start.js";
3
4
  import { parseCounter } from "../primitives/counter.js";
4
5
  import { parseMatter } from "../primitives/matter.js";
5
6
  import { parseIndexer } from "../primitives/indexer.js";
6
7
  import { parseAttachmentDispatchCompat } from "../parser/group-dispatch.js";
7
- import { counterCodeNameForVersion, matterCodeName, nativeLabelName, } from "./comments.js";
8
+ import { counterCodeName, counterCodeNameForVersion, matterCodeName, nativeLabelName, } from "./comments.js";
9
+ import { b64ToInt, intToB64 } from "../core/bytes.js";
8
10
  const TEXT_DECODER = new TextDecoder();
9
11
  const TEXT_ENCODER = new TextEncoder();
10
12
  const OPAQUE_TOKEN_COMMENT = "opaque token";
@@ -15,6 +17,12 @@ const WRAPPER_GROUP_NAMES = new Set([
15
17
  "BodyWithAttachmentGroup",
16
18
  "BigBodyWithAttachmentGroup",
17
19
  ]);
20
+ const TOP_LEVEL_WRAPPER_GROUP_NAMES = new Set([
21
+ "GenericGroup",
22
+ "BigGenericGroup",
23
+ "BodyWithAttachmentGroup",
24
+ "BigBodyWithAttachmentGroup",
25
+ ]);
18
26
  function toHex(bytes) {
19
27
  return Array.from(bytes).map((b) => b.toString(16).padStart(2, "0")).join("");
20
28
  }
@@ -109,27 +117,37 @@ function parseCounterCompat(input, version, domain) {
109
117
  return parseCounter(input, alternate, domain);
110
118
  }
111
119
  }
120
+ function decodeVersionCounter(counter) {
121
+ const triplet = counter.qb64.length >= 3
122
+ ? counter.qb64.slice(-3)
123
+ : intToB64(counter.count, 3);
124
+ const majorRaw = b64ToInt(triplet[0] ?? "A");
125
+ const minorRaw = b64ToInt(triplet[1] ?? "A");
126
+ return {
127
+ major: majorRaw === 1 ? 1 : 2,
128
+ minor: minorRaw,
129
+ };
130
+ }
112
131
  function renderGroupItems(lines, items, version, indent, options) {
113
132
  for (const item of items) {
114
- if (typeof item === "string") {
115
- const parsed = describeToken(item, "txt", version);
116
- emitLine(lines, item, parsed.comment, indent, options);
133
+ if (item.kind === "qb64") {
134
+ const parsed = describeToken(item.qb64, "txt", version);
135
+ const comment = item.opaque
136
+ ? OPAQUE_WRAPPER_PAYLOAD_COMMENT
137
+ : parsed.comment;
138
+ emitLine(lines, item.qb64, comment, indent, options);
117
139
  continue;
118
140
  }
119
- if (item instanceof Uint8Array) {
120
- emitLine(lines, `0x${toHex(item)}`, "raw qb2 quadlet fragment", indent, options);
141
+ if (item.kind === "qb2") {
142
+ emitLine(lines, `0x${toHex(item.qb2)}`, item.opaque ? OPAQUE_WRAPPER_PAYLOAD_COMMENT : "raw qb2 triplet token", indent, options);
121
143
  continue;
122
144
  }
123
- if (Array.isArray(item)) {
124
- renderGroupItems(lines, item, version, indent + options.indent, options);
145
+ if (item.kind === "tuple") {
146
+ renderGroupItems(lines, item.items, version, indent + options.indent, options);
125
147
  continue;
126
148
  }
127
- if (item && typeof item === "object") {
128
- const nested = item;
129
- if (typeof nested.code === "string" && typeof nested.name === "string") {
130
- emitLine(lines, `${nested.code}`, `${nested.name} nested group`, indent, options);
131
- }
132
- continue;
149
+ if (item.kind === "group") {
150
+ emitLine(lines, `${item.code}`, `${item.name} nested group`, indent, options);
133
151
  }
134
152
  }
135
153
  }
@@ -187,14 +205,14 @@ function renderAttachmentGroupRaw(lines, raw, version, indent, options) {
187
205
  return parsed.consumed;
188
206
  }
189
207
  function renderNativeBody(lines, frame, options, version) {
190
- const raw = frame.serder.raw;
208
+ const raw = frame.body.raw;
191
209
  const domain = asDomain(raw);
192
210
  if (domain !== "txt" && domain !== "bny") {
193
211
  return;
194
212
  }
195
213
  const counter = parseCounterCompat(raw, version, domain);
196
214
  emitLine(lines, counter.qb64, `${counterCodeNameForVersion(counter.code, version)} count=${counter.count}`, 0, options);
197
- for (const field of frame.serder.native?.fields ?? []) {
215
+ for (const field of frame.body.native?.fields ?? []) {
198
216
  const label = nativeLabelName(field.label);
199
217
  const comment = label
200
218
  ? `${label} (${matterCodeName(field.code)})`
@@ -203,9 +221,15 @@ function renderNativeBody(lines, frame, options, version) {
203
221
  }
204
222
  }
205
223
  function renderMessageBody(lines, frame, options) {
206
- const rawBody = TEXT_DECODER.decode(frame.serder.raw);
224
+ const rawBody = TEXT_DECODER.decode(frame.body.raw);
225
+ const isOpaqueCesrBody = frame.body.kind === "CESR" &&
226
+ frame.body.ked === null;
227
+ if (isOpaqueCesrBody) {
228
+ emitLine(lines, rawBody, `OPAQUE CESR body (non-serder fallback, hex=${toHex(frame.body.raw)})`, 0, options);
229
+ return;
230
+ }
207
231
  let body = rawBody;
208
- if (options.pretty && frame.serder.kind === "JSON") {
232
+ if (options.pretty && frame.body.kind === "JSON") {
209
233
  try {
210
234
  body = JSON.stringify(JSON.parse(rawBody), null, 2);
211
235
  }
@@ -218,17 +242,17 @@ function renderMessageBody(lines, frame, options) {
218
242
  }
219
243
  const info = [
220
244
  `SERDER`,
221
- frame.serder.proto,
222
- frame.serder.kind,
223
- frame.serder.ilk ? `ilk=${frame.serder.ilk}` : null,
224
- frame.serder.said ? `said=${frame.serder.said}` : null,
245
+ frame.body.proto,
246
+ frame.body.kind,
247
+ frame.body.ilk ? `ilk=${frame.body.ilk}` : null,
248
+ frame.body.said ? `said=${frame.body.said}` : null,
225
249
  ].filter(Boolean).join(" ");
226
250
  emitLine(lines, body, info, 0, options);
227
251
  }
228
252
  function renderFrame(frame, index, options) {
229
253
  const lines = [];
230
- const version = frame.serder.gvrsn ?? frame.serder.pvrsn;
231
- const domain = asDomain(frame.serder.raw);
254
+ const version = frame.body.gvrsn ?? frame.body.pvrsn;
255
+ const domain = asDomain(frame.body.raw);
232
256
  if (domain === "txt" || domain === "bny") {
233
257
  renderNativeBody(lines, frame, options, version);
234
258
  }
@@ -240,6 +264,81 @@ function renderFrame(frame, index, options) {
240
264
  }
241
265
  return { index, frame, lines };
242
266
  }
267
+ function renderFrameChunk(lines, input, indent, options) {
268
+ const parsed = parseBytes(input);
269
+ const frames = [];
270
+ for (const event of parsed) {
271
+ if (event.type === "error") {
272
+ throw event.error;
273
+ }
274
+ frames.push(event.frame);
275
+ }
276
+ const rendered = renderAnnotatedFrames(frames, options);
277
+ for (const frame of rendered) {
278
+ for (const line of frame.lines) {
279
+ lines.push(`${spaces(indent)}${line}`);
280
+ }
281
+ }
282
+ }
283
+ function renderWrapperAwareStream(lines, input, inheritedVersion, indent, options) {
284
+ let offset = 0;
285
+ let activeVersion = inheritedVersion;
286
+ let usedWrapper = false;
287
+ while (offset < input.length) {
288
+ const slice = input.slice(offset);
289
+ const domain = asDomain(slice);
290
+ if (domain !== "txt" && domain !== "bny") {
291
+ if (!usedWrapper)
292
+ return false;
293
+ renderFrameChunk(lines, slice, indent, options);
294
+ return true;
295
+ }
296
+ const counter = parseCounterCompat(slice, activeVersion, domain);
297
+ const headerSize = domain === "bny" ? counter.fullSizeB2 : counter.fullSize;
298
+ const name = counterCodeName(counter.code);
299
+ if (name === "KERIACDCGenusVersion") {
300
+ emitLine(lines, counter.qb64, `${name} count=${counter.count}`, indent, options);
301
+ activeVersion = decodeVersionCounter(counter);
302
+ offset += headerSize;
303
+ usedWrapper = true;
304
+ continue;
305
+ }
306
+ if (!TOP_LEVEL_WRAPPER_GROUP_NAMES.has(name)) {
307
+ if (!usedWrapper)
308
+ return false;
309
+ renderFrameChunk(lines, slice, indent, options);
310
+ return true;
311
+ }
312
+ const unit = domain === "bny" ? 3 : 4;
313
+ const payloadSize = counter.count * unit;
314
+ const total = headerSize + payloadSize;
315
+ if (slice.length < total) {
316
+ throw new ShortageError(total, slice.length);
317
+ }
318
+ emitLine(lines, counter.qb64, `${name} count=${counter.count}`, indent, options);
319
+ const payload = slice.slice(headerSize, total);
320
+ const nestedHandled = renderWrapperAwareStream(lines, payload, activeVersion, indent + options.indent, options);
321
+ if (!nestedHandled) {
322
+ renderFrameChunk(lines, payload, indent + options.indent, options);
323
+ }
324
+ offset += total;
325
+ usedWrapper = true;
326
+ }
327
+ return usedWrapper;
328
+ }
329
+ /**
330
+ * Render stream-level wrapper groups (GenericGroup/BodyWithAttachmentGroup)
331
+ * so denot round-trips preserve wrapper counters. Returns null when input does
332
+ * not start in a wrapper-oriented domain and caller should use frame rendering.
333
+ */
334
+ export function renderWrapperAnnotatedStream(input, options) {
335
+ const lines = [];
336
+ const rendered = renderWrapperAwareStream(lines, input, { major: 2, minor: 0 }, 0, options);
337
+ if (!rendered) {
338
+ return null;
339
+ }
340
+ return lines.join("\n");
341
+ }
243
342
  /** Render parsed CESR frames into line-oriented, human-annotated text blocks. */
244
343
  export function renderAnnotatedFrames(frames, options) {
245
344
  return frames.map((frame, index) => renderFrame(frame, index + 1, options));
@@ -0,0 +1,134 @@
1
+ import { createParser } from "../core/parser-engine.js";
2
+ function asPositiveInteger(value, fallback) {
3
+ if (typeof value !== "number" || !Number.isFinite(value)) {
4
+ return fallback;
5
+ }
6
+ return Math.max(0, Math.trunc(value));
7
+ }
8
+ /**
9
+ * Normalize chunking to a legal parser-feed size.
10
+ *
11
+ * Boundary rules:
12
+ * - `<= 0` means full-stream feed for each iteration.
13
+ * - values larger than stream size collapse to full-stream feed.
14
+ */
15
+ function normalizeChunkSize(chunkSize, totalBytes) {
16
+ const normalized = asPositiveInteger(chunkSize, 0);
17
+ if (normalized <= 0 || normalized >= totalBytes) {
18
+ return totalBytes;
19
+ }
20
+ return normalized;
21
+ }
22
+ /**
23
+ * Produce deterministic feed slices for one parse run.
24
+ *
25
+ * Invariant:
26
+ * - slices preserve original stream ordering.
27
+ * - slices are view-backed (`subarray`) to avoid benchmark-side copy noise.
28
+ */
29
+ function buildChunks(input, chunkSize) {
30
+ if (chunkSize >= input.length) {
31
+ return [input];
32
+ }
33
+ const chunks = [];
34
+ for (let offset = 0; offset < input.length; offset += chunkSize) {
35
+ chunks.push(input.subarray(offset, Math.min(offset + chunkSize, input.length)));
36
+ }
37
+ return chunks;
38
+ }
39
+ /**
40
+ * Count parser frame/error events for one parser emission batch.
41
+ */
42
+ function countFrames(events) {
43
+ let frameCount = 0;
44
+ let errorCount = 0;
45
+ for (const event of events) {
46
+ if (event.type === "frame") {
47
+ frameCount++;
48
+ continue;
49
+ }
50
+ errorCount++;
51
+ }
52
+ return { frameCount, errorCount };
53
+ }
54
+ /**
55
+ * Execute one complete parser pass over the provided stream.
56
+ *
57
+ * Boundary contract:
58
+ * - Always feeds configured chunks and then flushes once.
59
+ * - Returns event counts only; callers own timing and aggregation.
60
+ */
61
+ export function parseCesrStreamOnce(input, options = {}) {
62
+ const chunkSize = normalizeChunkSize(options.chunkSize, input.length);
63
+ const parser = createParser(options.parserOptions);
64
+ let frameCount = 0;
65
+ let errorCount = 0;
66
+ for (const chunk of buildChunks(input, chunkSize)) {
67
+ const summary = countFrames(parser.feed(chunk));
68
+ frameCount += summary.frameCount;
69
+ errorCount += summary.errorCount;
70
+ }
71
+ const flushSummary = countFrames(parser.flush());
72
+ frameCount += flushSummary.frameCount;
73
+ errorCount += flushSummary.errorCount;
74
+ return { frameCount, errorCount };
75
+ }
76
+ /**
77
+ * Benchmark parser throughput across warmup + measured iterations.
78
+ *
79
+ * Invariants:
80
+ * - warmup runs are excluded from final metrics.
81
+ * - when `failOnParseError` is true, any parse error fails fast to prevent
82
+ * silently benchmarking degraded correctness.
83
+ */
84
+ export function benchmarkCesrParser(input, options = {}) {
85
+ if (input.length === 0) {
86
+ throw new Error("Benchmark input stream must not be empty");
87
+ }
88
+ const iterations = asPositiveInteger(options.iterations, 50);
89
+ if (iterations <= 0) {
90
+ throw new Error("iterations must be greater than 0");
91
+ }
92
+ const warmupIterations = asPositiveInteger(options.warmupIterations, 5);
93
+ const chunkSize = normalizeChunkSize(options.chunkSize, input.length);
94
+ const failOnParseError = options.failOnParseError ?? true;
95
+ for (let i = 0; i < warmupIterations; i++) {
96
+ parseCesrStreamOnce(input, {
97
+ chunkSize,
98
+ parserOptions: options.parserOptions,
99
+ });
100
+ }
101
+ let totalFrames = 0;
102
+ let totalErrors = 0;
103
+ const startMs = performance.now();
104
+ for (let i = 0; i < iterations; i++) {
105
+ const run = parseCesrStreamOnce(input, {
106
+ chunkSize,
107
+ parserOptions: options.parserOptions,
108
+ });
109
+ totalFrames += run.frameCount;
110
+ totalErrors += run.errorCount;
111
+ if (failOnParseError && run.errorCount > 0) {
112
+ throw new Error(`Benchmark run produced parse errors (run=${i + 1}, errorCount=${run.errorCount})`);
113
+ }
114
+ }
115
+ const elapsedMs = Math.max(performance.now() - startMs, Number.EPSILON);
116
+ const totalBytes = input.length * iterations;
117
+ const throughputBytesPerSec = (totalBytes * 1000) / elapsedMs;
118
+ const throughputMiBPerSec = throughputBytesPerSec / (1024 * 1024);
119
+ const framesPerSec = (totalFrames * 1000) / elapsedMs;
120
+ return {
121
+ iterations,
122
+ warmupIterations,
123
+ chunkSize,
124
+ bytesPerIteration: input.length,
125
+ totalBytes,
126
+ totalFrames,
127
+ totalErrors,
128
+ elapsedMs,
129
+ avgIterationMs: elapsedMs / iterations,
130
+ throughputBytesPerSec,
131
+ throughputMiBPerSec,
132
+ framesPerSec,
133
+ };
134
+ }
@@ -1,6 +1,12 @@
1
1
  export const encoder = new TextEncoder();
2
2
  export const decoder = new TextDecoder();
3
3
  const B64_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
4
+ /**
5
+ * Concatenates Uint8Array byte arrays into one byte array
6
+ *
7
+ * @param chunks chunks to concatenate
8
+ * @returns concatenated bytearray
9
+ */
4
10
  export function concatBytes(...chunks) {
5
11
  const total = chunks.reduce((sum, chunk) => sum + chunk.length, 0);
6
12
  const out = new Uint8Array(total);
@@ -43,3 +43,27 @@ export class GroupSizeError extends ParserError {
43
43
  }
44
44
  export class DeserializeError extends ParserError {
45
45
  }
46
+ /** Failure while constructing syntax artifacts from token bytes. */
47
+ export class SyntaxParseError extends ParserError {
48
+ constructor(message, cause, offset, context) {
49
+ super(message, offset, context);
50
+ Object.defineProperty(this, "cause", {
51
+ enumerable: true,
52
+ configurable: true,
53
+ writable: true,
54
+ value: cause
55
+ });
56
+ }
57
+ }
58
+ /** Failure while interpreting syntax artifacts into semantic fields. */
59
+ export class SemanticInterpretationError extends ParserError {
60
+ constructor(message, cause, offset, context) {
61
+ super(message, offset, context);
62
+ Object.defineProperty(this, "cause", {
63
+ enumerable: true,
64
+ configurable: true,
65
+ writable: true,
66
+ value: cause
67
+ });
68
+ }
69
+ }