@botejs/core 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -8,94 +8,121 @@ npm install @botejs/core
8
8
 
9
9
  ```ts
10
10
  import { open, fromFile } from '@botejs/core'
11
+ import { publish } from './message-bus'
11
12
 
12
- import * as z from 'zod' // or bring your own Standard Schema validator
13
+ // e.g. { items: [...] }
14
+ await using cursor = await open(fromFile('./some-large.json'))
13
15
 
14
- const User = z.object({
15
- id: z.string(),
16
- name: z.string(),
17
- email: z.string(),
18
- details: z.object({
19
- lastLoggedIn: z.number(),
20
- }),
21
- })
16
+ // items[0]
17
+ const first = await cursor.get('items', 0)
18
+ console.log(`first item: ${first}`)
19
+ ```
20
+
21
+ given a **seekable** source (e.g. a file, an HTTP range) and a path, it retrieves values out of a JSON quickly, without loading the whole thing in-memory.
22
+
23
+ here's a run (Apple M1 Pro 2021, ~500MB JSON array file, cold-cache, default settings):
24
+
25
+ | operation | approach | time | js heap peak Δ | rust heap peak |
26
+ | -------------- | ---------- | --------: | -------------: | -------------: |
27
+ | items[0] | JSON.parse | 616.02 ms | 1.03 GB | n/a |
28
+ | items[535399] | JSON.parse | 604.63 ms | 1.03 GB | n/a |
29
+ | items[1070797] | JSON.parse | 600.68 ms | 1.03 GB | n/a |
30
+ | items[0] | bote | 527.80 µs | 291.6 KB | 130.4 KB |
31
+ | items[535399] | bote | 187.24 ms | 742.3 KB | 36.7 MB |
32
+ | items[1070797] | bote | 371.61 ms | 828.7 KB | 37.1 MB |
22
33
 
23
- type User = z.infer<typeof User>
34
+ ## array access
24
35
 
25
- await using cursor = await open(fromFile('./your-big.json'))
36
+ `iter` streams the elements of an array at a path, **a batch at a time**, so you never hold the whole collection in memory and not wait for the heat death of the universe if this yielded individually. each `for await` step yields an array of items (use `walk` to step over the members of an object):
26
37
 
27
- // users[1000].name
28
- const desc0: unknown = await cursor.get('users', 1000, 'name')
29
- // for .get and .iter, you can supply a validator as the last argument
30
- const desc1: string = await cursor.get('users', 1000, 'name', User.shape.name)
38
+ ```ts
39
+ // e.g. [{ id: 'user-1' }, { id: 'user-2' }, ...]
40
+ await using cursor = await open(fromFile('./users.json'))
31
41
 
32
- // iterate an array in batches
33
- for await (const batch of cursor.iter('users', User)) {
34
- // batch: User[]
35
- for (const user of batch) {
42
+ // root is an array
43
+ for await (const users of cursor.iter()) {
44
+ for (const user of users) {
36
45
  console.log(user)
37
46
  }
38
47
  }
48
+ ```
39
49
 
40
- // pick several fields into a named object to avoid resolving big items
41
- for await (const batch of cursor.iter('users', {
42
- select: {
43
- id: 'id',
44
- logged: ['details', 'lastLoggedIn'],
45
- },
46
- schema: z.object({
47
- id: User.shape.id,
48
- logged: User.shape.details.lastLoggedIn,
49
- }),
50
- })) {
51
- // batch: { id: string, logged: number }[]
52
- for (const userLog of batch) {
53
- console.log(userLog)
54
- }
50
+ pass an options object as the last argument to tune what comes back: `batch`, `select`, `schema`, `onInvalid`, and `withIndex`. if you want to know more of the options, see [`arrays.js`](./examples/arrays.js).
51
+
52
+ ## object access
53
+
54
+ `walk` steps over the members of an object at a path, yielding a **`[key, cursor]`** pair per member. the key is the member name, the cursor is anchored at its value. each child cursor is first-class: it outlives the loop and can be `walk`ed again, which is what lets you descend a tree of unknown depth.
55
+
56
+ ```ts
57
+ // e.g. { alice: { role: 'admin' }, bob: { role: 'guest' }, ... }
58
+ await using cursor = await open(fromFile('./accounts.json'))
59
+
60
+ for await (const [name, account] of cursor.walk()) {
61
+ // name is the member name ('alice', 'bob', ...)
62
+ const role = await account.get('role')
63
+ console.log(`${name}: ${role}`)
55
64
  }
65
+ ```
66
+
67
+ see [`recursive.js`](./examples/recursive.js) for advanced use-cases.
56
68
 
57
- // or pick a single field
58
- for await (const batch of cursor.iter('users', {
59
- select: 'name',
60
- schema: User.shape.name,
61
- })) {
62
- // batch: string[]
63
- for (const name of batch) {
64
- console.log({ name })
69
+ ## hopping
70
+
71
+ `hop` resolves a path once and hands back a **cursor** anchored at that value (or `null` if the path isn't there):
72
+
73
+ ```ts
74
+ // e.g. { report: { sections: [{ rows: [...] }, ...] } }
75
+ await using cursor = await open(fromFile('./report.json'))
76
+
77
+ const section = await cursor.hop('report', 'sections', 0)
78
+ if (section) {
79
+ console.log(await section.count('rows'))
80
+ for await (const rows of section.iter('rows')) {
81
+ console.log(rows)
65
82
  }
66
83
  }
84
+ ```
67
85
 
68
- // for open-ended per-child work (e.g. conditional reads, recursive descent, nested
69
- // iters), `walk` yields a subcursor positioned at each child:
70
- for await (const metaCursor of cursor.walk('meta')) {
71
- if (metaCursor.key === 'details') {
72
- const detailsValue = await metaCursor.get()
73
- console.log(detailsValue)
74
- }
86
+ ## validation
87
+
88
+ `get`, and `iter` takes a [Standard Schema](https://standardschema.dev) validator as their last argument (for `iter`, can also be passed in an `options` object). the value is validated and the return type is inferred from the schema, so reads come back typed instead of `unknown`:
89
+
90
+ ```ts
91
+ import { open, fromFile } from '@botejs/core'
92
+ import * as z from 'zod' // or any Standard Schema validator
93
+
94
+ // a downstream API that wants a typed list of recipients
95
+ declare function sendNewsletter(recipients: string[]): Promise<void>
96
+
97
+ const User = z.object({
98
+ id: z.string(),
99
+ name: z.string(),
100
+ email: z.string(),
101
+ })
102
+
103
+ const cursor = await open(fromFile('./users.json'))
104
+
105
+ // name: string
106
+ const name = await cursor.get('users', 1000, 'name', User.shape.name)
107
+
108
+ for await (const users of cursor.iter('users', User)) {
109
+ // user: User[]
110
+ const emails = users.map((user) => user.email)
111
+ await sendNewsletter(emails)
75
112
  }
76
113
 
77
- // 'await using' would normally clean up resources for you
78
- // when it goes out of lexical scope. if you hate that,
79
- // you can do it explicitly as well.
80
114
  await cursor.close()
81
115
  ```
82
116
 
83
- given a **seekable** source (e.g. a file, an HTTP range) and a path, it can retrieve values in a JSON quickly, without loading the whole thing in-memory.
117
+ ## memory
84
118
 
85
- here's a run (Apple M1 Pro 2021, ~500MB JSON array file, cold-cache, default settings):
119
+ bote keeps a small **structural-index** cache: as scans walk containers (arrays and object), it remembers where members live, so a later query that lands in an already walked container resumes near the target instead of from the top. it caches structure, never source bytes, so it can't grow unbounded with document size.
86
120
 
87
- | operation | approach | time | js heap peak Δ | rust heap peak |
88
- | -------------- | ---------- | --------: | -------------: | -------------: |
89
- | items[0] | JSON.parse | 616.02 ms | 1.03 GB | n/a |
90
- | items[535399] | JSON.parse | 604.63 ms | 1.03 GB | n/a |
91
- | items[1070797] | JSON.parse | 600.68 ms | 1.03 GB | n/a |
92
- | items[0] | bote | 527.80 µs | 291.6 KB | 130.4 KB |
93
- | items[535399] | bote | 187.24 ms | 742.3 KB | 36.7 MB |
94
- | items[1070797] | bote | 371.61 ms | 828.7 KB | 37.1 MB |
121
+ the defaults are good, but `open` takes a few knobs: `indexCacheEntries`, `objectMemberCap`, and `arrayIndexInterval`. to bound memory tighter or turn the cache off. see [`memory.js`](./examples/memory.js) for what each does.
95
122
 
96
123
  ## sources
97
124
 
98
- bote currently only has `fromFile` and `fromHttpRange` as pre-built sources. create your own by implementing the `Source` interface. see [./packages/core/src/sources.ts](./packages/core/src/sources.ts) on how it works.
125
+ bote ships `fromFile`, `fromHttpRange`, and `fromBuffer` as pre-built sources. create your own by implementing the `Source` interface. see [`sources-custom.ts`](./examples/sources-custom.ts) or [./packages/core/src/sources.ts](./packages/core/src/sources.ts) for how it works.
99
126
 
100
127
  ## status
101
128
 
package/dist/args.js CHANGED
@@ -1,11 +1,5 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.splitArgs = splitArgs;
4
- exports.isSchema = isSchema;
5
- exports.normalizeIterTail = normalizeIterTail;
6
- exports.serializeSelect = serializeSelect;
7
- const path_ts_1 = require("./path.js");
8
- function splitArgs(args) {
1
+ import { validatePath } from "./path.js";
2
+ export function splitArgs(args) {
9
3
  let pathArgs;
10
4
  let tail;
11
5
  if (args.length === 0) {
@@ -23,35 +17,41 @@ function splitArgs(args) {
23
17
  tail = undefined;
24
18
  }
25
19
  }
26
- (0, path_ts_1.validatePath)(pathArgs);
20
+ validatePath(pathArgs);
27
21
  return { path: pathArgs, tail };
28
22
  }
29
- function isSchema(value) {
23
+ export function isSchema(value) {
30
24
  return typeof value === 'object' && value !== null && '~standard' in value;
31
25
  }
32
- function normalizeIterTail(tail) {
26
+ export function normalizeIterTail(tail) {
33
27
  if (!tail)
34
28
  return {};
35
29
  if (isSchema(tail))
36
30
  return { schema: tail };
37
31
  return tail;
38
32
  }
39
- function serializeSelect(select) {
33
+ export function serializeSelect(select) {
40
34
  if (typeof select === 'string' || typeof select === 'number') {
41
35
  const one = [select];
42
- (0, path_ts_1.validatePath)(one);
36
+ validatePath(one);
43
37
  return JSON.stringify({ one });
44
38
  }
45
39
  if (Array.isArray(select)) {
46
- (0, path_ts_1.validatePath)(select);
40
+ validatePath(select);
47
41
  if (select.length === 0) {
48
42
  throw new RangeError('iter: select sub-path must have at least one segment');
49
43
  }
50
44
  return JSON.stringify({ one: select });
51
45
  }
46
+ if (select === null || typeof select !== 'object') {
47
+ throw new TypeError(`iter: select must be a segment, path, or field map, got ${describeSelect(select)}`);
48
+ }
52
49
  const entries = Object.entries(select).map(([k, sub]) => {
53
50
  const path = typeof sub === 'string' || typeof sub === 'number' ? [sub] : sub;
54
- (0, path_ts_1.validatePath)(path);
51
+ if (!Array.isArray(path)) {
52
+ throw new TypeError(`iter: select field ${JSON.stringify(k)} must be a segment or path, got ${describeSelect(sub)}`);
53
+ }
54
+ validatePath(path);
55
55
  if (path.length === 0) {
56
56
  throw new RangeError(`iter: select field ${JSON.stringify(k)} sub-path must have at least one segment`);
57
57
  }
@@ -62,3 +62,8 @@ function serializeSelect(select) {
62
62
  }
63
63
  return JSON.stringify({ map: entries });
64
64
  }
65
+ function describeSelect(value) {
66
+ if (value === null)
67
+ return 'null';
68
+ return Array.isArray(value) ? 'array' : typeof value;
69
+ }
package/dist/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
1
  export { type IterOptions } from './args.ts';
2
- export { ValidationError, formatPath, type Path, type Segment, type StandardSchemaV1 } from './validate.ts';
3
- export { open, DEFAULT_ITER_BATCH, type Cursor, type RootCursor, type OpenOptions, type IterIndex as IterKey, } from './open.ts';
2
+ export { ValidationError, PathError, formatPath, type Path, type PathFaultCode, type Segment, type StandardSchemaV1, } from './validate.ts';
3
+ export { open, DEFAULT_ITER_BATCH, MAX_ITER_BATCH, type Cursor, type RootCursor, type OpenOptions, type WalkEntry, type IterIndex as IterKey, } from './open.ts';
4
4
  export { fromBuffer, fromFile, fromHttpRange, type FactoryOptions, type Source, type SourceReader, type HttpRangeOptions, } from './sources.ts';
package/dist/index.js CHANGED
@@ -1,19 +1,3 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.fromHttpRange = exports.fromFile = exports.fromBuffer = exports.DEFAULT_ITER_BATCH = exports.formatPath = exports.ValidationError = void 0;
4
- // Node 18 and Node 20.3 predate `Symbol.asyncDispose`; mirror what TS emits for
5
- // `await using` so the well-known symbol is available across our engine range.
6
- if (!Symbol.asyncDispose) {
7
- ;
8
- Symbol.asyncDispose = Symbol.for('Symbol.asyncDispose');
9
- }
10
- var validate_ts_1 = require("./validate.js");
11
- Object.defineProperty(exports, "ValidationError", { enumerable: true, get: function () { return validate_ts_1.ValidationError; } });
12
- Object.defineProperty(exports, "formatPath", { enumerable: true, get: function () { return validate_ts_1.formatPath; } });
13
- var open_ts_1 = require("./open.js");
14
- Object.defineProperty(exports, "open", { enumerable: true, get: function () { return open_ts_1.open; } });
15
- Object.defineProperty(exports, "DEFAULT_ITER_BATCH", { enumerable: true, get: function () { return open_ts_1.DEFAULT_ITER_BATCH; } });
16
- var sources_ts_1 = require("./sources.js");
17
- Object.defineProperty(exports, "fromBuffer", { enumerable: true, get: function () { return sources_ts_1.fromBuffer; } });
18
- Object.defineProperty(exports, "fromFile", { enumerable: true, get: function () { return sources_ts_1.fromFile; } });
19
- Object.defineProperty(exports, "fromHttpRange", { enumerable: true, get: function () { return sources_ts_1.fromHttpRange; } });
1
+ export { ValidationError, PathError, formatPath, } from "./validate.js";
2
+ export { open, DEFAULT_ITER_BATCH, MAX_ITER_BATCH, } from "./open.js";
3
+ export { fromBuffer, fromFile, fromHttpRange, } from "./sources.js";
package/dist/open.d.ts CHANGED
@@ -7,8 +7,11 @@ type SelectMapShape<S> = {
7
7
  };
8
8
  /** Zero-based index of an array element. */
9
9
  export type IterIndex = number;
10
+ /** One `walk` step: the member's key paired with a cursor anchored at its value. */
11
+ export type WalkEntry = [key: string, cursor: Cursor];
10
12
  export declare const DEFAULT_SOURCE_CHUNK_BYTES: number;
11
13
  export declare const DEFAULT_ITER_BATCH = 1000;
14
+ export declare const MAX_ITER_BATCH = 1000000;
12
15
  export interface OpenOptions {
13
16
  /**
14
17
  * Slot budget for the structural-index cache: one slot per cached container
@@ -40,8 +43,7 @@ export interface OpenOptions {
40
43
  arrayIndexInterval?: number;
41
44
  }
42
45
  export interface Cursor {
43
- /** Object-member key or array-element index that this cursor was yielded under by `walk`. `null` on the root cursor. */
44
- readonly key: string | number | null;
46
+ hop(...path: Segment[]): Promise<Cursor | null>;
45
47
  has(...path: Segment[]): Promise<boolean>;
46
48
  has(...args: [...Segment[], StandardSchemaV1]): Promise<boolean>;
47
49
  get(...path: Segment[]): Promise<unknown>;
@@ -67,6 +69,7 @@ export interface Cursor {
67
69
  withIndex: true;
68
70
  }]): AsyncIterable<[IterIndex, unknown][]>;
69
71
  iter(...args: [...Segment[], IterOptions]): AsyncIterable<unknown[]>;
72
+ walk(...path: Segment[]): AsyncIterable<WalkEntry>;
70
73
  walk(...path: Segment[]): AsyncIterable<Cursor>;
71
74
  }
72
75
  export interface RootCursor extends Cursor, AsyncDisposable {
package/dist/open.js CHANGED
@@ -1,35 +1,41 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.DEFAULT_ITER_BATCH = exports.DEFAULT_SOURCE_CHUNK_BYTES = void 0;
4
- exports.open = open;
5
- const native_1 = require("@botejs/native");
6
- const path_ts_1 = require("./path.js");
7
- const validate_ts_1 = require("./validate.js");
8
- const args_ts_1 = require("./args.js");
9
- exports.DEFAULT_SOURCE_CHUNK_BYTES = 64 * 1024;
10
- exports.DEFAULT_ITER_BATCH = 1000;
1
+ import { open as openNative } from '@botejs/native';
2
+ import { validatePath } from "./path.js";
3
+ import { runStandardSchema, validateItem, formatPath, PathError, } from "./validate.js";
4
+ import { splitArgs, isSchema, serializeSelect, normalizeIterTail, } from "./args.js";
5
+ export const DEFAULT_SOURCE_CHUNK_BYTES = 64 * 1024;
6
+ export const DEFAULT_ITER_BATCH = 1000;
7
+ export const MAX_ITER_BATCH = 1_000_000;
11
8
  /**
12
9
  * Open a cursor over a seekable source.
13
10
  *
14
11
  * The returned `RootCursor` owns the reader: `close()` (or `await using`)
15
12
  * drives the reader's own `close()` exactly once.
16
13
  */
17
- async function open(source, options) {
14
+ export async function open(source, options) {
18
15
  const { indexCacheEntries, objectMemberCap, arrayIndexInterval } = options ?? {};
19
16
  for (const [name, value] of [
20
17
  ['indexCacheEntries', indexCacheEntries],
21
18
  ['objectMemberCap', objectMemberCap],
22
19
  ['arrayIndexInterval', arrayIndexInterval],
23
20
  ]) {
24
- if (value !== undefined && (!Number.isInteger(value) || value < 0)) {
21
+ if (value !== undefined && (!Number.isSafeInteger(value) || value < 0)) {
25
22
  throw new RangeError(`open: ${name} must be a non-negative integer (0 disables), got ${value}`);
26
23
  }
27
24
  }
28
25
  const reader = await source.open();
29
- const chunkBytes = reader.chunkBytes ?? exports.DEFAULT_SOURCE_CHUNK_BYTES;
26
+ const chunkBytes = reader.chunkBytes ?? DEFAULT_SOURCE_CHUNK_BYTES;
30
27
  let native;
31
28
  try {
32
- native = (0, native_1.open)({
29
+ if (!Number.isInteger(reader.size) || reader.size < 0) {
30
+ throw new RangeError(`open: source size must be a non-negative integer, got ${reader.size}`);
31
+ }
32
+ if (!Number.isSafeInteger(chunkBytes) || chunkBytes <= 0) {
33
+ throw new RangeError(`open: chunkBytes must be a positive integer, got ${chunkBytes}`);
34
+ }
35
+ if (chunkBytes % 64 !== 0) {
36
+ throw new RangeError(`open: chunkBytes must be a multiple of 64, got ${chunkBytes}`);
37
+ }
38
+ native = openNative({
33
39
  size: reader.size,
34
40
  chunkBytes,
35
41
  indexCacheEntries,
@@ -39,17 +45,24 @@ async function open(source, options) {
39
45
  });
40
46
  }
41
47
  catch (err) {
42
- await closeReader(reader);
48
+ // Don't let a failing cleanup mask the original open error; attach it as cause.
49
+ try {
50
+ await closeReader(reader);
51
+ }
52
+ catch (closeErr) {
53
+ if (err instanceof Error)
54
+ err.cause ??= closeErr;
55
+ }
43
56
  throw err;
44
57
  }
45
- let closed = false;
58
+ const state = { closed: false };
46
59
  const close = async () => {
47
- if (closed)
60
+ if (state.closed)
48
61
  return;
49
- closed = true;
62
+ state.closed = true;
50
63
  await closeReader(reader);
51
64
  };
52
- return Object.assign(wrap(native), {
65
+ return Object.assign(wrap(native, state), {
53
66
  close,
54
67
  [Symbol.asyncDispose]: close,
55
68
  });
@@ -58,66 +71,157 @@ async function closeReader(reader) {
58
71
  if (reader.close)
59
72
  await reader.close();
60
73
  }
61
- function wrap(native) {
74
+ const NATIVE_PATH_ERROR = /^bote:path:([a-z_]+)(?::(\d+))?$/;
75
+ function deserializeError(err, path) {
76
+ if (err instanceof Error && !(err instanceof PathError)) {
77
+ const match = NATIVE_PATH_ERROR.exec(err.message);
78
+ if (match) {
79
+ const segment = match[2] === undefined ? undefined : Number(match[2]);
80
+ return new PathError(path, match[1], segment);
81
+ }
82
+ }
83
+ return err;
84
+ }
85
+ /** Throw a uniform error for any operation on a closed cursor, so use-after-close
86
+ * is one defined contract regardless of source (some readers' reads keep working
87
+ * after close, others throw an opaque I/O error). */
88
+ function ensureOpen(state) {
89
+ if (state.closed)
90
+ throw new Error('bote: cursor is closed');
91
+ }
92
+ function wrap(native, state) {
62
93
  const cursor = {
63
- get key() {
64
- return native.key;
94
+ async hop(...path) {
95
+ ensureOpen(state);
96
+ validatePath(path);
97
+ let child;
98
+ try {
99
+ child = await native.hop(path);
100
+ }
101
+ catch (err) {
102
+ throw deserializeError(err, path);
103
+ }
104
+ return child ? wrap(child, state) : null;
65
105
  },
66
106
  async has(...args) {
67
- const { path, tail: schema } = (0, args_ts_1.splitArgs)(args);
107
+ ensureOpen(state);
108
+ const { path, tail: schema } = splitArgs(args);
109
+ if (schema !== undefined && !isSchema(schema)) {
110
+ throw new TypeError('has: expected a Standard Schema as the trailing argument');
111
+ }
68
112
  if (!schema)
69
113
  return native.has(path);
70
114
  if (!(await native.has(path)))
71
115
  return false;
72
- const result = await (0, validate_ts_1.validateItem)(schema, await native.get(path), path, 'skip');
116
+ const text = await native.get(path);
117
+ const value = text === undefined ? undefined : parseValue(text, path);
118
+ const result = await validateItem(schema, value, path, 'skip');
73
119
  return !('skip' in result);
74
120
  },
75
121
  async get(...args) {
76
- const { path, tail: schema } = (0, args_ts_1.splitArgs)(args);
77
- const value = await native.get(path);
78
- if (!schema || value === undefined)
122
+ ensureOpen(state);
123
+ const { path, tail: schema } = splitArgs(args);
124
+ if (schema !== undefined && !isSchema(schema)) {
125
+ throw new TypeError('get: expected a Standard Schema as the trailing argument');
126
+ }
127
+ let value;
128
+ try {
129
+ const text = await native.get(path);
130
+ value = text === undefined ? undefined : parseValue(text, path);
131
+ }
132
+ catch (err) {
133
+ throw deserializeError(err, path);
134
+ }
135
+ if (!schema)
79
136
  return value;
80
- return (0, validate_ts_1.runStandardSchema)(schema, value, path);
137
+ return runStandardSchema(schema, value, path);
81
138
  },
82
- count(...path) {
83
- (0, path_ts_1.validatePath)(path);
84
- return native.count(path);
139
+ async count(...path) {
140
+ ensureOpen(state);
141
+ validatePath(path);
142
+ try {
143
+ return await native.count(path);
144
+ }
145
+ catch (err) {
146
+ throw deserializeError(err, path);
147
+ }
85
148
  },
86
149
  iter(...args) {
87
- const { path, tail } = (0, args_ts_1.splitArgs)(args);
88
- const { schema, select, batch, onInvalid, withIndex } = (0, args_ts_1.normalizeIterTail)(tail);
89
- if (batch !== undefined && (!Number.isInteger(batch) || batch <= 0)) {
90
- throw new RangeError(`iter: batch must be a positive integer, got ${batch}`);
150
+ ensureOpen(state);
151
+ const { path, tail } = splitArgs(args);
152
+ const { schema, select, batch, onInvalid, withIndex } = normalizeIterTail(tail);
153
+ if (batch !== undefined && (!Number.isInteger(batch) || batch <= 0 || batch > MAX_ITER_BATCH)) {
154
+ throw new RangeError(`iter: batch must be an integer in 1..=${MAX_ITER_BATCH}, got ${batch}`);
155
+ }
156
+ if (withIndex !== undefined && typeof withIndex !== 'boolean') {
157
+ throw new TypeError(`iter: withIndex must be a boolean, got ${typeof withIndex}`);
158
+ }
159
+ if (onInvalid !== undefined && onInvalid !== 'throw' && onInvalid !== 'skip') {
160
+ throw new RangeError(`iter: onInvalid must be "throw" or "skip", got ${JSON.stringify(onInvalid)}`);
161
+ }
162
+ const resolvedBatch = batch ?? DEFAULT_ITER_BATCH;
163
+ const selectIr = select !== undefined ? serializeSelect(select) : undefined;
164
+ const inner = native.iter(path, { selectIr, batch: resolvedBatch });
165
+ if (!schema) {
166
+ return {
167
+ async *[Symbol.asyncIterator]() {
168
+ let i = 0;
169
+ try {
170
+ for await (const b of inner) {
171
+ const batch = parseValue(b, path);
172
+ if (!withIndex) {
173
+ yield batch;
174
+ continue;
175
+ }
176
+ const out = new Array(batch.length);
177
+ for (let j = 0; j < batch.length; j++) {
178
+ out[j] = [i++, batch[j]];
179
+ }
180
+ yield out;
181
+ }
182
+ }
183
+ catch (err) {
184
+ throw deserializeError(err, path);
185
+ }
186
+ },
187
+ };
91
188
  }
92
- const resolvedBatch = batch ?? exports.DEFAULT_ITER_BATCH;
93
- const selectIr = select !== undefined ? (0, args_ts_1.serializeSelect)(select) : undefined;
94
- const inner = native.iter(path, { selectIr, batch: resolvedBatch, withKey: withIndex });
95
- if (!schema)
96
- return inner;
97
189
  const policy = onInvalid ?? 'throw';
98
190
  return {
99
191
  async *[Symbol.asyncIterator]() {
100
192
  let i = 0;
101
- for await (const b of inner) {
102
- const out = [];
103
- for (const v of b) {
104
- const value = withIndex ? v[1] : v;
105
- const result = await (0, validate_ts_1.validateItem)(schema, value, [...path, i++], policy);
106
- if ('skip' in result)
107
- continue;
108
- out.push(withIndex ? [v[0], result.value] : result.value);
193
+ try {
194
+ for await (const b of inner) {
195
+ const out = [];
196
+ for (const v of parseValue(b, path)) {
197
+ const index = i++;
198
+ const result = await validateItem(schema, v, [...path, index], policy);
199
+ if ('skip' in result) {
200
+ continue;
201
+ }
202
+ out.push(withIndex ? [index, result.value] : result.value);
203
+ }
204
+ yield out;
109
205
  }
110
- yield out;
206
+ }
207
+ catch (err) {
208
+ throw deserializeError(err, path);
111
209
  }
112
210
  },
113
211
  };
114
212
  },
115
213
  walk(...path) {
116
- (0, path_ts_1.validatePath)(path);
214
+ ensureOpen(state);
215
+ validatePath(path);
117
216
  return {
118
217
  async *[Symbol.asyncIterator]() {
119
- for await (const child of native.walk(path)) {
120
- yield wrap(child);
218
+ try {
219
+ for await (const [key, child] of native.walk(path)) {
220
+ yield [key, wrap(child, state)];
221
+ }
222
+ }
223
+ catch (err) {
224
+ throw deserializeError(err, path);
121
225
  }
122
226
  },
123
227
  };
@@ -125,3 +229,11 @@ function wrap(native) {
125
229
  };
126
230
  return cursor;
127
231
  }
232
+ function parseValue(text, path) {
233
+ try {
234
+ return JSON.parse(text);
235
+ }
236
+ catch {
237
+ throw new Error(`bote: malformed JSON value at ${formatPath(path)}`);
238
+ }
239
+ }
package/dist/path.js CHANGED
@@ -1,18 +1,14 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.MAX_ARRAY_INDEX = void 0;
4
- exports.validatePath = validatePath;
5
1
  /** Upper bound on numeric segments (napi takes them as `u32`). 2^32 - 1
6
2
  * comfortably covers any in-memory JSON array. */
7
- exports.MAX_ARRAY_INDEX = 0xffffffff;
8
- function validatePath(path) {
3
+ export const MAX_ARRAY_INDEX = 0xffffffff;
4
+ export function validatePath(path) {
9
5
  for (let i = 0; i < path.length; i++) {
10
6
  const s = path[i];
11
7
  if (typeof s === 'string')
12
8
  continue;
13
- if (typeof s === 'number' && Number.isInteger(s) && s >= 0 && s <= exports.MAX_ARRAY_INDEX)
9
+ if (typeof s === 'number' && Number.isInteger(s) && s >= 0 && s <= MAX_ARRAY_INDEX)
14
10
  continue;
15
- throw new TypeError(`path segment ${i}: expected string or non-negative integer (<= ${exports.MAX_ARRAY_INDEX}), got ${describeBadSegment(s)}`);
11
+ throw new TypeError(`path segment ${i}: expected string or non-negative integer (<= ${MAX_ARRAY_INDEX}), got ${describeBadSegment(s)}`);
16
12
  }
17
13
  }
18
14
  function describeBadSegment(s) {
package/dist/sources.js CHANGED
@@ -1,16 +1,11 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.fromBuffer = fromBuffer;
4
- exports.fromFile = fromFile;
5
- exports.fromHttpRange = fromHttpRange;
6
- const promises_1 = require("node:fs/promises");
1
+ import { open as fsOpen } from 'node:fs/promises';
7
2
  /** Default chunk size, in bytes, for in-memory sources. */
8
3
  const DEFAULT_BUFFER_CHUNK_BYTES = 4 * 1024;
9
4
  /** Default chunk size, in bytes, for local files: matches typical filesystem readahead. */
10
5
  const DEFAULT_FILE_CHUNK_BYTES = 64 * 1024;
11
6
  /** Default chunk size, in bytes, for HTTP range reads: amortizes RTT across more data. */
12
7
  const DEFAULT_URL_CHUNK_BYTES = 256 * 1024;
13
- function fromBuffer(buf, options) {
8
+ export function fromBuffer(buf, options) {
14
9
  const view = buf instanceof Uint8Array ? buf : new Uint8Array(buf);
15
10
  const chunkBytes = options?.chunkBytes ?? DEFAULT_BUFFER_CHUNK_BYTES;
16
11
  return {
@@ -21,11 +16,11 @@ function fromBuffer(buf, options) {
21
16
  }),
22
17
  };
23
18
  }
24
- function fromFile(path, options) {
19
+ export function fromFile(path, options) {
25
20
  const chunkBytes = options?.chunkBytes ?? DEFAULT_FILE_CHUNK_BYTES;
26
21
  return {
27
22
  open: async () => {
28
- const handle = await (0, promises_1.open)(path, 'r');
23
+ const handle = await fsOpen(path, 'r');
29
24
  const stat = await handle.stat();
30
25
  let closed = false;
31
26
  return {
@@ -52,7 +47,7 @@ function fromFile(path, options) {
52
47
  },
53
48
  };
54
49
  }
55
- function fromHttpRange(url, options) {
50
+ export function fromHttpRange(url, options) {
56
51
  const init = options?.init;
57
52
  const chunkBytes = options?.chunkBytes ?? DEFAULT_URL_CHUNK_BYTES;
58
53
  return {
@@ -1,5 +1,6 @@
1
1
  import type { StandardSchemaV1 } from '@standard-schema/spec';
2
- export type { StandardSchemaV1 };
2
+ import type { PathFaultCode } from '@botejs/native';
3
+ export type { StandardSchemaV1, PathFaultCode };
3
4
  export type Segment = string | number;
4
5
  export type Path = readonly Segment[];
5
6
  export declare class ValidationError extends Error {
@@ -7,6 +8,12 @@ export declare class ValidationError extends Error {
7
8
  readonly path: Path;
8
9
  constructor(issues: readonly StandardSchemaV1.Issue[], path: Path);
9
10
  }
11
+ export declare class PathError extends Error {
12
+ readonly path: Path;
13
+ /** The fault kind; stable across versions, safe to branch on. */
14
+ readonly code: PathFaultCode;
15
+ constructor(path: Path, code: PathFaultCode, segment?: number);
16
+ }
10
17
  export declare function runStandardSchema<O>(schema: StandardSchemaV1<unknown, O>, value: unknown, path: Path): Promise<O>;
11
18
  export declare function validateItem<O>(schema: StandardSchemaV1<unknown, O>, value: unknown, path: Path, onInvalid: 'throw' | 'skip'): Promise<{
12
19
  skip: true;
package/dist/validate.js CHANGED
@@ -1,10 +1,4 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.ValidationError = void 0;
4
- exports.runStandardSchema = runStandardSchema;
5
- exports.validateItem = validateItem;
6
- exports.formatPath = formatPath;
7
- class ValidationError extends Error {
1
+ export class ValidationError extends Error {
8
2
  issues;
9
3
  path;
10
4
  constructor(issues, path) {
@@ -14,14 +8,35 @@ class ValidationError extends Error {
14
8
  this.path = path;
15
9
  }
16
10
  }
17
- exports.ValidationError = ValidationError;
18
- async function runStandardSchema(schema, value, path) {
11
+ /** Human message per fault kind. The native layer ships only the code (and the
12
+ * offending `segment` where it matters), so this is the single source of the
13
+ * user-facing prose. Keyed by the Rust-generated [`PathFaultCode`]. */
14
+ const PATH_FAULT_MESSAGE = {
15
+ through_scalar: (segment) => `path traverses a non-container value at segment ${segment}`,
16
+ wrong_kind: (segment) => `path segment ${segment} does not match the container kind`,
17
+ scalar_target: () => 'target value is not a container',
18
+ iter_on_object: () => 'iter target is an object; use walk() to iterate object members',
19
+ walk_on_array: () => 'walk target is an array; use iter() to iterate array elements',
20
+ };
21
+ export class PathError extends Error {
22
+ path;
23
+ /** The fault kind; stable across versions, safe to branch on. */
24
+ code;
25
+ constructor(path, code, segment) {
26
+ const reason = (PATH_FAULT_MESSAGE[code] ?? (() => code))(segment);
27
+ super(`bote: cannot resolve ${formatPath(path)}: ${reason}`);
28
+ this.name = 'PathError';
29
+ this.path = path;
30
+ this.code = code;
31
+ }
32
+ }
33
+ export async function runStandardSchema(schema, value, path) {
19
34
  const result = await schema['~standard'].validate(value);
20
35
  if (result.issues)
21
36
  throw new ValidationError(result.issues, path);
22
37
  return result.value;
23
38
  }
24
- async function validateItem(schema, value, path, onInvalid) {
39
+ export async function validateItem(schema, value, path, onInvalid) {
25
40
  const result = await schema['~standard'].validate(value);
26
41
  if (result.issues) {
27
42
  if (onInvalid === 'skip')
@@ -30,7 +45,7 @@ async function validateItem(schema, value, path, onInvalid) {
30
45
  }
31
46
  return { value: result.value };
32
47
  }
33
- function formatPath(path) {
48
+ export function formatPath(path) {
34
49
  if (path.length === 0)
35
50
  return '(root)';
36
51
  let out = '';
package/package.json CHANGED
@@ -1,12 +1,19 @@
1
1
  {
2
2
  "name": "@botejs/core",
3
- "version": "0.2.0",
3
+ "version": "0.4.0",
4
+ "type": "module",
4
5
  "license": "MIT",
5
6
  "repository": {
6
7
  "type": "git",
7
8
  "url": "git+https://github.com/jankdc/bote.git",
8
9
  "directory": "packages/core"
9
10
  },
11
+ "exports": {
12
+ ".": {
13
+ "types": "./dist/index.d.ts",
14
+ "default": "./dist/index.js"
15
+ }
16
+ },
10
17
  "main": "dist/index.js",
11
18
  "types": "dist/index.d.ts",
12
19
  "files": [
@@ -14,7 +21,7 @@
14
21
  "README.md"
15
22
  ],
16
23
  "engines": {
17
- "node": ">= 18.17.0 < 19 || >= 20.3.0 < 21 || >= 21.1.0"
24
+ "node": ">= 22.18.0"
18
25
  },
19
26
  "publishConfig": {
20
27
  "registry": "https://registry.npmjs.org/",
@@ -23,12 +30,12 @@
23
30
  "scripts": {
24
31
  "build": "tsc",
25
32
  "build:debug": "tsc --sourceMap",
26
- "test": "node --test --experimental-strip-types --no-warnings=ExperimentalWarning __test__/*.spec.ts",
33
+ "test": "node --test __test__/*.spec.ts",
27
34
  "lint": "oxlint src",
28
35
  "prepublishOnly": "cp ../../README.md ./README.md && tsc"
29
36
  },
30
37
  "dependencies": {
31
- "@botejs/native": "workspace:*"
38
+ "@botejs/native": "^0.4.0"
32
39
  },
33
40
  "devDependencies": {
34
41
  "@types/node": "^22.0.0",