@botejs/core 0.1.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -8,57 +8,121 @@ npm install @botejs/core
8
8
 
9
9
  ```ts
10
10
  import { open, fromFile } from '@botejs/core'
11
+ import { publish } from './message-bus'
11
12
 
12
- import * as z from 'zod' // or bring your own Standard Schema validator
13
+ // e.g. { items: [...] }
14
+ await using cursor = await open(fromFile('./some-large.json'))
13
15
 
14
- const User = z.object({
15
- id: z.string(),
16
- email: z.string(),
17
- })
16
+ // items[0]
17
+ const first = await cursor.get('items', 0)
18
+ console.log(`first item: ${first}`)
19
+ ```
20
+
21
+ given a **seekable** source (e.g. a file, an HTTP range) and a path, it retrieves values out of a JSON quickly, without loading the whole thing in-memory.
22
+
23
+ here's a run (Apple M1 Pro 2021, ~500MB JSON array file, cold-cache, default settings):
24
+
25
+ | operation | approach | time | js heap peak Δ | rust heap peak |
26
+ | -------------- | ---------- | --------: | -------------: | -------------: |
27
+ | items[0] | JSON.parse | 616.02 ms | 1.03 GB | n/a |
28
+ | items[535399] | JSON.parse | 604.63 ms | 1.03 GB | n/a |
29
+ | items[1070797] | JSON.parse | 600.68 ms | 1.03 GB | n/a |
30
+ | items[0] | bote | 527.80 µs | 291.6 KB | 130.4 KB |
31
+ | items[535399] | bote | 187.24 ms | 742.3 KB | 36.7 MB |
32
+ | items[1070797] | bote | 371.61 ms | 828.7 KB | 37.1 MB |
33
+
34
+ ## array access
35
+
36
+ `iter` streams the elements of an array at a path, **a batch at a time**, so you never hold the whole collection in memory and not wait for the heat death of the universe if this yielded individually. each `for await` step yields an array of items (use `walk` to step over the members of an object):
37
+
38
+ ```ts
39
+ // e.g. [{ id: 'user-1' }, { id: 'user-2' }, ...]
40
+ await using cursor = await open(fromFile('./users.json'))
41
+
42
+ // root is an array
43
+ for await (const users of cursor.iter()) {
44
+ for (const user of users) {
45
+ console.log(user)
46
+ }
47
+ }
48
+ ```
49
+
50
+ pass an options object as the last argument to tune what comes back: `batch`, `select`, `schema`, `onInvalid`, and `withIndex`. if you want to know more of the options, see [`arrays.js`](./examples/arrays.js).
51
+
52
+ ## object access
53
+
54
+ `walk` steps over the members of an object at a path, yielding a **`[key, cursor]`** pair per member. the key is the member name, the cursor is anchored at its value. each child cursor is first-class: it outlives the loop and can be `walk`ed again, which is what lets you descend a tree of unknown depth.
55
+
56
+ ```ts
57
+ // e.g. { alice: { role: 'admin' }, bob: { role: 'guest' }, ... }
58
+ await using cursor = await open(fromFile('./accounts.json'))
18
59
 
19
- type User = z.infer<typeof User>
60
+ for await (const [name, account] of cursor.walk()) {
61
+ // name is the member name ('alice', 'bob', ...)
62
+ const role = await account.get('role')
63
+ console.log(`${name}: ${role}`)
64
+ }
65
+ ```
20
66
 
21
- await using cursor = await open(fromFile('./your-big.json'))
67
+ see [`recursive.js`](./examples/recursive.js) for advanced use-cases.
22
68
 
23
- // if you want one value
24
- const user0: unknown = await cursor.get('/1234/users/0')
69
+ ## hopping
25
70
 
26
- // for .get and .iter, you can supply a validator
27
- const user1: User = await cursor.get('/1234/users/1', User)
71
+ `hop` resolves a path once and hands back a **cursor** anchored at that value (or `null` if the path isn't there):
28
72
 
29
- // if you want to iterate a list of values
30
- for await (const user of cursor.iter('/1234/users')) {
31
- console.log(user)
73
+ ```ts
74
+ // e.g. { report: { sections: [{ rows: [...] }, ...] } }
75
+ await using cursor = await open(fromFile('./report.json'))
76
+
77
+ const section = await cursor.hop('report', 'sections', 0)
78
+ if (section) {
79
+ console.log(await section.count('rows'))
80
+ for await (const rows of section.iter('rows')) {
81
+ console.log(rows)
82
+ }
32
83
  }
84
+ ```
85
+
86
+ ## validation
87
+
88
+ `get`, and `iter` takes a [Standard Schema](https://standardschema.dev) validator as their last argument (for `iter`, can also be passed in an `options` object). the value is validated and the return type is inferred from the schema, so reads come back typed instead of `unknown`:
89
+
90
+ ```ts
91
+ import { open, fromFile } from '@botejs/core'
92
+ import * as z from 'zod' // or any Standard Schema validator
93
+
94
+ // a downstream API that wants a typed list of recipients
95
+ declare function sendNewsletter(recipients: string[]): Promise<void>
96
+
97
+ const User = z.object({
98
+ id: z.string(),
99
+ name: z.string(),
100
+ email: z.string(),
101
+ })
102
+
103
+ const cursor = await open(fromFile('./users.json'))
104
+
105
+ // name: string
106
+ const name = await cursor.get('users', 1000, 'name', User.shape.name)
33
107
 
34
- // if you want to iterate but not fully resolve values
35
- for await (const userCursor of cursor.walk('/1234/users')) {
36
- const id = await userCursor.get('/id')
37
- console.log({ id })
108
+ for await (const users of cursor.iter('users', User)) {
109
+ // user: User[]
110
+ const emails = users.map((user) => user.email)
111
+ await sendNewsletter(emails)
38
112
  }
39
113
 
40
- // 'await using' would normally clean up resources for you
41
- // when it goes out of lexical scope. if you hate that,
42
- // you can do it explicitly as well.
43
114
  await cursor.close()
44
115
  ```
45
116
 
46
- given a **seekable** source (e.g. a file, an HTTP range) and a JSON pointer, it can retrieve values in a JSON quickly, without loading the whole thing in-memory.
117
+ ## memory
47
118
 
48
- here's a run (Apple M1 Pro 2021, 500MB JSON array file, cold-cache, default settings):
119
+ bote keeps a small **structural-index** cache: as scans walk containers (arrays and object), it remembers where members live, so a later query that lands in an already walked container resumes near the target instead of from the top. it caches structure, never source bytes, so it can't grow unbounded with document size.
49
120
 
50
- | operation | approach | time | js heap peak Δ | rust heap peak |
51
- | ------------ | ---------- | --------: | -------------: | -------------: |
52
- | items[0] | JSON.parse | 1.75 s | 1.21 GB | n/a |
53
- | items[len/2] | JSON.parse | 1.82 s | 1.21 GB | n/a |
54
- | items[len-1] | JSON.parse | 1.76 s | 1.21 GB | n/a |
55
- | items[0] | bote | 1.43 ms | 25.9 KB | 94.9 KB |
56
- | items[len/2] | bote | 328.81 ms | 1.3 MB | 56.6 MB |
57
- | items[len-1] | bote | 636.78 ms | 1.3 MB | 56.6 MB |
121
+ the defaults are good, but `open` takes a few knobs: `indexCacheEntries`, `objectMemberCap`, and `arrayIndexInterval`. to bound memory tighter or turn the cache off. see [`memory.js`](./examples/memory.js) for what each does.
58
122
 
59
123
  ## sources
60
124
 
61
- bote currently only has `fromFile` and `fromHttpRange` as pre-built sources. create your own by implementing the `Source` interface. see [./packages/core/src/sources.ts](./packages/core/src/sources.ts) on how it works.
125
+ bote ships `fromFile`, `fromHttpRange`, and `fromBuffer` as pre-built sources. create your own by implementing the `Source` interface. see [`sources-custom.ts`](./examples/sources-custom.ts) or [./packages/core/src/sources.ts](./packages/core/src/sources.ts) for how it works.
62
126
 
63
127
  ## status
64
128
 
package/dist/args.d.ts ADDED
@@ -0,0 +1,21 @@
1
+ import type { Path, Segment, StandardSchemaV1 } from './validate.ts';
2
+ export interface IterOptions {
3
+ select?: Segment | Path | Record<string, Segment | Path>;
4
+ /** How many items are yielded per batch. Higher is faster, but takes more memory to materialise those items. */
5
+ batch?: number;
6
+ /** Validate each yielded item against this schema (after `select`). */
7
+ schema?: StandardSchemaV1;
8
+ /** Policy for items failing `schema`. Default `'throw'`; `'skip'` drops them. */
9
+ onInvalid?: 'throw' | 'skip';
10
+ /** Yield `[index, value]` tuples instead of bare values, where `index` is
11
+ * the zero-based position of the element in the source array. */
12
+ withIndex?: boolean;
13
+ }
14
+ export type VariadicPathArgs<TTail> = [...Segment[]] | [...Segment[], TTail];
15
+ export declare function splitArgs<TTail>(args: VariadicPathArgs<TTail>): {
16
+ path: Segment[];
17
+ tail: TTail | undefined;
18
+ };
19
+ export declare function isSchema(value: unknown): value is StandardSchemaV1;
20
+ export declare function normalizeIterTail(tail: StandardSchemaV1 | IterOptions | undefined): IterOptions;
21
+ export declare function serializeSelect(select: Segment | Path | Record<string, Segment | Path>): string;
package/dist/args.js ADDED
@@ -0,0 +1,75 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.splitArgs = splitArgs;
4
+ exports.isSchema = isSchema;
5
+ exports.normalizeIterTail = normalizeIterTail;
6
+ exports.serializeSelect = serializeSelect;
7
+ const path_ts_1 = require("./path.js");
8
+ function splitArgs(args) {
9
+ let pathArgs;
10
+ let tail;
11
+ if (args.length === 0) {
12
+ pathArgs = [];
13
+ tail = undefined;
14
+ }
15
+ else {
16
+ const last = args[args.length - 1];
17
+ if (last !== null && typeof last === 'object' && !Array.isArray(last)) {
18
+ pathArgs = args.slice(0, -1);
19
+ tail = last;
20
+ }
21
+ else {
22
+ pathArgs = args;
23
+ tail = undefined;
24
+ }
25
+ }
26
+ (0, path_ts_1.validatePath)(pathArgs);
27
+ return { path: pathArgs, tail };
28
+ }
29
+ function isSchema(value) {
30
+ return typeof value === 'object' && value !== null && '~standard' in value;
31
+ }
32
+ function normalizeIterTail(tail) {
33
+ if (!tail)
34
+ return {};
35
+ if (isSchema(tail))
36
+ return { schema: tail };
37
+ return tail;
38
+ }
39
+ function serializeSelect(select) {
40
+ if (typeof select === 'string' || typeof select === 'number') {
41
+ const one = [select];
42
+ (0, path_ts_1.validatePath)(one);
43
+ return JSON.stringify({ one });
44
+ }
45
+ if (Array.isArray(select)) {
46
+ (0, path_ts_1.validatePath)(select);
47
+ if (select.length === 0) {
48
+ throw new RangeError('iter: select sub-path must have at least one segment');
49
+ }
50
+ return JSON.stringify({ one: select });
51
+ }
52
+ if (select === null || typeof select !== 'object') {
53
+ throw new TypeError(`iter: select must be a segment, path, or field map, got ${describeSelect(select)}`);
54
+ }
55
+ const entries = Object.entries(select).map(([k, sub]) => {
56
+ const path = typeof sub === 'string' || typeof sub === 'number' ? [sub] : sub;
57
+ if (!Array.isArray(path)) {
58
+ throw new TypeError(`iter: select field ${JSON.stringify(k)} must be a segment or path, got ${describeSelect(sub)}`);
59
+ }
60
+ (0, path_ts_1.validatePath)(path);
61
+ if (path.length === 0) {
62
+ throw new RangeError(`iter: select field ${JSON.stringify(k)} sub-path must have at least one segment`);
63
+ }
64
+ return [k, path];
65
+ });
66
+ if (entries.length === 0) {
67
+ throw new RangeError('iter: select must have at least one field');
68
+ }
69
+ return JSON.stringify({ map: entries });
70
+ }
71
+ function describeSelect(value) {
72
+ if (value === null)
73
+ return 'null';
74
+ return Array.isArray(value) ? 'array' : typeof value;
75
+ }
package/dist/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- export type { JsonPointer } from './pointer.ts';
2
- export { open, type Cursor, type RootCursor, type SessionOptions } from './open.ts';
1
+ export { type IterOptions } from './args.ts';
2
+ export { ValidationError, PathError, formatPath, type Path, type Segment, type StandardSchemaV1, } from './validate.ts';
3
+ export { open, DEFAULT_ITER_BATCH, MAX_ITER_BATCH, type Cursor, type RootCursor, type OpenOptions, type WalkEntry, type IterIndex as IterKey, } from './open.ts';
3
4
  export { fromBuffer, fromFile, fromHttpRange, type FactoryOptions, type Source, type SourceReader, type HttpRangeOptions, } from './sources.ts';
4
- export { ValidationError, type StandardSchemaV1 } from './validate.ts';
package/dist/index.js CHANGED
@@ -1,17 +1,21 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.ValidationError = exports.fromHttpRange = exports.fromFile = exports.fromBuffer = void 0;
3
+ exports.fromHttpRange = exports.fromFile = exports.fromBuffer = exports.MAX_ITER_BATCH = exports.DEFAULT_ITER_BATCH = exports.formatPath = exports.PathError = exports.ValidationError = void 0;
4
4
  // Node 18 and Node 20.3 predate `Symbol.asyncDispose`; mirror what TS emits for
5
5
  // `await using` so the well-known symbol is available across our engine range.
6
6
  if (!Symbol.asyncDispose) {
7
7
  ;
8
8
  Symbol.asyncDispose = Symbol.for('Symbol.asyncDispose');
9
9
  }
10
+ var validate_ts_1 = require("./validate.js");
11
+ Object.defineProperty(exports, "ValidationError", { enumerable: true, get: function () { return validate_ts_1.ValidationError; } });
12
+ Object.defineProperty(exports, "PathError", { enumerable: true, get: function () { return validate_ts_1.PathError; } });
13
+ Object.defineProperty(exports, "formatPath", { enumerable: true, get: function () { return validate_ts_1.formatPath; } });
10
14
  var open_ts_1 = require("./open.js");
11
15
  Object.defineProperty(exports, "open", { enumerable: true, get: function () { return open_ts_1.open; } });
16
+ Object.defineProperty(exports, "DEFAULT_ITER_BATCH", { enumerable: true, get: function () { return open_ts_1.DEFAULT_ITER_BATCH; } });
17
+ Object.defineProperty(exports, "MAX_ITER_BATCH", { enumerable: true, get: function () { return open_ts_1.MAX_ITER_BATCH; } });
12
18
  var sources_ts_1 = require("./sources.js");
13
19
  Object.defineProperty(exports, "fromBuffer", { enumerable: true, get: function () { return sources_ts_1.fromBuffer; } });
14
20
  Object.defineProperty(exports, "fromFile", { enumerable: true, get: function () { return sources_ts_1.fromFile; } });
15
21
  Object.defineProperty(exports, "fromHttpRange", { enumerable: true, get: function () { return sources_ts_1.fromHttpRange; } });
16
- var validate_ts_1 = require("./validate.js");
17
- Object.defineProperty(exports, "ValidationError", { enumerable: true, get: function () { return validate_ts_1.ValidationError; } });
package/dist/open.d.ts CHANGED
@@ -1,34 +1,77 @@
1
- import type { JsonPointer } from './pointer.ts';
2
1
  import type { Source } from './sources.ts';
3
- import { type StandardSchemaV1 } from './validate.ts';
4
- export interface SessionOptions {
2
+ import { type Path, type Segment, type StandardSchemaV1 } from './validate.ts';
3
+ import { type IterOptions } from './args.ts';
4
+ type InferOutput<Sch> = Sch extends StandardSchemaV1<unknown, infer O> ? O : never;
5
+ type SelectMapShape<S> = {
6
+ -readonly [K in keyof S]: unknown;
7
+ };
8
+ /** Zero-based index of an array element. */
9
+ export type IterIndex = number;
10
+ /** One `walk` step: the member's key paired with a cursor anchored at its value. */
11
+ export type WalkEntry = [key: string, cursor: Cursor];
12
+ export declare const DEFAULT_SOURCE_CHUNK_BYTES: number;
13
+ export declare const DEFAULT_ITER_BATCH = 1000;
14
+ export declare const MAX_ITER_BATCH = 1000000;
15
+ export interface OpenOptions {
16
+ /**
17
+ * Slot budget for the structural-index cache: one slot per cached container
18
+ * plus one per tabled object member. When a scan tips the cache over this
19
+ * budget, the deepest (least navigationally useful) containers are evicted
20
+ * first, LRU-tiebroken, keeping the shallow backbone that resumes future
21
+ * scans. Bounds resident cache memory regardless of document size. `0`
22
+ * disables the cache entirely. Omit for the native default (1024).
23
+ */
24
+ indexCacheEntries?: number;
25
+ /**
26
+ * Max object members tabled per walked container in the structural-index
27
+ * cache. The table is a dense prefix; past the cap, lookups of later members
28
+ * resume-scan from the cap boundary. Lower trades cache memory for resume work
29
+ * on pathologically large objects. `0` disables object member indexing. Omit
30
+ * for the native default (unbounded).
31
+ */
32
+ objectMemberCap?: number;
5
33
  /**
6
- * Maximum number of source chunks held resident at once. Each slot
7
- * accounts for one chunk's bytes plus its bitmaps; the cache also
8
- * enforces a derived byte ceiling at roughly `maxResidentChunks x
9
- * source.chunkBytes x 2` to bound total native memory.
34
+ * Element-index stride between sampled array members in the structural-index
35
+ * cache. A later index resumes from the nearest array member at or before it, so
36
+ * a smaller stride means denser array members (more memory, shorter resume
37
+ * scans). `0` disables array-member indexing. Omit for the native default (16).
10
38
  *
11
- * Defaults to 512 chunks.
39
+ * Setting both `objectMemberCap` and `arrayIndexInterval` to `0` disables the
40
+ * cache entirely (no source bytes are ever cached either way), as does
41
+ * `indexCacheEntries: 0`.
12
42
  */
13
- maxResidentChunks?: number;
43
+ arrayIndexInterval?: number;
14
44
  }
15
- type InferOutput<Sch> = Sch extends StandardSchemaV1<unknown, infer O> ? O : never;
16
45
  export interface Cursor {
17
- /** Object-member key or array-element index that this cursor was yielded under by `walk`. `null` on the root cursor. */
18
- readonly key: string | number | null;
19
- has<S extends string>(pointer: JsonPointer<S>): Promise<boolean>;
20
- has<S extends string>(pointer: JsonPointer<S>, schema: StandardSchemaV1): Promise<boolean>;
21
- get<S extends string>(pointer: JsonPointer<S>): Promise<unknown>;
22
- get<S extends string, Sch extends StandardSchemaV1>(pointer: JsonPointer<S>, schema: Sch): Promise<InferOutput<Sch>>;
23
- iter<S extends string>(pointer: JsonPointer<S>): AsyncIterable<unknown>;
24
- iter<S extends string, Sch extends StandardSchemaV1>(pointer: JsonPointer<S>, schema: Sch): AsyncIterable<InferOutput<Sch>>;
25
- walk<S extends string>(pointer: JsonPointer<S>): AsyncIterable<Cursor>;
46
+ hop(...path: Segment[]): Promise<Cursor | null>;
47
+ has(...path: Segment[]): Promise<boolean>;
48
+ has(...args: [...Segment[], StandardSchemaV1]): Promise<boolean>;
49
+ get(...path: Segment[]): Promise<unknown>;
50
+ get<Sch extends StandardSchemaV1>(...args: [...Segment[], Sch]): Promise<InferOutput<Sch>>;
51
+ count(...path: Segment[]): Promise<number>;
52
+ iter(...path: Segment[]): AsyncIterable<unknown[]>;
53
+ iter<Sch extends StandardSchemaV1>(...args: [...Segment[], Sch]): AsyncIterable<InferOutput<Sch>[]>;
54
+ iter<Sch extends StandardSchemaV1>(...args: [...Segment[], IterOptions & {
55
+ withIndex: true;
56
+ schema: Sch;
57
+ }]): AsyncIterable<[IterIndex, InferOutput<Sch>][]>;
58
+ iter<Sch extends StandardSchemaV1>(...args: [...Segment[], IterOptions & {
59
+ schema: Sch;
60
+ }]): AsyncIterable<InferOutput<Sch>[]>;
61
+ iter<S extends Record<string, Segment | Path>>(...args: [...Segment[], IterOptions & {
62
+ withIndex: true;
63
+ select: S;
64
+ }]): AsyncIterable<[IterIndex, SelectMapShape<S>][]>;
65
+ iter<S extends Record<string, Segment | Path>>(...args: [...Segment[], IterOptions & {
66
+ select: S;
67
+ }]): AsyncIterable<SelectMapShape<S>[]>;
68
+ iter(...args: [...Segment[], IterOptions & {
69
+ withIndex: true;
70
+ }]): AsyncIterable<[IterIndex, unknown][]>;
71
+ iter(...args: [...Segment[], IterOptions]): AsyncIterable<unknown[]>;
72
+ walk(...path: Segment[]): AsyncIterable<WalkEntry>;
73
+ walk(...path: Segment[]): AsyncIterable<Cursor>;
26
74
  }
27
- /**
28
- * The cursor returned by `open()`. Owns the underlying `Source` and exposes
29
- * both an explicit `close()` and `Symbol.asyncDispose` so callers can choose
30
- * between manual cleanup and `await using` scoping.
31
- */
32
75
  export interface RootCursor extends Cursor, AsyncDisposable {
33
76
  /** Close the underlying source. Idempotent. */
34
77
  close(): Promise<void>;
@@ -36,14 +79,8 @@ export interface RootCursor extends Cursor, AsyncDisposable {
36
79
  /**
37
80
  * Open a cursor over a seekable source.
38
81
  *
39
- * Calls `source.open()` to acquire a reader, then constructs the native cursor
40
- * over it. The reader's `read(offset, buf)` is invoked with chunk-aligned
41
- * `offset` and a `buf` whose `byteLength` equals the configured chunk size;
42
- * the reader fills `buf` and resolves with `bytesRead`. `buf` is a view over
43
- * native-owned memory and **MUST** not be retained past the returned promise.
44
- *
45
82
  * The returned `RootCursor` owns the reader: `close()` (or `await using`)
46
83
  * drives the reader's own `close()` exactly once.
47
84
  */
48
- export declare function open(source: Source, options?: SessionOptions): Promise<RootCursor>;
85
+ export declare function open(source: Source, options?: OpenOptions): Promise<RootCursor>;
49
86
  export {};
package/dist/open.js CHANGED
@@ -1,44 +1,72 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.MAX_ITER_BATCH = exports.DEFAULT_ITER_BATCH = exports.DEFAULT_SOURCE_CHUNK_BYTES = void 0;
3
4
  exports.open = open;
4
5
  const native_1 = require("@botejs/native");
6
+ const path_ts_1 = require("./path.js");
5
7
  const validate_ts_1 = require("./validate.js");
8
+ const args_ts_1 = require("./args.js");
9
+ exports.DEFAULT_SOURCE_CHUNK_BYTES = 64 * 1024;
10
+ exports.DEFAULT_ITER_BATCH = 1000;
11
+ exports.MAX_ITER_BATCH = 1_000_000;
6
12
  /**
7
13
  * Open a cursor over a seekable source.
8
14
  *
9
- * Calls `source.open()` to acquire a reader, then constructs the native cursor
10
- * over it. The reader's `read(offset, buf)` is invoked with chunk-aligned
11
- * `offset` and a `buf` whose `byteLength` equals the configured chunk size;
12
- * the reader fills `buf` and resolves with `bytesRead`. `buf` is a view over
13
- * native-owned memory and **MUST** not be retained past the returned promise.
14
- *
15
15
  * The returned `RootCursor` owns the reader: `close()` (or `await using`)
16
16
  * drives the reader's own `close()` exactly once.
17
17
  */
18
18
  async function open(source, options) {
19
+ const { indexCacheEntries, objectMemberCap, arrayIndexInterval } = options ?? {};
20
+ for (const [name, value] of [
21
+ ['indexCacheEntries', indexCacheEntries],
22
+ ['objectMemberCap', objectMemberCap],
23
+ ['arrayIndexInterval', arrayIndexInterval],
24
+ ]) {
25
+ if (value !== undefined && (!Number.isSafeInteger(value) || value < 0)) {
26
+ throw new RangeError(`open: ${name} must be a non-negative integer (0 disables), got ${value}`);
27
+ }
28
+ }
19
29
  const reader = await source.open();
30
+ const chunkBytes = reader.chunkBytes ?? exports.DEFAULT_SOURCE_CHUNK_BYTES;
20
31
  let native;
21
32
  try {
33
+ if (!Number.isInteger(reader.size) || reader.size < 0) {
34
+ throw new RangeError(`open: source size must be a non-negative integer, got ${reader.size}`);
35
+ }
36
+ if (!Number.isSafeInteger(chunkBytes) || chunkBytes <= 0) {
37
+ throw new RangeError(`open: chunkBytes must be a positive integer, got ${chunkBytes}`);
38
+ }
39
+ if (chunkBytes % 64 !== 0) {
40
+ throw new RangeError(`open: chunkBytes must be a multiple of 64, got ${chunkBytes}`);
41
+ }
22
42
  native = (0, native_1.open)({
23
43
  size: reader.size,
24
- chunkBytes: reader.chunkBytes,
25
- read: async ({ offset, buf }) => reader.read(offset, buf),
26
- }, {
27
- maxResidentChunks: options?.maxResidentChunks,
44
+ chunkBytes,
45
+ indexCacheEntries,
46
+ objectMemberCap,
47
+ arrayIndexInterval,
48
+ read: async ({ offset, length }) => reader.read(offset, length),
28
49
  });
29
50
  }
30
51
  catch (err) {
31
- await closeReader(reader);
52
+ // Don't let a failing cleanup mask the original open error; attach it as cause.
53
+ try {
54
+ await closeReader(reader);
55
+ }
56
+ catch (closeErr) {
57
+ if (err instanceof Error)
58
+ err.cause ??= closeErr;
59
+ }
32
60
  throw err;
33
61
  }
34
- let closed = false;
62
+ const state = { closed: false };
35
63
  const close = async () => {
36
- if (closed)
64
+ if (state.closed)
37
65
  return;
38
- closed = true;
66
+ state.closed = true;
39
67
  await closeReader(reader);
40
68
  };
41
- return Object.assign(wrap(native), {
69
+ return Object.assign(wrap(native, state), {
42
70
  close,
43
71
  [Symbol.asyncDispose]: close,
44
72
  });
@@ -47,41 +75,142 @@ async function closeReader(reader) {
47
75
  if (reader.close)
48
76
  await reader.close();
49
77
  }
50
- function wrap(native) {
78
+ /** Sentinel the native layer prefixes onto shape-contradiction errors (see
79
+ * `session.rs` `SessionError::Path`). */
80
+ const NATIVE_PATH_ERROR_PREFIX = 'bote.PathError: ';
81
+ /** Rethrow a native shape-contradiction error as a `PathError` carrying the
82
+ * caller's path; pass anything else through unchanged. */
83
+ function asPathError(err, path) {
84
+ if (err instanceof Error && !(err instanceof validate_ts_1.PathError) && err.message.startsWith(NATIVE_PATH_ERROR_PREFIX)) {
85
+ return new validate_ts_1.PathError(err.message.slice(NATIVE_PATH_ERROR_PREFIX.length), path);
86
+ }
87
+ return err;
88
+ }
89
+ /** Throw a uniform error for any operation on a closed cursor, so use-after-close
90
+ * is one defined contract regardless of source (some readers' reads keep working
91
+ * after close, others throw an opaque I/O error). */
92
+ function ensureOpen(state) {
93
+ if (state.closed)
94
+ throw new Error('bote: cursor is closed');
95
+ }
96
+ function wrap(native, state) {
51
97
  const cursor = {
52
- get key() {
53
- return native.key;
98
+ async hop(...path) {
99
+ ensureOpen(state);
100
+ (0, path_ts_1.validatePath)(path);
101
+ let child;
102
+ try {
103
+ child = await native.hop(path);
104
+ }
105
+ catch (err) {
106
+ throw asPathError(err, path);
107
+ }
108
+ return child ? wrap(child, state) : null;
54
109
  },
55
- async has(pointer, schema) {
110
+ async has(...args) {
111
+ ensureOpen(state);
112
+ const { path, tail: schema } = (0, args_ts_1.splitArgs)(args);
113
+ if (schema !== undefined && !(0, args_ts_1.isSchema)(schema)) {
114
+ throw new TypeError('has: expected a Standard Schema as the trailing argument');
115
+ }
56
116
  if (!schema)
57
- return native.has(pointer);
58
- if (!(await native.has(pointer)))
117
+ return native.has(path);
118
+ if (!(await native.has(path)))
59
119
  return false;
60
- const result = await schema['~standard'].validate(await native.get(pointer));
61
- return result.issues === undefined;
120
+ const result = await (0, validate_ts_1.validateItem)(schema, await native.get(path), path, 'skip');
121
+ return !('skip' in result);
62
122
  },
63
- async get(pointer, schema) {
64
- const value = await native.get(pointer);
65
- return schema ? (0, validate_ts_1.runStandardSchema)(schema, value, pointer) : value;
66
- },
67
- iter(pointer, schema) {
68
- const inner = native.iter(pointer);
123
+ async get(...args) {
124
+ ensureOpen(state);
125
+ const { path, tail: schema } = (0, args_ts_1.splitArgs)(args);
126
+ if (schema !== undefined && !(0, args_ts_1.isSchema)(schema)) {
127
+ throw new TypeError('get: expected a Standard Schema as the trailing argument');
128
+ }
129
+ let value;
130
+ try {
131
+ value = await native.get(path);
132
+ }
133
+ catch (err) {
134
+ throw asPathError(err, path);
135
+ }
69
136
  if (!schema)
70
- return inner;
137
+ return value;
138
+ return (0, validate_ts_1.runStandardSchema)(schema, value, path);
139
+ },
140
+ async count(...path) {
141
+ ensureOpen(state);
142
+ (0, path_ts_1.validatePath)(path);
143
+ try {
144
+ return await native.count(path);
145
+ }
146
+ catch (err) {
147
+ throw asPathError(err, path);
148
+ }
149
+ },
150
+ iter(...args) {
151
+ ensureOpen(state);
152
+ const { path, tail } = (0, args_ts_1.splitArgs)(args);
153
+ const { schema, select, batch, onInvalid, withIndex } = (0, args_ts_1.normalizeIterTail)(tail);
154
+ if (batch !== undefined && (!Number.isInteger(batch) || batch <= 0 || batch > exports.MAX_ITER_BATCH)) {
155
+ throw new RangeError(`iter: batch must be an integer in 1..=${exports.MAX_ITER_BATCH}, got ${batch}`);
156
+ }
157
+ if (withIndex !== undefined && typeof withIndex !== 'boolean') {
158
+ throw new TypeError(`iter: withIndex must be a boolean, got ${typeof withIndex}`);
159
+ }
160
+ if (onInvalid !== undefined && onInvalid !== 'throw' && onInvalid !== 'skip') {
161
+ throw new RangeError(`iter: onInvalid must be "throw" or "skip", got ${JSON.stringify(onInvalid)}`);
162
+ }
163
+ const resolvedBatch = batch ?? exports.DEFAULT_ITER_BATCH;
164
+ const selectIr = select !== undefined ? (0, args_ts_1.serializeSelect)(select) : undefined;
165
+ const inner = native.iter(path, { selectIr, batch: resolvedBatch, withKey: withIndex });
166
+ if (!schema) {
167
+ return {
168
+ async *[Symbol.asyncIterator]() {
169
+ try {
170
+ for await (const b of inner)
171
+ yield b;
172
+ }
173
+ catch (err) {
174
+ throw asPathError(err, path);
175
+ }
176
+ },
177
+ };
178
+ }
179
+ const policy = onInvalid ?? 'throw';
71
180
  return {
72
181
  async *[Symbol.asyncIterator]() {
73
182
  let i = 0;
74
- for await (const v of inner) {
75
- yield await (0, validate_ts_1.runStandardSchema)(schema, v, `${pointer}/${i++}`);
183
+ try {
184
+ for await (const b of inner) {
185
+ const out = [];
186
+ for (const v of b) {
187
+ const value = withIndex ? v[1] : v;
188
+ const result = await (0, validate_ts_1.validateItem)(schema, value, [...path, i++], policy);
189
+ if ('skip' in result)
190
+ continue;
191
+ out.push(withIndex ? [v[0], result.value] : result.value);
192
+ }
193
+ yield out;
194
+ }
195
+ }
196
+ catch (err) {
197
+ throw asPathError(err, path);
76
198
  }
77
199
  },
78
200
  };
79
201
  },
80
- walk(pointer) {
202
+ walk(...path) {
203
+ ensureOpen(state);
204
+ (0, path_ts_1.validatePath)(path);
81
205
  return {
82
206
  async *[Symbol.asyncIterator]() {
83
- for await (const child of native.walk(pointer)) {
84
- yield wrap(child);
207
+ try {
208
+ for await (const [key, child] of native.walk(path)) {
209
+ yield [key, wrap(child, state)];
210
+ }
211
+ }
212
+ catch (err) {
213
+ throw asPathError(err, path);
85
214
  }
86
215
  },
87
216
  };
package/dist/path.d.ts ADDED
@@ -0,0 +1,5 @@
1
+ import type { Segment } from './validate.ts';
2
+ /** Upper bound on numeric segments (napi takes them as `u32`). 2^32 - 1
3
+ * comfortably covers any in-memory JSON array. */
4
+ export declare const MAX_ARRAY_INDEX = 4294967295;
5
+ export declare function validatePath(path: readonly unknown[]): asserts path is readonly Segment[];
package/dist/path.js ADDED
@@ -0,0 +1,24 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.MAX_ARRAY_INDEX = void 0;
4
+ exports.validatePath = validatePath;
5
+ /** Upper bound on numeric segments (napi takes them as `u32`). 2^32 - 1
6
+ * comfortably covers any in-memory JSON array. */
7
+ exports.MAX_ARRAY_INDEX = 0xffffffff;
8
+ function validatePath(path) {
9
+ for (let i = 0; i < path.length; i++) {
10
+ const s = path[i];
11
+ if (typeof s === 'string')
12
+ continue;
13
+ if (typeof s === 'number' && Number.isInteger(s) && s >= 0 && s <= exports.MAX_ARRAY_INDEX)
14
+ continue;
15
+ throw new TypeError(`path segment ${i}: expected string or non-negative integer (<= ${exports.MAX_ARRAY_INDEX}), got ${describeBadSegment(s)}`);
16
+ }
17
+ }
18
+ function describeBadSegment(s) {
19
+ if (typeof s === 'number')
20
+ return `${s}`;
21
+ if (s === null)
22
+ return 'null';
23
+ return typeof s;
24
+ }
package/dist/sources.d.ts CHANGED
@@ -10,21 +10,17 @@ export interface SourceReader {
10
10
  /** Preferred read granularity in bytes. Must be a non-zero multiple of 64. */
11
11
  readonly chunkBytes?: number;
12
12
  /**
13
- * Fill `buf` with up to `buf.byteLength` bytes starting at `offset` and
14
- * resolve with the number of bytes written. The implementation must not
15
- * retain a reference to `buf` or read from it after the returned promise
16
- * resolves: `buf` is a view over native-owned memory whose lifetime ends
17
- * once the promise settles.
13
+ * Read up to `length` bytes starting at `offset` and resolve with the
14
+ * bytes read. The returned `Uint8Array`'s `.byteLength` is the actual
15
+ * count, which must be `<= length`.
18
16
  */
19
- read(offset: number, buf: Uint8Array): Promise<number>;
17
+ read(offset: number, length: number): Promise<Uint8Array>;
20
18
  /** Release resources held by the reader. Driven once by the `open()` lifecycle. */
21
19
  close?(): Promise<void> | void;
22
20
  }
23
21
  /**
24
- * Describes how to obtain a seekable byte stream. Construction is cheap and
25
- * synchronous - no I/O happens until `open()` runs, which the top-level
26
- * `open()` API drives. Provide your own object implementing this interface to
27
- * plug in custom backends.
22
+ * Describes how to obtain a seekable byte stream. Provide your own object implementing
23
+ * this interface to plug in custom backends.
28
24
  */
29
25
  export interface Source {
30
26
  /** Acquire the stream. Resolves to a `SourceReader` that owns any underlying resources. */
package/dist/sources.js CHANGED
@@ -17,13 +17,7 @@ function fromBuffer(buf, options) {
17
17
  open: () => Promise.resolve({
18
18
  size: view.byteLength,
19
19
  chunkBytes,
20
- read: async (offset, dst) => {
21
- const end = Math.min(offset + dst.byteLength, view.byteLength);
22
- const n = Math.max(0, end - offset);
23
- if (n > 0)
24
- dst.set(view.subarray(offset, end));
25
- return n;
26
- },
20
+ read: (offset, length) => Promise.resolve(view.subarray(offset, Math.min(offset + length, view.byteLength))),
27
21
  }),
28
22
  };
29
23
  }
@@ -37,9 +31,16 @@ function fromFile(path, options) {
37
31
  return {
38
32
  size: stat.size,
39
33
  chunkBytes,
40
- read: async (offset, dst) => {
41
- const { bytesRead } = await handle.read(dst, 0, dst.byteLength, offset);
42
- return bytesRead;
34
+ read: async (offset, length) => {
35
+ const buf = Buffer.allocUnsafe(length);
36
+ let filled = 0;
37
+ while (filled < length) {
38
+ const { bytesRead } = await handle.read(buf, filled, length - filled, offset + filled);
39
+ if (bytesRead === 0)
40
+ break;
41
+ filled += bytesRead;
42
+ }
43
+ return buf.subarray(0, filled);
43
44
  },
44
45
  close: async () => {
45
46
  if (closed)
@@ -85,17 +86,15 @@ function fromHttpRange(url, options) {
85
86
  return {
86
87
  size,
87
88
  chunkBytes,
88
- read: async (offset, dst) => {
89
+ read: async (offset, length) => {
89
90
  // HTTP ranges are inclusive on both ends.
90
- const end = Math.min(offset + dst.byteLength, size) - 1;
91
+ const end = Math.min(offset + length, size) - 1;
91
92
  const headers = new Headers(init?.headers);
92
93
  headers.set('Range', `bytes=${offset}-${end}`);
93
94
  headers.set('Accept-Encoding', 'identity');
94
95
  const res = await fetch(url, { ...init, headers, method: 'GET', signal: controller.signal });
95
96
  if (res.status === 206) {
96
- const body = new Uint8Array(await res.arrayBuffer());
97
- dst.set(body);
98
- return body.byteLength;
97
+ return new Uint8Array(await res.arrayBuffer());
99
98
  }
100
99
  // A 200 means the server ignored our Range request and returned the full
101
100
  // body. We throw here since the point of using ranges is to not have to
@@ -1,8 +1,20 @@
1
1
  import type { StandardSchemaV1 } from '@standard-schema/spec';
2
2
  export type { StandardSchemaV1 };
3
+ export type Segment = string | number;
4
+ export type Path = readonly Segment[];
3
5
  export declare class ValidationError extends Error {
4
6
  readonly issues: readonly StandardSchemaV1.Issue[];
5
- readonly pointer: string;
6
- constructor(issues: readonly StandardSchemaV1.Issue[], pointer: string);
7
+ readonly path: Path;
8
+ constructor(issues: readonly StandardSchemaV1.Issue[], path: Path);
7
9
  }
8
- export declare function runStandardSchema<O>(schema: StandardSchemaV1<unknown, O>, value: unknown, pointer: string): Promise<O>;
10
+ export declare class PathError extends Error {
11
+ readonly path: Path;
12
+ constructor(reason: string, path: Path);
13
+ }
14
+ export declare function runStandardSchema<O>(schema: StandardSchemaV1<unknown, O>, value: unknown, path: Path): Promise<O>;
15
+ export declare function validateItem<O>(schema: StandardSchemaV1<unknown, O>, value: unknown, path: Path, onInvalid: 'throw' | 'skip'): Promise<{
16
+ skip: true;
17
+ } | {
18
+ value: O;
19
+ }>;
20
+ export declare function formatPath(path: Path): string;
package/dist/validate.js CHANGED
@@ -1,21 +1,60 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.ValidationError = void 0;
3
+ exports.PathError = exports.ValidationError = void 0;
4
4
  exports.runStandardSchema = runStandardSchema;
5
+ exports.validateItem = validateItem;
6
+ exports.formatPath = formatPath;
5
7
  class ValidationError extends Error {
6
8
  issues;
7
- pointer;
8
- constructor(issues, pointer) {
9
- super(`bote: schema validation failed at ${pointer || '/'}: ${issues[0]?.message ?? 'unknown'}`);
9
+ path;
10
+ constructor(issues, path) {
11
+ super(`bote: schema validation failed at ${formatPath(path)}: ${issues[0]?.message ?? 'unknown'}`);
10
12
  this.name = 'ValidationError';
11
13
  this.issues = issues;
12
- this.pointer = pointer;
14
+ this.path = path;
13
15
  }
14
16
  }
15
17
  exports.ValidationError = ValidationError;
16
- async function runStandardSchema(schema, value, pointer) {
18
+ class PathError extends Error {
19
+ path;
20
+ constructor(reason, path) {
21
+ super(`bote: cannot resolve ${formatPath(path)}: ${reason}`);
22
+ this.name = 'PathError';
23
+ this.path = path;
24
+ }
25
+ }
26
+ exports.PathError = PathError;
27
+ async function runStandardSchema(schema, value, path) {
17
28
  const result = await schema['~standard'].validate(value);
18
29
  if (result.issues)
19
- throw new ValidationError(result.issues, pointer);
30
+ throw new ValidationError(result.issues, path);
20
31
  return result.value;
21
32
  }
33
+ async function validateItem(schema, value, path, onInvalid) {
34
+ const result = await schema['~standard'].validate(value);
35
+ if (result.issues) {
36
+ if (onInvalid === 'skip')
37
+ return { skip: true };
38
+ throw new ValidationError(result.issues, path);
39
+ }
40
+ return { value: result.value };
41
+ }
42
+ function formatPath(path) {
43
+ if (path.length === 0)
44
+ return '(root)';
45
+ let out = '';
46
+ for (let i = 0; i < path.length; i++) {
47
+ const seg = path[i];
48
+ if (typeof seg === 'number') {
49
+ out += `[${seg}]`;
50
+ continue;
51
+ }
52
+ if (/^[A-Za-z_$][A-Za-z0-9_$]*$/.test(seg)) {
53
+ out += i === 0 ? seg : `.${seg}`;
54
+ }
55
+ else {
56
+ out += `[${JSON.stringify(seg)}]`;
57
+ }
58
+ }
59
+ return out;
60
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@botejs/core",
3
- "version": "0.1.4",
3
+ "version": "0.3.0",
4
4
  "license": "MIT",
5
5
  "repository": {
6
6
  "type": "git",
package/dist/pointer.d.ts DELETED
@@ -1,5 +0,0 @@
1
- type ValidateTokenChars<S extends string> = S extends `${string}~${infer Rest}` ? Rest extends `0${infer After}` | `1${infer After}` ? ValidateTokenChars<After> : false : true;
2
- type ValidateTokens<S extends string> = S extends `${infer Token}/${infer Rest}` ? ValidateTokenChars<Token> extends true ? ValidateTokens<Rest> : false : ValidateTokenChars<S>;
3
- type IsJsonPointer<S extends string> = S extends '' ? true : S extends `/${infer Rest}` ? ValidateTokens<Rest> : false;
4
- export type JsonPointer<S extends string> = IsJsonPointer<S> extends true ? S : `Error: invalid JSON pointer "${S}"`;
5
- export {};
package/dist/pointer.js DELETED
@@ -1,3 +0,0 @@
1
- "use strict";
2
- // RFC 6901 JSON Pointer Static Typing Validator
3
- Object.defineProperty(exports, "__esModule", { value: true });