@botejs/core 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +44 -115
- package/dist/args.d.ts +16 -5
- package/dist/args.js +6 -3
- package/dist/cursor.d.ts +97 -0
- package/dist/cursor.js +124 -0
- package/dist/error.d.ts +47 -0
- package/dist/error.js +113 -0
- package/dist/index.d.ts +9 -3
- package/dist/index.js +6 -3
- package/dist/open.d.ts +32 -71
- package/dist/open.js +27 -190
- package/dist/path.d.ts +3 -1
- package/dist/path.js +28 -4
- package/dist/source/base.d.ts +70 -0
- package/dist/source/base.js +1 -0
- package/dist/source/forward.d.ts +49 -0
- package/dist/source/forward.js +219 -0
- package/dist/source/seekable.d.ts +8 -0
- package/dist/{sources.js → source/seekable.js} +24 -10
- package/dist/stream.d.ts +15 -0
- package/dist/stream.js +166 -0
- package/dist/validate.d.ts +2 -16
- package/dist/validate.js +5 -53
- package/package.json +3 -2
- package/dist/sources.d.ts +0 -39
package/README.md
CHANGED
|
@@ -1,133 +1,62 @@
|
|
|
1
1
|
# bote
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
A fast, modern and low-memory approach to processing a big JSON:
|
|
4
4
|
|
|
5
5
|
```sh
|
|
6
6
|
npm install @botejs/core
|
|
7
7
|
```
|
|
8
8
|
|
|
9
9
|
```ts
|
|
10
|
-
|
|
11
|
-
import {
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
// e.g. [{ id: 'user-1' }, { id: 'user-2' }, ...]
|
|
40
|
-
await using cursor = await open(fromFile('./users.json'))
|
|
41
|
-
|
|
42
|
-
// root is an array
|
|
43
|
-
for await (const users of cursor.iter()) {
|
|
44
|
-
for (const user of users) {
|
|
45
|
-
console.log(user)
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
```
|
|
49
|
-
|
|
50
|
-
pass an options object as the last argument to tune what comes back: `batch`, `select`, `schema`, `onInvalid`, and `withIndex`. if you want to know more of the options, see [`arrays.js`](./examples/arrays.js).
|
|
51
|
-
|
|
52
|
-
## object access
|
|
53
|
-
|
|
54
|
-
`walk` steps over the members of an object at a path, yielding a **`[key, cursor]`** pair per member. the key is the member name, the cursor is anchored at its value. each child cursor is first-class: it outlives the loop and can be `walk`ed again, which is what lets you descend a tree of unknown depth.
|
|
55
|
-
|
|
56
|
-
```ts
|
|
57
|
-
// e.g. { alice: { role: 'admin' }, bob: { role: 'guest' }, ... }
|
|
58
|
-
await using cursor = await open(fromFile('./accounts.json'))
|
|
59
|
-
|
|
60
|
-
for await (const [name, account] of cursor.walk()) {
|
|
61
|
-
// name is the member name ('alice', 'bob', ...)
|
|
62
|
-
const role = await account.get('role')
|
|
63
|
-
console.log(`${name}: ${role}`)
|
|
64
|
-
}
|
|
65
|
-
```
|
|
66
|
-
|
|
67
|
-
see [`recursive.js`](./examples/recursive.js) for advanced use-cases.
|
|
68
|
-
|
|
69
|
-
## hopping
|
|
70
|
-
|
|
71
|
-
`hop` resolves a path once and hands back a **cursor** anchored at that value (or `null` if the path isn't there):
|
|
72
|
-
|
|
73
|
-
```ts
|
|
74
|
-
// e.g. { report: { sections: [{ rows: [...] }, ...] } }
|
|
75
|
-
await using cursor = await open(fromFile('./report.json'))
|
|
76
|
-
|
|
77
|
-
const section = await cursor.hop('report', 'sections', 0)
|
|
78
|
-
if (section) {
|
|
79
|
-
console.log(await section.count('rows'))
|
|
80
|
-
for await (const rows of section.iter('rows')) {
|
|
81
|
-
console.log(rows)
|
|
82
|
-
}
|
|
83
|
-
}
|
|
10
|
+
// node examples/citylots.js
|
|
11
|
+
import { join } from 'node:path';
|
|
12
|
+
import { open, fromFile } from '@botejs/core';
|
|
13
|
+
|
|
14
|
+
// 181 MB GeoJSON:
|
|
15
|
+
// { type: "...", features: [{ properties: { STREET: "..." }}] }
|
|
16
|
+
const filePath = join(import.meta.dirname, 'citylots.json');
|
|
17
|
+
|
|
18
|
+
await using cursor = await open(fromFile(filePath));
|
|
19
|
+
|
|
20
|
+
console.log(`type: ${await cursor.get('type')}`);
|
|
21
|
+
// type: 'FeatureCollection'
|
|
22
|
+
|
|
23
|
+
console.log(`features: ${await cursor.count('features')}`);
|
|
24
|
+
// features: 206_560
|
|
25
|
+
|
|
26
|
+
const byStreet = await cursor
|
|
27
|
+
.iter('features', {
|
|
28
|
+
select: ['properties', 'STREET'],
|
|
29
|
+
})
|
|
30
|
+
.reduce((tally, street) => {
|
|
31
|
+
if (typeof street === 'string') {
|
|
32
|
+
tally.set(street, (tally.get(street) ?? 0) + 1);
|
|
33
|
+
}
|
|
34
|
+
return tally;
|
|
35
|
+
}, new Map());
|
|
36
|
+
|
|
37
|
+
console.log([...byStreet].sort((a, b) => b[1] - a[1]).slice(0, 10));
|
|
38
|
+
// [[ 'UNKNOWN', 2843 ], [ 'MASON', 2651 ], [ 'PINE', 1799 ], ... ]
|
|
84
39
|
```
|
|
85
40
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
`get`, and `iter` takes a [Standard Schema](https://standardschema.dev) validator as their last argument (for `iter`, can also be passed in an `options` object). the value is validated and the return type is inferred from the schema, so reads come back typed instead of `unknown`:
|
|
89
|
-
|
|
90
|
-
```ts
|
|
91
|
-
import { open, fromFile } from '@botejs/core'
|
|
92
|
-
import * as z from 'zod' // or any Standard Schema validator
|
|
93
|
-
|
|
94
|
-
// a downstream API that wants a typed list of recipients
|
|
95
|
-
declare function sendNewsletter(recipients: string[]): Promise<void>
|
|
96
|
-
|
|
97
|
-
const User = z.object({
|
|
98
|
-
id: z.string(),
|
|
99
|
-
name: z.string(),
|
|
100
|
-
email: z.string(),
|
|
101
|
-
})
|
|
102
|
-
|
|
103
|
-
const cursor = await open(fromFile('./users.json'))
|
|
104
|
-
|
|
105
|
-
// name: string
|
|
106
|
-
const name = await cursor.get('users', 1000, 'name', User.shape.name)
|
|
107
|
-
|
|
108
|
-
for await (const users of cursor.iter('users', User)) {
|
|
109
|
-
// user: User[]
|
|
110
|
-
const emails = users.map((user) => user.email)
|
|
111
|
-
await sendNewsletter(emails)
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
await cursor.close()
|
|
115
|
-
```
|
|
116
|
-
|
|
117
|
-
## memory
|
|
118
|
-
|
|
119
|
-
bote keeps a small **structural-index** cache: as scans walk containers (arrays and object), it remembers where members live, so a later query that lands in an already walked container resumes near the target instead of from the top. it caches structure, never source bytes, so it can't grow unbounded with document size.
|
|
41
|
+
Given a **seekable** source (e.g. a file, an HTTP range) or "forward-only" source (e.g. HTTP GET request) and a path, it retrieves values out of a JSON, without loading the whole thing in-memory.
|
|
120
42
|
|
|
121
|
-
|
|
43
|
+
Here's a comparison of running above (using Apple M1 Pro 2021's `/usr/bin/time -l`):
|
|
122
44
|
|
|
123
|
-
|
|
45
|
+
| method | mean time | mean peak footprint (MB) |
|
|
46
|
+
| ------------------ | --------- | ------------------------ |
|
|
47
|
+
| JSON.parse | 0.81 s | 647.0 |
|
|
48
|
+
| bote | 1.062 s | 89.0 |
|
|
49
|
+
| @streamparser/json | 4.363 s | 98.7 |
|
|
50
|
+
| JSONStream | 4.417 s | 60.7 |
|
|
51
|
+
| oboe.js | 9.649 s | 102.6 |
|
|
52
|
+
| stream-json | 18.693 s | 184.3 |
|
|
124
53
|
|
|
125
|
-
|
|
54
|
+
## Status
|
|
126
55
|
|
|
127
|
-
|
|
56
|
+
Pre-1.0. Still in development and APIs may change based on feedback, bugs and holy divinations from the coding gods.
|
|
128
57
|
|
|
129
|
-
|
|
58
|
+
I would say 90% satisfactory for MVP, but I'm getting there.
|
|
130
59
|
|
|
131
|
-
##
|
|
60
|
+
## License
|
|
132
61
|
|
|
133
62
|
MIT.
|
package/dist/args.d.ts
CHANGED
|
@@ -1,15 +1,26 @@
|
|
|
1
|
-
import type
|
|
1
|
+
import { type Path, type Segment } from './path.ts';
|
|
2
|
+
import type { StandardSchemaV1 } from './validate.ts';
|
|
3
|
+
/** Trailing options object for `Cursor.iter`, tuning how the iteration yields items. */
|
|
2
4
|
export interface IterOptions {
|
|
5
|
+
/** Project each member before it is yielded. A single segment or path picks a
|
|
6
|
+
* sub-value; a field map (`{ name: 'name', city: ['address', 'city'] }`)
|
|
7
|
+
* builds an object from several sub-paths. */
|
|
3
8
|
select?: Segment | Path | Record<string, Segment | Path>;
|
|
4
|
-
/** How many items
|
|
9
|
+
/** How many items cross the native boundary per fetch, which also bounds the
|
|
10
|
+
* resident memory to that batch and sets the array size yielded by `IterStream.raw()`.
|
|
11
|
+
* The default item loop drains each fetch one item at a time, so this doesn't change
|
|
12
|
+
* what item iteration yields, only how much is fetched and held at once.
|
|
13
|
+
* Higher is faster but holds more in memory.
|
|
14
|
+
*
|
|
15
|
+
* Default is `1000`. */
|
|
5
16
|
batch?: number;
|
|
6
17
|
/** Validate each yielded item against this schema (after `select`). */
|
|
7
18
|
schema?: StandardSchemaV1;
|
|
19
|
+
/** Yield `[key, value]` tuples instead of bare values. `key` is the member
|
|
20
|
+
* name for objects and the zero-based index for arrays. */
|
|
21
|
+
withKey?: boolean;
|
|
8
22
|
/** Policy for items failing `schema`. Default `'throw'`; `'skip'` drops them. */
|
|
9
23
|
onInvalid?: 'throw' | 'skip';
|
|
10
|
-
/** Yield `[index, value]` tuples instead of bare values, where `index` is
|
|
11
|
-
* the zero-based position of the element in the source array. */
|
|
12
|
-
withIndex?: boolean;
|
|
13
24
|
}
|
|
14
25
|
export type VariadicPathArgs<TTail> = [...Segment[]] | [...Segment[], TTail];
|
|
15
26
|
export declare function splitArgs<TTail>(args: VariadicPathArgs<TTail>): {
|
package/dist/args.js
CHANGED
|
@@ -24,10 +24,12 @@ export function isSchema(value) {
|
|
|
24
24
|
return typeof value === 'object' && value !== null && '~standard' in value;
|
|
25
25
|
}
|
|
26
26
|
export function normalizeIterTail(tail) {
|
|
27
|
-
if (!tail)
|
|
27
|
+
if (!tail) {
|
|
28
28
|
return {};
|
|
29
|
-
|
|
29
|
+
}
|
|
30
|
+
if (isSchema(tail)) {
|
|
30
31
|
return { schema: tail };
|
|
32
|
+
}
|
|
31
33
|
return tail;
|
|
32
34
|
}
|
|
33
35
|
export function serializeSelect(select) {
|
|
@@ -63,7 +65,8 @@ export function serializeSelect(select) {
|
|
|
63
65
|
return JSON.stringify({ map: entries });
|
|
64
66
|
}
|
|
65
67
|
function describeSelect(value) {
|
|
66
|
-
if (value === null)
|
|
68
|
+
if (value === null) {
|
|
67
69
|
return 'null';
|
|
70
|
+
}
|
|
68
71
|
return Array.isArray(value) ? 'array' : typeof value;
|
|
69
72
|
}
|
package/dist/cursor.d.ts
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import type { Cursor as NativeCursor } from '@botejs/native';
|
|
2
|
+
import { type Path, type Segment } from './path.ts';
|
|
3
|
+
import { type IterStream } from './stream.ts';
|
|
4
|
+
import { type StandardSchemaV1 } from './validate.ts';
|
|
5
|
+
import { type IterOptions } from './args.ts';
|
|
6
|
+
type InferOutput<Sch> = Sch extends StandardSchemaV1<unknown, infer O> ? O : never;
|
|
7
|
+
type SelectMapShape<S> = {
|
|
8
|
+
-readonly [K in keyof S]: unknown;
|
|
9
|
+
};
|
|
10
|
+
export type IterKey = string | number;
|
|
11
|
+
export declare const DEFAULT_ITER_BATCH = 1000;
|
|
12
|
+
export declare const MAX_ITER_BATCH = 1000000;
|
|
13
|
+
export interface Cursor {
|
|
14
|
+
/**
|
|
15
|
+
* Resolve `path` to a container and return a new cursor anchored there, or
|
|
16
|
+
* `null` if it is absent. Child cursors share the root's source and lifetime;
|
|
17
|
+
* closing the root closes them too.
|
|
18
|
+
*
|
|
19
|
+
* @example
|
|
20
|
+
* const user = await root.hop('users', 0);
|
|
21
|
+
* const name = await user?.get('name');
|
|
22
|
+
*/
|
|
23
|
+
hop(...path: Segment[]): Promise<Cursor | null>;
|
|
24
|
+
/**
|
|
25
|
+
* Report whether a value exists at `path`. With a trailing Standard Schema,
|
|
26
|
+
* also require the value to validate against it (a parse/validation miss
|
|
27
|
+
* yields `false` rather than throwing).
|
|
28
|
+
*
|
|
29
|
+
* @example
|
|
30
|
+
* await root.has('users', 0, 'email');
|
|
31
|
+
* await root.has('users', 0, 'age', z.number());
|
|
32
|
+
*/
|
|
33
|
+
has(...path: Segment[]): Promise<boolean>;
|
|
34
|
+
has(...args: [...Segment[], StandardSchemaV1]): Promise<boolean>;
|
|
35
|
+
/**
|
|
36
|
+
* Read and decode the value at `path`, or `undefined` if absent. With a
|
|
37
|
+
* trailing Standard Schema, validate and return its parsed output, throwing
|
|
38
|
+
* on failure.
|
|
39
|
+
*
|
|
40
|
+
* @example
|
|
41
|
+
* const name = await root.get('users', 0, 'name');
|
|
42
|
+
* const age = await root.get('users', 0, 'age', z.number());
|
|
43
|
+
*/
|
|
44
|
+
get(...path: Segment[]): Promise<unknown>;
|
|
45
|
+
get<Sch extends StandardSchemaV1>(...args: [...Segment[], Sch]): Promise<InferOutput<Sch>>;
|
|
46
|
+
/**
|
|
47
|
+
* Count the members of the array or object at `path`.
|
|
48
|
+
*
|
|
49
|
+
* @example
|
|
50
|
+
* const total = await root.count('users');
|
|
51
|
+
*/
|
|
52
|
+
count(...path: Segment[]): Promise<number>;
|
|
53
|
+
/**
|
|
54
|
+
* Stream the members of the array or object at `path` as an async iterable.
|
|
55
|
+
* A trailing Standard Schema validates each item; a trailing {@link IterOptions}
|
|
56
|
+
* object tunes the iteration (see its fields for the available knobs).
|
|
57
|
+
*
|
|
58
|
+
* @example
|
|
59
|
+
* for await (const user of root.iter('users')) {
|
|
60
|
+
* console.log(user);
|
|
61
|
+
* }
|
|
62
|
+
*
|
|
63
|
+
* for await (const [i, name] of root.iter('users', { withKey: true, select: ['name'] })) {
|
|
64
|
+
* console.log(i, name);
|
|
65
|
+
* }
|
|
66
|
+
*/
|
|
67
|
+
iter(...path: Segment[]): IterStream<unknown>;
|
|
68
|
+
iter<Sch extends StandardSchemaV1>(...args: [...Segment[], Sch]): IterStream<InferOutput<Sch>>;
|
|
69
|
+
iter<Sch extends StandardSchemaV1>(...args: [...Segment[], IterOptions & {
|
|
70
|
+
withKey: true;
|
|
71
|
+
schema: Sch;
|
|
72
|
+
}]): IterStream<[IterKey, InferOutput<Sch>]>;
|
|
73
|
+
iter<Sch extends StandardSchemaV1>(...args: [...Segment[], IterOptions & {
|
|
74
|
+
schema: Sch;
|
|
75
|
+
}]): IterStream<InferOutput<Sch>>;
|
|
76
|
+
iter<S extends Record<string, Segment | Path>>(...args: [...Segment[], IterOptions & {
|
|
77
|
+
withKey: true;
|
|
78
|
+
select: S;
|
|
79
|
+
}]): IterStream<[IterKey, SelectMapShape<S>]>;
|
|
80
|
+
iter<S extends Record<string, Segment | Path>>(...args: [...Segment[], IterOptions & {
|
|
81
|
+
select: S;
|
|
82
|
+
}]): IterStream<SelectMapShape<S>>;
|
|
83
|
+
iter(...args: [...Segment[], IterOptions & {
|
|
84
|
+
withKey: true;
|
|
85
|
+
}]): IterStream<[IterKey, unknown]>;
|
|
86
|
+
iter(...args: [...Segment[], IterOptions]): IterStream<unknown>;
|
|
87
|
+
}
|
|
88
|
+
export interface RootCursor extends Cursor, AsyncDisposable {
|
|
89
|
+
/** Close the underlying source. Idempotent. */
|
|
90
|
+
close(): Promise<void>;
|
|
91
|
+
}
|
|
92
|
+
export type CursorState = {
|
|
93
|
+
closed: boolean;
|
|
94
|
+
};
|
|
95
|
+
export declare function wrap(native: NativeCursor, state: CursorState): Cursor;
|
|
96
|
+
export declare function ensureOpen(state: CursorState): void;
|
|
97
|
+
export {};
|
package/dist/cursor.js
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import { deserializeNativeError, ClosedCursorError, MalformedJsonError } from "./error.js";
|
|
2
|
+
import { validatePath } from "./path.js";
|
|
3
|
+
import { makeStream } from "./stream.js";
|
|
4
|
+
import { runStandardSchema, validateItem } from "./validate.js";
|
|
5
|
+
import { splitArgs, isSchema, serializeSelect, normalizeIterTail, } from "./args.js";
|
|
6
|
+
export const DEFAULT_ITER_BATCH = 1000;
|
|
7
|
+
export const MAX_ITER_BATCH = 1_000_000;
|
|
8
|
+
export function wrap(native, state) {
|
|
9
|
+
const cursor = {
|
|
10
|
+
async hop(...path) {
|
|
11
|
+
ensureOpen(state);
|
|
12
|
+
validatePath(path);
|
|
13
|
+
const child = await withPath(path, () => native.hop(path));
|
|
14
|
+
return child ? wrap(child, state) : null;
|
|
15
|
+
},
|
|
16
|
+
async has(...args) {
|
|
17
|
+
ensureOpen(state);
|
|
18
|
+
const { path, tail: schema } = splitArgs(args);
|
|
19
|
+
if (schema !== undefined && !isSchema(schema)) {
|
|
20
|
+
throw new TypeError('has: expected a Standard Schema as the trailing argument');
|
|
21
|
+
}
|
|
22
|
+
if (!schema) {
|
|
23
|
+
return withPath(path, () => native.has(path));
|
|
24
|
+
}
|
|
25
|
+
if (!(await withPath(path, () => native.has(path)))) {
|
|
26
|
+
return false;
|
|
27
|
+
}
|
|
28
|
+
const text = await withPath(path, () => native.get(path));
|
|
29
|
+
const value = text === undefined ? undefined : parseValue(text, path);
|
|
30
|
+
const result = await validateItem(schema, value, path, 'skip');
|
|
31
|
+
return !('skip' in result);
|
|
32
|
+
},
|
|
33
|
+
async get(...args) {
|
|
34
|
+
ensureOpen(state);
|
|
35
|
+
const { path, tail: schema } = splitArgs(args);
|
|
36
|
+
if (schema !== undefined && !isSchema(schema)) {
|
|
37
|
+
throw new TypeError('get: expected a Standard Schema as the trailing argument');
|
|
38
|
+
}
|
|
39
|
+
const text = await withPath(path, () => native.get(path));
|
|
40
|
+
const value = text === undefined ? undefined : parseValue(text, path);
|
|
41
|
+
if (!schema) {
|
|
42
|
+
return value;
|
|
43
|
+
}
|
|
44
|
+
return runStandardSchema(schema, value, path);
|
|
45
|
+
},
|
|
46
|
+
async count(...path) {
|
|
47
|
+
ensureOpen(state);
|
|
48
|
+
validatePath(path);
|
|
49
|
+
return withPath(path, () => native.count(path));
|
|
50
|
+
},
|
|
51
|
+
iter(...args) {
|
|
52
|
+
ensureOpen(state);
|
|
53
|
+
const { path, tail } = splitArgs(args);
|
|
54
|
+
const { schema, select, batch, onInvalid, withKey } = normalizeIterTail(tail);
|
|
55
|
+
if (batch !== undefined && (!Number.isInteger(batch) || batch <= 0 || batch > MAX_ITER_BATCH)) {
|
|
56
|
+
throw new RangeError(`iter: batch must be an integer in 1..=${MAX_ITER_BATCH}, got ${batch}`);
|
|
57
|
+
}
|
|
58
|
+
if (withKey !== undefined && typeof withKey !== 'boolean') {
|
|
59
|
+
throw new TypeError(`iter: withKey must be a boolean, got ${typeof withKey}`);
|
|
60
|
+
}
|
|
61
|
+
if (onInvalid !== undefined && onInvalid !== 'throw' && onInvalid !== 'skip') {
|
|
62
|
+
throw new RangeError(`iter: onInvalid must be "throw" or "skip", got ${JSON.stringify(onInvalid)}`);
|
|
63
|
+
}
|
|
64
|
+
const resolvedBatch = batch ?? DEFAULT_ITER_BATCH;
|
|
65
|
+
const selectIr = select !== undefined ? serializeSelect(select) : undefined;
|
|
66
|
+
const wantKey = withKey ?? false;
|
|
67
|
+
const nativeWithKey = wantKey || schema !== undefined;
|
|
68
|
+
const inner = native.iter(path, { selectIr, batch: resolvedBatch, withKey: nativeWithKey });
|
|
69
|
+
if (!schema) {
|
|
70
|
+
return nativeStream(inner, path, resolvedBatch, (raw) => parseValue(raw, path));
|
|
71
|
+
}
|
|
72
|
+
const policy = onInvalid ?? 'throw';
|
|
73
|
+
return nativeStream(inner, path, resolvedBatch, async (raw) => {
|
|
74
|
+
const out = [];
|
|
75
|
+
for (const [key, value] of parseValue(raw, path)) {
|
|
76
|
+
const result = await validateItem(schema, value, [...path, key], policy);
|
|
77
|
+
if ('skip' in result) {
|
|
78
|
+
continue;
|
|
79
|
+
}
|
|
80
|
+
out.push(wantKey ? [key, result.value] : result.value);
|
|
81
|
+
}
|
|
82
|
+
return out;
|
|
83
|
+
});
|
|
84
|
+
},
|
|
85
|
+
};
|
|
86
|
+
return cursor;
|
|
87
|
+
}
|
|
88
|
+
export function ensureOpen(state) {
|
|
89
|
+
if (state.closed) {
|
|
90
|
+
throw new ClosedCursorError();
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
/** Run a native call, retyping any addon error as the matching {@link BoteError}
|
|
94
|
+
* anchored to `path`. The single funnel every cursor operation passes through,
|
|
95
|
+
* so native faults surface uniformly. */
|
|
96
|
+
async function withPath(path, op) {
|
|
97
|
+
try {
|
|
98
|
+
return await op();
|
|
99
|
+
}
|
|
100
|
+
catch (err) {
|
|
101
|
+
throw deserializeNativeError(err, path);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
function nativeStream(inner, path, batchSize, mapBatch) {
|
|
105
|
+
async function* batches() {
|
|
106
|
+
try {
|
|
107
|
+
for await (const raw of inner) {
|
|
108
|
+
yield await mapBatch(raw);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
catch (err) {
|
|
112
|
+
throw deserializeNativeError(err, path);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
return makeStream(batches, batchSize);
|
|
116
|
+
}
|
|
117
|
+
function parseValue(text, path) {
|
|
118
|
+
try {
|
|
119
|
+
return JSON.parse(text);
|
|
120
|
+
}
|
|
121
|
+
catch (cause) {
|
|
122
|
+
throw new MalformedJsonError(path, 'malformed_json', { cause });
|
|
123
|
+
}
|
|
124
|
+
}
|
package/dist/error.d.ts
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import type { StandardSchemaV1 } from '@standard-schema/spec';
|
|
2
|
+
import type { PathFaultCode, JsonFaultCode, SourceFaultCode } from '@botejs/native';
|
|
3
|
+
import { type Path } from './path.ts';
|
|
4
|
+
export type { PathFaultCode, JsonFaultCode, SourceFaultCode };
|
|
5
|
+
export type BoteErrorCode = PathFaultCode | JsonFaultCode | SourceFaultCode | 'validation' | 'closed' | 'forward_replay';
|
|
6
|
+
/** Base class for every error bote raises from its own logic. Catch this to
|
|
7
|
+
* catch anything bote throws; branch on {@link BoteError.code} for the precise
|
|
8
|
+
* kind. Always carries a `bote:`-prefixed message. */
|
|
9
|
+
export declare abstract class BoteError extends Error {
|
|
10
|
+
readonly code: BoteErrorCode;
|
|
11
|
+
constructor(code: BoteErrorCode, message: string, options?: ErrorOptions);
|
|
12
|
+
}
|
|
13
|
+
export declare class PathError extends BoteError {
|
|
14
|
+
readonly code: PathFaultCode;
|
|
15
|
+
readonly path: Path;
|
|
16
|
+
constructor(path: Path, code: PathFaultCode, segment?: number);
|
|
17
|
+
}
|
|
18
|
+
export declare class ValidationError extends BoteError {
|
|
19
|
+
readonly code: 'validation';
|
|
20
|
+
readonly issues: readonly StandardSchemaV1.Issue[];
|
|
21
|
+
readonly path: Path;
|
|
22
|
+
constructor(issues: readonly StandardSchemaV1.Issue[], path: Path);
|
|
23
|
+
}
|
|
24
|
+
export declare class MalformedJsonError extends BoteError {
|
|
25
|
+
readonly code: JsonFaultCode;
|
|
26
|
+
readonly path: Path;
|
|
27
|
+
constructor(path: Path, code: JsonFaultCode, options?: ErrorOptions);
|
|
28
|
+
}
|
|
29
|
+
export declare class SourceReadError extends BoteError {
|
|
30
|
+
readonly code: SourceFaultCode;
|
|
31
|
+
readonly path: Path;
|
|
32
|
+
constructor(path: Path, detail: string, options?: ErrorOptions);
|
|
33
|
+
}
|
|
34
|
+
export declare class ForwardReplayError extends BoteError {
|
|
35
|
+
readonly code: 'forward_replay';
|
|
36
|
+
readonly offset: number;
|
|
37
|
+
readonly position: number;
|
|
38
|
+
constructor(offset: number, position: number, options?: ErrorOptions);
|
|
39
|
+
}
|
|
40
|
+
export declare class ClosedCursorError extends BoteError {
|
|
41
|
+
readonly code: 'closed';
|
|
42
|
+
constructor();
|
|
43
|
+
}
|
|
44
|
+
/** Rebuild a typed {@link BoteError} from a native addon error, anchoring it to
|
|
45
|
+
* the `path` of the call it surfaced through. Pass-through for anything that
|
|
46
|
+
* isn't a recognized native error (including errors already typed here). */
|
|
47
|
+
export declare function deserializeNativeError(err: unknown, path: Path): unknown;
|
package/dist/error.js
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import { formatPath } from "./path.js";
|
|
2
|
+
/** Base class for every error bote raises from its own logic. Catch this to
|
|
3
|
+
* catch anything bote throws; branch on {@link BoteError.code} for the precise
|
|
4
|
+
* kind. Always carries a `bote:`-prefixed message. */
|
|
5
|
+
export class BoteError extends Error {
|
|
6
|
+
code;
|
|
7
|
+
constructor(code, message, options) {
|
|
8
|
+
super(message, options);
|
|
9
|
+
this.code = code;
|
|
10
|
+
this.name = 'BoteError';
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
export class PathError extends BoteError {
|
|
14
|
+
path;
|
|
15
|
+
constructor(path, code, segment) {
|
|
16
|
+
const reason = (PATH_FAULT_MESSAGE[code] ?? (() => code))(segment);
|
|
17
|
+
super(code, `bote: cannot resolve ${formatPath(path)}: ${reason}`);
|
|
18
|
+
this.name = 'PathError';
|
|
19
|
+
this.path = path;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
export class ValidationError extends BoteError {
|
|
23
|
+
issues;
|
|
24
|
+
path;
|
|
25
|
+
constructor(issues, path) {
|
|
26
|
+
super('validation', `bote: schema validation failed at ${formatPath(path)}: ${issues[0]?.message ?? 'unknown'}`);
|
|
27
|
+
this.name = 'ValidationError';
|
|
28
|
+
this.issues = issues;
|
|
29
|
+
this.path = path;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
export class MalformedJsonError extends BoteError {
|
|
33
|
+
path;
|
|
34
|
+
constructor(path, code, options) {
|
|
35
|
+
const what = code === 'unexpected_eof' ? 'unexpected end of JSON input' : 'malformed JSON';
|
|
36
|
+
super(code, `bote: ${what} at ${formatPath(path)}`, options);
|
|
37
|
+
this.name = 'MalformedJsonError';
|
|
38
|
+
this.path = path;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
export class SourceReadError extends BoteError {
|
|
42
|
+
path;
|
|
43
|
+
constructor(path, detail, options) {
|
|
44
|
+
super('source_io', `bote: source read failed at ${formatPath(path)}: ${detail}`, options);
|
|
45
|
+
this.name = 'SourceReadError';
|
|
46
|
+
this.path = path;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
export class ForwardReplayError extends BoteError {
|
|
50
|
+
offset;
|
|
51
|
+
position;
|
|
52
|
+
constructor(offset, position, options) {
|
|
53
|
+
super('forward_replay', `bote: forward source cannot rewind to offset ${offset} from ${position}: the stream has already advanced. ` +
|
|
54
|
+
"Pass { rewind: 'replay' } if the producer is idempotent, { rewind: 'buffer' } to snapshot it in memory, " +
|
|
55
|
+
'or use a seekable source (fromFile/fromBuffer/fromHttpRange) for repeated or out-of-order access.', options);
|
|
56
|
+
this.name = 'ForwardReplayError';
|
|
57
|
+
this.offset = offset;
|
|
58
|
+
this.position = position;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
export class ClosedCursorError extends BoteError {
|
|
62
|
+
constructor() {
|
|
63
|
+
super('closed', 'bote: cursor is closed');
|
|
64
|
+
this.name = 'ClosedCursorError';
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
/** `bote:<code>[:<detail>]` lines the native addon emits in place of a human
|
|
68
|
+
* message, so the typed error and its message live on this side only. `<code>`
|
|
69
|
+
* is a Rust-owned native fault code; `<detail>` is a path fault's offending
|
|
70
|
+
* segment or a source fault's reason. The code groups below are typed against
|
|
71
|
+
* the Rust enums, so renaming a code in Rust breaks compilation here. */
|
|
72
|
+
const NATIVE_ERROR = /^bote:([a-z_]+)(?::([\s\S]*))?$/;
|
|
73
|
+
const PATH_CODES = ['through_scalar', 'scalar_target', 'wrong_kind'];
|
|
74
|
+
const JSON_CODES = ['malformed_json', 'unexpected_eof'];
|
|
75
|
+
const SOURCE_CODE = 'source_io';
|
|
76
|
+
const FORWARD_REWIND = /forward source cannot rewind to offset (\d+) from (\d+)/;
|
|
77
|
+
/** Rebuild a typed {@link BoteError} from a native addon error, anchoring it to
|
|
78
|
+
* the `path` of the call it surfaced through. Pass-through for anything that
|
|
79
|
+
* isn't a recognized native error (including errors already typed here). */
|
|
80
|
+
export function deserializeNativeError(err, path) {
|
|
81
|
+
if (!(err instanceof Error) || err instanceof BoteError) {
|
|
82
|
+
return err;
|
|
83
|
+
}
|
|
84
|
+
const match = NATIVE_ERROR.exec(err.message);
|
|
85
|
+
if (!match) {
|
|
86
|
+
return err;
|
|
87
|
+
}
|
|
88
|
+
const code = match[1];
|
|
89
|
+
const detail = match[2];
|
|
90
|
+
if (PATH_CODES.includes(code)) {
|
|
91
|
+
const segment = detail === undefined ? undefined : Number(detail);
|
|
92
|
+
return new PathError(path, code, segment);
|
|
93
|
+
}
|
|
94
|
+
if (JSON_CODES.includes(code)) {
|
|
95
|
+
return new MalformedJsonError(path, code, { cause: err });
|
|
96
|
+
}
|
|
97
|
+
if (code === SOURCE_CODE) {
|
|
98
|
+
// A forward reader rejects its read() with a ForwardReplayError; the native
|
|
99
|
+
// layer can only relay it as a generic source_io fault, so rebuild the typed
|
|
100
|
+
// error from the message it wrapped (offset/position survive in the detail).
|
|
101
|
+
const rewind = FORWARD_REWIND.exec(detail ?? '');
|
|
102
|
+
if (rewind) {
|
|
103
|
+
return new ForwardReplayError(Number(rewind[1]), Number(rewind[2]), { cause: err });
|
|
104
|
+
}
|
|
105
|
+
return new SourceReadError(path, detail ?? '', { cause: err });
|
|
106
|
+
}
|
|
107
|
+
return err;
|
|
108
|
+
}
|
|
109
|
+
const PATH_FAULT_MESSAGE = {
|
|
110
|
+
wrong_kind: (segment) => `path segment ${segment} does not match the container kind`,
|
|
111
|
+
scalar_target: () => 'target value is not a container',
|
|
112
|
+
through_scalar: (segment) => `path traverses a non-container value at segment ${segment}`,
|
|
113
|
+
};
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,10 @@
|
|
|
1
1
|
export { type IterOptions } from './args.ts';
|
|
2
|
-
export {
|
|
3
|
-
export {
|
|
4
|
-
export {
|
|
2
|
+
export { type StandardSchemaV1 } from './validate.ts';
|
|
3
|
+
export { BoteError, PathError, SourceReadError, ValidationError, ClosedCursorError, MalformedJsonError, ForwardReplayError, type BoteErrorCode, type PathFaultCode, type JsonFaultCode, type SourceFaultCode, } from './error.ts';
|
|
4
|
+
export { formatPath, type Path, type Segment } from './path.ts';
|
|
5
|
+
export { DEFAULT_ITER_BATCH, MAX_ITER_BATCH, type Cursor, type RootCursor, type IterKey } from './cursor.ts';
|
|
6
|
+
export { type Source, type Reader, type ReadResult, type ForwardSource, type FactoryOptions, type SeekableSource, } from './source/base.ts';
|
|
7
|
+
export { fromFile, fromBuffer, fromHttpRange, type HttpRangeOptions } from './source/seekable.ts';
|
|
8
|
+
export { fromReadable, fromHttpRequest, type ReadableOptions, type ReadableProducer, type HttpRequestOptions, } from './source/forward.ts';
|
|
9
|
+
export { type IterStream } from './stream.ts';
|
|
10
|
+
export { open, type OpenOptions, type ForwardOpenOptions } from './open.ts';
|
package/dist/index.js
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
-
export {
|
|
2
|
-
export {
|
|
3
|
-
export {
|
|
1
|
+
export { BoteError, PathError, SourceReadError, ValidationError, ClosedCursorError, MalformedJsonError, ForwardReplayError, } from "./error.js";
|
|
2
|
+
export { formatPath } from "./path.js";
|
|
3
|
+
export { DEFAULT_ITER_BATCH, MAX_ITER_BATCH } from "./cursor.js";
|
|
4
|
+
export { fromFile, fromBuffer, fromHttpRange } from "./source/seekable.js";
|
|
5
|
+
export { fromReadable, fromHttpRequest, } from "./source/forward.js";
|
|
6
|
+
export { open } from "./open.js";
|