@botejs/core 0.5.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +45 -117
- package/dist/args.d.ts +14 -8
- package/dist/args.js +6 -3
- package/dist/cursor.d.ts +44 -6
- package/dist/cursor.js +41 -41
- package/dist/error.d.ts +47 -0
- package/dist/error.js +113 -0
- package/dist/index.d.ts +7 -3
- package/dist/index.js +4 -2
- package/dist/open.d.ts +31 -24
- package/dist/open.js +27 -20
- package/dist/path.d.ts +3 -1
- package/dist/path.js +28 -4
- package/dist/source/base.d.ts +70 -0
- package/dist/source/base.js +1 -0
- package/dist/source/forward.d.ts +49 -0
- package/dist/source/forward.js +219 -0
- package/dist/source/seekable.d.ts +8 -0
- package/dist/{sources.js → source/seekable.js} +24 -10
- package/dist/validate.d.ts +2 -16
- package/dist/validate.js +5 -51
- package/package.json +3 -2
- package/dist/decode.d.ts +0 -3
- package/dist/decode.js +0 -20
- package/dist/sources.d.ts +0 -39
package/README.md
CHANGED
|
@@ -1,140 +1,68 @@
|
|
|
1
1
|
# bote
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
A fast, modern and low-memory approach to processing a big JSON:
|
|
4
4
|
|
|
5
5
|
```sh
|
|
6
6
|
npm install @botejs/core
|
|
7
7
|
```
|
|
8
8
|
|
|
9
9
|
```ts
|
|
10
|
-
import {
|
|
11
|
-
import {
|
|
12
|
-
|
|
13
|
-
//
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
| items[535399] | bote | 193.49 ms | 191.5 KB | 36.7 MB |
|
|
32
|
-
| items[1070797] | bote | 379.98 ms | 189.8 KB | 37.2 MB |
|
|
33
|
-
|
|
34
|
-
## array access
|
|
35
|
-
|
|
36
|
-
`iter` streams the children of a container at a path **one item at a time**, so you never hold the whole collection in memory. it works on either kind: array elements or object member values. each `for await` step yields a single item:
|
|
37
|
-
|
|
38
|
-
```ts
|
|
39
|
-
// e.g. [{ id: 'user-1' }, { id: 'user-2' }, ...]
|
|
40
|
-
await using cursor = await open(fromFile('./users.json'))
|
|
41
|
-
|
|
42
|
-
// root is an array
|
|
43
|
-
for await (const user of cursor.iter()) {
|
|
44
|
-
console.log(user)
|
|
45
|
-
}
|
|
46
|
-
```
|
|
47
|
-
|
|
48
|
-
the item loop is the ergonomic default; it costs a flat ~10% over a full walk. for hot paths, `.raw()` hands back the raw fetch arrays with no per-item tax (the `batch` option sets their size and the memory bound):
|
|
49
|
-
|
|
50
|
-
```ts
|
|
51
|
-
for await (const users of cursor.iter().raw()) {
|
|
52
|
-
for (const user of users) {
|
|
53
|
-
console.log(user)
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
```
|
|
57
|
-
|
|
58
|
-
## object access
|
|
59
|
-
|
|
60
|
-
`iter` over an object yields its **member values** in document order. add `withKey: true` to get **`[key, value]`** pairs instead, where `key` is the member name (for an array, `key` is the element's index). streamed either way, so a million-member object never lands on the heap at once:
|
|
61
|
-
|
|
62
|
-
```ts
|
|
63
|
-
// e.g. { alice: { role: 'admin' }, bob: { role: 'guest' }, ... }
|
|
64
|
-
await using cursor = await open(fromFile('./accounts.json'))
|
|
65
|
-
|
|
66
|
-
for await (const [name, account] of cursor.iter({ withKey: true })) {
|
|
67
|
-
// name is the member name ('alice', 'bob', ...); account is its value
|
|
68
|
-
console.log(`${name}: ${account.role}`)
|
|
69
|
-
}
|
|
10
|
+
import { fileURLToPath } from 'node:url';
|
|
11
|
+
import { open, fromFile } from '@botejs/core';
|
|
12
|
+
|
|
13
|
+
// 181 MB GeoJSON:
|
|
14
|
+
// { type: "...", features: [{ properties: { STREET: "..." }}] }
|
|
15
|
+
const filePath = fileURLToPath(new URL('../citylots.json', import.meta.url));
|
|
16
|
+
|
|
17
|
+
await using cursor = await open(fromFile(filePath));
|
|
18
|
+
|
|
19
|
+
const byStreet = await cursor
|
|
20
|
+
.iter('features', {
|
|
21
|
+
select: ['properties', 'STREET'],
|
|
22
|
+
})
|
|
23
|
+
.reduce((tally, street) => {
|
|
24
|
+
if (typeof street === 'string') {
|
|
25
|
+
tally.set(street, (tally.get(street) ?? 0) + 1);
|
|
26
|
+
}
|
|
27
|
+
return tally;
|
|
28
|
+
}, new Map());
|
|
29
|
+
|
|
30
|
+
console.log([...byStreet].sort((a, b) => b[1] - a[1]).slice(0, 10));
|
|
70
31
|
```
|
|
71
32
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
## hopping
|
|
75
|
-
|
|
76
|
-
`hop` resolves a path once and hands back a **cursor** anchored at that value (or `null` if the path isn't there):
|
|
77
|
-
|
|
78
|
-
```ts
|
|
79
|
-
// e.g. { report: { sections: [{ rows: [...] }, ...] } }
|
|
80
|
-
await using cursor = await open(fromFile('./report.json'))
|
|
81
|
-
|
|
82
|
-
const section = await cursor.hop('report', 'sections', 0)
|
|
83
|
-
if (section) {
|
|
84
|
-
console.log(await section.count('rows'))
|
|
85
|
-
for await (const row of section.iter('rows')) {
|
|
86
|
-
console.log(row)
|
|
87
|
-
}
|
|
88
|
-
}
|
|
89
|
-
```
|
|
33
|
+
Given a **seekable** or **forward** source and a path, it retrieves values out of a JSON, without loading the whole thing in-memory.
|
|
90
34
|
|
|
91
|
-
|
|
35
|
+
Here's a run (Apple M1 Pro 2021, default settings, RUNS=100):
|
|
92
36
|
|
|
93
|
-
|
|
37
|
+
| method | mean time (seconds) | mean peak footprint (MB) |
|
|
38
|
+
| ------------------ | ----------------- | ------------------------ |
|
|
39
|
+
| bote | 0.517 ± 0.018 s | 40.3 ± 2.5 |
|
|
40
|
+
| JSON.parse | 0.816 ± 0.031 s | 648.9 ± 2.4 |
|
|
41
|
+
| JSONStream | 4.452 ± 0.052 s | 57.9 ± 3.9 |
|
|
42
|
+
| @streamparser/json | 5.103 ± 0.084 s | 47.9 ± 2.3 |
|
|
43
|
+
| oboe.js | 8.566 ± 0.295 s | 100.0 ± 4.6 |
|
|
44
|
+
| stream-json | 13.346 ± 0.569 s | 207.6 ± 8.4 |
|
|
94
45
|
|
|
95
|
-
|
|
96
|
-
import { open, fromFile } from '@botejs/core'
|
|
97
|
-
import * as z from 'zod' // or any Standard Schema validator
|
|
98
|
-
|
|
99
|
-
// a downstream API that wants a typed list of recipients
|
|
100
|
-
declare function sendNewsletter(recipients: string[]): Promise<void>
|
|
101
|
-
|
|
102
|
-
const User = z.object({
|
|
103
|
-
id: z.string(),
|
|
104
|
-
name: z.string(),
|
|
105
|
-
email: z.string(),
|
|
106
|
-
})
|
|
107
|
-
|
|
108
|
-
const cursor = await open(fromFile('./users.json'))
|
|
109
|
-
|
|
110
|
-
// name: string
|
|
111
|
-
const name = await cursor.get('users', 1000, 'name', User.shape.name)
|
|
112
|
-
|
|
113
|
-
let emails: string[] = []
|
|
114
|
-
// .raw() to hand each fetch's worth of recipients to the batched API at once
|
|
115
|
-
for await (const user of cursor.iter('users', User)) {
|
|
116
|
-
// user: User
|
|
117
|
-
emails.push(user.email)
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
await sendNewsletter(emails)
|
|
121
|
-
await cursor.close()
|
|
122
|
-
```
|
|
46
|
+
For comparison notes, go [here](https://github.com/jankdc/bote-comparison).
|
|
123
47
|
|
|
124
|
-
##
|
|
48
|
+
## Features
|
|
125
49
|
|
|
126
|
-
|
|
50
|
+
* Modern `AsyncIterator` API with helpers that emulate the [tc39 ones](https://github.com/tc39/proposal-async-iterator-helpers)
|
|
51
|
+
* Validate with [Standard Schema](https://standardschema.dev/), avoiding those pesky `unknown`s
|
|
52
|
+
* Supports multiple sources of data (e.g. file, network, stream) or write a custom one (see [example](./examples/))
|
|
53
|
+
* For forward-only sources, there's support for replaying/buffering, allowing navigation to previous values
|
|
127
54
|
|
|
128
|
-
|
|
55
|
+
## Documentation
|
|
129
56
|
|
|
130
|
-
|
|
57
|
+
Coming soon. Check the [./examples](./examples/) folder for usages. I've also heavily JSDoc'ed the hell out of the API so have fun
|
|
58
|
+
playing around with it for now.
|
|
131
59
|
|
|
132
|
-
|
|
60
|
+
## Status
|
|
133
61
|
|
|
134
|
-
|
|
62
|
+
Pre-1.0. Still in development and APIs may change based on feedback, bugs and holy divinations from the coding gods.
|
|
135
63
|
|
|
136
|
-
|
|
64
|
+
I would say 90% satisfactory for MVP, but I'm getting there.
|
|
137
65
|
|
|
138
|
-
##
|
|
66
|
+
## License
|
|
139
67
|
|
|
140
68
|
MIT.
|
package/dist/args.d.ts
CHANGED
|
@@ -1,20 +1,26 @@
|
|
|
1
|
-
import type
|
|
1
|
+
import { type Path, type Segment } from './path.ts';
|
|
2
|
+
import type { StandardSchemaV1 } from './validate.ts';
|
|
3
|
+
/** Trailing options object for `Cursor.iter`, tuning how the iteration yields items. */
|
|
2
4
|
export interface IterOptions {
|
|
5
|
+
/** Project each member before it is yielded. A single segment or path picks a
|
|
6
|
+
* sub-value; a field map (`{ name: 'name', city: ['address', 'city'] }`)
|
|
7
|
+
* builds an object from several sub-paths. */
|
|
3
8
|
select?: Segment | Path | Record<string, Segment | Path>;
|
|
4
9
|
/** How many items cross the native boundary per fetch, which also bounds the
|
|
5
|
-
* resident
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
+
* resident memory to that batch and sets the array size yielded by `IterStream.raw()`.
|
|
11
|
+
* The default item loop drains each fetch one item at a time, so this doesn't change
|
|
12
|
+
* what item iteration yields, only how much is fetched and held at once.
|
|
13
|
+
* Higher is faster but holds more in memory.
|
|
14
|
+
*
|
|
15
|
+
* Default is `1000`. */
|
|
10
16
|
batch?: number;
|
|
11
17
|
/** Validate each yielded item against this schema (after `select`). */
|
|
12
18
|
schema?: StandardSchemaV1;
|
|
13
|
-
/** Policy for items failing `schema`. Default `'throw'`; `'skip'` drops them. */
|
|
14
|
-
onInvalid?: 'throw' | 'skip';
|
|
15
19
|
/** Yield `[key, value]` tuples instead of bare values. `key` is the member
|
|
16
20
|
* name for objects and the zero-based index for arrays. */
|
|
17
21
|
withKey?: boolean;
|
|
22
|
+
/** Policy for items failing `schema`. Default `'throw'`; `'skip'` drops them. */
|
|
23
|
+
onInvalid?: 'throw' | 'skip';
|
|
18
24
|
}
|
|
19
25
|
export type VariadicPathArgs<TTail> = [...Segment[]] | [...Segment[], TTail];
|
|
20
26
|
export declare function splitArgs<TTail>(args: VariadicPathArgs<TTail>): {
|
package/dist/args.js
CHANGED
|
@@ -24,10 +24,12 @@ export function isSchema(value) {
|
|
|
24
24
|
return typeof value === 'object' && value !== null && '~standard' in value;
|
|
25
25
|
}
|
|
26
26
|
export function normalizeIterTail(tail) {
|
|
27
|
-
if (!tail)
|
|
27
|
+
if (!tail) {
|
|
28
28
|
return {};
|
|
29
|
-
|
|
29
|
+
}
|
|
30
|
+
if (isSchema(tail)) {
|
|
30
31
|
return { schema: tail };
|
|
32
|
+
}
|
|
31
33
|
return tail;
|
|
32
34
|
}
|
|
33
35
|
export function serializeSelect(select) {
|
|
@@ -63,7 +65,8 @@ export function serializeSelect(select) {
|
|
|
63
65
|
return JSON.stringify({ map: entries });
|
|
64
66
|
}
|
|
65
67
|
function describeSelect(value) {
|
|
66
|
-
if (value === null)
|
|
68
|
+
if (value === null) {
|
|
67
69
|
return 'null';
|
|
70
|
+
}
|
|
68
71
|
return Array.isArray(value) ? 'array' : typeof value;
|
|
69
72
|
}
|
package/dist/cursor.d.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type { Cursor as NativeCursor } from '@botejs/native';
|
|
2
|
+
import { type Path, type Segment } from './path.ts';
|
|
2
3
|
import { type IterStream } from './stream.ts';
|
|
3
|
-
import { type
|
|
4
|
+
import { type StandardSchemaV1 } from './validate.ts';
|
|
4
5
|
import { type IterOptions } from './args.ts';
|
|
5
6
|
type InferOutput<Sch> = Sch extends StandardSchemaV1<unknown, infer O> ? O : never;
|
|
6
7
|
type SelectMapShape<S> = {
|
|
@@ -10,12 +11,52 @@ export type IterKey = string | number;
|
|
|
10
11
|
export declare const DEFAULT_ITER_BATCH = 1000;
|
|
11
12
|
export declare const MAX_ITER_BATCH = 1000000;
|
|
12
13
|
export interface Cursor {
|
|
14
|
+
/**
|
|
15
|
+
* Resolve `path` to a container and return a new cursor anchored there, or
|
|
16
|
+
* `null` if it is absent. Child cursors share the root's source and lifetime;
|
|
17
|
+
* closing the root closes them too.
|
|
18
|
+
*
|
|
19
|
+
* @example
|
|
20
|
+
* const user = await root.hop('users', 0);
|
|
21
|
+
* const name = await user?.get('name');
|
|
22
|
+
*/
|
|
13
23
|
hop(...path: Segment[]): Promise<Cursor | null>;
|
|
24
|
+
/**
|
|
25
|
+
* Report whether a value exists at `path`. With a trailing Standard Schema,
|
|
26
|
+
* also require the value to validate against it (a parse/validation miss
|
|
27
|
+
* yields `false` rather than throwing).
|
|
28
|
+
*
|
|
29
|
+
* @example
|
|
30
|
+
* await root.has('users', 0, 'email');
|
|
31
|
+
* await root.has('users', 0, 'age', z.number());
|
|
32
|
+
*/
|
|
14
33
|
has(...path: Segment[]): Promise<boolean>;
|
|
15
34
|
has(...args: [...Segment[], StandardSchemaV1]): Promise<boolean>;
|
|
35
|
+
/**
|
|
36
|
+
* Read and decode the value at `path`, or `undefined` if absent. With a
|
|
37
|
+
* trailing Standard Schema, validate and return its parsed output, throwing
|
|
38
|
+
* on failure.
|
|
39
|
+
*
|
|
40
|
+
* @example
|
|
41
|
+
* const name = await root.get('users', 0, 'name');
|
|
42
|
+
* const age = await root.get('users', 0, 'age', z.number());
|
|
43
|
+
*/
|
|
16
44
|
get(...path: Segment[]): Promise<unknown>;
|
|
17
45
|
get<Sch extends StandardSchemaV1>(...args: [...Segment[], Sch]): Promise<InferOutput<Sch>>;
|
|
18
|
-
|
|
46
|
+
/**
|
|
47
|
+
* Stream the members of the array or object at `path` as an async iterable.
|
|
48
|
+
* A trailing Standard Schema validates each item; a trailing {@link IterOptions}
|
|
49
|
+
* object tunes the iteration (see its fields for the available knobs).
|
|
50
|
+
*
|
|
51
|
+
* @example
|
|
52
|
+
* for await (const user of root.iter('users')) {
|
|
53
|
+
* console.log(user);
|
|
54
|
+
* }
|
|
55
|
+
*
|
|
56
|
+
* for await (const [i, name] of root.iter('users', { withKey: true, select: ['name'] })) {
|
|
57
|
+
* console.log(i, name);
|
|
58
|
+
* }
|
|
59
|
+
*/
|
|
19
60
|
iter(...path: Segment[]): IterStream<unknown>;
|
|
20
61
|
iter<Sch extends StandardSchemaV1>(...args: [...Segment[], Sch]): IterStream<InferOutput<Sch>>;
|
|
21
62
|
iter<Sch extends StandardSchemaV1>(...args: [...Segment[], IterOptions & {
|
|
@@ -44,9 +85,6 @@ export interface RootCursor extends Cursor, AsyncDisposable {
|
|
|
44
85
|
export type CursorState = {
|
|
45
86
|
closed: boolean;
|
|
46
87
|
};
|
|
47
|
-
/** Throw a uniform error for any operation on a closed cursor, so use-after-close
|
|
48
|
-
* is one defined contract regardless of source (some readers' reads keep working
|
|
49
|
-
* after close, others throw an opaque I/O error). */
|
|
50
|
-
export declare function ensureOpen(state: CursorState): void;
|
|
51
88
|
export declare function wrap(native: NativeCursor, state: CursorState): Cursor;
|
|
89
|
+
export declare function ensureOpen(state: CursorState): void;
|
|
52
90
|
export {};
|
package/dist/cursor.js
CHANGED
|
@@ -1,29 +1,16 @@
|
|
|
1
|
+
import { deserializeNativeError, ClosedCursorError, MalformedJsonError } from "./error.js";
|
|
1
2
|
import { validatePath } from "./path.js";
|
|
2
|
-
import { parseValue, deserializeError } from "./decode.js";
|
|
3
3
|
import { makeStream } from "./stream.js";
|
|
4
4
|
import { runStandardSchema, validateItem } from "./validate.js";
|
|
5
5
|
import { splitArgs, isSchema, serializeSelect, normalizeIterTail, } from "./args.js";
|
|
6
6
|
export const DEFAULT_ITER_BATCH = 1000;
|
|
7
7
|
export const MAX_ITER_BATCH = 1_000_000;
|
|
8
|
-
/** Throw a uniform error for any operation on a closed cursor, so use-after-close
|
|
9
|
-
* is one defined contract regardless of source (some readers' reads keep working
|
|
10
|
-
* after close, others throw an opaque I/O error). */
|
|
11
|
-
export function ensureOpen(state) {
|
|
12
|
-
if (state.closed)
|
|
13
|
-
throw new Error('bote: cursor is closed');
|
|
14
|
-
}
|
|
15
8
|
export function wrap(native, state) {
|
|
16
9
|
const cursor = {
|
|
17
10
|
async hop(...path) {
|
|
18
11
|
ensureOpen(state);
|
|
19
12
|
validatePath(path);
|
|
20
|
-
|
|
21
|
-
try {
|
|
22
|
-
child = await native.hop(path);
|
|
23
|
-
}
|
|
24
|
-
catch (err) {
|
|
25
|
-
throw deserializeError(err, path);
|
|
26
|
-
}
|
|
13
|
+
const child = await withPath(path, () => native.hop(path));
|
|
27
14
|
return child ? wrap(child, state) : null;
|
|
28
15
|
},
|
|
29
16
|
async has(...args) {
|
|
@@ -32,11 +19,13 @@ export function wrap(native, state) {
|
|
|
32
19
|
if (schema !== undefined && !isSchema(schema)) {
|
|
33
20
|
throw new TypeError('has: expected a Standard Schema as the trailing argument');
|
|
34
21
|
}
|
|
35
|
-
if (!schema)
|
|
36
|
-
return native.has(path);
|
|
37
|
-
|
|
22
|
+
if (!schema) {
|
|
23
|
+
return withPath(path, () => native.has(path));
|
|
24
|
+
}
|
|
25
|
+
if (!(await withPath(path, () => native.has(path)))) {
|
|
38
26
|
return false;
|
|
39
|
-
|
|
27
|
+
}
|
|
28
|
+
const text = await withPath(path, () => native.get(path));
|
|
40
29
|
const value = text === undefined ? undefined : parseValue(text, path);
|
|
41
30
|
const result = await validateItem(schema, value, path, 'skip');
|
|
42
31
|
return !('skip' in result);
|
|
@@ -47,27 +36,12 @@ export function wrap(native, state) {
|
|
|
47
36
|
if (schema !== undefined && !isSchema(schema)) {
|
|
48
37
|
throw new TypeError('get: expected a Standard Schema as the trailing argument');
|
|
49
38
|
}
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
value = text === undefined ? undefined : parseValue(text, path);
|
|
54
|
-
}
|
|
55
|
-
catch (err) {
|
|
56
|
-
throw deserializeError(err, path);
|
|
57
|
-
}
|
|
58
|
-
if (!schema)
|
|
39
|
+
const text = await withPath(path, () => native.get(path));
|
|
40
|
+
const value = text === undefined ? undefined : parseValue(text, path);
|
|
41
|
+
if (!schema) {
|
|
59
42
|
return value;
|
|
60
|
-
return runStandardSchema(schema, value, path);
|
|
61
|
-
},
|
|
62
|
-
async count(...path) {
|
|
63
|
-
ensureOpen(state);
|
|
64
|
-
validatePath(path);
|
|
65
|
-
try {
|
|
66
|
-
return await native.count(path);
|
|
67
|
-
}
|
|
68
|
-
catch (err) {
|
|
69
|
-
throw deserializeError(err, path);
|
|
70
43
|
}
|
|
44
|
+
return runStandardSchema(schema, value, path);
|
|
71
45
|
},
|
|
72
46
|
iter(...args) {
|
|
73
47
|
ensureOpen(state);
|
|
@@ -95,8 +69,9 @@ export function wrap(native, state) {
|
|
|
95
69
|
const out = [];
|
|
96
70
|
for (const [key, value] of parseValue(raw, path)) {
|
|
97
71
|
const result = await validateItem(schema, value, [...path, key], policy);
|
|
98
|
-
if ('skip' in result)
|
|
72
|
+
if ('skip' in result) {
|
|
99
73
|
continue;
|
|
74
|
+
}
|
|
100
75
|
out.push(wantKey ? [key, result.value] : result.value);
|
|
101
76
|
}
|
|
102
77
|
return out;
|
|
@@ -105,15 +80,40 @@ export function wrap(native, state) {
|
|
|
105
80
|
};
|
|
106
81
|
return cursor;
|
|
107
82
|
}
|
|
83
|
+
export function ensureOpen(state) {
|
|
84
|
+
if (state.closed) {
|
|
85
|
+
throw new ClosedCursorError();
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
/** Run a native call, retyping any addon error as the matching {@link BoteError}
|
|
89
|
+
* anchored to `path`. The single funnel every cursor operation passes through,
|
|
90
|
+
* so native faults surface uniformly. */
|
|
91
|
+
async function withPath(path, op) {
|
|
92
|
+
try {
|
|
93
|
+
return await op();
|
|
94
|
+
}
|
|
95
|
+
catch (err) {
|
|
96
|
+
throw deserializeNativeError(err, path);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
108
99
|
function nativeStream(inner, path, batchSize, mapBatch) {
|
|
109
100
|
async function* batches() {
|
|
110
101
|
try {
|
|
111
|
-
for await (const raw of inner)
|
|
102
|
+
for await (const raw of inner) {
|
|
112
103
|
yield await mapBatch(raw);
|
|
104
|
+
}
|
|
113
105
|
}
|
|
114
106
|
catch (err) {
|
|
115
|
-
throw
|
|
107
|
+
throw deserializeNativeError(err, path);
|
|
116
108
|
}
|
|
117
109
|
}
|
|
118
110
|
return makeStream(batches, batchSize);
|
|
119
111
|
}
|
|
112
|
+
function parseValue(text, path) {
|
|
113
|
+
try {
|
|
114
|
+
return JSON.parse(text);
|
|
115
|
+
}
|
|
116
|
+
catch (cause) {
|
|
117
|
+
throw new MalformedJsonError(path, 'malformed_json', { cause });
|
|
118
|
+
}
|
|
119
|
+
}
|
package/dist/error.d.ts
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import type { StandardSchemaV1 } from '@standard-schema/spec';
|
|
2
|
+
import type { PathFaultCode, JsonFaultCode, SourceFaultCode } from '@botejs/native';
|
|
3
|
+
import { type Path } from './path.ts';
|
|
4
|
+
export type { PathFaultCode, JsonFaultCode, SourceFaultCode };
|
|
5
|
+
export type BoteErrorCode = PathFaultCode | JsonFaultCode | SourceFaultCode | 'validation' | 'closed' | 'forward_replay';
|
|
6
|
+
/** Base class for every error bote raises from its own logic. Catch this to
|
|
7
|
+
* catch anything bote throws; branch on {@link BoteError.code} for the precise
|
|
8
|
+
* kind. Always carries a `bote:`-prefixed message. */
|
|
9
|
+
export declare abstract class BoteError extends Error {
|
|
10
|
+
readonly code: BoteErrorCode;
|
|
11
|
+
constructor(code: BoteErrorCode, message: string, options?: ErrorOptions);
|
|
12
|
+
}
|
|
13
|
+
export declare class PathError extends BoteError {
|
|
14
|
+
readonly code: PathFaultCode;
|
|
15
|
+
readonly path: Path;
|
|
16
|
+
constructor(path: Path, code: PathFaultCode, segment?: number);
|
|
17
|
+
}
|
|
18
|
+
export declare class ValidationError extends BoteError {
|
|
19
|
+
readonly code: 'validation';
|
|
20
|
+
readonly issues: readonly StandardSchemaV1.Issue[];
|
|
21
|
+
readonly path: Path;
|
|
22
|
+
constructor(issues: readonly StandardSchemaV1.Issue[], path: Path);
|
|
23
|
+
}
|
|
24
|
+
export declare class MalformedJsonError extends BoteError {
|
|
25
|
+
readonly code: JsonFaultCode;
|
|
26
|
+
readonly path: Path;
|
|
27
|
+
constructor(path: Path, code: JsonFaultCode, options?: ErrorOptions);
|
|
28
|
+
}
|
|
29
|
+
export declare class SourceReadError extends BoteError {
|
|
30
|
+
readonly code: SourceFaultCode;
|
|
31
|
+
readonly path: Path;
|
|
32
|
+
constructor(path: Path, detail: string, options?: ErrorOptions);
|
|
33
|
+
}
|
|
34
|
+
export declare class ForwardReplayError extends BoteError {
|
|
35
|
+
readonly code: 'forward_replay';
|
|
36
|
+
readonly offset: number;
|
|
37
|
+
readonly position: number;
|
|
38
|
+
constructor(offset: number, position: number, options?: ErrorOptions);
|
|
39
|
+
}
|
|
40
|
+
export declare class ClosedCursorError extends BoteError {
|
|
41
|
+
readonly code: 'closed';
|
|
42
|
+
constructor();
|
|
43
|
+
}
|
|
44
|
+
/** Rebuild a typed {@link BoteError} from a native addon error, anchoring it to
|
|
45
|
+
* the `path` of the call it surfaced through. Pass-through for anything that
|
|
46
|
+
* isn't a recognized native error (including errors already typed here). */
|
|
47
|
+
export declare function deserializeNativeError(err: unknown, path: Path): unknown;
|
package/dist/error.js
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import { formatPath } from "./path.js";
|
|
2
|
+
/** Base class for every error bote raises from its own logic. Catch this to
|
|
3
|
+
* catch anything bote throws; branch on {@link BoteError.code} for the precise
|
|
4
|
+
* kind. Always carries a `bote:`-prefixed message. */
|
|
5
|
+
export class BoteError extends Error {
|
|
6
|
+
code;
|
|
7
|
+
constructor(code, message, options) {
|
|
8
|
+
super(message, options);
|
|
9
|
+
this.code = code;
|
|
10
|
+
this.name = 'BoteError';
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
export class PathError extends BoteError {
|
|
14
|
+
path;
|
|
15
|
+
constructor(path, code, segment) {
|
|
16
|
+
const reason = (PATH_FAULT_MESSAGE[code] ?? (() => code))(segment);
|
|
17
|
+
super(code, `bote: cannot resolve ${formatPath(path)}: ${reason}`);
|
|
18
|
+
this.name = 'PathError';
|
|
19
|
+
this.path = path;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
export class ValidationError extends BoteError {
|
|
23
|
+
issues;
|
|
24
|
+
path;
|
|
25
|
+
constructor(issues, path) {
|
|
26
|
+
super('validation', `bote: schema validation failed at ${formatPath(path)}: ${issues[0]?.message ?? 'unknown'}`);
|
|
27
|
+
this.name = 'ValidationError';
|
|
28
|
+
this.issues = issues;
|
|
29
|
+
this.path = path;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
export class MalformedJsonError extends BoteError {
|
|
33
|
+
path;
|
|
34
|
+
constructor(path, code, options) {
|
|
35
|
+
const what = code === 'unexpected_eof' ? 'unexpected end of JSON input' : 'malformed JSON';
|
|
36
|
+
super(code, `bote: ${what} at ${formatPath(path)}`, options);
|
|
37
|
+
this.name = 'MalformedJsonError';
|
|
38
|
+
this.path = path;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
export class SourceReadError extends BoteError {
|
|
42
|
+
path;
|
|
43
|
+
constructor(path, detail, options) {
|
|
44
|
+
super('source_io', `bote: source read failed at ${formatPath(path)}: ${detail}`, options);
|
|
45
|
+
this.name = 'SourceReadError';
|
|
46
|
+
this.path = path;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
export class ForwardReplayError extends BoteError {
|
|
50
|
+
offset;
|
|
51
|
+
position;
|
|
52
|
+
constructor(offset, position, options) {
|
|
53
|
+
super('forward_replay', `bote: forward source cannot rewind to offset ${offset} from ${position}: the stream has already advanced. ` +
|
|
54
|
+
"Pass { rewind: 'replay' } if the producer is idempotent, { rewind: 'buffer' } to snapshot it in memory, " +
|
|
55
|
+
'or use a seekable source (fromFile/fromBuffer/fromHttpRange) for repeated or out-of-order access.', options);
|
|
56
|
+
this.name = 'ForwardReplayError';
|
|
57
|
+
this.offset = offset;
|
|
58
|
+
this.position = position;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
export class ClosedCursorError extends BoteError {
|
|
62
|
+
constructor() {
|
|
63
|
+
super('closed', 'bote: cursor is closed');
|
|
64
|
+
this.name = 'ClosedCursorError';
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
/** `bote:<code>[:<detail>]` lines the native addon emits in place of a human
|
|
68
|
+
* message, so the typed error and its message live on this side only. `<code>`
|
|
69
|
+
* is a Rust-owned native fault code; `<detail>` is a path fault's offending
|
|
70
|
+
* segment or a source fault's reason. The code groups below are typed against
|
|
71
|
+
* the Rust enums, so renaming a code in Rust breaks compilation here. */
|
|
72
|
+
const NATIVE_ERROR = /^bote:([a-z_]+)(?::([\s\S]*))?$/;
|
|
73
|
+
const PATH_CODES = ['through_scalar', 'scalar_target', 'wrong_kind'];
|
|
74
|
+
const JSON_CODES = ['malformed_json', 'unexpected_eof'];
|
|
75
|
+
const SOURCE_CODE = 'source_io';
|
|
76
|
+
const FORWARD_REWIND = /forward source cannot rewind to offset (\d+) from (\d+)/;
|
|
77
|
+
/** Rebuild a typed {@link BoteError} from a native addon error, anchoring it to
|
|
78
|
+
* the `path` of the call it surfaced through. Pass-through for anything that
|
|
79
|
+
* isn't a recognized native error (including errors already typed here). */
|
|
80
|
+
export function deserializeNativeError(err, path) {
|
|
81
|
+
if (!(err instanceof Error) || err instanceof BoteError) {
|
|
82
|
+
return err;
|
|
83
|
+
}
|
|
84
|
+
const match = NATIVE_ERROR.exec(err.message);
|
|
85
|
+
if (!match) {
|
|
86
|
+
return err;
|
|
87
|
+
}
|
|
88
|
+
const code = match[1];
|
|
89
|
+
const detail = match[2];
|
|
90
|
+
if (PATH_CODES.includes(code)) {
|
|
91
|
+
const segment = detail === undefined ? undefined : Number(detail);
|
|
92
|
+
return new PathError(path, code, segment);
|
|
93
|
+
}
|
|
94
|
+
if (JSON_CODES.includes(code)) {
|
|
95
|
+
return new MalformedJsonError(path, code, { cause: err });
|
|
96
|
+
}
|
|
97
|
+
if (code === SOURCE_CODE) {
|
|
98
|
+
// A forward reader rejects its read() with a ForwardReplayError; the native
|
|
99
|
+
// layer can only relay it as a generic source_io fault, so rebuild the typed
|
|
100
|
+
// error from the message it wrapped (offset/position survive in the detail).
|
|
101
|
+
const rewind = FORWARD_REWIND.exec(detail ?? '');
|
|
102
|
+
if (rewind) {
|
|
103
|
+
return new ForwardReplayError(Number(rewind[1]), Number(rewind[2]), { cause: err });
|
|
104
|
+
}
|
|
105
|
+
return new SourceReadError(path, detail ?? '', { cause: err });
|
|
106
|
+
}
|
|
107
|
+
return err;
|
|
108
|
+
}
|
|
109
|
+
const PATH_FAULT_MESSAGE = {
|
|
110
|
+
wrong_kind: (segment) => `path segment ${segment} does not match the container kind`,
|
|
111
|
+
scalar_target: () => 'target value is not a container',
|
|
112
|
+
through_scalar: (segment) => `path traverses a non-container value at segment ${segment}`,
|
|
113
|
+
};
|
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
export { type IterOptions } from './args.ts';
|
|
2
|
-
export {
|
|
2
|
+
export { type StandardSchemaV1 } from './validate.ts';
|
|
3
|
+
export { BoteError, PathError, SourceReadError, ValidationError, ClosedCursorError, MalformedJsonError, ForwardReplayError, type BoteErrorCode, type PathFaultCode, type JsonFaultCode, type SourceFaultCode, } from './error.ts';
|
|
4
|
+
export { formatPath, type Path, type Segment } from './path.ts';
|
|
3
5
|
export { DEFAULT_ITER_BATCH, MAX_ITER_BATCH, type Cursor, type RootCursor, type IterKey } from './cursor.ts';
|
|
4
|
-
export {
|
|
6
|
+
export { type Source, type Reader, type ReadResult, type ForwardSource, type FactoryOptions, type SeekableSource, } from './source/base.ts';
|
|
7
|
+
export { fromFile, fromBuffer, fromHttpRange, type HttpRangeOptions } from './source/seekable.ts';
|
|
8
|
+
export { fromReadable, fromHttpRequest, type ReadableOptions, type ReadableProducer, type HttpRequestOptions, } from './source/forward.ts';
|
|
5
9
|
export { type IterStream } from './stream.ts';
|
|
6
|
-
export { open, type OpenOptions } from './open.ts';
|
|
10
|
+
export { open, type OpenOptions, type ForwardOpenOptions } from './open.ts';
|
package/dist/index.js
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
|
-
export {
|
|
1
|
+
export { BoteError, PathError, SourceReadError, ValidationError, ClosedCursorError, MalformedJsonError, ForwardReplayError, } from "./error.js";
|
|
2
|
+
export { formatPath } from "./path.js";
|
|
2
3
|
export { DEFAULT_ITER_BATCH, MAX_ITER_BATCH } from "./cursor.js";
|
|
3
|
-
export {
|
|
4
|
+
export { fromFile, fromBuffer, fromHttpRange } from "./source/seekable.js";
|
|
5
|
+
export { fromReadable, fromHttpRequest, } from "./source/forward.js";
|
|
4
6
|
export { open } from "./open.js";
|