@botejs/core 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +90 -63
- package/dist/args.js +20 -15
- package/dist/index.d.ts +2 -2
- package/dist/index.js +3 -19
- package/dist/open.d.ts +5 -2
- package/dist/open.js +164 -52
- package/dist/path.js +4 -8
- package/dist/sources.js +5 -10
- package/dist/validate.d.ts +8 -1
- package/dist/validate.js +26 -11
- package/package.json +11 -4
package/README.md
CHANGED
|
@@ -8,94 +8,121 @@ npm install @botejs/core
|
|
|
8
8
|
|
|
9
9
|
```ts
|
|
10
10
|
import { open, fromFile } from '@botejs/core'
|
|
11
|
+
import { publish } from './message-bus'
|
|
11
12
|
|
|
12
|
-
|
|
13
|
+
// e.g. { items: [...] }
|
|
14
|
+
await using cursor = await open(fromFile('./some-large.json'))
|
|
13
15
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
16
|
+
// items[0]
|
|
17
|
+
const first = await cursor.get('items', 0)
|
|
18
|
+
console.log(`first item: ${first}`)
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
given a **seekable** source (e.g. a file, an HTTP range) and a path, it retrieves values out of a JSON quickly, without loading the whole thing in-memory.
|
|
22
|
+
|
|
23
|
+
here's a run (Apple M1 Pro 2021, ~500MB JSON array file, cold-cache, default settings):
|
|
24
|
+
|
|
25
|
+
| operation | approach | time | js heap peak Δ | rust heap peak |
|
|
26
|
+
| -------------- | ---------- | --------: | -------------: | -------------: |
|
|
27
|
+
| items[0] | JSON.parse | 616.02 ms | 1.03 GB | n/a |
|
|
28
|
+
| items[535399] | JSON.parse | 604.63 ms | 1.03 GB | n/a |
|
|
29
|
+
| items[1070797] | JSON.parse | 600.68 ms | 1.03 GB | n/a |
|
|
30
|
+
| items[0] | bote | 527.80 µs | 291.6 KB | 130.4 KB |
|
|
31
|
+
| items[535399] | bote | 187.24 ms | 742.3 KB | 36.7 MB |
|
|
32
|
+
| items[1070797] | bote | 371.61 ms | 828.7 KB | 37.1 MB |
|
|
22
33
|
|
|
23
|
-
|
|
34
|
+
## array access
|
|
24
35
|
|
|
25
|
-
|
|
36
|
+
`iter` streams the elements of an array at a path, **a batch at a time**, so you never hold the whole collection in memory and not wait for the heat death of the universe if this yielded individually. each `for await` step yields an array of items (use `walk` to step over the members of an object):
|
|
26
37
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
const desc1: string = await cursor.get('users', 1000, 'name', User.shape.name)
|
|
38
|
+
```ts
|
|
39
|
+
// e.g. [{ id: 'user-1' }, { id: 'user-2' }, ...]
|
|
40
|
+
await using cursor = await open(fromFile('./users.json'))
|
|
31
41
|
|
|
32
|
-
//
|
|
33
|
-
for await (const
|
|
34
|
-
|
|
35
|
-
for (const user of batch) {
|
|
42
|
+
// root is an array
|
|
43
|
+
for await (const users of cursor.iter()) {
|
|
44
|
+
for (const user of users) {
|
|
36
45
|
console.log(user)
|
|
37
46
|
}
|
|
38
47
|
}
|
|
48
|
+
```
|
|
39
49
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
//
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
}
|
|
50
|
+
pass an options object as the last argument to tune what comes back: `batch`, `select`, `schema`, `onInvalid`, and `withIndex`. if you want to know more of the options, see [`arrays.js`](./examples/arrays.js).
|
|
51
|
+
|
|
52
|
+
## object access
|
|
53
|
+
|
|
54
|
+
`walk` steps over the members of an object at a path, yielding a **`[key, cursor]`** pair per member. the key is the member name, the cursor is anchored at its value. each child cursor is first-class: it outlives the loop and can be `walk`ed again, which is what lets you descend a tree of unknown depth.
|
|
55
|
+
|
|
56
|
+
```ts
|
|
57
|
+
// e.g. { alice: { role: 'admin' }, bob: { role: 'guest' }, ... }
|
|
58
|
+
await using cursor = await open(fromFile('./accounts.json'))
|
|
59
|
+
|
|
60
|
+
for await (const [name, account] of cursor.walk()) {
|
|
61
|
+
// name is the member name ('alice', 'bob', ...)
|
|
62
|
+
const role = await account.get('role')
|
|
63
|
+
console.log(`${name}: ${role}`)
|
|
55
64
|
}
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
see [`recursive.js`](./examples/recursive.js) for advanced use-cases.
|
|
56
68
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
69
|
+
## hopping
|
|
70
|
+
|
|
71
|
+
`hop` resolves a path once and hands back a **cursor** anchored at that value (or `null` if the path isn't there):
|
|
72
|
+
|
|
73
|
+
```ts
|
|
74
|
+
// e.g. { report: { sections: [{ rows: [...] }, ...] } }
|
|
75
|
+
await using cursor = await open(fromFile('./report.json'))
|
|
76
|
+
|
|
77
|
+
const section = await cursor.hop('report', 'sections', 0)
|
|
78
|
+
if (section) {
|
|
79
|
+
console.log(await section.count('rows'))
|
|
80
|
+
for await (const rows of section.iter('rows')) {
|
|
81
|
+
console.log(rows)
|
|
65
82
|
}
|
|
66
83
|
}
|
|
84
|
+
```
|
|
67
85
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
86
|
+
## validation
|
|
87
|
+
|
|
88
|
+
`get`, and `iter` takes a [Standard Schema](https://standardschema.dev) validator as their last argument (for `iter`, can also be passed in an `options` object). the value is validated and the return type is inferred from the schema, so reads come back typed instead of `unknown`:
|
|
89
|
+
|
|
90
|
+
```ts
|
|
91
|
+
import { open, fromFile } from '@botejs/core'
|
|
92
|
+
import * as z from 'zod' // or any Standard Schema validator
|
|
93
|
+
|
|
94
|
+
// a downstream API that wants a typed list of recipients
|
|
95
|
+
declare function sendNewsletter(recipients: string[]): Promise<void>
|
|
96
|
+
|
|
97
|
+
const User = z.object({
|
|
98
|
+
id: z.string(),
|
|
99
|
+
name: z.string(),
|
|
100
|
+
email: z.string(),
|
|
101
|
+
})
|
|
102
|
+
|
|
103
|
+
const cursor = await open(fromFile('./users.json'))
|
|
104
|
+
|
|
105
|
+
// name: string
|
|
106
|
+
const name = await cursor.get('users', 1000, 'name', User.shape.name)
|
|
107
|
+
|
|
108
|
+
for await (const users of cursor.iter('users', User)) {
|
|
109
|
+
// user: User[]
|
|
110
|
+
const emails = users.map((user) => user.email)
|
|
111
|
+
await sendNewsletter(emails)
|
|
75
112
|
}
|
|
76
113
|
|
|
77
|
-
// 'await using' would normally clean up resources for you
|
|
78
|
-
// when it goes out of lexical scope. if you hate that,
|
|
79
|
-
// you can do it explicitly as well.
|
|
80
114
|
await cursor.close()
|
|
81
115
|
```
|
|
82
116
|
|
|
83
|
-
|
|
117
|
+
## memory
|
|
84
118
|
|
|
85
|
-
|
|
119
|
+
bote keeps a small **structural-index** cache: as scans walk containers (arrays and object), it remembers where members live, so a later query that lands in an already walked container resumes near the target instead of from the top. it caches structure, never source bytes, so it can't grow unbounded with document size.
|
|
86
120
|
|
|
87
|
-
|
|
88
|
-
| -------------- | ---------- | --------: | -------------: | -------------: |
|
|
89
|
-
| items[0] | JSON.parse | 616.02 ms | 1.03 GB | n/a |
|
|
90
|
-
| items[535399] | JSON.parse | 604.63 ms | 1.03 GB | n/a |
|
|
91
|
-
| items[1070797] | JSON.parse | 600.68 ms | 1.03 GB | n/a |
|
|
92
|
-
| items[0] | bote | 527.80 µs | 291.6 KB | 130.4 KB |
|
|
93
|
-
| items[535399] | bote | 187.24 ms | 742.3 KB | 36.7 MB |
|
|
94
|
-
| items[1070797] | bote | 371.61 ms | 828.7 KB | 37.1 MB |
|
|
121
|
+
the defaults are good, but `open` takes a few knobs: `indexCacheEntries`, `objectMemberCap`, and `arrayIndexInterval`. to bound memory tighter or turn the cache off. see [`memory.js`](./examples/memory.js) for what each does.
|
|
95
122
|
|
|
96
123
|
## sources
|
|
97
124
|
|
|
98
|
-
bote
|
|
125
|
+
bote ships `fromFile`, `fromHttpRange`, and `fromBuffer` as pre-built sources. create your own by implementing the `Source` interface. see [`sources-custom.ts`](./examples/sources-custom.ts) or [./packages/core/src/sources.ts](./packages/core/src/sources.ts) for how it works.
|
|
99
126
|
|
|
100
127
|
## status
|
|
101
128
|
|
package/dist/args.js
CHANGED
|
@@ -1,11 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
exports.splitArgs = splitArgs;
|
|
4
|
-
exports.isSchema = isSchema;
|
|
5
|
-
exports.normalizeIterTail = normalizeIterTail;
|
|
6
|
-
exports.serializeSelect = serializeSelect;
|
|
7
|
-
const path_ts_1 = require("./path.js");
|
|
8
|
-
function splitArgs(args) {
|
|
1
|
+
import { validatePath } from "./path.js";
|
|
2
|
+
export function splitArgs(args) {
|
|
9
3
|
let pathArgs;
|
|
10
4
|
let tail;
|
|
11
5
|
if (args.length === 0) {
|
|
@@ -23,35 +17,41 @@ function splitArgs(args) {
|
|
|
23
17
|
tail = undefined;
|
|
24
18
|
}
|
|
25
19
|
}
|
|
26
|
-
|
|
20
|
+
validatePath(pathArgs);
|
|
27
21
|
return { path: pathArgs, tail };
|
|
28
22
|
}
|
|
29
|
-
function isSchema(value) {
|
|
23
|
+
export function isSchema(value) {
|
|
30
24
|
return typeof value === 'object' && value !== null && '~standard' in value;
|
|
31
25
|
}
|
|
32
|
-
function normalizeIterTail(tail) {
|
|
26
|
+
export function normalizeIterTail(tail) {
|
|
33
27
|
if (!tail)
|
|
34
28
|
return {};
|
|
35
29
|
if (isSchema(tail))
|
|
36
30
|
return { schema: tail };
|
|
37
31
|
return tail;
|
|
38
32
|
}
|
|
39
|
-
function serializeSelect(select) {
|
|
33
|
+
export function serializeSelect(select) {
|
|
40
34
|
if (typeof select === 'string' || typeof select === 'number') {
|
|
41
35
|
const one = [select];
|
|
42
|
-
|
|
36
|
+
validatePath(one);
|
|
43
37
|
return JSON.stringify({ one });
|
|
44
38
|
}
|
|
45
39
|
if (Array.isArray(select)) {
|
|
46
|
-
|
|
40
|
+
validatePath(select);
|
|
47
41
|
if (select.length === 0) {
|
|
48
42
|
throw new RangeError('iter: select sub-path must have at least one segment');
|
|
49
43
|
}
|
|
50
44
|
return JSON.stringify({ one: select });
|
|
51
45
|
}
|
|
46
|
+
if (select === null || typeof select !== 'object') {
|
|
47
|
+
throw new TypeError(`iter: select must be a segment, path, or field map, got ${describeSelect(select)}`);
|
|
48
|
+
}
|
|
52
49
|
const entries = Object.entries(select).map(([k, sub]) => {
|
|
53
50
|
const path = typeof sub === 'string' || typeof sub === 'number' ? [sub] : sub;
|
|
54
|
-
(
|
|
51
|
+
if (!Array.isArray(path)) {
|
|
52
|
+
throw new TypeError(`iter: select field ${JSON.stringify(k)} must be a segment or path, got ${describeSelect(sub)}`);
|
|
53
|
+
}
|
|
54
|
+
validatePath(path);
|
|
55
55
|
if (path.length === 0) {
|
|
56
56
|
throw new RangeError(`iter: select field ${JSON.stringify(k)} sub-path must have at least one segment`);
|
|
57
57
|
}
|
|
@@ -62,3 +62,8 @@ function serializeSelect(select) {
|
|
|
62
62
|
}
|
|
63
63
|
return JSON.stringify({ map: entries });
|
|
64
64
|
}
|
|
65
|
+
function describeSelect(value) {
|
|
66
|
+
if (value === null)
|
|
67
|
+
return 'null';
|
|
68
|
+
return Array.isArray(value) ? 'array' : typeof value;
|
|
69
|
+
}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
1
|
export { type IterOptions } from './args.ts';
|
|
2
|
-
export { ValidationError, formatPath, type Path, type Segment, type StandardSchemaV1 } from './validate.ts';
|
|
3
|
-
export { open, DEFAULT_ITER_BATCH, type Cursor, type RootCursor, type OpenOptions, type IterIndex as IterKey, } from './open.ts';
|
|
2
|
+
export { ValidationError, PathError, formatPath, type Path, type PathFaultCode, type Segment, type StandardSchemaV1, } from './validate.ts';
|
|
3
|
+
export { open, DEFAULT_ITER_BATCH, MAX_ITER_BATCH, type Cursor, type RootCursor, type OpenOptions, type WalkEntry, type IterIndex as IterKey, } from './open.ts';
|
|
4
4
|
export { fromBuffer, fromFile, fromHttpRange, type FactoryOptions, type Source, type SourceReader, type HttpRangeOptions, } from './sources.ts';
|
package/dist/index.js
CHANGED
|
@@ -1,19 +1,3 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
// Node 18 and Node 20.3 predate `Symbol.asyncDispose`; mirror what TS emits for
|
|
5
|
-
// `await using` so the well-known symbol is available across our engine range.
|
|
6
|
-
if (!Symbol.asyncDispose) {
|
|
7
|
-
;
|
|
8
|
-
Symbol.asyncDispose = Symbol.for('Symbol.asyncDispose');
|
|
9
|
-
}
|
|
10
|
-
var validate_ts_1 = require("./validate.js");
|
|
11
|
-
Object.defineProperty(exports, "ValidationError", { enumerable: true, get: function () { return validate_ts_1.ValidationError; } });
|
|
12
|
-
Object.defineProperty(exports, "formatPath", { enumerable: true, get: function () { return validate_ts_1.formatPath; } });
|
|
13
|
-
var open_ts_1 = require("./open.js");
|
|
14
|
-
Object.defineProperty(exports, "open", { enumerable: true, get: function () { return open_ts_1.open; } });
|
|
15
|
-
Object.defineProperty(exports, "DEFAULT_ITER_BATCH", { enumerable: true, get: function () { return open_ts_1.DEFAULT_ITER_BATCH; } });
|
|
16
|
-
var sources_ts_1 = require("./sources.js");
|
|
17
|
-
Object.defineProperty(exports, "fromBuffer", { enumerable: true, get: function () { return sources_ts_1.fromBuffer; } });
|
|
18
|
-
Object.defineProperty(exports, "fromFile", { enumerable: true, get: function () { return sources_ts_1.fromFile; } });
|
|
19
|
-
Object.defineProperty(exports, "fromHttpRange", { enumerable: true, get: function () { return sources_ts_1.fromHttpRange; } });
|
|
1
|
+
export { ValidationError, PathError, formatPath, } from "./validate.js";
|
|
2
|
+
export { open, DEFAULT_ITER_BATCH, MAX_ITER_BATCH, } from "./open.js";
|
|
3
|
+
export { fromBuffer, fromFile, fromHttpRange, } from "./sources.js";
|
package/dist/open.d.ts
CHANGED
|
@@ -7,8 +7,11 @@ type SelectMapShape<S> = {
|
|
|
7
7
|
};
|
|
8
8
|
/** Zero-based index of an array element. */
|
|
9
9
|
export type IterIndex = number;
|
|
10
|
+
/** One `walk` step: the member's key paired with a cursor anchored at its value. */
|
|
11
|
+
export type WalkEntry = [key: string, cursor: Cursor];
|
|
10
12
|
export declare const DEFAULT_SOURCE_CHUNK_BYTES: number;
|
|
11
13
|
export declare const DEFAULT_ITER_BATCH = 1000;
|
|
14
|
+
export declare const MAX_ITER_BATCH = 1000000;
|
|
12
15
|
export interface OpenOptions {
|
|
13
16
|
/**
|
|
14
17
|
* Slot budget for the structural-index cache: one slot per cached container
|
|
@@ -40,8 +43,7 @@ export interface OpenOptions {
|
|
|
40
43
|
arrayIndexInterval?: number;
|
|
41
44
|
}
|
|
42
45
|
export interface Cursor {
|
|
43
|
-
|
|
44
|
-
readonly key: string | number | null;
|
|
46
|
+
hop(...path: Segment[]): Promise<Cursor | null>;
|
|
45
47
|
has(...path: Segment[]): Promise<boolean>;
|
|
46
48
|
has(...args: [...Segment[], StandardSchemaV1]): Promise<boolean>;
|
|
47
49
|
get(...path: Segment[]): Promise<unknown>;
|
|
@@ -67,6 +69,7 @@ export interface Cursor {
|
|
|
67
69
|
withIndex: true;
|
|
68
70
|
}]): AsyncIterable<[IterIndex, unknown][]>;
|
|
69
71
|
iter(...args: [...Segment[], IterOptions]): AsyncIterable<unknown[]>;
|
|
72
|
+
walk(...path: Segment[]): AsyncIterable<WalkEntry>;
|
|
70
73
|
walk(...path: Segment[]): AsyncIterable<Cursor>;
|
|
71
74
|
}
|
|
72
75
|
export interface RootCursor extends Cursor, AsyncDisposable {
|
package/dist/open.js
CHANGED
|
@@ -1,35 +1,41 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
const
|
|
6
|
-
const
|
|
7
|
-
const
|
|
8
|
-
const args_ts_1 = require("./args.js");
|
|
9
|
-
exports.DEFAULT_SOURCE_CHUNK_BYTES = 64 * 1024;
|
|
10
|
-
exports.DEFAULT_ITER_BATCH = 1000;
|
|
1
|
+
import { open as openNative } from '@botejs/native';
|
|
2
|
+
import { validatePath } from "./path.js";
|
|
3
|
+
import { runStandardSchema, validateItem, formatPath, PathError, } from "./validate.js";
|
|
4
|
+
import { splitArgs, isSchema, serializeSelect, normalizeIterTail, } from "./args.js";
|
|
5
|
+
export const DEFAULT_SOURCE_CHUNK_BYTES = 64 * 1024;
|
|
6
|
+
export const DEFAULT_ITER_BATCH = 1000;
|
|
7
|
+
export const MAX_ITER_BATCH = 1_000_000;
|
|
11
8
|
/**
|
|
12
9
|
* Open a cursor over a seekable source.
|
|
13
10
|
*
|
|
14
11
|
* The returned `RootCursor` owns the reader: `close()` (or `await using`)
|
|
15
12
|
* drives the reader's own `close()` exactly once.
|
|
16
13
|
*/
|
|
17
|
-
async function open(source, options) {
|
|
14
|
+
export async function open(source, options) {
|
|
18
15
|
const { indexCacheEntries, objectMemberCap, arrayIndexInterval } = options ?? {};
|
|
19
16
|
for (const [name, value] of [
|
|
20
17
|
['indexCacheEntries', indexCacheEntries],
|
|
21
18
|
['objectMemberCap', objectMemberCap],
|
|
22
19
|
['arrayIndexInterval', arrayIndexInterval],
|
|
23
20
|
]) {
|
|
24
|
-
if (value !== undefined && (!Number.
|
|
21
|
+
if (value !== undefined && (!Number.isSafeInteger(value) || value < 0)) {
|
|
25
22
|
throw new RangeError(`open: ${name} must be a non-negative integer (0 disables), got ${value}`);
|
|
26
23
|
}
|
|
27
24
|
}
|
|
28
25
|
const reader = await source.open();
|
|
29
|
-
const chunkBytes = reader.chunkBytes ??
|
|
26
|
+
const chunkBytes = reader.chunkBytes ?? DEFAULT_SOURCE_CHUNK_BYTES;
|
|
30
27
|
let native;
|
|
31
28
|
try {
|
|
32
|
-
|
|
29
|
+
if (!Number.isInteger(reader.size) || reader.size < 0) {
|
|
30
|
+
throw new RangeError(`open: source size must be a non-negative integer, got ${reader.size}`);
|
|
31
|
+
}
|
|
32
|
+
if (!Number.isSafeInteger(chunkBytes) || chunkBytes <= 0) {
|
|
33
|
+
throw new RangeError(`open: chunkBytes must be a positive integer, got ${chunkBytes}`);
|
|
34
|
+
}
|
|
35
|
+
if (chunkBytes % 64 !== 0) {
|
|
36
|
+
throw new RangeError(`open: chunkBytes must be a multiple of 64, got ${chunkBytes}`);
|
|
37
|
+
}
|
|
38
|
+
native = openNative({
|
|
33
39
|
size: reader.size,
|
|
34
40
|
chunkBytes,
|
|
35
41
|
indexCacheEntries,
|
|
@@ -39,17 +45,24 @@ async function open(source, options) {
|
|
|
39
45
|
});
|
|
40
46
|
}
|
|
41
47
|
catch (err) {
|
|
42
|
-
|
|
48
|
+
// Don't let a failing cleanup mask the original open error; attach it as cause.
|
|
49
|
+
try {
|
|
50
|
+
await closeReader(reader);
|
|
51
|
+
}
|
|
52
|
+
catch (closeErr) {
|
|
53
|
+
if (err instanceof Error)
|
|
54
|
+
err.cause ??= closeErr;
|
|
55
|
+
}
|
|
43
56
|
throw err;
|
|
44
57
|
}
|
|
45
|
-
|
|
58
|
+
const state = { closed: false };
|
|
46
59
|
const close = async () => {
|
|
47
|
-
if (closed)
|
|
60
|
+
if (state.closed)
|
|
48
61
|
return;
|
|
49
|
-
closed = true;
|
|
62
|
+
state.closed = true;
|
|
50
63
|
await closeReader(reader);
|
|
51
64
|
};
|
|
52
|
-
return Object.assign(wrap(native), {
|
|
65
|
+
return Object.assign(wrap(native, state), {
|
|
53
66
|
close,
|
|
54
67
|
[Symbol.asyncDispose]: close,
|
|
55
68
|
});
|
|
@@ -58,66 +71,157 @@ async function closeReader(reader) {
|
|
|
58
71
|
if (reader.close)
|
|
59
72
|
await reader.close();
|
|
60
73
|
}
|
|
61
|
-
|
|
74
|
+
const NATIVE_PATH_ERROR = /^bote:path:([a-z_]+)(?::(\d+))?$/;
|
|
75
|
+
function deserializeError(err, path) {
|
|
76
|
+
if (err instanceof Error && !(err instanceof PathError)) {
|
|
77
|
+
const match = NATIVE_PATH_ERROR.exec(err.message);
|
|
78
|
+
if (match) {
|
|
79
|
+
const segment = match[2] === undefined ? undefined : Number(match[2]);
|
|
80
|
+
return new PathError(path, match[1], segment);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
return err;
|
|
84
|
+
}
|
|
85
|
+
/** Throw a uniform error for any operation on a closed cursor, so use-after-close
|
|
86
|
+
* is one defined contract regardless of source (some readers' reads keep working
|
|
87
|
+
* after close, others throw an opaque I/O error). */
|
|
88
|
+
function ensureOpen(state) {
|
|
89
|
+
if (state.closed)
|
|
90
|
+
throw new Error('bote: cursor is closed');
|
|
91
|
+
}
|
|
92
|
+
function wrap(native, state) {
|
|
62
93
|
const cursor = {
|
|
63
|
-
|
|
64
|
-
|
|
94
|
+
async hop(...path) {
|
|
95
|
+
ensureOpen(state);
|
|
96
|
+
validatePath(path);
|
|
97
|
+
let child;
|
|
98
|
+
try {
|
|
99
|
+
child = await native.hop(path);
|
|
100
|
+
}
|
|
101
|
+
catch (err) {
|
|
102
|
+
throw deserializeError(err, path);
|
|
103
|
+
}
|
|
104
|
+
return child ? wrap(child, state) : null;
|
|
65
105
|
},
|
|
66
106
|
async has(...args) {
|
|
67
|
-
|
|
107
|
+
ensureOpen(state);
|
|
108
|
+
const { path, tail: schema } = splitArgs(args);
|
|
109
|
+
if (schema !== undefined && !isSchema(schema)) {
|
|
110
|
+
throw new TypeError('has: expected a Standard Schema as the trailing argument');
|
|
111
|
+
}
|
|
68
112
|
if (!schema)
|
|
69
113
|
return native.has(path);
|
|
70
114
|
if (!(await native.has(path)))
|
|
71
115
|
return false;
|
|
72
|
-
const
|
|
116
|
+
const text = await native.get(path);
|
|
117
|
+
const value = text === undefined ? undefined : parseValue(text, path);
|
|
118
|
+
const result = await validateItem(schema, value, path, 'skip');
|
|
73
119
|
return !('skip' in result);
|
|
74
120
|
},
|
|
75
121
|
async get(...args) {
|
|
76
|
-
|
|
77
|
-
const
|
|
78
|
-
if (
|
|
122
|
+
ensureOpen(state);
|
|
123
|
+
const { path, tail: schema } = splitArgs(args);
|
|
124
|
+
if (schema !== undefined && !isSchema(schema)) {
|
|
125
|
+
throw new TypeError('get: expected a Standard Schema as the trailing argument');
|
|
126
|
+
}
|
|
127
|
+
let value;
|
|
128
|
+
try {
|
|
129
|
+
const text = await native.get(path);
|
|
130
|
+
value = text === undefined ? undefined : parseValue(text, path);
|
|
131
|
+
}
|
|
132
|
+
catch (err) {
|
|
133
|
+
throw deserializeError(err, path);
|
|
134
|
+
}
|
|
135
|
+
if (!schema)
|
|
79
136
|
return value;
|
|
80
|
-
return
|
|
137
|
+
return runStandardSchema(schema, value, path);
|
|
81
138
|
},
|
|
82
|
-
count(...path) {
|
|
83
|
-
(
|
|
84
|
-
|
|
139
|
+
async count(...path) {
|
|
140
|
+
ensureOpen(state);
|
|
141
|
+
validatePath(path);
|
|
142
|
+
try {
|
|
143
|
+
return await native.count(path);
|
|
144
|
+
}
|
|
145
|
+
catch (err) {
|
|
146
|
+
throw deserializeError(err, path);
|
|
147
|
+
}
|
|
85
148
|
},
|
|
86
149
|
iter(...args) {
|
|
87
|
-
|
|
88
|
-
const {
|
|
89
|
-
|
|
90
|
-
|
|
150
|
+
ensureOpen(state);
|
|
151
|
+
const { path, tail } = splitArgs(args);
|
|
152
|
+
const { schema, select, batch, onInvalid, withIndex } = normalizeIterTail(tail);
|
|
153
|
+
if (batch !== undefined && (!Number.isInteger(batch) || batch <= 0 || batch > MAX_ITER_BATCH)) {
|
|
154
|
+
throw new RangeError(`iter: batch must be an integer in 1..=${MAX_ITER_BATCH}, got ${batch}`);
|
|
155
|
+
}
|
|
156
|
+
if (withIndex !== undefined && typeof withIndex !== 'boolean') {
|
|
157
|
+
throw new TypeError(`iter: withIndex must be a boolean, got ${typeof withIndex}`);
|
|
158
|
+
}
|
|
159
|
+
if (onInvalid !== undefined && onInvalid !== 'throw' && onInvalid !== 'skip') {
|
|
160
|
+
throw new RangeError(`iter: onInvalid must be "throw" or "skip", got ${JSON.stringify(onInvalid)}`);
|
|
161
|
+
}
|
|
162
|
+
const resolvedBatch = batch ?? DEFAULT_ITER_BATCH;
|
|
163
|
+
const selectIr = select !== undefined ? serializeSelect(select) : undefined;
|
|
164
|
+
const inner = native.iter(path, { selectIr, batch: resolvedBatch });
|
|
165
|
+
if (!schema) {
|
|
166
|
+
return {
|
|
167
|
+
async *[Symbol.asyncIterator]() {
|
|
168
|
+
let i = 0;
|
|
169
|
+
try {
|
|
170
|
+
for await (const b of inner) {
|
|
171
|
+
const batch = parseValue(b, path);
|
|
172
|
+
if (!withIndex) {
|
|
173
|
+
yield batch;
|
|
174
|
+
continue;
|
|
175
|
+
}
|
|
176
|
+
const out = new Array(batch.length);
|
|
177
|
+
for (let j = 0; j < batch.length; j++) {
|
|
178
|
+
out[j] = [i++, batch[j]];
|
|
179
|
+
}
|
|
180
|
+
yield out;
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
catch (err) {
|
|
184
|
+
throw deserializeError(err, path);
|
|
185
|
+
}
|
|
186
|
+
},
|
|
187
|
+
};
|
|
91
188
|
}
|
|
92
|
-
const resolvedBatch = batch ?? exports.DEFAULT_ITER_BATCH;
|
|
93
|
-
const selectIr = select !== undefined ? (0, args_ts_1.serializeSelect)(select) : undefined;
|
|
94
|
-
const inner = native.iter(path, { selectIr, batch: resolvedBatch, withKey: withIndex });
|
|
95
|
-
if (!schema)
|
|
96
|
-
return inner;
|
|
97
189
|
const policy = onInvalid ?? 'throw';
|
|
98
190
|
return {
|
|
99
191
|
async *[Symbol.asyncIterator]() {
|
|
100
192
|
let i = 0;
|
|
101
|
-
|
|
102
|
-
const
|
|
103
|
-
|
|
104
|
-
const
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
193
|
+
try {
|
|
194
|
+
for await (const b of inner) {
|
|
195
|
+
const out = [];
|
|
196
|
+
for (const v of parseValue(b, path)) {
|
|
197
|
+
const index = i++;
|
|
198
|
+
const result = await validateItem(schema, v, [...path, index], policy);
|
|
199
|
+
if ('skip' in result) {
|
|
200
|
+
continue;
|
|
201
|
+
}
|
|
202
|
+
out.push(withIndex ? [index, result.value] : result.value);
|
|
203
|
+
}
|
|
204
|
+
yield out;
|
|
109
205
|
}
|
|
110
|
-
|
|
206
|
+
}
|
|
207
|
+
catch (err) {
|
|
208
|
+
throw deserializeError(err, path);
|
|
111
209
|
}
|
|
112
210
|
},
|
|
113
211
|
};
|
|
114
212
|
},
|
|
115
213
|
walk(...path) {
|
|
116
|
-
(
|
|
214
|
+
ensureOpen(state);
|
|
215
|
+
validatePath(path);
|
|
117
216
|
return {
|
|
118
217
|
async *[Symbol.asyncIterator]() {
|
|
119
|
-
|
|
120
|
-
|
|
218
|
+
try {
|
|
219
|
+
for await (const [key, child] of native.walk(path)) {
|
|
220
|
+
yield [key, wrap(child, state)];
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
catch (err) {
|
|
224
|
+
throw deserializeError(err, path);
|
|
121
225
|
}
|
|
122
226
|
},
|
|
123
227
|
};
|
|
@@ -125,3 +229,11 @@ function wrap(native) {
|
|
|
125
229
|
};
|
|
126
230
|
return cursor;
|
|
127
231
|
}
|
|
232
|
+
function parseValue(text, path) {
|
|
233
|
+
try {
|
|
234
|
+
return JSON.parse(text);
|
|
235
|
+
}
|
|
236
|
+
catch {
|
|
237
|
+
throw new Error(`bote: malformed JSON value at ${formatPath(path)}`);
|
|
238
|
+
}
|
|
239
|
+
}
|
package/dist/path.js
CHANGED
|
@@ -1,18 +1,14 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.MAX_ARRAY_INDEX = void 0;
|
|
4
|
-
exports.validatePath = validatePath;
|
|
5
1
|
/** Upper bound on numeric segments (napi takes them as `u32`). 2^32 - 1
|
|
6
2
|
* comfortably covers any in-memory JSON array. */
|
|
7
|
-
|
|
8
|
-
function validatePath(path) {
|
|
3
|
+
export const MAX_ARRAY_INDEX = 0xffffffff;
|
|
4
|
+
export function validatePath(path) {
|
|
9
5
|
for (let i = 0; i < path.length; i++) {
|
|
10
6
|
const s = path[i];
|
|
11
7
|
if (typeof s === 'string')
|
|
12
8
|
continue;
|
|
13
|
-
if (typeof s === 'number' && Number.isInteger(s) && s >= 0 && s <=
|
|
9
|
+
if (typeof s === 'number' && Number.isInteger(s) && s >= 0 && s <= MAX_ARRAY_INDEX)
|
|
14
10
|
continue;
|
|
15
|
-
throw new TypeError(`path segment ${i}: expected string or non-negative integer (<= ${
|
|
11
|
+
throw new TypeError(`path segment ${i}: expected string or non-negative integer (<= ${MAX_ARRAY_INDEX}), got ${describeBadSegment(s)}`);
|
|
16
12
|
}
|
|
17
13
|
}
|
|
18
14
|
function describeBadSegment(s) {
|
package/dist/sources.js
CHANGED
|
@@ -1,16 +1,11 @@
|
|
|
1
|
-
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.fromBuffer = fromBuffer;
|
|
4
|
-
exports.fromFile = fromFile;
|
|
5
|
-
exports.fromHttpRange = fromHttpRange;
|
|
6
|
-
const promises_1 = require("node:fs/promises");
|
|
1
|
+
import { open as fsOpen } from 'node:fs/promises';
|
|
7
2
|
/** Default chunk size, in bytes, for in-memory sources. */
|
|
8
3
|
const DEFAULT_BUFFER_CHUNK_BYTES = 4 * 1024;
|
|
9
4
|
/** Default chunk size, in bytes, for local files: matches typical filesystem readahead. */
|
|
10
5
|
const DEFAULT_FILE_CHUNK_BYTES = 64 * 1024;
|
|
11
6
|
/** Default chunk size, in bytes, for HTTP range reads: amortizes RTT across more data. */
|
|
12
7
|
const DEFAULT_URL_CHUNK_BYTES = 256 * 1024;
|
|
13
|
-
function fromBuffer(buf, options) {
|
|
8
|
+
export function fromBuffer(buf, options) {
|
|
14
9
|
const view = buf instanceof Uint8Array ? buf : new Uint8Array(buf);
|
|
15
10
|
const chunkBytes = options?.chunkBytes ?? DEFAULT_BUFFER_CHUNK_BYTES;
|
|
16
11
|
return {
|
|
@@ -21,11 +16,11 @@ function fromBuffer(buf, options) {
|
|
|
21
16
|
}),
|
|
22
17
|
};
|
|
23
18
|
}
|
|
24
|
-
function fromFile(path, options) {
|
|
19
|
+
export function fromFile(path, options) {
|
|
25
20
|
const chunkBytes = options?.chunkBytes ?? DEFAULT_FILE_CHUNK_BYTES;
|
|
26
21
|
return {
|
|
27
22
|
open: async () => {
|
|
28
|
-
const handle = await (
|
|
23
|
+
const handle = await fsOpen(path, 'r');
|
|
29
24
|
const stat = await handle.stat();
|
|
30
25
|
let closed = false;
|
|
31
26
|
return {
|
|
@@ -52,7 +47,7 @@ function fromFile(path, options) {
|
|
|
52
47
|
},
|
|
53
48
|
};
|
|
54
49
|
}
|
|
55
|
-
function fromHttpRange(url, options) {
|
|
50
|
+
export function fromHttpRange(url, options) {
|
|
56
51
|
const init = options?.init;
|
|
57
52
|
const chunkBytes = options?.chunkBytes ?? DEFAULT_URL_CHUNK_BYTES;
|
|
58
53
|
return {
|
package/dist/validate.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import type { StandardSchemaV1 } from '@standard-schema/spec';
|
|
2
|
-
|
|
2
|
+
import type { PathFaultCode } from '@botejs/native';
|
|
3
|
+
export type { StandardSchemaV1, PathFaultCode };
|
|
3
4
|
export type Segment = string | number;
|
|
4
5
|
export type Path = readonly Segment[];
|
|
5
6
|
export declare class ValidationError extends Error {
|
|
@@ -7,6 +8,12 @@ export declare class ValidationError extends Error {
|
|
|
7
8
|
readonly path: Path;
|
|
8
9
|
constructor(issues: readonly StandardSchemaV1.Issue[], path: Path);
|
|
9
10
|
}
|
|
11
|
+
export declare class PathError extends Error {
|
|
12
|
+
readonly path: Path;
|
|
13
|
+
/** The fault kind; stable across versions, safe to branch on. */
|
|
14
|
+
readonly code: PathFaultCode;
|
|
15
|
+
constructor(path: Path, code: PathFaultCode, segment?: number);
|
|
16
|
+
}
|
|
10
17
|
export declare function runStandardSchema<O>(schema: StandardSchemaV1<unknown, O>, value: unknown, path: Path): Promise<O>;
|
|
11
18
|
export declare function validateItem<O>(schema: StandardSchemaV1<unknown, O>, value: unknown, path: Path, onInvalid: 'throw' | 'skip'): Promise<{
|
|
12
19
|
skip: true;
|
package/dist/validate.js
CHANGED
|
@@ -1,10 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.ValidationError = void 0;
|
|
4
|
-
exports.runStandardSchema = runStandardSchema;
|
|
5
|
-
exports.validateItem = validateItem;
|
|
6
|
-
exports.formatPath = formatPath;
|
|
7
|
-
class ValidationError extends Error {
|
|
1
|
+
export class ValidationError extends Error {
|
|
8
2
|
issues;
|
|
9
3
|
path;
|
|
10
4
|
constructor(issues, path) {
|
|
@@ -14,14 +8,35 @@ class ValidationError extends Error {
|
|
|
14
8
|
this.path = path;
|
|
15
9
|
}
|
|
16
10
|
}
|
|
17
|
-
|
|
18
|
-
|
|
11
|
+
/** Human message per fault kind. The native layer ships only the code (and the
|
|
12
|
+
* offending `segment` where it matters), so this is the single source of the
|
|
13
|
+
* user-facing prose. Keyed by the Rust-generated [`PathFaultCode`]. */
|
|
14
|
+
const PATH_FAULT_MESSAGE = {
|
|
15
|
+
through_scalar: (segment) => `path traverses a non-container value at segment ${segment}`,
|
|
16
|
+
wrong_kind: (segment) => `path segment ${segment} does not match the container kind`,
|
|
17
|
+
scalar_target: () => 'target value is not a container',
|
|
18
|
+
iter_on_object: () => 'iter target is an object; use walk() to iterate object members',
|
|
19
|
+
walk_on_array: () => 'walk target is an array; use iter() to iterate array elements',
|
|
20
|
+
};
|
|
21
|
+
export class PathError extends Error {
|
|
22
|
+
path;
|
|
23
|
+
/** The fault kind; stable across versions, safe to branch on. */
|
|
24
|
+
code;
|
|
25
|
+
constructor(path, code, segment) {
|
|
26
|
+
const reason = (PATH_FAULT_MESSAGE[code] ?? (() => code))(segment);
|
|
27
|
+
super(`bote: cannot resolve ${formatPath(path)}: ${reason}`);
|
|
28
|
+
this.name = 'PathError';
|
|
29
|
+
this.path = path;
|
|
30
|
+
this.code = code;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
export async function runStandardSchema(schema, value, path) {
|
|
19
34
|
const result = await schema['~standard'].validate(value);
|
|
20
35
|
if (result.issues)
|
|
21
36
|
throw new ValidationError(result.issues, path);
|
|
22
37
|
return result.value;
|
|
23
38
|
}
|
|
24
|
-
async function validateItem(schema, value, path, onInvalid) {
|
|
39
|
+
export async function validateItem(schema, value, path, onInvalid) {
|
|
25
40
|
const result = await schema['~standard'].validate(value);
|
|
26
41
|
if (result.issues) {
|
|
27
42
|
if (onInvalid === 'skip')
|
|
@@ -30,7 +45,7 @@ async function validateItem(schema, value, path, onInvalid) {
|
|
|
30
45
|
}
|
|
31
46
|
return { value: result.value };
|
|
32
47
|
}
|
|
33
|
-
function formatPath(path) {
|
|
48
|
+
export function formatPath(path) {
|
|
34
49
|
if (path.length === 0)
|
|
35
50
|
return '(root)';
|
|
36
51
|
let out = '';
|
package/package.json
CHANGED
|
@@ -1,12 +1,19 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@botejs/core",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.4.0",
|
|
4
|
+
"type": "module",
|
|
4
5
|
"license": "MIT",
|
|
5
6
|
"repository": {
|
|
6
7
|
"type": "git",
|
|
7
8
|
"url": "git+https://github.com/jankdc/bote.git",
|
|
8
9
|
"directory": "packages/core"
|
|
9
10
|
},
|
|
11
|
+
"exports": {
|
|
12
|
+
".": {
|
|
13
|
+
"types": "./dist/index.d.ts",
|
|
14
|
+
"default": "./dist/index.js"
|
|
15
|
+
}
|
|
16
|
+
},
|
|
10
17
|
"main": "dist/index.js",
|
|
11
18
|
"types": "dist/index.d.ts",
|
|
12
19
|
"files": [
|
|
@@ -14,7 +21,7 @@
|
|
|
14
21
|
"README.md"
|
|
15
22
|
],
|
|
16
23
|
"engines": {
|
|
17
|
-
"node": ">= 18.
|
|
24
|
+
"node": ">= 22.18.0"
|
|
18
25
|
},
|
|
19
26
|
"publishConfig": {
|
|
20
27
|
"registry": "https://registry.npmjs.org/",
|
|
@@ -23,12 +30,12 @@
|
|
|
23
30
|
"scripts": {
|
|
24
31
|
"build": "tsc",
|
|
25
32
|
"build:debug": "tsc --sourceMap",
|
|
26
|
-
"test": "node --test
|
|
33
|
+
"test": "node --test __test__/*.spec.ts",
|
|
27
34
|
"lint": "oxlint src",
|
|
28
35
|
"prepublishOnly": "cp ../../README.md ./README.md && tsc"
|
|
29
36
|
},
|
|
30
37
|
"dependencies": {
|
|
31
|
-
"@botejs/native": "
|
|
38
|
+
"@botejs/native": "^0.4.0"
|
|
32
39
|
},
|
|
33
40
|
"devDependencies": {
|
|
34
41
|
"@types/node": "^22.0.0",
|