@botejs/core 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +58 -21
- package/dist/args.d.ts +21 -0
- package/dist/args.js +64 -0
- package/dist/index.d.ts +3 -3
- package/dist/index.js +5 -3
- package/dist/open.d.ts +63 -29
- package/dist/open.js +60 -24
- package/dist/path.d.ts +5 -0
- package/dist/path.js +24 -0
- package/dist/sources.d.ts +6 -10
- package/dist/sources.js +14 -15
- package/dist/validate.d.ts +11 -3
- package/dist/validate.js +36 -6
- package/package.json +1 -1
- package/dist/pointer.d.ts +0 -5
- package/dist/pointer.js +0 -3
package/README.md
CHANGED
|
@@ -13,28 +13,65 @@ import * as z from 'zod' // or bring your own Standard Schema validator
|
|
|
13
13
|
|
|
14
14
|
const User = z.object({
|
|
15
15
|
id: z.string(),
|
|
16
|
+
name: z.string(),
|
|
16
17
|
email: z.string(),
|
|
18
|
+
details: z.object({
|
|
19
|
+
lastLoggedIn: z.number(),
|
|
20
|
+
}),
|
|
17
21
|
})
|
|
18
22
|
|
|
19
23
|
type User = z.infer<typeof User>
|
|
20
24
|
|
|
21
25
|
await using cursor = await open(fromFile('./your-big.json'))
|
|
22
26
|
|
|
23
|
-
//
|
|
24
|
-
const
|
|
27
|
+
// users[1000].name
|
|
28
|
+
const desc0: unknown = await cursor.get('users', 1000, 'name')
|
|
29
|
+
// for .get and .iter, you can supply a validator as the last argument
|
|
30
|
+
const desc1: string = await cursor.get('users', 1000, 'name', User.shape.name)
|
|
31
|
+
|
|
32
|
+
// iterate an array in batches
|
|
33
|
+
for await (const batch of cursor.iter('users', User)) {
|
|
34
|
+
// batch: User[]
|
|
35
|
+
for (const user of batch) {
|
|
36
|
+
console.log(user)
|
|
37
|
+
}
|
|
38
|
+
}
|
|
25
39
|
|
|
26
|
-
//
|
|
27
|
-
|
|
40
|
+
// pick several fields into a named object to avoid resolving big items
|
|
41
|
+
for await (const batch of cursor.iter('users', {
|
|
42
|
+
select: {
|
|
43
|
+
id: 'id',
|
|
44
|
+
logged: ['details', 'lastLoggedIn'],
|
|
45
|
+
},
|
|
46
|
+
schema: z.object({
|
|
47
|
+
id: User.shape.id,
|
|
48
|
+
logged: User.shape.details.lastLoggedIn,
|
|
49
|
+
}),
|
|
50
|
+
})) {
|
|
51
|
+
// batch: { id: string, logged: number }[]
|
|
52
|
+
for (const userLog of batch) {
|
|
53
|
+
console.log(userLog)
|
|
54
|
+
}
|
|
55
|
+
}
|
|
28
56
|
|
|
29
|
-
//
|
|
30
|
-
for await (const
|
|
31
|
-
|
|
57
|
+
// or pick a single field
|
|
58
|
+
for await (const batch of cursor.iter('users', {
|
|
59
|
+
select: 'name',
|
|
60
|
+
schema: User.shape.name,
|
|
61
|
+
})) {
|
|
62
|
+
// batch: string[]
|
|
63
|
+
for (const name of batch) {
|
|
64
|
+
console.log({ name })
|
|
65
|
+
}
|
|
32
66
|
}
|
|
33
67
|
|
|
34
|
-
//
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
68
|
+
// for open-ended per-child work (e.g. conditional reads, recursive descent, nested
|
|
69
|
+
// iters), `walk` yields a subcursor positioned at each child:
|
|
70
|
+
for await (const metaCursor of cursor.walk('meta')) {
|
|
71
|
+
if (metaCursor.key === 'details') {
|
|
72
|
+
const detailsValue = await metaCursor.get()
|
|
73
|
+
console.log(detailsValue)
|
|
74
|
+
}
|
|
38
75
|
}
|
|
39
76
|
|
|
40
77
|
// 'await using' would normally clean up resources for you
|
|
@@ -43,18 +80,18 @@ for await (const userCursor of cursor.walk('/1234/users')) {
|
|
|
43
80
|
await cursor.close()
|
|
44
81
|
```
|
|
45
82
|
|
|
46
|
-
given a **seekable** source (e.g. a file, an HTTP range) and a
|
|
83
|
+
given a **seekable** source (e.g. a file, an HTTP range) and a path, it can retrieve values in a JSON quickly, without loading the whole thing in-memory.
|
|
47
84
|
|
|
48
|
-
here's a run (Apple M1 Pro 2021, 500MB JSON array file, cold-cache, default settings):
|
|
85
|
+
here's a run (Apple M1 Pro 2021, ~500MB JSON array file, cold-cache, default settings):
|
|
49
86
|
|
|
50
|
-
| operation
|
|
51
|
-
|
|
|
52
|
-
| items[0]
|
|
53
|
-
| items[
|
|
54
|
-
| items[
|
|
55
|
-
| items[0]
|
|
56
|
-
| items[
|
|
57
|
-
| items[
|
|
87
|
+
| operation | approach | time | js heap peak Δ | rust heap peak |
|
|
88
|
+
| -------------- | ---------- | --------: | -------------: | -------------: |
|
|
89
|
+
| items[0] | JSON.parse | 616.02 ms | 1.03 GB | n/a |
|
|
90
|
+
| items[535399] | JSON.parse | 604.63 ms | 1.03 GB | n/a |
|
|
91
|
+
| items[1070797] | JSON.parse | 600.68 ms | 1.03 GB | n/a |
|
|
92
|
+
| items[0] | bote | 527.80 µs | 291.6 KB | 130.4 KB |
|
|
93
|
+
| items[535399] | bote | 187.24 ms | 742.3 KB | 36.7 MB |
|
|
94
|
+
| items[1070797] | bote | 371.61 ms | 828.7 KB | 37.1 MB |
|
|
58
95
|
|
|
59
96
|
## sources
|
|
60
97
|
|
package/dist/args.d.ts
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import type { Path, Segment, StandardSchemaV1 } from './validate.ts';
|
|
2
|
+
export interface IterOptions {
|
|
3
|
+
select?: Segment | Path | Record<string, Segment | Path>;
|
|
4
|
+
/** How many items are yielded per batch. Higher is faster, but takes more memory to materialise those items. */
|
|
5
|
+
batch?: number;
|
|
6
|
+
/** Validate each yielded item against this schema (after `select`). */
|
|
7
|
+
schema?: StandardSchemaV1;
|
|
8
|
+
/** Policy for items failing `schema`. Default `'throw'`; `'skip'` drops them. */
|
|
9
|
+
onInvalid?: 'throw' | 'skip';
|
|
10
|
+
/** Yield `[index, value]` tuples instead of bare values, where `index` is
|
|
11
|
+
* the zero-based position of the element in the source array. */
|
|
12
|
+
withIndex?: boolean;
|
|
13
|
+
}
|
|
14
|
+
export type VariadicPathArgs<TTail> = [...Segment[]] | [...Segment[], TTail];
|
|
15
|
+
export declare function splitArgs<TTail>(args: VariadicPathArgs<TTail>): {
|
|
16
|
+
path: Segment[];
|
|
17
|
+
tail: TTail | undefined;
|
|
18
|
+
};
|
|
19
|
+
export declare function isSchema(value: unknown): value is StandardSchemaV1;
|
|
20
|
+
export declare function normalizeIterTail(tail: StandardSchemaV1 | IterOptions | undefined): IterOptions;
|
|
21
|
+
export declare function serializeSelect(select: Segment | Path | Record<string, Segment | Path>): string;
|
package/dist/args.js
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.splitArgs = splitArgs;
|
|
4
|
+
exports.isSchema = isSchema;
|
|
5
|
+
exports.normalizeIterTail = normalizeIterTail;
|
|
6
|
+
exports.serializeSelect = serializeSelect;
|
|
7
|
+
const path_ts_1 = require("./path.js");
|
|
8
|
+
function splitArgs(args) {
|
|
9
|
+
let pathArgs;
|
|
10
|
+
let tail;
|
|
11
|
+
if (args.length === 0) {
|
|
12
|
+
pathArgs = [];
|
|
13
|
+
tail = undefined;
|
|
14
|
+
}
|
|
15
|
+
else {
|
|
16
|
+
const last = args[args.length - 1];
|
|
17
|
+
if (last !== null && typeof last === 'object' && !Array.isArray(last)) {
|
|
18
|
+
pathArgs = args.slice(0, -1);
|
|
19
|
+
tail = last;
|
|
20
|
+
}
|
|
21
|
+
else {
|
|
22
|
+
pathArgs = args;
|
|
23
|
+
tail = undefined;
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
(0, path_ts_1.validatePath)(pathArgs);
|
|
27
|
+
return { path: pathArgs, tail };
|
|
28
|
+
}
|
|
29
|
+
function isSchema(value) {
|
|
30
|
+
return typeof value === 'object' && value !== null && '~standard' in value;
|
|
31
|
+
}
|
|
32
|
+
function normalizeIterTail(tail) {
|
|
33
|
+
if (!tail)
|
|
34
|
+
return {};
|
|
35
|
+
if (isSchema(tail))
|
|
36
|
+
return { schema: tail };
|
|
37
|
+
return tail;
|
|
38
|
+
}
|
|
39
|
+
function serializeSelect(select) {
|
|
40
|
+
if (typeof select === 'string' || typeof select === 'number') {
|
|
41
|
+
const one = [select];
|
|
42
|
+
(0, path_ts_1.validatePath)(one);
|
|
43
|
+
return JSON.stringify({ one });
|
|
44
|
+
}
|
|
45
|
+
if (Array.isArray(select)) {
|
|
46
|
+
(0, path_ts_1.validatePath)(select);
|
|
47
|
+
if (select.length === 0) {
|
|
48
|
+
throw new RangeError('iter: select sub-path must have at least one segment');
|
|
49
|
+
}
|
|
50
|
+
return JSON.stringify({ one: select });
|
|
51
|
+
}
|
|
52
|
+
const entries = Object.entries(select).map(([k, sub]) => {
|
|
53
|
+
const path = typeof sub === 'string' || typeof sub === 'number' ? [sub] : sub;
|
|
54
|
+
(0, path_ts_1.validatePath)(path);
|
|
55
|
+
if (path.length === 0) {
|
|
56
|
+
throw new RangeError(`iter: select field ${JSON.stringify(k)} sub-path must have at least one segment`);
|
|
57
|
+
}
|
|
58
|
+
return [k, path];
|
|
59
|
+
});
|
|
60
|
+
if (entries.length === 0) {
|
|
61
|
+
throw new RangeError('iter: select must have at least one field');
|
|
62
|
+
}
|
|
63
|
+
return JSON.stringify({ map: entries });
|
|
64
|
+
}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export type
|
|
2
|
-
export {
|
|
1
|
+
export { type IterOptions } from './args.ts';
|
|
2
|
+
export { ValidationError, formatPath, type Path, type Segment, type StandardSchemaV1 } from './validate.ts';
|
|
3
|
+
export { open, DEFAULT_ITER_BATCH, type Cursor, type RootCursor, type OpenOptions, type IterIndex as IterKey, } from './open.ts';
|
|
3
4
|
export { fromBuffer, fromFile, fromHttpRange, type FactoryOptions, type Source, type SourceReader, type HttpRangeOptions, } from './sources.ts';
|
|
4
|
-
export { ValidationError, type StandardSchemaV1 } from './validate.ts';
|
package/dist/index.js
CHANGED
|
@@ -1,17 +1,19 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.
|
|
3
|
+
exports.fromHttpRange = exports.fromFile = exports.fromBuffer = exports.DEFAULT_ITER_BATCH = exports.formatPath = exports.ValidationError = void 0;
|
|
4
4
|
// Node 18 and Node 20.3 predate `Symbol.asyncDispose`; mirror what TS emits for
|
|
5
5
|
// `await using` so the well-known symbol is available across our engine range.
|
|
6
6
|
if (!Symbol.asyncDispose) {
|
|
7
7
|
;
|
|
8
8
|
Symbol.asyncDispose = Symbol.for('Symbol.asyncDispose');
|
|
9
9
|
}
|
|
10
|
+
var validate_ts_1 = require("./validate.js");
|
|
11
|
+
Object.defineProperty(exports, "ValidationError", { enumerable: true, get: function () { return validate_ts_1.ValidationError; } });
|
|
12
|
+
Object.defineProperty(exports, "formatPath", { enumerable: true, get: function () { return validate_ts_1.formatPath; } });
|
|
10
13
|
var open_ts_1 = require("./open.js");
|
|
11
14
|
Object.defineProperty(exports, "open", { enumerable: true, get: function () { return open_ts_1.open; } });
|
|
15
|
+
Object.defineProperty(exports, "DEFAULT_ITER_BATCH", { enumerable: true, get: function () { return open_ts_1.DEFAULT_ITER_BATCH; } });
|
|
12
16
|
var sources_ts_1 = require("./sources.js");
|
|
13
17
|
Object.defineProperty(exports, "fromBuffer", { enumerable: true, get: function () { return sources_ts_1.fromBuffer; } });
|
|
14
18
|
Object.defineProperty(exports, "fromFile", { enumerable: true, get: function () { return sources_ts_1.fromFile; } });
|
|
15
19
|
Object.defineProperty(exports, "fromHttpRange", { enumerable: true, get: function () { return sources_ts_1.fromHttpRange; } });
|
|
16
|
-
var validate_ts_1 = require("./validate.js");
|
|
17
|
-
Object.defineProperty(exports, "ValidationError", { enumerable: true, get: function () { return validate_ts_1.ValidationError; } });
|
package/dist/open.d.ts
CHANGED
|
@@ -1,34 +1,74 @@
|
|
|
1
|
-
import type { JsonPointer } from './pointer.ts';
|
|
2
1
|
import type { Source } from './sources.ts';
|
|
3
|
-
import { type StandardSchemaV1 } from './validate.ts';
|
|
4
|
-
|
|
2
|
+
import { type Path, type Segment, type StandardSchemaV1 } from './validate.ts';
|
|
3
|
+
import { type IterOptions } from './args.ts';
|
|
4
|
+
type InferOutput<Sch> = Sch extends StandardSchemaV1<unknown, infer O> ? O : never;
|
|
5
|
+
type SelectMapShape<S> = {
|
|
6
|
+
-readonly [K in keyof S]: unknown;
|
|
7
|
+
};
|
|
8
|
+
/** Zero-based index of an array element. */
|
|
9
|
+
export type IterIndex = number;
|
|
10
|
+
export declare const DEFAULT_SOURCE_CHUNK_BYTES: number;
|
|
11
|
+
export declare const DEFAULT_ITER_BATCH = 1000;
|
|
12
|
+
export interface OpenOptions {
|
|
13
|
+
/**
|
|
14
|
+
* Slot budget for the structural-index cache: one slot per cached container
|
|
15
|
+
* plus one per tabled object member. When a scan tips the cache over this
|
|
16
|
+
* budget, the deepest (least navigationally useful) containers are evicted
|
|
17
|
+
* first, LRU-tiebroken, keeping the shallow backbone that resumes future
|
|
18
|
+
* scans. Bounds resident cache memory regardless of document size. `0`
|
|
19
|
+
* disables the cache entirely. Omit for the native default (1024).
|
|
20
|
+
*/
|
|
21
|
+
indexCacheEntries?: number;
|
|
22
|
+
/**
|
|
23
|
+
* Max object members tabled per walked container in the structural-index
|
|
24
|
+
* cache. The table is a dense prefix; past the cap, lookups of later members
|
|
25
|
+
* resume-scan from the cap boundary. Lower trades cache memory for resume work
|
|
26
|
+
* on pathologically large objects. `0` disables object member indexing. Omit
|
|
27
|
+
* for the native default (unbounded).
|
|
28
|
+
*/
|
|
29
|
+
objectMemberCap?: number;
|
|
5
30
|
/**
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
31
|
+
* Element-index stride between sampled array members in the structural-index
|
|
32
|
+
* cache. A later index resumes from the nearest array member at or before it, so
|
|
33
|
+
* a smaller stride means denser array members (more memory, shorter resume
|
|
34
|
+
* scans). `0` disables array-member indexing. Omit for the native default (16).
|
|
10
35
|
*
|
|
11
|
-
*
|
|
36
|
+
* Setting both `objectMemberCap` and `arrayIndexInterval` to `0` disables the
|
|
37
|
+
* cache entirely (no source bytes are ever cached either way), as does
|
|
38
|
+
* `indexCacheEntries: 0`.
|
|
12
39
|
*/
|
|
13
|
-
|
|
40
|
+
arrayIndexInterval?: number;
|
|
14
41
|
}
|
|
15
|
-
type InferOutput<Sch> = Sch extends StandardSchemaV1<unknown, infer O> ? O : never;
|
|
16
42
|
export interface Cursor {
|
|
17
43
|
/** Object-member key or array-element index that this cursor was yielded under by `walk`. `null` on the root cursor. */
|
|
18
44
|
readonly key: string | number | null;
|
|
19
|
-
has
|
|
20
|
-
has
|
|
21
|
-
get
|
|
22
|
-
get<
|
|
23
|
-
|
|
24
|
-
iter
|
|
25
|
-
|
|
45
|
+
has(...path: Segment[]): Promise<boolean>;
|
|
46
|
+
has(...args: [...Segment[], StandardSchemaV1]): Promise<boolean>;
|
|
47
|
+
get(...path: Segment[]): Promise<unknown>;
|
|
48
|
+
get<Sch extends StandardSchemaV1>(...args: [...Segment[], Sch]): Promise<InferOutput<Sch>>;
|
|
49
|
+
count(...path: Segment[]): Promise<number>;
|
|
50
|
+
iter(...path: Segment[]): AsyncIterable<unknown[]>;
|
|
51
|
+
iter<Sch extends StandardSchemaV1>(...args: [...Segment[], Sch]): AsyncIterable<InferOutput<Sch>[]>;
|
|
52
|
+
iter<Sch extends StandardSchemaV1>(...args: [...Segment[], IterOptions & {
|
|
53
|
+
withIndex: true;
|
|
54
|
+
schema: Sch;
|
|
55
|
+
}]): AsyncIterable<[IterIndex, InferOutput<Sch>][]>;
|
|
56
|
+
iter<Sch extends StandardSchemaV1>(...args: [...Segment[], IterOptions & {
|
|
57
|
+
schema: Sch;
|
|
58
|
+
}]): AsyncIterable<InferOutput<Sch>[]>;
|
|
59
|
+
iter<S extends Record<string, Segment | Path>>(...args: [...Segment[], IterOptions & {
|
|
60
|
+
withIndex: true;
|
|
61
|
+
select: S;
|
|
62
|
+
}]): AsyncIterable<[IterIndex, SelectMapShape<S>][]>;
|
|
63
|
+
iter<S extends Record<string, Segment | Path>>(...args: [...Segment[], IterOptions & {
|
|
64
|
+
select: S;
|
|
65
|
+
}]): AsyncIterable<SelectMapShape<S>[]>;
|
|
66
|
+
iter(...args: [...Segment[], IterOptions & {
|
|
67
|
+
withIndex: true;
|
|
68
|
+
}]): AsyncIterable<[IterIndex, unknown][]>;
|
|
69
|
+
iter(...args: [...Segment[], IterOptions]): AsyncIterable<unknown[]>;
|
|
70
|
+
walk(...path: Segment[]): AsyncIterable<Cursor>;
|
|
26
71
|
}
|
|
27
|
-
/**
|
|
28
|
-
* The cursor returned by `open()`. Owns the underlying `Source` and exposes
|
|
29
|
-
* both an explicit `close()` and `Symbol.asyncDispose` so callers can choose
|
|
30
|
-
* between manual cleanup and `await using` scoping.
|
|
31
|
-
*/
|
|
32
72
|
export interface RootCursor extends Cursor, AsyncDisposable {
|
|
33
73
|
/** Close the underlying source. Idempotent. */
|
|
34
74
|
close(): Promise<void>;
|
|
@@ -36,14 +76,8 @@ export interface RootCursor extends Cursor, AsyncDisposable {
|
|
|
36
76
|
/**
|
|
37
77
|
* Open a cursor over a seekable source.
|
|
38
78
|
*
|
|
39
|
-
* Calls `source.open()` to acquire a reader, then constructs the native cursor
|
|
40
|
-
* over it. The reader's `read(offset, buf)` is invoked with chunk-aligned
|
|
41
|
-
* `offset` and a `buf` whose `byteLength` equals the configured chunk size;
|
|
42
|
-
* the reader fills `buf` and resolves with `bytesRead`. `buf` is a view over
|
|
43
|
-
* native-owned memory and **MUST** not be retained past the returned promise.
|
|
44
|
-
*
|
|
45
79
|
* The returned `RootCursor` owns the reader: `close()` (or `await using`)
|
|
46
80
|
* drives the reader's own `close()` exactly once.
|
|
47
81
|
*/
|
|
48
|
-
export declare function open(source: Source, options?:
|
|
82
|
+
export declare function open(source: Source, options?: OpenOptions): Promise<RootCursor>;
|
|
49
83
|
export {};
|
package/dist/open.js
CHANGED
|
@@ -1,30 +1,41 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.DEFAULT_ITER_BATCH = exports.DEFAULT_SOURCE_CHUNK_BYTES = void 0;
|
|
3
4
|
exports.open = open;
|
|
4
5
|
const native_1 = require("@botejs/native");
|
|
6
|
+
const path_ts_1 = require("./path.js");
|
|
5
7
|
const validate_ts_1 = require("./validate.js");
|
|
8
|
+
const args_ts_1 = require("./args.js");
|
|
9
|
+
exports.DEFAULT_SOURCE_CHUNK_BYTES = 64 * 1024;
|
|
10
|
+
exports.DEFAULT_ITER_BATCH = 1000;
|
|
6
11
|
/**
|
|
7
12
|
* Open a cursor over a seekable source.
|
|
8
13
|
*
|
|
9
|
-
* Calls `source.open()` to acquire a reader, then constructs the native cursor
|
|
10
|
-
* over it. The reader's `read(offset, buf)` is invoked with chunk-aligned
|
|
11
|
-
* `offset` and a `buf` whose `byteLength` equals the configured chunk size;
|
|
12
|
-
* the reader fills `buf` and resolves with `bytesRead`. `buf` is a view over
|
|
13
|
-
* native-owned memory and **MUST** not be retained past the returned promise.
|
|
14
|
-
*
|
|
15
14
|
* The returned `RootCursor` owns the reader: `close()` (or `await using`)
|
|
16
15
|
* drives the reader's own `close()` exactly once.
|
|
17
16
|
*/
|
|
18
17
|
async function open(source, options) {
|
|
18
|
+
const { indexCacheEntries, objectMemberCap, arrayIndexInterval } = options ?? {};
|
|
19
|
+
for (const [name, value] of [
|
|
20
|
+
['indexCacheEntries', indexCacheEntries],
|
|
21
|
+
['objectMemberCap', objectMemberCap],
|
|
22
|
+
['arrayIndexInterval', arrayIndexInterval],
|
|
23
|
+
]) {
|
|
24
|
+
if (value !== undefined && (!Number.isInteger(value) || value < 0)) {
|
|
25
|
+
throw new RangeError(`open: ${name} must be a non-negative integer (0 disables), got ${value}`);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
19
28
|
const reader = await source.open();
|
|
29
|
+
const chunkBytes = reader.chunkBytes ?? exports.DEFAULT_SOURCE_CHUNK_BYTES;
|
|
20
30
|
let native;
|
|
21
31
|
try {
|
|
22
32
|
native = (0, native_1.open)({
|
|
23
33
|
size: reader.size,
|
|
24
|
-
chunkBytes
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
34
|
+
chunkBytes,
|
|
35
|
+
indexCacheEntries,
|
|
36
|
+
objectMemberCap,
|
|
37
|
+
arrayIndexInterval,
|
|
38
|
+
read: async ({ offset, length }) => reader.read(offset, length),
|
|
28
39
|
});
|
|
29
40
|
}
|
|
30
41
|
catch (err) {
|
|
@@ -52,35 +63,60 @@ function wrap(native) {
|
|
|
52
63
|
get key() {
|
|
53
64
|
return native.key;
|
|
54
65
|
},
|
|
55
|
-
async has(
|
|
66
|
+
async has(...args) {
|
|
67
|
+
const { path, tail: schema } = (0, args_ts_1.splitArgs)(args);
|
|
56
68
|
if (!schema)
|
|
57
|
-
return native.has(
|
|
58
|
-
if (!(await native.has(
|
|
69
|
+
return native.has(path);
|
|
70
|
+
if (!(await native.has(path)))
|
|
59
71
|
return false;
|
|
60
|
-
const result = await
|
|
61
|
-
return
|
|
72
|
+
const result = await (0, validate_ts_1.validateItem)(schema, await native.get(path), path, 'skip');
|
|
73
|
+
return !('skip' in result);
|
|
74
|
+
},
|
|
75
|
+
async get(...args) {
|
|
76
|
+
const { path, tail: schema } = (0, args_ts_1.splitArgs)(args);
|
|
77
|
+
const value = await native.get(path);
|
|
78
|
+
if (!schema || value === undefined)
|
|
79
|
+
return value;
|
|
80
|
+
return (0, validate_ts_1.runStandardSchema)(schema, value, path);
|
|
62
81
|
},
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
return
|
|
82
|
+
count(...path) {
|
|
83
|
+
(0, path_ts_1.validatePath)(path);
|
|
84
|
+
return native.count(path);
|
|
66
85
|
},
|
|
67
|
-
iter(
|
|
68
|
-
const
|
|
86
|
+
iter(...args) {
|
|
87
|
+
const { path, tail } = (0, args_ts_1.splitArgs)(args);
|
|
88
|
+
const { schema, select, batch, onInvalid, withIndex } = (0, args_ts_1.normalizeIterTail)(tail);
|
|
89
|
+
if (batch !== undefined && (!Number.isInteger(batch) || batch <= 0)) {
|
|
90
|
+
throw new RangeError(`iter: batch must be a positive integer, got ${batch}`);
|
|
91
|
+
}
|
|
92
|
+
const resolvedBatch = batch ?? exports.DEFAULT_ITER_BATCH;
|
|
93
|
+
const selectIr = select !== undefined ? (0, args_ts_1.serializeSelect)(select) : undefined;
|
|
94
|
+
const inner = native.iter(path, { selectIr, batch: resolvedBatch, withKey: withIndex });
|
|
69
95
|
if (!schema)
|
|
70
96
|
return inner;
|
|
97
|
+
const policy = onInvalid ?? 'throw';
|
|
71
98
|
return {
|
|
72
99
|
async *[Symbol.asyncIterator]() {
|
|
73
100
|
let i = 0;
|
|
74
|
-
for await (const
|
|
75
|
-
|
|
101
|
+
for await (const b of inner) {
|
|
102
|
+
const out = [];
|
|
103
|
+
for (const v of b) {
|
|
104
|
+
const value = withIndex ? v[1] : v;
|
|
105
|
+
const result = await (0, validate_ts_1.validateItem)(schema, value, [...path, i++], policy);
|
|
106
|
+
if ('skip' in result)
|
|
107
|
+
continue;
|
|
108
|
+
out.push(withIndex ? [v[0], result.value] : result.value);
|
|
109
|
+
}
|
|
110
|
+
yield out;
|
|
76
111
|
}
|
|
77
112
|
},
|
|
78
113
|
};
|
|
79
114
|
},
|
|
80
|
-
walk(
|
|
115
|
+
walk(...path) {
|
|
116
|
+
(0, path_ts_1.validatePath)(path);
|
|
81
117
|
return {
|
|
82
118
|
async *[Symbol.asyncIterator]() {
|
|
83
|
-
for await (const child of native.walk(
|
|
119
|
+
for await (const child of native.walk(path)) {
|
|
84
120
|
yield wrap(child);
|
|
85
121
|
}
|
|
86
122
|
},
|
package/dist/path.d.ts
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import type { Segment } from './validate.ts';
|
|
2
|
+
/** Upper bound on numeric segments (napi takes them as `u32`). 2^32 - 1
|
|
3
|
+
* comfortably covers any in-memory JSON array. */
|
|
4
|
+
export declare const MAX_ARRAY_INDEX = 4294967295;
|
|
5
|
+
export declare function validatePath(path: readonly unknown[]): asserts path is readonly Segment[];
|
package/dist/path.js
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.MAX_ARRAY_INDEX = void 0;
|
|
4
|
+
exports.validatePath = validatePath;
|
|
5
|
+
/** Upper bound on numeric segments (napi takes them as `u32`). 2^32 - 1
|
|
6
|
+
* comfortably covers any in-memory JSON array. */
|
|
7
|
+
exports.MAX_ARRAY_INDEX = 0xffffffff;
|
|
8
|
+
function validatePath(path) {
|
|
9
|
+
for (let i = 0; i < path.length; i++) {
|
|
10
|
+
const s = path[i];
|
|
11
|
+
if (typeof s === 'string')
|
|
12
|
+
continue;
|
|
13
|
+
if (typeof s === 'number' && Number.isInteger(s) && s >= 0 && s <= exports.MAX_ARRAY_INDEX)
|
|
14
|
+
continue;
|
|
15
|
+
throw new TypeError(`path segment ${i}: expected string or non-negative integer (<= ${exports.MAX_ARRAY_INDEX}), got ${describeBadSegment(s)}`);
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
function describeBadSegment(s) {
|
|
19
|
+
if (typeof s === 'number')
|
|
20
|
+
return `${s}`;
|
|
21
|
+
if (s === null)
|
|
22
|
+
return 'null';
|
|
23
|
+
return typeof s;
|
|
24
|
+
}
|
package/dist/sources.d.ts
CHANGED
|
@@ -10,21 +10,17 @@ export interface SourceReader {
|
|
|
10
10
|
/** Preferred read granularity in bytes. Must be a non-zero multiple of 64. */
|
|
11
11
|
readonly chunkBytes?: number;
|
|
12
12
|
/**
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
16
|
-
* resolves: `buf` is a view over native-owned memory whose lifetime ends
|
|
17
|
-
* once the promise settles.
|
|
13
|
+
* Read up to `length` bytes starting at `offset` and resolve with the
|
|
14
|
+
* bytes read. The returned `Uint8Array`'s `.byteLength` is the actual
|
|
15
|
+
* count, which must be `<= length`.
|
|
18
16
|
*/
|
|
19
|
-
read(offset: number,
|
|
17
|
+
read(offset: number, length: number): Promise<Uint8Array>;
|
|
20
18
|
/** Release resources held by the reader. Driven once by the `open()` lifecycle. */
|
|
21
19
|
close?(): Promise<void> | void;
|
|
22
20
|
}
|
|
23
21
|
/**
|
|
24
|
-
* Describes how to obtain a seekable byte stream.
|
|
25
|
-
*
|
|
26
|
-
* `open()` API drives. Provide your own object implementing this interface to
|
|
27
|
-
* plug in custom backends.
|
|
22
|
+
* Describes how to obtain a seekable byte stream. Provide your own object implementing
|
|
23
|
+
* this interface to plug in custom backends.
|
|
28
24
|
*/
|
|
29
25
|
export interface Source {
|
|
30
26
|
/** Acquire the stream. Resolves to a `SourceReader` that owns any underlying resources. */
|
package/dist/sources.js
CHANGED
|
@@ -17,13 +17,7 @@ function fromBuffer(buf, options) {
|
|
|
17
17
|
open: () => Promise.resolve({
|
|
18
18
|
size: view.byteLength,
|
|
19
19
|
chunkBytes,
|
|
20
|
-
read:
|
|
21
|
-
const end = Math.min(offset + dst.byteLength, view.byteLength);
|
|
22
|
-
const n = Math.max(0, end - offset);
|
|
23
|
-
if (n > 0)
|
|
24
|
-
dst.set(view.subarray(offset, end));
|
|
25
|
-
return n;
|
|
26
|
-
},
|
|
20
|
+
read: (offset, length) => Promise.resolve(view.subarray(offset, Math.min(offset + length, view.byteLength))),
|
|
27
21
|
}),
|
|
28
22
|
};
|
|
29
23
|
}
|
|
@@ -37,9 +31,16 @@ function fromFile(path, options) {
|
|
|
37
31
|
return {
|
|
38
32
|
size: stat.size,
|
|
39
33
|
chunkBytes,
|
|
40
|
-
read: async (offset,
|
|
41
|
-
const
|
|
42
|
-
|
|
34
|
+
read: async (offset, length) => {
|
|
35
|
+
const buf = Buffer.allocUnsafe(length);
|
|
36
|
+
let filled = 0;
|
|
37
|
+
while (filled < length) {
|
|
38
|
+
const { bytesRead } = await handle.read(buf, filled, length - filled, offset + filled);
|
|
39
|
+
if (bytesRead === 0)
|
|
40
|
+
break;
|
|
41
|
+
filled += bytesRead;
|
|
42
|
+
}
|
|
43
|
+
return buf.subarray(0, filled);
|
|
43
44
|
},
|
|
44
45
|
close: async () => {
|
|
45
46
|
if (closed)
|
|
@@ -85,17 +86,15 @@ function fromHttpRange(url, options) {
|
|
|
85
86
|
return {
|
|
86
87
|
size,
|
|
87
88
|
chunkBytes,
|
|
88
|
-
read: async (offset,
|
|
89
|
+
read: async (offset, length) => {
|
|
89
90
|
// HTTP ranges are inclusive on both ends.
|
|
90
|
-
const end = Math.min(offset +
|
|
91
|
+
const end = Math.min(offset + length, size) - 1;
|
|
91
92
|
const headers = new Headers(init?.headers);
|
|
92
93
|
headers.set('Range', `bytes=${offset}-${end}`);
|
|
93
94
|
headers.set('Accept-Encoding', 'identity');
|
|
94
95
|
const res = await fetch(url, { ...init, headers, method: 'GET', signal: controller.signal });
|
|
95
96
|
if (res.status === 206) {
|
|
96
|
-
|
|
97
|
-
dst.set(body);
|
|
98
|
-
return body.byteLength;
|
|
97
|
+
return new Uint8Array(await res.arrayBuffer());
|
|
99
98
|
}
|
|
100
99
|
// A 200 means the server ignored our Range request and returned the full
|
|
101
100
|
// body. We throw here since the point of using ranges is to not have to
|
package/dist/validate.d.ts
CHANGED
|
@@ -1,8 +1,16 @@
|
|
|
1
1
|
import type { StandardSchemaV1 } from '@standard-schema/spec';
|
|
2
2
|
export type { StandardSchemaV1 };
|
|
3
|
+
export type Segment = string | number;
|
|
4
|
+
export type Path = readonly Segment[];
|
|
3
5
|
export declare class ValidationError extends Error {
|
|
4
6
|
readonly issues: readonly StandardSchemaV1.Issue[];
|
|
5
|
-
readonly
|
|
6
|
-
constructor(issues: readonly StandardSchemaV1.Issue[],
|
|
7
|
+
readonly path: Path;
|
|
8
|
+
constructor(issues: readonly StandardSchemaV1.Issue[], path: Path);
|
|
7
9
|
}
|
|
8
|
-
export declare function runStandardSchema<O>(schema: StandardSchemaV1<unknown, O>, value: unknown,
|
|
10
|
+
export declare function runStandardSchema<O>(schema: StandardSchemaV1<unknown, O>, value: unknown, path: Path): Promise<O>;
|
|
11
|
+
export declare function validateItem<O>(schema: StandardSchemaV1<unknown, O>, value: unknown, path: Path, onInvalid: 'throw' | 'skip'): Promise<{
|
|
12
|
+
skip: true;
|
|
13
|
+
} | {
|
|
14
|
+
value: O;
|
|
15
|
+
}>;
|
|
16
|
+
export declare function formatPath(path: Path): string;
|
package/dist/validate.js
CHANGED
|
@@ -2,20 +2,50 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.ValidationError = void 0;
|
|
4
4
|
exports.runStandardSchema = runStandardSchema;
|
|
5
|
+
exports.validateItem = validateItem;
|
|
6
|
+
exports.formatPath = formatPath;
|
|
5
7
|
class ValidationError extends Error {
|
|
6
8
|
issues;
|
|
7
|
-
|
|
8
|
-
constructor(issues,
|
|
9
|
-
super(`bote: schema validation failed at ${
|
|
9
|
+
path;
|
|
10
|
+
constructor(issues, path) {
|
|
11
|
+
super(`bote: schema validation failed at ${formatPath(path)}: ${issues[0]?.message ?? 'unknown'}`);
|
|
10
12
|
this.name = 'ValidationError';
|
|
11
13
|
this.issues = issues;
|
|
12
|
-
this.
|
|
14
|
+
this.path = path;
|
|
13
15
|
}
|
|
14
16
|
}
|
|
15
17
|
exports.ValidationError = ValidationError;
|
|
16
|
-
async function runStandardSchema(schema, value,
|
|
18
|
+
async function runStandardSchema(schema, value, path) {
|
|
17
19
|
const result = await schema['~standard'].validate(value);
|
|
18
20
|
if (result.issues)
|
|
19
|
-
throw new ValidationError(result.issues,
|
|
21
|
+
throw new ValidationError(result.issues, path);
|
|
20
22
|
return result.value;
|
|
21
23
|
}
|
|
24
|
+
async function validateItem(schema, value, path, onInvalid) {
|
|
25
|
+
const result = await schema['~standard'].validate(value);
|
|
26
|
+
if (result.issues) {
|
|
27
|
+
if (onInvalid === 'skip')
|
|
28
|
+
return { skip: true };
|
|
29
|
+
throw new ValidationError(result.issues, path);
|
|
30
|
+
}
|
|
31
|
+
return { value: result.value };
|
|
32
|
+
}
|
|
33
|
+
function formatPath(path) {
|
|
34
|
+
if (path.length === 0)
|
|
35
|
+
return '(root)';
|
|
36
|
+
let out = '';
|
|
37
|
+
for (let i = 0; i < path.length; i++) {
|
|
38
|
+
const seg = path[i];
|
|
39
|
+
if (typeof seg === 'number') {
|
|
40
|
+
out += `[${seg}]`;
|
|
41
|
+
continue;
|
|
42
|
+
}
|
|
43
|
+
if (/^[A-Za-z_$][A-Za-z0-9_$]*$/.test(seg)) {
|
|
44
|
+
out += i === 0 ? seg : `.${seg}`;
|
|
45
|
+
}
|
|
46
|
+
else {
|
|
47
|
+
out += `[${JSON.stringify(seg)}]`;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
return out;
|
|
51
|
+
}
|
package/package.json
CHANGED
package/dist/pointer.d.ts
DELETED
|
@@ -1,5 +0,0 @@
|
|
|
1
|
-
type ValidateTokenChars<S extends string> = S extends `${string}~${infer Rest}` ? Rest extends `0${infer After}` | `1${infer After}` ? ValidateTokenChars<After> : false : true;
|
|
2
|
-
type ValidateTokens<S extends string> = S extends `${infer Token}/${infer Rest}` ? ValidateTokenChars<Token> extends true ? ValidateTokens<Rest> : false : ValidateTokenChars<S>;
|
|
3
|
-
type IsJsonPointer<S extends string> = S extends '' ? true : S extends `/${infer Rest}` ? ValidateTokens<Rest> : false;
|
|
4
|
-
export type JsonPointer<S extends string> = IsJsonPointer<S> extends true ? S : `Error: invalid JSON pointer "${S}"`;
|
|
5
|
-
export {};
|
package/dist/pointer.js
DELETED