@botejs/core 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,106 @@
1
+ # bote
2
+
3
+ a minimal, ergonomic and low-memory approach to navigating a big JSON:
4
+
5
+ ```sh
6
+ npm install @botejs/core
7
+ ```
8
+
9
+ ```ts
10
+ import { open, fromFile } from '@botejs/core'
11
+
12
+ import * as z from 'zod' // or bring your own Standard Schema validator
13
+
14
+ const User = z.object({
15
+ id: z.string(),
16
+ name: z.string(),
17
+ email: z.string(),
18
+ details: z.object({
19
+ lastLoggedIn: z.number(),
20
+ }),
21
+ })
22
+
23
+ type User = z.infer<typeof User>
24
+
25
+ await using cursor = await open(fromFile('./your-big.json'))
26
+
27
+ // users[1000].name
28
+ const desc0: unknown = await cursor.get('users', 1000, 'name')
29
+ // for .get and .iter, you can supply a validator as the last argument
30
+ const desc1: string = await cursor.get('users', 1000, 'name', User.shape.name)
31
+
32
+ // iterate an array in batches
33
+ for await (const batch of cursor.iter('users', User)) {
34
+ // batch: User[]
35
+ for (const user of batch) {
36
+ console.log(user)
37
+ }
38
+ }
39
+
40
+ // pick several fields into a named object to avoid resolving big items
41
+ for await (const batch of cursor.iter('users', {
42
+ select: {
43
+ id: 'id',
44
+ logged: ['details', 'lastLoggedIn'],
45
+ },
46
+ schema: z.object({
47
+ id: User.shape.id,
48
+ logged: User.shape.details.lastLoggedIn,
49
+ }),
50
+ })) {
51
+ // batch: { id: string, logged: number }[]
52
+ for (const userLog of batch) {
53
+ console.log(userLog)
54
+ }
55
+ }
56
+
57
+ // or pick a single field
58
+ for await (const batch of cursor.iter('users', {
59
+ select: 'name',
60
+ schema: User.shape.name,
61
+ })) {
62
+ // batch: string[]
63
+ for (const name of batch) {
64
+ console.log({ name })
65
+ }
66
+ }
67
+
68
+ // for open-ended per-child work (e.g. conditional reads, recursive descent, nested
69
+ // iters), `walk` yields a subcursor positioned at each child:
70
+ for await (const metaCursor of cursor.walk('meta')) {
71
+ if (metaCursor.key === 'details') {
72
+ const detailsValue = await metaCursor.get()
73
+ console.log(detailsValue)
74
+ }
75
+ }
76
+
77
+ // 'await using' would normally clean up resources for you
78
+ // when it goes out of lexical scope. if you hate that,
79
+ // you can do it explicitly as well.
80
+ await cursor.close()
81
+ ```
82
+
83
+ given a **seekable** source (e.g. a file, an HTTP range) and a path, it can retrieve values in a JSON quickly, without loading the whole thing in-memory.
84
+
85
+ here's a run (Apple M1 Pro 2021, ~500MB JSON array file, cold-cache, default settings):
86
+
87
+ | operation | approach | time | js heap peak Δ | rust heap peak |
88
+ | -------------- | ---------- | --------: | -------------: | -------------: |
89
+ | items[0] | JSON.parse | 616.02 ms | 1.03 GB | n/a |
90
+ | items[535399] | JSON.parse | 604.63 ms | 1.03 GB | n/a |
91
+ | items[1070797] | JSON.parse | 600.68 ms | 1.03 GB | n/a |
92
+ | items[0] | bote | 527.80 µs | 291.6 KB | 130.4 KB |
93
+ | items[535399] | bote | 187.24 ms | 742.3 KB | 36.7 MB |
94
+ | items[1070797] | bote | 371.61 ms | 828.7 KB | 37.1 MB |
95
+
96
+ ## sources
97
+
98
+ bote currently only has `fromFile` and `fromHttpRange` as pre-built sources. create your own by implementing the `Source` interface. see [./packages/core/src/sources.ts](./packages/core/src/sources.ts) on how it works.
99
+
100
+ ## status
101
+
102
+ pre-1.0 so still in development and APIs may change based on feedback, bugs and holy divinations from the coding gods.
103
+
104
+ ## license
105
+
106
+ MIT.
package/dist/args.d.ts ADDED
@@ -0,0 +1,21 @@
1
+ import type { Path, Segment, StandardSchemaV1 } from './validate.ts';
2
+ export interface IterOptions {
3
+ select?: Segment | Path | Record<string, Segment | Path>;
4
+ /** How many items are yielded per batch. Higher is faster, but takes more memory to materialise those items. */
5
+ batch?: number;
6
+ /** Validate each yielded item against this schema (after `select`). */
7
+ schema?: StandardSchemaV1;
8
+ /** Policy for items failing `schema`. Default `'throw'`; `'skip'` drops them. */
9
+ onInvalid?: 'throw' | 'skip';
10
+ /** Yield `[index, value]` tuples instead of bare values, where `index` is
11
+ * the zero-based position of the element in the source array. */
12
+ withIndex?: boolean;
13
+ }
14
+ export type VariadicPathArgs<TTail> = [...Segment[]] | [...Segment[], TTail];
15
+ export declare function splitArgs<TTail>(args: VariadicPathArgs<TTail>): {
16
+ path: Segment[];
17
+ tail: TTail | undefined;
18
+ };
19
+ export declare function isSchema(value: unknown): value is StandardSchemaV1;
20
+ export declare function normalizeIterTail(tail: StandardSchemaV1 | IterOptions | undefined): IterOptions;
21
+ export declare function serializeSelect(select: Segment | Path | Record<string, Segment | Path>): string;
package/dist/args.js ADDED
@@ -0,0 +1,64 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.splitArgs = splitArgs;
4
+ exports.isSchema = isSchema;
5
+ exports.normalizeIterTail = normalizeIterTail;
6
+ exports.serializeSelect = serializeSelect;
7
+ const path_ts_1 = require("./path.js");
8
+ function splitArgs(args) {
9
+ let pathArgs;
10
+ let tail;
11
+ if (args.length === 0) {
12
+ pathArgs = [];
13
+ tail = undefined;
14
+ }
15
+ else {
16
+ const last = args[args.length - 1];
17
+ if (last !== null && typeof last === 'object' && !Array.isArray(last)) {
18
+ pathArgs = args.slice(0, -1);
19
+ tail = last;
20
+ }
21
+ else {
22
+ pathArgs = args;
23
+ tail = undefined;
24
+ }
25
+ }
26
+ (0, path_ts_1.validatePath)(pathArgs);
27
+ return { path: pathArgs, tail };
28
+ }
29
+ function isSchema(value) {
30
+ return typeof value === 'object' && value !== null && '~standard' in value;
31
+ }
32
+ function normalizeIterTail(tail) {
33
+ if (!tail)
34
+ return {};
35
+ if (isSchema(tail))
36
+ return { schema: tail };
37
+ return tail;
38
+ }
39
+ function serializeSelect(select) {
40
+ if (typeof select === 'string' || typeof select === 'number') {
41
+ const one = [select];
42
+ (0, path_ts_1.validatePath)(one);
43
+ return JSON.stringify({ one });
44
+ }
45
+ if (Array.isArray(select)) {
46
+ (0, path_ts_1.validatePath)(select);
47
+ if (select.length === 0) {
48
+ throw new RangeError('iter: select sub-path must have at least one segment');
49
+ }
50
+ return JSON.stringify({ one: select });
51
+ }
52
+ const entries = Object.entries(select).map(([k, sub]) => {
53
+ const path = typeof sub === 'string' || typeof sub === 'number' ? [sub] : sub;
54
+ (0, path_ts_1.validatePath)(path);
55
+ if (path.length === 0) {
56
+ throw new RangeError(`iter: select field ${JSON.stringify(k)} sub-path must have at least one segment`);
57
+ }
58
+ return [k, path];
59
+ });
60
+ if (entries.length === 0) {
61
+ throw new RangeError('iter: select must have at least one field');
62
+ }
63
+ return JSON.stringify({ map: entries });
64
+ }
package/dist/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- export type { JsonPointer } from './pointer.ts';
2
- export { open, type Cursor, type RootCursor, type SessionOptions } from './open.ts';
1
+ export { type IterOptions } from './args.ts';
2
+ export { ValidationError, formatPath, type Path, type Segment, type StandardSchemaV1 } from './validate.ts';
3
+ export { open, DEFAULT_ITER_BATCH, type Cursor, type RootCursor, type OpenOptions, type IterIndex as IterKey, } from './open.ts';
3
4
  export { fromBuffer, fromFile, fromHttpRange, type FactoryOptions, type Source, type SourceReader, type HttpRangeOptions, } from './sources.ts';
4
- export { ValidationError, type StandardSchemaV1 } from './validate.ts';
package/dist/index.js CHANGED
@@ -1,17 +1,19 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.ValidationError = exports.fromHttpRange = exports.fromFile = exports.fromBuffer = void 0;
3
+ exports.fromHttpRange = exports.fromFile = exports.fromBuffer = exports.DEFAULT_ITER_BATCH = exports.formatPath = exports.ValidationError = void 0;
4
4
  // Node 18 and Node 20.3 predate `Symbol.asyncDispose`; mirror what TS emits for
5
5
  // `await using` so the well-known symbol is available across our engine range.
6
6
  if (!Symbol.asyncDispose) {
7
7
  ;
8
8
  Symbol.asyncDispose = Symbol.for('Symbol.asyncDispose');
9
9
  }
10
+ var validate_ts_1 = require("./validate.js");
11
+ Object.defineProperty(exports, "ValidationError", { enumerable: true, get: function () { return validate_ts_1.ValidationError; } });
12
+ Object.defineProperty(exports, "formatPath", { enumerable: true, get: function () { return validate_ts_1.formatPath; } });
10
13
  var open_ts_1 = require("./open.js");
11
14
  Object.defineProperty(exports, "open", { enumerable: true, get: function () { return open_ts_1.open; } });
15
+ Object.defineProperty(exports, "DEFAULT_ITER_BATCH", { enumerable: true, get: function () { return open_ts_1.DEFAULT_ITER_BATCH; } });
12
16
  var sources_ts_1 = require("./sources.js");
13
17
  Object.defineProperty(exports, "fromBuffer", { enumerable: true, get: function () { return sources_ts_1.fromBuffer; } });
14
18
  Object.defineProperty(exports, "fromFile", { enumerable: true, get: function () { return sources_ts_1.fromFile; } });
15
19
  Object.defineProperty(exports, "fromHttpRange", { enumerable: true, get: function () { return sources_ts_1.fromHttpRange; } });
16
- var validate_ts_1 = require("./validate.js");
17
- Object.defineProperty(exports, "ValidationError", { enumerable: true, get: function () { return validate_ts_1.ValidationError; } });
package/dist/open.d.ts CHANGED
@@ -1,34 +1,74 @@
1
- import type { JsonPointer } from './pointer.ts';
2
1
  import type { Source } from './sources.ts';
3
- import { type StandardSchemaV1 } from './validate.ts';
4
- export interface SessionOptions {
2
+ import { type Path, type Segment, type StandardSchemaV1 } from './validate.ts';
3
+ import { type IterOptions } from './args.ts';
4
+ type InferOutput<Sch> = Sch extends StandardSchemaV1<unknown, infer O> ? O : never;
5
+ type SelectMapShape<S> = {
6
+ -readonly [K in keyof S]: unknown;
7
+ };
8
+ /** Zero-based index of an array element. */
9
+ export type IterIndex = number;
10
+ export declare const DEFAULT_SOURCE_CHUNK_BYTES: number;
11
+ export declare const DEFAULT_ITER_BATCH = 1000;
12
+ export interface OpenOptions {
13
+ /**
14
+ * Slot budget for the structural-index cache: one slot per cached container
15
+ * plus one per tabled object member. When a scan tips the cache over this
16
+ * budget, the deepest (least navigationally useful) containers are evicted
17
+ * first, LRU-tiebroken, keeping the shallow backbone that resumes future
18
+ * scans. Bounds resident cache memory regardless of document size. `0`
19
+ * disables the cache entirely. Omit for the native default (1024).
20
+ */
21
+ indexCacheEntries?: number;
22
+ /**
23
+ * Max object members tabled per walked container in the structural-index
24
+ * cache. The table is a dense prefix; past the cap, lookups of later members
25
+ * resume-scan from the cap boundary. Lower trades cache memory for resume work
26
+ * on pathologically large objects. `0` disables object member indexing. Omit
27
+ * for the native default (unbounded).
28
+ */
29
+ objectMemberCap?: number;
5
30
  /**
6
- * Maximum number of source chunks held resident at once. Each slot
7
- * accounts for one chunk's bytes plus its bitmaps; the cache also
8
- * enforces a derived byte ceiling at roughly `maxResidentChunks x
9
- * source.chunkBytes x 2` to bound total native memory.
31
+ * Element-index stride between sampled array members in the structural-index
32
+ * cache. A later index resumes from the nearest array member at or before it, so
33
+ * a smaller stride means denser array members (more memory, shorter resume
34
+ * scans). `0` disables array-member indexing. Omit for the native default (16).
10
35
  *
11
- * Defaults to 512 chunks.
36
+ * Setting both `objectMemberCap` and `arrayIndexInterval` to `0` disables the
37
+ * cache entirely (no source bytes are ever cached either way), as does
38
+ * `indexCacheEntries: 0`.
12
39
  */
13
- maxResidentChunks?: number;
40
+ arrayIndexInterval?: number;
14
41
  }
15
- type InferOutput<Sch> = Sch extends StandardSchemaV1<unknown, infer O> ? O : never;
16
42
  export interface Cursor {
17
43
  /** Object-member key or array-element index that this cursor was yielded under by `walk`. `null` on the root cursor. */
18
44
  readonly key: string | number | null;
19
- has<S extends string>(pointer: JsonPointer<S>): Promise<boolean>;
20
- has<S extends string>(pointer: JsonPointer<S>, schema: StandardSchemaV1): Promise<boolean>;
21
- get<S extends string>(pointer: JsonPointer<S>): Promise<unknown>;
22
- get<S extends string, Sch extends StandardSchemaV1>(pointer: JsonPointer<S>, schema: Sch): Promise<InferOutput<Sch>>;
23
- iter<S extends string>(pointer: JsonPointer<S>): AsyncIterable<unknown>;
24
- iter<S extends string, Sch extends StandardSchemaV1>(pointer: JsonPointer<S>, schema: Sch): AsyncIterable<InferOutput<Sch>>;
25
- walk<S extends string>(pointer: JsonPointer<S>): AsyncIterable<Cursor>;
45
+ has(...path: Segment[]): Promise<boolean>;
46
+ has(...args: [...Segment[], StandardSchemaV1]): Promise<boolean>;
47
+ get(...path: Segment[]): Promise<unknown>;
48
+ get<Sch extends StandardSchemaV1>(...args: [...Segment[], Sch]): Promise<InferOutput<Sch>>;
49
+ count(...path: Segment[]): Promise<number>;
50
+ iter(...path: Segment[]): AsyncIterable<unknown[]>;
51
+ iter<Sch extends StandardSchemaV1>(...args: [...Segment[], Sch]): AsyncIterable<InferOutput<Sch>[]>;
52
+ iter<Sch extends StandardSchemaV1>(...args: [...Segment[], IterOptions & {
53
+ withIndex: true;
54
+ schema: Sch;
55
+ }]): AsyncIterable<[IterIndex, InferOutput<Sch>][]>;
56
+ iter<Sch extends StandardSchemaV1>(...args: [...Segment[], IterOptions & {
57
+ schema: Sch;
58
+ }]): AsyncIterable<InferOutput<Sch>[]>;
59
+ iter<S extends Record<string, Segment | Path>>(...args: [...Segment[], IterOptions & {
60
+ withIndex: true;
61
+ select: S;
62
+ }]): AsyncIterable<[IterIndex, SelectMapShape<S>][]>;
63
+ iter<S extends Record<string, Segment | Path>>(...args: [...Segment[], IterOptions & {
64
+ select: S;
65
+ }]): AsyncIterable<SelectMapShape<S>[]>;
66
+ iter(...args: [...Segment[], IterOptions & {
67
+ withIndex: true;
68
+ }]): AsyncIterable<[IterIndex, unknown][]>;
69
+ iter(...args: [...Segment[], IterOptions]): AsyncIterable<unknown[]>;
70
+ walk(...path: Segment[]): AsyncIterable<Cursor>;
26
71
  }
27
- /**
28
- * The cursor returned by `open()`. Owns the underlying `Source` and exposes
29
- * both an explicit `close()` and `Symbol.asyncDispose` so callers can choose
30
- * between manual cleanup and `await using` scoping.
31
- */
32
72
  export interface RootCursor extends Cursor, AsyncDisposable {
33
73
  /** Close the underlying source. Idempotent. */
34
74
  close(): Promise<void>;
@@ -36,14 +76,8 @@ export interface RootCursor extends Cursor, AsyncDisposable {
36
76
  /**
37
77
  * Open a cursor over a seekable source.
38
78
  *
39
- * Calls `source.open()` to acquire a reader, then constructs the native cursor
40
- * over it. The reader's `read(offset, buf)` is invoked with chunk-aligned
41
- * `offset` and a `buf` whose `byteLength` equals the configured chunk size;
42
- * the reader fills `buf` and resolves with `bytesRead`. `buf` is a view over
43
- * native-owned memory and **MUST** not be retained past the returned promise.
44
- *
45
79
  * The returned `RootCursor` owns the reader: `close()` (or `await using`)
46
80
  * drives the reader's own `close()` exactly once.
47
81
  */
48
- export declare function open(source: Source, options?: SessionOptions): Promise<RootCursor>;
82
+ export declare function open(source: Source, options?: OpenOptions): Promise<RootCursor>;
49
83
  export {};
package/dist/open.js CHANGED
@@ -1,30 +1,41 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.DEFAULT_ITER_BATCH = exports.DEFAULT_SOURCE_CHUNK_BYTES = void 0;
3
4
  exports.open = open;
4
5
  const native_1 = require("@botejs/native");
6
+ const path_ts_1 = require("./path.js");
5
7
  const validate_ts_1 = require("./validate.js");
8
+ const args_ts_1 = require("./args.js");
9
+ exports.DEFAULT_SOURCE_CHUNK_BYTES = 64 * 1024;
10
+ exports.DEFAULT_ITER_BATCH = 1000;
6
11
  /**
7
12
  * Open a cursor over a seekable source.
8
13
  *
9
- * Calls `source.open()` to acquire a reader, then constructs the native cursor
10
- * over it. The reader's `read(offset, buf)` is invoked with chunk-aligned
11
- * `offset` and a `buf` whose `byteLength` equals the configured chunk size;
12
- * the reader fills `buf` and resolves with `bytesRead`. `buf` is a view over
13
- * native-owned memory and **MUST** not be retained past the returned promise.
14
- *
15
14
  * The returned `RootCursor` owns the reader: `close()` (or `await using`)
16
15
  * drives the reader's own `close()` exactly once.
17
16
  */
18
17
  async function open(source, options) {
18
+ const { indexCacheEntries, objectMemberCap, arrayIndexInterval } = options ?? {};
19
+ for (const [name, value] of [
20
+ ['indexCacheEntries', indexCacheEntries],
21
+ ['objectMemberCap', objectMemberCap],
22
+ ['arrayIndexInterval', arrayIndexInterval],
23
+ ]) {
24
+ if (value !== undefined && (!Number.isInteger(value) || value < 0)) {
25
+ throw new RangeError(`open: ${name} must be a non-negative integer (0 disables), got ${value}`);
26
+ }
27
+ }
19
28
  const reader = await source.open();
29
+ const chunkBytes = reader.chunkBytes ?? exports.DEFAULT_SOURCE_CHUNK_BYTES;
20
30
  let native;
21
31
  try {
22
32
  native = (0, native_1.open)({
23
33
  size: reader.size,
24
- chunkBytes: reader.chunkBytes,
25
- read: async ({ offset, buf }) => reader.read(offset, buf),
26
- }, {
27
- maxResidentChunks: options?.maxResidentChunks,
34
+ chunkBytes,
35
+ indexCacheEntries,
36
+ objectMemberCap,
37
+ arrayIndexInterval,
38
+ read: async ({ offset, length }) => reader.read(offset, length),
28
39
  });
29
40
  }
30
41
  catch (err) {
@@ -52,35 +63,60 @@ function wrap(native) {
52
63
  get key() {
53
64
  return native.key;
54
65
  },
55
- async has(pointer, schema) {
66
+ async has(...args) {
67
+ const { path, tail: schema } = (0, args_ts_1.splitArgs)(args);
56
68
  if (!schema)
57
- return native.has(pointer);
58
- if (!(await native.has(pointer)))
69
+ return native.has(path);
70
+ if (!(await native.has(path)))
59
71
  return false;
60
- const result = await schema['~standard'].validate(await native.get(pointer));
61
- return result.issues === undefined;
72
+ const result = await (0, validate_ts_1.validateItem)(schema, await native.get(path), path, 'skip');
73
+ return !('skip' in result);
74
+ },
75
+ async get(...args) {
76
+ const { path, tail: schema } = (0, args_ts_1.splitArgs)(args);
77
+ const value = await native.get(path);
78
+ if (!schema || value === undefined)
79
+ return value;
80
+ return (0, validate_ts_1.runStandardSchema)(schema, value, path);
62
81
  },
63
- async get(pointer, schema) {
64
- const value = await native.get(pointer);
65
- return schema ? (0, validate_ts_1.runStandardSchema)(schema, value, pointer) : value;
82
+ count(...path) {
83
+ (0, path_ts_1.validatePath)(path);
84
+ return native.count(path);
66
85
  },
67
- iter(pointer, schema) {
68
- const inner = native.iter(pointer);
86
+ iter(...args) {
87
+ const { path, tail } = (0, args_ts_1.splitArgs)(args);
88
+ const { schema, select, batch, onInvalid, withIndex } = (0, args_ts_1.normalizeIterTail)(tail);
89
+ if (batch !== undefined && (!Number.isInteger(batch) || batch <= 0)) {
90
+ throw new RangeError(`iter: batch must be a positive integer, got ${batch}`);
91
+ }
92
+ const resolvedBatch = batch ?? exports.DEFAULT_ITER_BATCH;
93
+ const selectIr = select !== undefined ? (0, args_ts_1.serializeSelect)(select) : undefined;
94
+ const inner = native.iter(path, { selectIr, batch: resolvedBatch, withKey: withIndex });
69
95
  if (!schema)
70
96
  return inner;
97
+ const policy = onInvalid ?? 'throw';
71
98
  return {
72
99
  async *[Symbol.asyncIterator]() {
73
100
  let i = 0;
74
- for await (const v of inner) {
75
- yield await (0, validate_ts_1.runStandardSchema)(schema, v, `${pointer}/${i++}`);
101
+ for await (const b of inner) {
102
+ const out = [];
103
+ for (const v of b) {
104
+ const value = withIndex ? v[1] : v;
105
+ const result = await (0, validate_ts_1.validateItem)(schema, value, [...path, i++], policy);
106
+ if ('skip' in result)
107
+ continue;
108
+ out.push(withIndex ? [v[0], result.value] : result.value);
109
+ }
110
+ yield out;
76
111
  }
77
112
  },
78
113
  };
79
114
  },
80
- walk(pointer) {
115
+ walk(...path) {
116
+ (0, path_ts_1.validatePath)(path);
81
117
  return {
82
118
  async *[Symbol.asyncIterator]() {
83
- for await (const child of native.walk(pointer)) {
119
+ for await (const child of native.walk(path)) {
84
120
  yield wrap(child);
85
121
  }
86
122
  },
package/dist/path.d.ts ADDED
@@ -0,0 +1,5 @@
1
+ import type { Segment } from './validate.ts';
2
+ /** Upper bound on numeric segments (napi takes them as `u32`). 2^32 - 1
3
+ * comfortably covers any in-memory JSON array. */
4
+ export declare const MAX_ARRAY_INDEX = 4294967295;
5
+ export declare function validatePath(path: readonly unknown[]): asserts path is readonly Segment[];
package/dist/path.js ADDED
@@ -0,0 +1,24 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.MAX_ARRAY_INDEX = void 0;
4
+ exports.validatePath = validatePath;
5
+ /** Upper bound on numeric segments (napi takes them as `u32`). 2^32 - 1
6
+ * comfortably covers any in-memory JSON array. */
7
+ exports.MAX_ARRAY_INDEX = 0xffffffff;
8
+ function validatePath(path) {
9
+ for (let i = 0; i < path.length; i++) {
10
+ const s = path[i];
11
+ if (typeof s === 'string')
12
+ continue;
13
+ if (typeof s === 'number' && Number.isInteger(s) && s >= 0 && s <= exports.MAX_ARRAY_INDEX)
14
+ continue;
15
+ throw new TypeError(`path segment ${i}: expected string or non-negative integer (<= ${exports.MAX_ARRAY_INDEX}), got ${describeBadSegment(s)}`);
16
+ }
17
+ }
18
+ function describeBadSegment(s) {
19
+ if (typeof s === 'number')
20
+ return `${s}`;
21
+ if (s === null)
22
+ return 'null';
23
+ return typeof s;
24
+ }
package/dist/sources.d.ts CHANGED
@@ -10,21 +10,17 @@ export interface SourceReader {
10
10
  /** Preferred read granularity in bytes. Must be a non-zero multiple of 64. */
11
11
  readonly chunkBytes?: number;
12
12
  /**
13
- * Fill `buf` with up to `buf.byteLength` bytes starting at `offset` and
14
- * resolve with the number of bytes written. The implementation must not
15
- * retain a reference to `buf` or read from it after the returned promise
16
- * resolves: `buf` is a view over native-owned memory whose lifetime ends
17
- * once the promise settles.
13
+ * Read up to `length` bytes starting at `offset` and resolve with the
14
+ * bytes read. The returned `Uint8Array`'s `.byteLength` is the actual
15
+ * count, which must be `<= length`.
18
16
  */
19
- read(offset: number, buf: Uint8Array): Promise<number>;
17
+ read(offset: number, length: number): Promise<Uint8Array>;
20
18
  /** Release resources held by the reader. Driven once by the `open()` lifecycle. */
21
19
  close?(): Promise<void> | void;
22
20
  }
23
21
  /**
24
- * Describes how to obtain a seekable byte stream. Construction is cheap and
25
- * synchronous - no I/O happens until `open()` runs, which the top-level
26
- * `open()` API drives. Provide your own object implementing this interface to
27
- * plug in custom backends.
22
+ * Describes how to obtain a seekable byte stream. Provide your own object implementing
23
+ * this interface to plug in custom backends.
28
24
  */
29
25
  export interface Source {
30
26
  /** Acquire the stream. Resolves to a `SourceReader` that owns any underlying resources. */
package/dist/sources.js CHANGED
@@ -17,13 +17,7 @@ function fromBuffer(buf, options) {
17
17
  open: () => Promise.resolve({
18
18
  size: view.byteLength,
19
19
  chunkBytes,
20
- read: async (offset, dst) => {
21
- const end = Math.min(offset + dst.byteLength, view.byteLength);
22
- const n = Math.max(0, end - offset);
23
- if (n > 0)
24
- dst.set(view.subarray(offset, end));
25
- return n;
26
- },
20
+ read: (offset, length) => Promise.resolve(view.subarray(offset, Math.min(offset + length, view.byteLength))),
27
21
  }),
28
22
  };
29
23
  }
@@ -37,9 +31,16 @@ function fromFile(path, options) {
37
31
  return {
38
32
  size: stat.size,
39
33
  chunkBytes,
40
- read: async (offset, dst) => {
41
- const { bytesRead } = await handle.read(dst, 0, dst.byteLength, offset);
42
- return bytesRead;
34
+ read: async (offset, length) => {
35
+ const buf = Buffer.allocUnsafe(length);
36
+ let filled = 0;
37
+ while (filled < length) {
38
+ const { bytesRead } = await handle.read(buf, filled, length - filled, offset + filled);
39
+ if (bytesRead === 0)
40
+ break;
41
+ filled += bytesRead;
42
+ }
43
+ return buf.subarray(0, filled);
43
44
  },
44
45
  close: async () => {
45
46
  if (closed)
@@ -85,17 +86,15 @@ function fromHttpRange(url, options) {
85
86
  return {
86
87
  size,
87
88
  chunkBytes,
88
- read: async (offset, dst) => {
89
+ read: async (offset, length) => {
89
90
  // HTTP ranges are inclusive on both ends.
90
- const end = Math.min(offset + dst.byteLength, size) - 1;
91
+ const end = Math.min(offset + length, size) - 1;
91
92
  const headers = new Headers(init?.headers);
92
93
  headers.set('Range', `bytes=${offset}-${end}`);
93
94
  headers.set('Accept-Encoding', 'identity');
94
95
  const res = await fetch(url, { ...init, headers, method: 'GET', signal: controller.signal });
95
96
  if (res.status === 206) {
96
- const body = new Uint8Array(await res.arrayBuffer());
97
- dst.set(body);
98
- return body.byteLength;
97
+ return new Uint8Array(await res.arrayBuffer());
99
98
  }
100
99
  // A 200 means the server ignored our Range request and returned the full
101
100
  // body. We throw here since the point of using ranges is to not have to
@@ -1,8 +1,16 @@
1
1
  import type { StandardSchemaV1 } from '@standard-schema/spec';
2
2
  export type { StandardSchemaV1 };
3
+ export type Segment = string | number;
4
+ export type Path = readonly Segment[];
3
5
  export declare class ValidationError extends Error {
4
6
  readonly issues: readonly StandardSchemaV1.Issue[];
5
- readonly pointer: string;
6
- constructor(issues: readonly StandardSchemaV1.Issue[], pointer: string);
7
+ readonly path: Path;
8
+ constructor(issues: readonly StandardSchemaV1.Issue[], path: Path);
7
9
  }
8
- export declare function runStandardSchema<O>(schema: StandardSchemaV1<unknown, O>, value: unknown, pointer: string): Promise<O>;
10
+ export declare function runStandardSchema<O>(schema: StandardSchemaV1<unknown, O>, value: unknown, path: Path): Promise<O>;
11
+ export declare function validateItem<O>(schema: StandardSchemaV1<unknown, O>, value: unknown, path: Path, onInvalid: 'throw' | 'skip'): Promise<{
12
+ skip: true;
13
+ } | {
14
+ value: O;
15
+ }>;
16
+ export declare function formatPath(path: Path): string;
package/dist/validate.js CHANGED
@@ -2,20 +2,50 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.ValidationError = void 0;
4
4
  exports.runStandardSchema = runStandardSchema;
5
+ exports.validateItem = validateItem;
6
+ exports.formatPath = formatPath;
5
7
  class ValidationError extends Error {
6
8
  issues;
7
- pointer;
8
- constructor(issues, pointer) {
9
- super(`bote: schema validation failed at ${pointer || '/'}: ${issues[0]?.message ?? 'unknown'}`);
9
+ path;
10
+ constructor(issues, path) {
11
+ super(`bote: schema validation failed at ${formatPath(path)}: ${issues[0]?.message ?? 'unknown'}`);
10
12
  this.name = 'ValidationError';
11
13
  this.issues = issues;
12
- this.pointer = pointer;
14
+ this.path = path;
13
15
  }
14
16
  }
15
17
  exports.ValidationError = ValidationError;
16
- async function runStandardSchema(schema, value, pointer) {
18
+ async function runStandardSchema(schema, value, path) {
17
19
  const result = await schema['~standard'].validate(value);
18
20
  if (result.issues)
19
- throw new ValidationError(result.issues, pointer);
21
+ throw new ValidationError(result.issues, path);
20
22
  return result.value;
21
23
  }
24
+ async function validateItem(schema, value, path, onInvalid) {
25
+ const result = await schema['~standard'].validate(value);
26
+ if (result.issues) {
27
+ if (onInvalid === 'skip')
28
+ return { skip: true };
29
+ throw new ValidationError(result.issues, path);
30
+ }
31
+ return { value: result.value };
32
+ }
33
+ function formatPath(path) {
34
+ if (path.length === 0)
35
+ return '(root)';
36
+ let out = '';
37
+ for (let i = 0; i < path.length; i++) {
38
+ const seg = path[i];
39
+ if (typeof seg === 'number') {
40
+ out += `[${seg}]`;
41
+ continue;
42
+ }
43
+ if (/^[A-Za-z_$][A-Za-z0-9_$]*$/.test(seg)) {
44
+ out += i === 0 ? seg : `.${seg}`;
45
+ }
46
+ else {
47
+ out += `[${JSON.stringify(seg)}]`;
48
+ }
49
+ }
50
+ return out;
51
+ }
package/package.json CHANGED
@@ -1,16 +1,17 @@
1
1
  {
2
2
  "name": "@botejs/core",
3
- "version": "0.1.3",
3
+ "version": "0.2.0",
4
4
  "license": "MIT",
5
5
  "repository": {
6
6
  "type": "git",
7
7
  "url": "git+https://github.com/jankdc/bote.git",
8
- "directory": "packages/bote"
8
+ "directory": "packages/core"
9
9
  },
10
10
  "main": "dist/index.js",
11
11
  "types": "dist/index.d.ts",
12
12
  "files": [
13
- "dist"
13
+ "dist",
14
+ "README.md"
14
15
  ],
15
16
  "engines": {
16
17
  "node": ">= 18.17.0 < 19 || >= 20.3.0 < 21 || >= 21.1.0"
@@ -24,7 +25,7 @@
24
25
  "build:debug": "tsc --sourceMap",
25
26
  "test": "node --test --experimental-strip-types --no-warnings=ExperimentalWarning __test__/*.spec.ts",
26
27
  "lint": "oxlint src",
27
- "prepublishOnly": "tsc"
28
+ "prepublishOnly": "cp ../../README.md ./README.md && tsc"
28
29
  },
29
30
  "dependencies": {
30
31
  "@botejs/native": "workspace:*"
package/dist/pointer.d.ts DELETED
@@ -1,5 +0,0 @@
1
- type ValidateTokenChars<S extends string> = S extends `${string}~${infer Rest}` ? Rest extends `0${infer After}` | `1${infer After}` ? ValidateTokenChars<After> : false : true;
2
- type ValidateTokens<S extends string> = S extends `${infer Token}/${infer Rest}` ? ValidateTokenChars<Token> extends true ? ValidateTokens<Rest> : false : ValidateTokenChars<S>;
3
- type IsJsonPointer<S extends string> = S extends '' ? true : S extends `/${infer Rest}` ? ValidateTokens<Rest> : false;
4
- export type JsonPointer<S extends string> = IsJsonPointer<S> extends true ? S : `Error: invalid JSON pointer "${S}"`;
5
- export {};
package/dist/pointer.js DELETED
@@ -1,3 +0,0 @@
1
- "use strict";
2
- // RFC 6901 JSON Pointer Static Typing Validator
3
- Object.defineProperty(exports, "__esModule", { value: true });