@botejs/core 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -24,43 +24,48 @@ here's a run (Apple M1 Pro 2021, ~500MB JSON array file, cold-cache, default set
24
24
 
25
25
  | operation | approach | time | js heap peak Δ | rust heap peak |
26
26
  | -------------- | ---------- | --------: | -------------: | -------------: |
27
- | items[0] | JSON.parse | 616.02 ms | 1.03 GB | n/a |
28
- | items[535399] | JSON.parse | 604.63 ms | 1.03 GB | n/a |
29
- | items[1070797] | JSON.parse | 600.68 ms | 1.03 GB | n/a |
30
- | items[0] | bote | 527.80 µs | 291.6 KB | 130.4 KB |
31
- | items[535399] | bote | 187.24 ms | 742.3 KB | 36.7 MB |
32
- | items[1070797] | bote | 371.61 ms | 828.7 KB | 37.1 MB |
27
+ | items[0] | JSON.parse | 1.81 s | 1.21 GB | n/a |
28
+ | items[535399] | JSON.parse | 1.74 s | 1.21 GB | n/a |
29
+ | items[1070797] | JSON.parse | 1.74 s | 1.21 GB | n/a |
30
+ | items[0] | bote | 1.29 ms | 63.3 KB | 130.8 KB |
31
+ | items[535399] | bote | 193.49 ms | 191.5 KB | 36.7 MB |
32
+ | items[1070797] | bote | 379.98 ms | 189.8 KB | 37.2 MB |
33
33
 
34
34
  ## array access
35
35
 
36
- `iter` streams the elements of an array at a path, **a batch at a time**, so you never hold the whole collection in memory and not wait for the heat death of the universe if this yielded individually. each `for await` step yields an array of items (use `walk` to step over the members of an object):
36
+ `iter` streams the children of a container at a path **one item at a time**, so you never hold the whole collection in memory. it works on either kind: array elements or object member values. each `for await` step yields a single item:
37
37
 
38
38
  ```ts
39
39
  // e.g. [{ id: 'user-1' }, { id: 'user-2' }, ...]
40
40
  await using cursor = await open(fromFile('./users.json'))
41
41
 
42
42
  // root is an array
43
- for await (const users of cursor.iter()) {
43
+ for await (const user of cursor.iter()) {
44
+ console.log(user)
45
+ }
46
+ ```
47
+
48
+ the item loop is the ergonomic default; it costs a flat ~10% over a full walk. for hot paths, `.raw()` hands back the raw fetch arrays with no per-item tax (the `batch` option sets their size and the memory bound):
49
+
50
+ ```ts
51
+ for await (const users of cursor.iter().raw()) {
44
52
  for (const user of users) {
45
53
  console.log(user)
46
54
  }
47
55
  }
48
56
  ```
49
57
 
50
- pass an options object as the last argument to tune what comes back: `batch`, `select`, `schema`, `onInvalid`, and `withIndex`. if you want to know more of the options, see [`arrays.js`](./examples/arrays.js).
51
-
52
58
  ## object access
53
59
 
54
- `walk` steps over the members of an object at a path, yielding a **`[key, cursor]`** pair per member. the key is the member name, the cursor is anchored at its value. each child cursor is first-class: it outlives the loop and can be `walk`ed again, which is what lets you descend a tree of unknown depth.
60
+ `iter` over an object yields its **member values** in document order. add `withKey: true` to get **`[key, value]`** pairs instead, where `key` is the member name (for an array, `key` is the element's index). streamed either way, so a million-member object never lands on the heap at once:
55
61
 
56
62
  ```ts
57
63
  // e.g. { alice: { role: 'admin' }, bob: { role: 'guest' }, ... }
58
64
  await using cursor = await open(fromFile('./accounts.json'))
59
65
 
60
- for await (const [name, account] of cursor.walk()) {
61
- // name is the member name ('alice', 'bob', ...)
62
- const role = await account.get('role')
63
- console.log(`${name}: ${role}`)
66
+ for await (const [name, account] of cursor.iter({ withKey: true })) {
67
+ // name is the member name ('alice', 'bob', ...); account is its value
68
+ console.log(`${name}: ${account.role}`)
64
69
  }
65
70
  ```
66
71
 
@@ -77,8 +82,8 @@ await using cursor = await open(fromFile('./report.json'))
77
82
  const section = await cursor.hop('report', 'sections', 0)
78
83
  if (section) {
79
84
  console.log(await section.count('rows'))
80
- for await (const rows of section.iter('rows')) {
81
- console.log(rows)
85
+ for await (const row of section.iter('rows')) {
86
+ console.log(row)
82
87
  }
83
88
  }
84
89
  ```
@@ -105,12 +110,14 @@ const cursor = await open(fromFile('./users.json'))
105
110
  // name: string
106
111
  const name = await cursor.get('users', 1000, 'name', User.shape.name)
107
112
 
108
- for await (const users of cursor.iter('users', User)) {
109
- // user: User[]
110
- const emails = users.map((user) => user.email)
111
- await sendNewsletter(emails)
113
+ let emails: string[] = []
114
+ // .raw() to hand each fetch's worth of recipients to the batched API at once
115
+ for await (const user of cursor.iter('users', User)) {
116
+ // user: User
117
+ emails.push(user.email)
112
118
  }
113
119
 
120
+ await sendNewsletter(emails)
114
121
  await cursor.close()
115
122
  ```
116
123
 
package/dist/args.d.ts CHANGED
@@ -1,15 +1,20 @@
1
1
  import type { Path, Segment, StandardSchemaV1 } from './validate.ts';
2
2
  export interface IterOptions {
3
3
  select?: Segment | Path | Record<string, Segment | Path>;
4
- /** How many items are yielded per batch. Higher is faster, but takes more memory to materialise those items. */
4
+ /** How many items cross the native boundary per fetch, which also bounds the
5
+ * resident materialization window (the memory knob) and sets the array size
6
+ * yielded by `IterStream.raw()`. The default item loop drains each fetch
7
+ * one item at a time, so this doesn't change what item iteration yields, only
8
+ * how much is fetched and held at once. Higher is faster but holds more in
9
+ * memory. */
5
10
  batch?: number;
6
11
  /** Validate each yielded item against this schema (after `select`). */
7
12
  schema?: StandardSchemaV1;
8
13
  /** Policy for items failing `schema`. Default `'throw'`; `'skip'` drops them. */
9
14
  onInvalid?: 'throw' | 'skip';
10
- /** Yield `[index, value]` tuples instead of bare values, where `index` is
11
- * the zero-based position of the element in the source array. */
12
- withIndex?: boolean;
15
+ /** Yield `[key, value]` tuples instead of bare values. `key` is the member
16
+ * name for objects and the zero-based index for arrays. */
17
+ withKey?: boolean;
13
18
  }
14
19
  export type VariadicPathArgs<TTail> = [...Segment[]] | [...Segment[], TTail];
15
20
  export declare function splitArgs<TTail>(args: VariadicPathArgs<TTail>): {
@@ -0,0 +1,52 @@
1
+ import type { Cursor as NativeCursor } from '@botejs/native';
2
+ import { type IterStream } from './stream.ts';
3
+ import { type Path, type Segment, type StandardSchemaV1 } from './validate.ts';
4
+ import { type IterOptions } from './args.ts';
5
+ type InferOutput<Sch> = Sch extends StandardSchemaV1<unknown, infer O> ? O : never;
6
+ type SelectMapShape<S> = {
7
+ -readonly [K in keyof S]: unknown;
8
+ };
9
+ export type IterKey = string | number;
10
+ export declare const DEFAULT_ITER_BATCH = 1000;
11
+ export declare const MAX_ITER_BATCH = 1000000;
12
+ export interface Cursor {
13
+ hop(...path: Segment[]): Promise<Cursor | null>;
14
+ has(...path: Segment[]): Promise<boolean>;
15
+ has(...args: [...Segment[], StandardSchemaV1]): Promise<boolean>;
16
+ get(...path: Segment[]): Promise<unknown>;
17
+ get<Sch extends StandardSchemaV1>(...args: [...Segment[], Sch]): Promise<InferOutput<Sch>>;
18
+ count(...path: Segment[]): Promise<number>;
19
+ iter(...path: Segment[]): IterStream<unknown>;
20
+ iter<Sch extends StandardSchemaV1>(...args: [...Segment[], Sch]): IterStream<InferOutput<Sch>>;
21
+ iter<Sch extends StandardSchemaV1>(...args: [...Segment[], IterOptions & {
22
+ withKey: true;
23
+ schema: Sch;
24
+ }]): IterStream<[IterKey, InferOutput<Sch>]>;
25
+ iter<Sch extends StandardSchemaV1>(...args: [...Segment[], IterOptions & {
26
+ schema: Sch;
27
+ }]): IterStream<InferOutput<Sch>>;
28
+ iter<S extends Record<string, Segment | Path>>(...args: [...Segment[], IterOptions & {
29
+ withKey: true;
30
+ select: S;
31
+ }]): IterStream<[IterKey, SelectMapShape<S>]>;
32
+ iter<S extends Record<string, Segment | Path>>(...args: [...Segment[], IterOptions & {
33
+ select: S;
34
+ }]): IterStream<SelectMapShape<S>>;
35
+ iter(...args: [...Segment[], IterOptions & {
36
+ withKey: true;
37
+ }]): IterStream<[IterKey, unknown]>;
38
+ iter(...args: [...Segment[], IterOptions]): IterStream<unknown>;
39
+ }
40
+ export interface RootCursor extends Cursor, AsyncDisposable {
41
+ /** Close the underlying source. Idempotent. */
42
+ close(): Promise<void>;
43
+ }
44
+ export type CursorState = {
45
+ closed: boolean;
46
+ };
47
+ /** Throw a uniform error for any operation on a closed cursor, so use-after-close
48
+ * is one defined contract regardless of source (some readers' reads keep working
49
+ * after close, others throw an opaque I/O error). */
50
+ export declare function ensureOpen(state: CursorState): void;
51
+ export declare function wrap(native: NativeCursor, state: CursorState): Cursor;
52
+ export {};
package/dist/cursor.js ADDED
@@ -0,0 +1,119 @@
1
+ import { validatePath } from "./path.js";
2
+ import { parseValue, deserializeError } from "./decode.js";
3
+ import { makeStream } from "./stream.js";
4
+ import { runStandardSchema, validateItem } from "./validate.js";
5
+ import { splitArgs, isSchema, serializeSelect, normalizeIterTail, } from "./args.js";
6
+ export const DEFAULT_ITER_BATCH = 1000;
7
+ export const MAX_ITER_BATCH = 1_000_000;
8
+ /** Throw a uniform error for any operation on a closed cursor, so use-after-close
9
+ * is one defined contract regardless of source (some readers' reads keep working
10
+ * after close, others throw an opaque I/O error). */
11
+ export function ensureOpen(state) {
12
+ if (state.closed)
13
+ throw new Error('bote: cursor is closed');
14
+ }
15
+ export function wrap(native, state) {
16
+ const cursor = {
17
+ async hop(...path) {
18
+ ensureOpen(state);
19
+ validatePath(path);
20
+ let child;
21
+ try {
22
+ child = await native.hop(path);
23
+ }
24
+ catch (err) {
25
+ throw deserializeError(err, path);
26
+ }
27
+ return child ? wrap(child, state) : null;
28
+ },
29
+ async has(...args) {
30
+ ensureOpen(state);
31
+ const { path, tail: schema } = splitArgs(args);
32
+ if (schema !== undefined && !isSchema(schema)) {
33
+ throw new TypeError('has: expected a Standard Schema as the trailing argument');
34
+ }
35
+ if (!schema)
36
+ return native.has(path);
37
+ if (!(await native.has(path)))
38
+ return false;
39
+ const text = await native.get(path);
40
+ const value = text === undefined ? undefined : parseValue(text, path);
41
+ const result = await validateItem(schema, value, path, 'skip');
42
+ return !('skip' in result);
43
+ },
44
+ async get(...args) {
45
+ ensureOpen(state);
46
+ const { path, tail: schema } = splitArgs(args);
47
+ if (schema !== undefined && !isSchema(schema)) {
48
+ throw new TypeError('get: expected a Standard Schema as the trailing argument');
49
+ }
50
+ let value;
51
+ try {
52
+ const text = await native.get(path);
53
+ value = text === undefined ? undefined : parseValue(text, path);
54
+ }
55
+ catch (err) {
56
+ throw deserializeError(err, path);
57
+ }
58
+ if (!schema)
59
+ return value;
60
+ return runStandardSchema(schema, value, path);
61
+ },
62
+ async count(...path) {
63
+ ensureOpen(state);
64
+ validatePath(path);
65
+ try {
66
+ return await native.count(path);
67
+ }
68
+ catch (err) {
69
+ throw deserializeError(err, path);
70
+ }
71
+ },
72
+ iter(...args) {
73
+ ensureOpen(state);
74
+ const { path, tail } = splitArgs(args);
75
+ const { schema, select, batch, onInvalid, withKey } = normalizeIterTail(tail);
76
+ if (batch !== undefined && (!Number.isInteger(batch) || batch <= 0 || batch > MAX_ITER_BATCH)) {
77
+ throw new RangeError(`iter: batch must be an integer in 1..=${MAX_ITER_BATCH}, got ${batch}`);
78
+ }
79
+ if (withKey !== undefined && typeof withKey !== 'boolean') {
80
+ throw new TypeError(`iter: withKey must be a boolean, got ${typeof withKey}`);
81
+ }
82
+ if (onInvalid !== undefined && onInvalid !== 'throw' && onInvalid !== 'skip') {
83
+ throw new RangeError(`iter: onInvalid must be "throw" or "skip", got ${JSON.stringify(onInvalid)}`);
84
+ }
85
+ const resolvedBatch = batch ?? DEFAULT_ITER_BATCH;
86
+ const selectIr = select !== undefined ? serializeSelect(select) : undefined;
87
+ const wantKey = withKey ?? false;
88
+ const nativeWithKey = wantKey || schema !== undefined;
89
+ const inner = native.iter(path, { selectIr, batch: resolvedBatch, withKey: nativeWithKey });
90
+ if (!schema) {
91
+ return nativeStream(inner, path, resolvedBatch, (raw) => parseValue(raw, path));
92
+ }
93
+ const policy = onInvalid ?? 'throw';
94
+ return nativeStream(inner, path, resolvedBatch, async (raw) => {
95
+ const out = [];
96
+ for (const [key, value] of parseValue(raw, path)) {
97
+ const result = await validateItem(schema, value, [...path, key], policy);
98
+ if ('skip' in result)
99
+ continue;
100
+ out.push(wantKey ? [key, result.value] : result.value);
101
+ }
102
+ return out;
103
+ });
104
+ },
105
+ };
106
+ return cursor;
107
+ }
108
+ function nativeStream(inner, path, batchSize, mapBatch) {
109
+ async function* batches() {
110
+ try {
111
+ for await (const raw of inner)
112
+ yield await mapBatch(raw);
113
+ }
114
+ catch (err) {
115
+ throw deserializeError(err, path);
116
+ }
117
+ }
118
+ return makeStream(batches, batchSize);
119
+ }
@@ -0,0 +1,3 @@
1
+ import { type Path } from './validate.ts';
2
+ export declare function deserializeError(err: unknown, path: Path): unknown;
3
+ export declare function parseValue(text: string, path: Path): unknown;
package/dist/decode.js ADDED
@@ -0,0 +1,20 @@
1
+ import { PathError, formatPath } from "./validate.js";
2
+ const NATIVE_PATH_ERROR = /^bote:path:([a-z_]+)(?::(\d+))?$/;
3
+ export function deserializeError(err, path) {
4
+ if (err instanceof Error && !(err instanceof PathError)) {
5
+ const match = NATIVE_PATH_ERROR.exec(err.message);
6
+ if (match) {
7
+ const segment = match[2] === undefined ? undefined : Number(match[2]);
8
+ return new PathError(path, match[1], segment);
9
+ }
10
+ }
11
+ return err;
12
+ }
13
+ export function parseValue(text, path) {
14
+ try {
15
+ return JSON.parse(text);
16
+ }
17
+ catch {
18
+ throw new Error(`bote: malformed JSON value at ${formatPath(path)}`);
19
+ }
20
+ }
package/dist/index.d.ts CHANGED
@@ -1,4 +1,6 @@
1
1
  export { type IterOptions } from './args.ts';
2
2
  export { ValidationError, PathError, formatPath, type Path, type PathFaultCode, type Segment, type StandardSchemaV1, } from './validate.ts';
3
- export { open, DEFAULT_ITER_BATCH, MAX_ITER_BATCH, type Cursor, type RootCursor, type OpenOptions, type WalkEntry, type IterIndex as IterKey, } from './open.ts';
3
+ export { DEFAULT_ITER_BATCH, MAX_ITER_BATCH, type Cursor, type RootCursor, type IterKey } from './cursor.ts';
4
4
  export { fromBuffer, fromFile, fromHttpRange, type FactoryOptions, type Source, type SourceReader, type HttpRangeOptions, } from './sources.ts';
5
+ export { type IterStream } from './stream.ts';
6
+ export { open, type OpenOptions } from './open.ts';
package/dist/index.js CHANGED
@@ -1,3 +1,4 @@
1
1
  export { ValidationError, PathError, formatPath, } from "./validate.js";
2
- export { open, DEFAULT_ITER_BATCH, MAX_ITER_BATCH, } from "./open.js";
2
+ export { DEFAULT_ITER_BATCH, MAX_ITER_BATCH } from "./cursor.js";
3
3
  export { fromBuffer, fromFile, fromHttpRange, } from "./sources.js";
4
+ export { open } from "./open.js";
package/dist/open.d.ts CHANGED
@@ -1,17 +1,6 @@
1
+ import { type RootCursor } from './cursor.ts';
1
2
  import type { Source } from './sources.ts';
2
- import { type Path, type Segment, type StandardSchemaV1 } from './validate.ts';
3
- import { type IterOptions } from './args.ts';
4
- type InferOutput<Sch> = Sch extends StandardSchemaV1<unknown, infer O> ? O : never;
5
- type SelectMapShape<S> = {
6
- -readonly [K in keyof S]: unknown;
7
- };
8
- /** Zero-based index of an array element. */
9
- export type IterIndex = number;
10
- /** One `walk` step: the member's key paired with a cursor anchored at its value. */
11
- export type WalkEntry = [key: string, cursor: Cursor];
12
3
  export declare const DEFAULT_SOURCE_CHUNK_BYTES: number;
13
- export declare const DEFAULT_ITER_BATCH = 1000;
14
- export declare const MAX_ITER_BATCH = 1000000;
15
4
  export interface OpenOptions {
16
5
  /**
17
6
  * Slot budget for the structural-index cache: one slot per cached container
@@ -42,40 +31,6 @@ export interface OpenOptions {
42
31
  */
43
32
  arrayIndexInterval?: number;
44
33
  }
45
- export interface Cursor {
46
- hop(...path: Segment[]): Promise<Cursor | null>;
47
- has(...path: Segment[]): Promise<boolean>;
48
- has(...args: [...Segment[], StandardSchemaV1]): Promise<boolean>;
49
- get(...path: Segment[]): Promise<unknown>;
50
- get<Sch extends StandardSchemaV1>(...args: [...Segment[], Sch]): Promise<InferOutput<Sch>>;
51
- count(...path: Segment[]): Promise<number>;
52
- iter(...path: Segment[]): AsyncIterable<unknown[]>;
53
- iter<Sch extends StandardSchemaV1>(...args: [...Segment[], Sch]): AsyncIterable<InferOutput<Sch>[]>;
54
- iter<Sch extends StandardSchemaV1>(...args: [...Segment[], IterOptions & {
55
- withIndex: true;
56
- schema: Sch;
57
- }]): AsyncIterable<[IterIndex, InferOutput<Sch>][]>;
58
- iter<Sch extends StandardSchemaV1>(...args: [...Segment[], IterOptions & {
59
- schema: Sch;
60
- }]): AsyncIterable<InferOutput<Sch>[]>;
61
- iter<S extends Record<string, Segment | Path>>(...args: [...Segment[], IterOptions & {
62
- withIndex: true;
63
- select: S;
64
- }]): AsyncIterable<[IterIndex, SelectMapShape<S>][]>;
65
- iter<S extends Record<string, Segment | Path>>(...args: [...Segment[], IterOptions & {
66
- select: S;
67
- }]): AsyncIterable<SelectMapShape<S>[]>;
68
- iter(...args: [...Segment[], IterOptions & {
69
- withIndex: true;
70
- }]): AsyncIterable<[IterIndex, unknown][]>;
71
- iter(...args: [...Segment[], IterOptions]): AsyncIterable<unknown[]>;
72
- walk(...path: Segment[]): AsyncIterable<WalkEntry>;
73
- walk(...path: Segment[]): AsyncIterable<Cursor>;
74
- }
75
- export interface RootCursor extends Cursor, AsyncDisposable {
76
- /** Close the underlying source. Idempotent. */
77
- close(): Promise<void>;
78
- }
79
34
  /**
80
35
  * Open a cursor over a seekable source.
81
36
  *
@@ -83,4 +38,3 @@ export interface RootCursor extends Cursor, AsyncDisposable {
83
38
  * drives the reader's own `close()` exactly once.
84
39
  */
85
40
  export declare function open(source: Source, options?: OpenOptions): Promise<RootCursor>;
86
- export {};
package/dist/open.js CHANGED
@@ -1,10 +1,6 @@
1
1
  import { open as openNative } from '@botejs/native';
2
- import { validatePath } from "./path.js";
3
- import { runStandardSchema, validateItem, formatPath, PathError, } from "./validate.js";
4
- import { splitArgs, isSchema, serializeSelect, normalizeIterTail, } from "./args.js";
2
+ import { wrap } from "./cursor.js";
5
3
  export const DEFAULT_SOURCE_CHUNK_BYTES = 64 * 1024;
6
- export const DEFAULT_ITER_BATCH = 1000;
7
- export const MAX_ITER_BATCH = 1_000_000;
8
4
  /**
9
5
  * Open a cursor over a seekable source.
10
6
  *
@@ -71,169 +67,3 @@ async function closeReader(reader) {
71
67
  if (reader.close)
72
68
  await reader.close();
73
69
  }
74
- const NATIVE_PATH_ERROR = /^bote:path:([a-z_]+)(?::(\d+))?$/;
75
- function deserializeError(err, path) {
76
- if (err instanceof Error && !(err instanceof PathError)) {
77
- const match = NATIVE_PATH_ERROR.exec(err.message);
78
- if (match) {
79
- const segment = match[2] === undefined ? undefined : Number(match[2]);
80
- return new PathError(path, match[1], segment);
81
- }
82
- }
83
- return err;
84
- }
85
- /** Throw a uniform error for any operation on a closed cursor, so use-after-close
86
- * is one defined contract regardless of source (some readers' reads keep working
87
- * after close, others throw an opaque I/O error). */
88
- function ensureOpen(state) {
89
- if (state.closed)
90
- throw new Error('bote: cursor is closed');
91
- }
92
- function wrap(native, state) {
93
- const cursor = {
94
- async hop(...path) {
95
- ensureOpen(state);
96
- validatePath(path);
97
- let child;
98
- try {
99
- child = await native.hop(path);
100
- }
101
- catch (err) {
102
- throw deserializeError(err, path);
103
- }
104
- return child ? wrap(child, state) : null;
105
- },
106
- async has(...args) {
107
- ensureOpen(state);
108
- const { path, tail: schema } = splitArgs(args);
109
- if (schema !== undefined && !isSchema(schema)) {
110
- throw new TypeError('has: expected a Standard Schema as the trailing argument');
111
- }
112
- if (!schema)
113
- return native.has(path);
114
- if (!(await native.has(path)))
115
- return false;
116
- const text = await native.get(path);
117
- const value = text === undefined ? undefined : parseValue(text, path);
118
- const result = await validateItem(schema, value, path, 'skip');
119
- return !('skip' in result);
120
- },
121
- async get(...args) {
122
- ensureOpen(state);
123
- const { path, tail: schema } = splitArgs(args);
124
- if (schema !== undefined && !isSchema(schema)) {
125
- throw new TypeError('get: expected a Standard Schema as the trailing argument');
126
- }
127
- let value;
128
- try {
129
- const text = await native.get(path);
130
- value = text === undefined ? undefined : parseValue(text, path);
131
- }
132
- catch (err) {
133
- throw deserializeError(err, path);
134
- }
135
- if (!schema)
136
- return value;
137
- return runStandardSchema(schema, value, path);
138
- },
139
- async count(...path) {
140
- ensureOpen(state);
141
- validatePath(path);
142
- try {
143
- return await native.count(path);
144
- }
145
- catch (err) {
146
- throw deserializeError(err, path);
147
- }
148
- },
149
- iter(...args) {
150
- ensureOpen(state);
151
- const { path, tail } = splitArgs(args);
152
- const { schema, select, batch, onInvalid, withIndex } = normalizeIterTail(tail);
153
- if (batch !== undefined && (!Number.isInteger(batch) || batch <= 0 || batch > MAX_ITER_BATCH)) {
154
- throw new RangeError(`iter: batch must be an integer in 1..=${MAX_ITER_BATCH}, got ${batch}`);
155
- }
156
- if (withIndex !== undefined && typeof withIndex !== 'boolean') {
157
- throw new TypeError(`iter: withIndex must be a boolean, got ${typeof withIndex}`);
158
- }
159
- if (onInvalid !== undefined && onInvalid !== 'throw' && onInvalid !== 'skip') {
160
- throw new RangeError(`iter: onInvalid must be "throw" or "skip", got ${JSON.stringify(onInvalid)}`);
161
- }
162
- const resolvedBatch = batch ?? DEFAULT_ITER_BATCH;
163
- const selectIr = select !== undefined ? serializeSelect(select) : undefined;
164
- const inner = native.iter(path, { selectIr, batch: resolvedBatch });
165
- if (!schema) {
166
- return {
167
- async *[Symbol.asyncIterator]() {
168
- let i = 0;
169
- try {
170
- for await (const b of inner) {
171
- const batch = parseValue(b, path);
172
- if (!withIndex) {
173
- yield batch;
174
- continue;
175
- }
176
- const out = new Array(batch.length);
177
- for (let j = 0; j < batch.length; j++) {
178
- out[j] = [i++, batch[j]];
179
- }
180
- yield out;
181
- }
182
- }
183
- catch (err) {
184
- throw deserializeError(err, path);
185
- }
186
- },
187
- };
188
- }
189
- const policy = onInvalid ?? 'throw';
190
- return {
191
- async *[Symbol.asyncIterator]() {
192
- let i = 0;
193
- try {
194
- for await (const b of inner) {
195
- const out = [];
196
- for (const v of parseValue(b, path)) {
197
- const index = i++;
198
- const result = await validateItem(schema, v, [...path, index], policy);
199
- if ('skip' in result) {
200
- continue;
201
- }
202
- out.push(withIndex ? [index, result.value] : result.value);
203
- }
204
- yield out;
205
- }
206
- }
207
- catch (err) {
208
- throw deserializeError(err, path);
209
- }
210
- },
211
- };
212
- },
213
- walk(...path) {
214
- ensureOpen(state);
215
- validatePath(path);
216
- return {
217
- async *[Symbol.asyncIterator]() {
218
- try {
219
- for await (const [key, child] of native.walk(path)) {
220
- yield [key, wrap(child, state)];
221
- }
222
- }
223
- catch (err) {
224
- throw deserializeError(err, path);
225
- }
226
- },
227
- };
228
- },
229
- };
230
- return cursor;
231
- }
232
- function parseValue(text, path) {
233
- try {
234
- return JSON.parse(text);
235
- }
236
- catch {
237
- throw new Error(`bote: malformed JSON value at ${formatPath(path)}`);
238
- }
239
- }
@@ -0,0 +1,15 @@
1
+ export interface IterStream<T> extends AsyncIterable<T> {
2
+ raw(): AsyncIterable<T[]>;
3
+ map<U>(fn: (item: T, index: number) => U | Promise<U>): IterStream<U>;
4
+ filter<U extends T>(fn: (item: T, index: number) => item is U): IterStream<U>;
5
+ filter(fn: (item: T, index: number) => boolean | Promise<boolean>): IterStream<T>;
6
+ take(limit: number): IterStream<T>;
7
+ drop(limit: number): IterStream<T>;
8
+ toArray(): Promise<T[]>;
9
+ forEach(fn: (item: T, index: number) => void | Promise<void>): Promise<void>;
10
+ reduce<A>(fn: (acc: A, item: T, index: number) => A | Promise<A>, init: A): Promise<A>;
11
+ find(fn: (item: T, index: number) => boolean | Promise<boolean>): Promise<T | undefined>;
12
+ some(fn: (item: T, index: number) => boolean | Promise<boolean>): Promise<boolean>;
13
+ every(fn: (item: T, index: number) => boolean | Promise<boolean>): Promise<boolean>;
14
+ }
15
+ export declare function makeStream<T>(batches: () => AsyncIterable<T[]>, batchSize: number, regroup?: boolean): IterStream<T>;
package/dist/stream.js ADDED
@@ -0,0 +1,166 @@
1
+ export function makeStream(batches, batchSize, regroup = false) {
2
+ const derive = (next) => makeStream(next, batchSize, true);
3
+ const stream = {
4
+ [Symbol.asyncIterator]() {
5
+ return flatten(batches())[Symbol.asyncIterator]();
6
+ },
7
+ raw() {
8
+ return regroup ? regroupBatches(batches(), batchSize) : batches();
9
+ },
10
+ map(fn) {
11
+ return derive(() => mapBatches(batches(), fn));
12
+ },
13
+ filter(fn) {
14
+ return derive(() => filterBatches(batches(), fn));
15
+ },
16
+ take(limit) {
17
+ return derive(() => takeBatches(batches(), limit));
18
+ },
19
+ drop(limit) {
20
+ return derive(() => dropBatches(batches(), limit));
21
+ },
22
+ async toArray() {
23
+ const out = [];
24
+ for await (const batch of batches()) {
25
+ for (let i = 0; i < batch.length; i++) {
26
+ out.push(batch[i]);
27
+ }
28
+ }
29
+ return out;
30
+ },
31
+ async forEach(fn) {
32
+ let index = 0;
33
+ for await (const batch of batches()) {
34
+ for (let i = 0; i < batch.length; i++) {
35
+ await fn(batch[i], index++);
36
+ }
37
+ }
38
+ },
39
+ async reduce(fn, init) {
40
+ let acc = init;
41
+ let index = 0;
42
+ for await (const batch of batches()) {
43
+ for (let i = 0; i < batch.length; i++) {
44
+ acc = await fn(acc, batch[i], index++);
45
+ }
46
+ }
47
+ return acc;
48
+ },
49
+ async find(fn) {
50
+ let index = 0;
51
+ for await (const batch of batches()) {
52
+ for (let i = 0; i < batch.length; i++) {
53
+ if (await fn(batch[i], index++)) {
54
+ return batch[i];
55
+ }
56
+ }
57
+ }
58
+ return undefined;
59
+ },
60
+ async some(fn) {
61
+ let index = 0;
62
+ for await (const batch of batches()) {
63
+ for (let i = 0; i < batch.length; i++) {
64
+ if (await fn(batch[i], index++)) {
65
+ return true;
66
+ }
67
+ }
68
+ }
69
+ return false;
70
+ },
71
+ async every(fn) {
72
+ let index = 0;
73
+ for await (const batch of batches()) {
74
+ for (let i = 0; i < batch.length; i++) {
75
+ if (!(await fn(batch[i], index++))) {
76
+ return false;
77
+ }
78
+ }
79
+ }
80
+ return true;
81
+ },
82
+ };
83
+ return stream;
84
+ }
85
+ async function* flatten(batches) {
86
+ for await (const batch of batches) {
87
+ for (let i = 0; i < batch.length; i++) {
88
+ yield batch[i];
89
+ }
90
+ }
91
+ }
92
+ async function* regroupBatches(batches, size) {
93
+ let buf = [];
94
+ for await (const batch of batches) {
95
+ for (let i = 0; i < batch.length; i++) {
96
+ buf.push(batch[i]);
97
+ if (buf.length >= size) {
98
+ yield buf;
99
+ buf = [];
100
+ }
101
+ }
102
+ }
103
+ if (buf.length > 0) {
104
+ yield buf;
105
+ }
106
+ }
107
+ async function* mapBatches(batches, fn) {
108
+ let index = 0;
109
+ for await (const batch of batches) {
110
+ const out = new Array(batch.length);
111
+ for (let i = 0; i < batch.length; i++) {
112
+ const r = fn(batch[i], index++);
113
+ out[i] = isThenable(r) ? await r : r;
114
+ }
115
+ yield out;
116
+ }
117
+ }
118
+ async function* filterBatches(batches, fn) {
119
+ let index = 0;
120
+ for await (const batch of batches) {
121
+ const out = [];
122
+ for (let i = 0; i < batch.length; i++) {
123
+ const item = batch[i];
124
+ const r = fn(item, index++);
125
+ if (isThenable(r) ? await r : r) {
126
+ out.push(item);
127
+ }
128
+ }
129
+ if (out.length > 0) {
130
+ yield out;
131
+ }
132
+ }
133
+ }
134
+ async function* takeBatches(batches, limit) {
135
+ if (limit <= 0) {
136
+ return;
137
+ }
138
+ let remaining = limit;
139
+ for await (const batch of batches) {
140
+ if (batch.length < remaining) {
141
+ remaining -= batch.length;
142
+ yield batch;
143
+ continue;
144
+ }
145
+ yield batch.length === remaining ? batch : batch.slice(0, remaining);
146
+ return;
147
+ }
148
+ }
149
+ async function* dropBatches(batches, limit) {
150
+ let remaining = limit;
151
+ for await (const batch of batches) {
152
+ if (remaining === 0) {
153
+ yield batch;
154
+ }
155
+ else if (remaining >= batch.length) {
156
+ remaining -= batch.length;
157
+ }
158
+ else {
159
+ yield batch.slice(remaining);
160
+ remaining = 0;
161
+ }
162
+ }
163
+ }
164
+ function isThenable(value) {
165
+ return value != null && typeof value.then === 'function';
166
+ }
package/dist/validate.js CHANGED
@@ -15,8 +15,6 @@ const PATH_FAULT_MESSAGE = {
15
15
  through_scalar: (segment) => `path traverses a non-container value at segment ${segment}`,
16
16
  wrong_kind: (segment) => `path segment ${segment} does not match the container kind`,
17
17
  scalar_target: () => 'target value is not a container',
18
- iter_on_object: () => 'iter target is an object; use walk() to iterate object members',
19
- walk_on_array: () => 'walk target is an array; use iter() to iterate array elements',
20
18
  };
21
19
  export class PathError extends Error {
22
20
  path;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@botejs/core",
3
- "version": "0.4.0",
3
+ "version": "0.5.0",
4
4
  "type": "module",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -35,7 +35,7 @@
35
35
  "prepublishOnly": "cp ../../README.md ./README.md && tsc"
36
36
  },
37
37
  "dependencies": {
38
- "@botejs/native": "^0.4.0"
38
+ "@botejs/native": "^0.5.0"
39
39
  },
40
40
  "devDependencies": {
41
41
  "@types/node": "^22.0.0",