@arbidocs/sdk 0.3.45 → 0.3.47

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -605,10 +605,17 @@ type DocumentListFields = 'full' | 'lite';
605
605
  */
606
606
  interface ListPaginatedOptions {
607
607
  /**
608
- * Number of documents per page.
608
+ * Number of documents per page (applies to every page after the first).
609
609
  * @default 5000
610
610
  */
611
611
  pageSize?: number;
612
+ /**
613
+ * Size of the first page only. When set, the initial request uses this
614
+ * smaller limit so the caller can render something on screen before the
615
+ * full `pageSize`-sized pages stream in. Subsequent pages fall back to
616
+ * `pageSize`. When unset, every page uses `pageSize`.
617
+ */
618
+ firstPageSize?: number;
612
619
  /**
613
620
  * Sort order for pagination.
614
621
  * @default 'id_asc'
@@ -625,6 +632,13 @@ interface ListPaginatedOptions {
625
632
  * AbortSignal to cancel iteration mid-stream.
626
633
  */
627
634
  signal?: AbortSignal;
635
+ /**
636
+ * Number of pages kept in flight concurrently. A higher value hides more
637
+ * backend + network latency between pages but increases peak backend load
638
+ * and memory for not-yet-consumed pages. Clamped to `[1, MAX_PAGES]`.
639
+ * @default 1
640
+ */
641
+ lookahead?: number;
628
642
  }
629
643
  /**
630
644
  * Options for `listAll` — collects all pages into a single array.
@@ -718,11 +732,24 @@ declare function listDocuments(arbi: ArbiClient): Promise<{
718
732
  } | null | undefined;
719
733
  }[]>;
720
734
  /**
721
- * Async iterator that yields pages of documents sequentially.
735
+ * Async iterator that yields pages of documents with a configurable lookahead.
736
+ *
737
+ * Uses `limit`/`offset` pagination. A FIFO queue of up to `lookahead` requests
738
+ * is kept in flight: as soon as a page is awaited off the queue the next
739
+ * request is enqueued, so the backend is continuously working on the next
740
+ * page while the consumer processes the current one. The default of 1 keeps
741
+ * one page in flight at a time (classic sequential pagination). Raise it when
742
+ * you want to pipeline — each extra slot adds one more concurrent backend
743
+ * scan and one more not-yet-consumed page held in memory.
744
+ *
745
+ * Pair `firstPageSize` with a larger `pageSize` when you need the initial
746
+ * page on screen fast: e.g. `firstPageSize: 500, pageSize: 2000` renders the
747
+ * first 500 rows in a fifth of the time of a single 2500-row request, then
748
+ * streams in 2000-row pages after.
722
749
  *
723
- * Fetches pages one at a time using `limit`/`offset` pagination. Sequential
724
- * (not parallel) because the backend decrypt step is CPU-bound on a shared
725
- * thread pool — parallel requests contend and don't finish meaningfully faster.
750
+ * Iteration stops at the first short page (length < `pageSize`) in-flight
751
+ * requests past that point are discarded. `MAX_PAGES` is a hard cap on the
752
+ * number of requests issued.
726
753
  *
727
754
  * @example
728
755
  * ```ts
@@ -732,7 +759,7 @@ declare function listDocuments(arbi: ArbiClient): Promise<{
732
759
  * ```
733
760
  *
734
761
  * @param arbi - Authenticated ArbiClient
735
- * @param options - Pagination options (pageSize, order, fields, signal)
762
+ * @param options - Pagination options (pageSize, order, fields, signal, lookahead)
736
763
  * @yields Pages of documents until the backend returns a short page or signal is aborted
737
764
  */
738
765
  declare function listPaginated(arbi: ArbiClient, options?: ListPaginatedOptions): AsyncGenerator<DocResponse[]>;
@@ -605,10 +605,17 @@ type DocumentListFields = 'full' | 'lite';
605
605
  */
606
606
  interface ListPaginatedOptions {
607
607
  /**
608
- * Number of documents per page.
608
+ * Number of documents per page (applies to every page after the first).
609
609
  * @default 5000
610
610
  */
611
611
  pageSize?: number;
612
+ /**
613
+ * Size of the first page only. When set, the initial request uses this
614
+ * smaller limit so the caller can render something on screen before the
615
+ * full `pageSize`-sized pages stream in. Subsequent pages fall back to
616
+ * `pageSize`. When unset, every page uses `pageSize`.
617
+ */
618
+ firstPageSize?: number;
612
619
  /**
613
620
  * Sort order for pagination.
614
621
  * @default 'id_asc'
@@ -625,6 +632,13 @@ interface ListPaginatedOptions {
625
632
  * AbortSignal to cancel iteration mid-stream.
626
633
  */
627
634
  signal?: AbortSignal;
635
+ /**
636
+ * Number of pages kept in flight concurrently. A higher value hides more
637
+ * backend + network latency between pages but increases peak backend load
638
+ * and memory for not-yet-consumed pages. Clamped to `[1, MAX_PAGES]`.
639
+ * @default 1
640
+ */
641
+ lookahead?: number;
628
642
  }
629
643
  /**
630
644
  * Options for `listAll` — collects all pages into a single array.
@@ -718,11 +732,24 @@ declare function listDocuments(arbi: ArbiClient): Promise<{
718
732
  } | null | undefined;
719
733
  }[]>;
720
734
  /**
721
- * Async iterator that yields pages of documents sequentially.
735
+ * Async iterator that yields pages of documents with a configurable lookahead.
736
+ *
737
+ * Uses `limit`/`offset` pagination. A FIFO queue of up to `lookahead` requests
738
+ * is kept in flight: as soon as a page is awaited off the queue the next
739
+ * request is enqueued, so the backend is continuously working on the next
740
+ * page while the consumer processes the current one. The default of 1 keeps
741
+ * one page in flight at a time (classic sequential pagination). Raise it when
742
+ * you want to pipeline — each extra slot adds one more concurrent backend
743
+ * scan and one more not-yet-consumed page held in memory.
744
+ *
745
+ * Pair `firstPageSize` with a larger `pageSize` when you need the initial
746
+ * page on screen fast: e.g. `firstPageSize: 500, pageSize: 2000` renders the
747
+ * first 500 rows in a fifth of the time of a single 2500-row request, then
748
+ * streams in 2000-row pages after.
722
749
  *
723
- * Fetches pages one at a time using `limit`/`offset` pagination. Sequential
724
- * (not parallel) because the backend decrypt step is CPU-bound on a shared
725
- * thread pool — parallel requests contend and don't finish meaningfully faster.
750
+ * Iteration stops at the first short page (length < `pageSize`) in-flight
751
+ * requests past that point are discarded. `MAX_PAGES` is a hard cap on the
752
+ * number of requests issued.
726
753
  *
727
754
  * @example
728
755
  * ```ts
@@ -732,7 +759,7 @@ declare function listDocuments(arbi: ArbiClient): Promise<{
732
759
  * ```
733
760
  *
734
761
  * @param arbi - Authenticated ArbiClient
735
- * @param options - Pagination options (pageSize, order, fields, signal)
762
+ * @param options - Pagination options (pageSize, order, fields, signal, lookahead)
736
763
  * @yields Pages of documents until the backend returns a short page or signal is aborted
737
764
  */
738
765
  declare function listPaginated(arbi: ArbiClient, options?: ListPaginatedOptions): AsyncGenerator<DocResponse[]>;
package/dist/browser.cjs CHANGED
@@ -4160,13 +4160,18 @@ async function listDocuments(arbi) {
4160
4160
  return requireData(await arbi.fetch.GET("/v1/document/list"), "Failed to fetch documents");
4161
4161
  }
4162
4162
  async function* listPaginated(arbi, options = {}) {
4163
- const { pageSize = 5e3, order = "id_asc", fields, signal } = options;
4164
- let offset = 0;
4165
- let pagesFetched = 0;
4166
- while (!signal?.aborted && pagesFetched < MAX_PAGES) {
4163
+ const {
4164
+ pageSize = 5e3,
4165
+ firstPageSize,
4166
+ order = "id_asc",
4167
+ fields,
4168
+ signal,
4169
+ lookahead = 1
4170
+ } = options;
4171
+ const fetchPage = async (pageOffset, limit) => {
4167
4172
  const query = {
4168
- limit: pageSize,
4169
- offset,
4173
+ limit,
4174
+ offset: pageOffset,
4170
4175
  order
4171
4176
  };
4172
4177
  if (fields) query.fields = fields;
@@ -4177,16 +4182,42 @@ async function* listPaginated(arbi, options = {}) {
4177
4182
  if (error) {
4178
4183
  throw new Error(typeof error === "string" ? error : JSON.stringify(error));
4179
4184
  }
4185
+ return data ?? [];
4186
+ };
4187
+ if (signal?.aborted) return;
4188
+ const depth = Math.max(1, Math.min(lookahead, MAX_PAGES));
4189
+ let issued = 0;
4190
+ let pagesYielded = 0;
4191
+ let nextOffsetToIssue = 0;
4192
+ let done = false;
4193
+ const queue = [];
4194
+ const tryEnqueue = () => {
4195
+ while (!done && queue.length < depth && issued < MAX_PAGES) {
4196
+ const limit = issued === 0 && firstPageSize !== void 0 ? firstPageSize : pageSize;
4197
+ const promise = fetchPage(nextOffsetToIssue, limit);
4198
+ promise.catch(() => {
4199
+ });
4200
+ queue.push({ limit, promise });
4201
+ nextOffsetToIssue += limit;
4202
+ issued++;
4203
+ }
4204
+ };
4205
+ tryEnqueue();
4206
+ while (queue.length > 0 && !signal?.aborted) {
4207
+ const { limit, promise } = queue.shift();
4208
+ const page = await promise;
4180
4209
  if (signal?.aborted) return;
4181
- const page = data ?? [];
4210
+ const isShort = page.length < limit;
4211
+ if (isShort) done = true;
4212
+ tryEnqueue();
4182
4213
  if (page.length > 0) {
4183
4214
  yield page;
4184
- offset += page.length;
4185
- pagesFetched++;
4215
+ pagesYielded++;
4186
4216
  }
4187
- if (page.length < pageSize) return;
4217
+ if (isShort) return;
4218
+ if (pagesYielded >= MAX_PAGES) break;
4188
4219
  }
4189
- if (pagesFetched >= MAX_PAGES) {
4220
+ if (!signal?.aborted && !done && pagesYielded >= MAX_PAGES) {
4190
4221
  console.warn(
4191
4222
  `[arbi-sdk] listPaginated hit MAX_PAGES (${MAX_PAGES}) \u2014 stopped. Workspace may have more than ${MAX_PAGES * pageSize} documents.`
4192
4223
  );