@pyxmate/memory 0.11.0 → 0.12.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,24 +11,31 @@ var MemoryServerError = class extends Error {
11
11
  return this.status === 404;
12
12
  }
13
13
  };
14
+ var DEFAULT_REQUEST_TIMEOUT_MS = 3e4;
14
15
  var MemoryClient = class {
15
16
  baseUrl;
16
17
  _authHeaders;
18
+ _requestTimeoutMs;
17
19
  constructor(memoryUrl, apiKeyOrOptions) {
18
20
  this.baseUrl = memoryUrl.replace(/\/$/, "");
19
21
  let apiKey;
20
22
  let defaultHeaders = {};
23
+ let requestTimeoutMs = DEFAULT_REQUEST_TIMEOUT_MS;
21
24
  if (typeof apiKeyOrOptions === "string") {
22
25
  apiKey = apiKeyOrOptions;
23
26
  } else if (apiKeyOrOptions) {
24
27
  apiKey = apiKeyOrOptions.apiKey;
25
28
  defaultHeaders = apiKeyOrOptions.defaultHeaders ?? {};
29
+ if (apiKeyOrOptions.requestTimeoutMs !== void 0) {
30
+ requestTimeoutMs = apiKeyOrOptions.requestTimeoutMs;
31
+ }
26
32
  }
27
33
  const trimmed = apiKey?.trim();
28
34
  this._authHeaders = {
29
35
  ...trimmed ? { Authorization: `Bearer ${trimmed}` } : {},
30
36
  ...defaultHeaders
31
37
  };
38
+ this._requestTimeoutMs = requestTimeoutMs;
32
39
  }
33
40
  /** Encode a path segment to prevent URL injection */
34
41
  encodePathSegment(segment) {
@@ -335,16 +342,44 @@ var MemoryClient = class {
335
342
  return result.entries;
336
343
  }
337
344
  async fetchApi(path, options) {
338
- const res = await fetch(`${this.baseUrl}${path}`, {
339
- ...options,
340
- headers: {
341
- "Content-Type": "application/json",
342
- ...options?.headers,
343
- ...this._authHeaders
344
- }
345
- });
345
+ const signal = options?.signal ?? AbortSignal.timeout(this._requestTimeoutMs);
346
+ let res;
347
+ try {
348
+ res = await fetch(`${this.baseUrl}${path}`, {
349
+ ...options,
350
+ headers: {
351
+ "Content-Type": "application/json",
352
+ ...options?.headers,
353
+ ...this._authHeaders
354
+ },
355
+ signal
356
+ });
357
+ } catch (err) {
358
+ throw this.translateFetchError(err, path);
359
+ }
346
360
  return this.parseApiResponse(res);
347
361
  }
362
+ /**
363
+ * Map fetch-layer rejections into a typed `MemoryServerError` so callers
364
+ * can react uniformly. AbortSignal.timeout fires a `TimeoutError`; the
365
+ * caller's signal generally fires an `AbortError`. Anything else (DNS,
366
+ * TCP reset, TLS) becomes a wrapped error with status 0.
367
+ */
368
+ translateFetchError(err, path) {
369
+ if (err instanceof Error) {
370
+ if (err.name === "TimeoutError") {
371
+ return new MemoryServerError(
372
+ `Memory server request timed out after ${this._requestTimeoutMs}ms (${path})`,
373
+ 504
374
+ );
375
+ }
376
+ if (err.name === "AbortError") {
377
+ return new MemoryServerError(`Memory server request aborted (${path})`, 499);
378
+ }
379
+ return new MemoryServerError(`Memory server request failed: ${err.message} (${path})`, 0);
380
+ }
381
+ return new MemoryServerError(`Memory server request failed: ${String(err)} (${path})`, 0);
382
+ }
348
383
  /** Parse and validate a JSON API response, throwing MemoryServerError on any failure. */
349
384
  async parseApiResponse(res) {
350
385
  let body;
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  MemoryClient
3
- } from "./chunk-5IERAVVW.mjs";
3
+ } from "./chunk-4YIKI2BA.mjs";
4
4
 
5
5
  // ../dashboard/src/aggregations/consolidation-analytics.ts
6
6
  function analyzeConsolidationLog(entries) {
@@ -11,8 +11,8 @@ import {
11
11
  toGraphologyFormat,
12
12
  transformGraphData,
13
13
  unreachableHealth
14
- } from "./chunk-DGVOXKYV.mjs";
15
- import "./chunk-5IERAVVW.mjs";
14
+ } from "./chunk-DZZHJ66P.mjs";
15
+ import "./chunk-4YIKI2BA.mjs";
16
16
  export {
17
17
  DashboardClient,
18
18
  Poller,
package/dist/index.d.ts CHANGED
@@ -128,10 +128,26 @@ interface MemoryClientOptions {
128
128
  apiKey?: string;
129
129
  /** Additional headers to send with every request (e.g., X-Caller-Access-Level). */
130
130
  defaultHeaders?: Record<string, string>;
131
+ /**
132
+ * Default per-request timeout in milliseconds. Without this, a wedged
133
+ * memory server (e.g. event-loop blocked by inference) makes every
134
+ * caller hang forever — that was the Korens demo wedge in 2026-04 where
135
+ * a 161-second pyx-memory stall propagated through the runtime to the
136
+ * browser. Defaults to 30 s, which is high enough that normal
137
+ * `/search` and `/stats` requests never hit it but low enough that a
138
+ * stuck server fails loudly.
139
+ *
140
+ * Only applied when the caller does NOT pass their own `signal` via
141
+ * RequestInit. Long-running operations (large `consolidate`, `reindex`,
142
+ * file ingest with enrichment) should pass their own AbortSignal —
143
+ * that signal fully replaces the default ceiling.
144
+ */
145
+ requestTimeoutMs?: number;
131
146
  }
132
147
  declare class MemoryClient implements ExtendedMemoryInterface {
133
148
  protected baseUrl: string;
134
149
  private readonly _authHeaders;
150
+ private readonly _requestTimeoutMs;
135
151
  constructor(memoryUrl: string, apiKeyOrOptions?: string | MemoryClientOptions);
136
152
  /** Encode a path segment to prevent URL injection */
137
153
  private encodePathSegment;
@@ -191,6 +207,13 @@ declare class MemoryClient implements ExtendedMemoryInterface {
191
207
  queryAsOf(asOfDate: string, filters?: TemporalQueryFilters): Promise<MemoryEntry$1[]>;
192
208
  queryByEventTime(startTime: string, endTime: string, filters?: TemporalQueryFilters): Promise<MemoryEntry$1[]>;
193
209
  protected fetchApi<T>(path: string, options?: RequestInit): Promise<T>;
210
+ /**
211
+ * Map fetch-layer rejections into a typed `MemoryServerError` so callers
212
+ * can react uniformly. AbortSignal.timeout fires a `TimeoutError`; the
213
+ * caller's signal generally fires an `AbortError`. Anything else (DNS,
214
+ * TCP reset, TLS) becomes a wrapped error with status 0.
215
+ */
216
+ private translateFetchError;
194
217
  /** Parse and validate a JSON API response, throwing MemoryServerError on any failure. */
195
218
  private parseApiResponse;
196
219
  }
package/dist/index.mjs CHANGED
@@ -1,7 +1,7 @@
1
1
  import {
2
2
  MemoryClient,
3
3
  MemoryServerError
4
- } from "./chunk-5IERAVVW.mjs";
4
+ } from "./chunk-4YIKI2BA.mjs";
5
5
 
6
6
  // ../shared/src/constants/defaults.ts
7
7
  var DEFAULTS = {
package/dist/react.mjs CHANGED
@@ -11,8 +11,8 @@ import {
11
11
  toGraphologyFormat,
12
12
  transformGraphData,
13
13
  unreachableHealth
14
- } from "./chunk-DGVOXKYV.mjs";
15
- import "./chunk-5IERAVVW.mjs";
14
+ } from "./chunk-DZZHJ66P.mjs";
15
+ import "./chunk-4YIKI2BA.mjs";
16
16
 
17
17
  // ../dashboard/src/hooks/use-consolidation-log.ts
18
18
  import { useCallback as useCallback2, useMemo } from "react";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pyxmate/memory",
3
- "version": "0.11.0",
3
+ "version": "0.12.2",
4
4
  "type": "module",
5
5
  "description": "SDK for pyx-memory — Memory as a Service for AI agents",
6
6
  "license": "MIT",
@@ -93,7 +93,16 @@ curl -s -X POST {{ENDPOINT}}/api/memory/ingest/file \
93
93
 
94
94
  Supported formats: txt, md, csv, tsv, log, pdf, docx, xlsx, pptx, json, jsonl, html, htm, png, jpg, jpeg, webp, gif, bmp, tiff, svg (100MB limit).
95
95
 
96
- Ingestion streams from disk — parsers read row-by-row (csv/xlsx), line-by-line (txt/jsonl), page-by-page (pdf), or slide-by-slide (pptx) so peak memory is bounded regardless of file size. The only exception is `.docx`, which is hard-capped at 10MB because the underlying library (mammoth) loads the whole document into memory; above 10MB the server returns an error asking you to pre-extract the text upstream and re-upload as `.txt` or `.md`.
96
+ Ingestion memory profile, by format:
97
+
98
+ - **Plain-text + structured-text** (`csv`, `tsv`, `txt`, `log`, `json`, `jsonl`, `html`, `htm`, `md`): truly streaming — peak memory ≈ a few MB regardless of file size, up to the 100 MB file cap.
99
+ - **`pdf`**: streaming via the poppler `pdftotext` first-class path — peak memory ≈ a few MB. The `pdf-parse` fallback (when poppler is absent) loads the whole buffer; install poppler-utils for streaming.
100
+ - **`xlsx`**: row-by-row streaming via `ExcelJS.stream.xlsx.WorkbookReader`, but **shared strings are cached for the whole workbook** (`sharedStrings: 'cache'`). Peak memory grows with shared-string count, not just file size — workbooks with dense, repeated cell strings can use significantly more memory than the file cap suggests.
101
+ - **`pptx`**: the full ZIP is decompressed in memory (same shape as docx); only one slide's XML is decoded at a time. For a typical 30 MB presentation, peak memory is ~90 MB. Capped at 100 MB file / 200 MB decompressed.
102
+ - **`docx`**: hard-capped at 10 MB. The underlying library (mammoth) has no streaming API and loads the whole document into memory; above 10 MB the server returns a `MemoryError` asking you to pre-extract the text upstream and re-upload as `.txt` or `.md`.
103
+ - **Images** (`png`, `jpg`, `jpeg`, `webp`, `gif`, `bmp`, `tiff`, `svg`): the file is held in memory once for embedding/storage; size scales with the file, not with content complexity.
104
+
105
+ If you need deterministic memory or timing for production UX (e.g. RAG over large user-supplied workbooks), prefer pre-extracting `.pptx` and large `.xlsx` upstream and uploading the text as `.txt`. See `patterns/file-uploads.md` for the consumer-side pattern.
97
106
 
98
107
  **Images require a `description`** — this is how the content gets embedded and becomes searchable. Without it, the image is stored but not findable. Use your vision capabilities to generate the description when the user doesn't provide one.
99
108
 
@@ -0,0 +1,78 @@
1
+ # Pattern: File Uploads
2
+
3
+ This is consumer-side guidance: how to decide whether to forward a file
4
+ straight to `ingestFile()` or to pre-extract its text upstream first.
5
+
6
+ ## TL;DR
7
+
8
+ | Format | Default action |
9
+ |---|---|
10
+ | `txt`, `md`, `csv`, `tsv`, `log`, `json`, `jsonl`, `html`, `htm` | Forward raw — pyx-memory streams. |
11
+ | `pdf` | Forward raw — pyx-memory streams via poppler. Install `poppler-utils` on the server image. |
12
+ | Images (`png`, `jpg`, `jpeg`, `webp`, `gif`, `bmp`, `tiff`, `svg`) | Forward raw with a `description` (use vision capability). |
13
+ | `docx` ≤ 10 MB | Forward raw. |
14
+ | `docx` > 10 MB | **Pre-extract upstream** as `.txt` or `.md`. The server returns a `MemoryError` if you don't. |
15
+ | `xlsx` (large or shared-string-heavy) | **Pre-extract upstream** as `.xxx.xlsx.txt` for deterministic UX. |
16
+ | `pptx` (production UX) | **Pre-extract upstream** as `.xxx.pptx.txt` for deterministic UX. |
17
+
18
+ "Production UX" here means: you can't afford a single hung upload to wedge
19
+ the user-facing layer for 30+ seconds, you need actionable error messages
20
+ on every failure, and you have your own copy of the original file
21
+ (separate from pyx-memory's internal storage).
22
+
23
+ ## Why pre-extract pptx and large xlsx
24
+
25
+ The server's pptx parser decompresses the full ZIP in memory (~3× file
26
+ size peak). The xlsx parser streams rows but caches shared strings for
27
+ the entire workbook (`ExcelJS.WorkbookReader { sharedStrings: 'cache' }`).
28
+ Both are bounded by the 100 MB file / 200 MB decompressed caps, but
29
+ "bounded" is not "constant" — pathological files (huge shared-string
30
+ tables, dense cell formulas, embedded media) can push peak memory and
31
+ parse time well past what naive callers expect.
32
+
33
+ If you control the upload boundary (e.g. you operate a runtime/proxy
34
+ service that fronts pyx-memory), upstream pre-extraction lets you:
35
+
36
+ 1. **Catch parse failures at your boundary**, where you can return an
37
+ actionable error to the user (`"Excel formula evaluation failed at
38
+ sheet 'Q3 Revenue', row 412"`) instead of a generic upstream 5xx.
39
+ 2. **Bound the wire payload to pyx-memory** — text/plain only — so the
40
+ memory server's parser is never the bottleneck.
41
+ 3. **Keep the original binary in your own storage**, so users can still
42
+ download the file. pyx-memory's catalog only holds the indexed text.
43
+
44
+ ## Reference implementation
45
+
46
+ [ai-rag-hub](https://github.com/fysoul17/one-query-v1) (a consumer of
47
+ pyx-memory) implements this pattern in its runtime:
48
+
49
+ - `packages/server/src/text-extractors.ts` — local extractors for `pptx`
50
+ and `xlsx`, both wrapped in the same OOXML safety envelope (zip-bomb
51
+ defense, path-traversal check, macro reject, decompressed-size cap,
52
+ char limit).
53
+ - `packages/server/src/routes/memory.ts` — `prepareFileForIngest`
54
+ dispatches via `getTextExtractor(mimeType)`; matched formats are
55
+ re-uploaded as `<original>.txt` with `text/plain`. Catalog metadata
56
+ flags `downloadableFromMemory: false` so the consumer's own
57
+ `/api/team/documents/[id]/download` route serves the original
58
+ binary instead.
59
+
60
+ ## When NOT to pre-extract
61
+
62
+ Single-tenant lab usage, internal tools, batch jobs where a 30-second
63
+ parse latency is acceptable, or any case where you don't have your own
64
+ copy of the file and need pyx-memory's `GET /api/memory/files/download/:filename`
65
+ to return the original binary. In those cases, the native pyx-memory
66
+ parsers are exactly what you want.
67
+
68
+ ## What about other formats
69
+
70
+ - **HTML**: pyx-memory strips `<script>` and `<style>` during parse
71
+ (`parsers/html.ts`). No upstream sanitizer needed for indexing.
72
+ - **PDF with images**: use the SDK's two-phase enrichment via
73
+ `EnrichmentCallbacks` — see `reference/sdk-guide.md`. Don't pre-extract
74
+ the PDF as text and lose image enrichment.
75
+ - **SVG**: currently classified as an image but pyx-memory's image
76
+ parser only stores it as a placeholder; if you need SVG text indexed,
77
+ pre-extract the `<text>` and `<desc>` content yourself or convert to
78
+ raster + describeImage.
@@ -71,7 +71,15 @@ const client = new MemoryClient('http://localhost:7822', process.env.MEMORY_API_
71
71
 
72
72
  **Supported formats**: `.txt`, `.md`, `.csv`, `.tsv`, `.log`, `.pdf`, `.docx`, `.xlsx`, `.pptx`, `.json`, `.jsonl`, `.html`, `.htm`, `.png`, `.jpg`, `.jpeg`, `.webp`, `.gif`, `.bmp`, `.tiff`, `.svg`
73
73
 
74
- **Memory behavior**: All text parsers (csv, tsv, txt, log, pdf, json, jsonl, html, xlsx, pptx) stream from disk during ingestion — peak server memory is bounded to a few MB regardless of file size, up to the 100MB file limit. `.docx` is the exception: mammoth (the parser) has no streaming API, so `.docx` is hard-capped at 10MB on the server. Above 10MB the server returns a `MemoryError` asking you to pre-extract the text upstream and re-upload as `.txt` or `.md`.
74
+ **Memory behavior, by format**:
75
+
76
+ - **Plain-text + structured-text** (`csv`, `tsv`, `txt`, `log`, `json`, `jsonl`, `html`, `htm`, `md`) — fully streaming; peak server memory ≈ a few MB regardless of file size up to the 100 MB cap.
77
+ - **`pdf`** — streaming via `pdftotext` (poppler-utils); fall-back path (`pdf-parse`) buffers the file, so install poppler-utils on the server for streaming behavior.
78
+ - **`xlsx`** — `ExcelJS.stream.xlsx.WorkbookReader` yields rows as it parses, but the shared-string table is cached for the whole workbook. Peak memory grows with shared-string count; dense workbooks can exceed "a few MB" significantly.
79
+ - **`pptx`** — the full ZIP is decompressed in memory (~3× file size peak for typical decks). One slide's XML is decoded at a time. Bounded by the 100 MB file cap and a 200 MB decompressed cap.
80
+ - **`docx`** — hard-capped at 10 MB. mammoth has no streaming API; above 10 MB the server returns a `MemoryError` asking you to pre-extract the text upstream and re-upload as `.txt` or `.md`.
81
+
82
+ For deterministic peak memory or timing (production UX), consumers should pre-extract `.pptx` and large `.xlsx` upstream and upload the text as `.txt` — see `patterns/file-uploads.md`.
75
83
 
76
84
  **What happens on upload**:
77
85
  1. Original file is saved to `{DATA_DIR}/files/{filename}` (persistent across restarts)