@allus-fyi/company-data 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +706 -0
- package/dist/cjs/buffer.js +352 -0
- package/dist/cjs/client.js +396 -0
- package/dist/cjs/config.js +241 -0
- package/dist/cjs/crypto.js +288 -0
- package/dist/cjs/errors.js +96 -0
- package/dist/cjs/http.js +272 -0
- package/dist/cjs/index.js +74 -0
- package/dist/cjs/models.js +300 -0
- package/dist/cjs/package.json +1 -0
- package/dist/cjs/pump.js +279 -0
- package/dist/cjs/webhooks.js +335 -0
- package/dist/cjs/xml.js +257 -0
- package/dist/esm/buffer.js +348 -0
- package/dist/esm/client.js +392 -0
- package/dist/esm/config.js +237 -0
- package/dist/esm/crypto.js +281 -0
- package/dist/esm/errors.js +86 -0
- package/dist/esm/http.js +267 -0
- package/dist/esm/index.js +37 -0
- package/dist/esm/models.js +292 -0
- package/dist/esm/package.json +1 -0
- package/dist/esm/pump.js +275 -0
- package/dist/esm/webhooks.js +329 -0
- package/dist/esm/xml.js +252 -0
- package/dist/types/buffer.d.ts +109 -0
- package/dist/types/client.d.ts +150 -0
- package/dist/types/config.d.ts +86 -0
- package/dist/types/crypto.d.ts +125 -0
- package/dist/types/errors.d.ts +73 -0
- package/dist/types/http.d.ts +80 -0
- package/dist/types/index.d.ts +36 -0
- package/dist/types/models.d.ts +154 -0
- package/dist/types/pump.d.ts +118 -0
- package/dist/types/webhooks.d.ts +99 -0
- package/dist/types/xml.d.ts +42 -0
- package/docs/config.md +93 -0
- package/docs/errors.md +87 -0
- package/docs/model.md +141 -0
- package/docs/pump.md +130 -0
- package/docs/webhooks.md +140 -0
- package/package.json +54 -0
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Webhook receiver helpers.
|
|
3
|
+
*
|
|
4
|
+
* The lower-latency push alternative to polling the changes feed. The platform
|
|
5
|
+
* delivers each change event to the company's configured webhook URL with:
|
|
6
|
+
*
|
|
7
|
+
* - `X-Allus-Webhook-Id` — which webhook this is (selects the HMAC secret).
|
|
8
|
+
* - `X-Allus-Signature` — `HMAC-SHA256(rawBody, secret)` as lowercase hex.
|
|
9
|
+
* - the body — the same slug-keyed {@link Change} shape as the pull feed,
|
|
10
|
+
* JSON or XML. If the webhook has `encrypt_payload` on, the body is REPLACED
|
|
11
|
+
* by a `{"_enc":1,...}` envelope encrypted to the company **account** key (and
|
|
12
|
+
* the HMAC is then over that envelope — it is the final body that was sent).
|
|
13
|
+
*
|
|
14
|
+
* Webhook delivery auth is per-webhook and may be any of five methods (hmac,
|
|
15
|
+
* bearer, basic, custom header, or none); {@link verifyWebhook} dispatches on the
|
|
16
|
+
* single method configured in {@link Config} ({@link Config.webhookAuthMethod}).
|
|
17
|
+
*
|
|
18
|
+
* All secrets/keys come from {@link Config}. **These helpers take NO key or secret
|
|
19
|
+
* arguments** — only the raw body, the headers, the config, and (for value typing)
|
|
20
|
+
* the same decrypt/type closures the {@link Client} already holds.
|
|
21
|
+
*
|
|
22
|
+
* The account-key envelope is webhook-specific: the platform wraps it with
|
|
23
|
+
* OpenSSL's DEFAULT OAEP padding (MGF1-**SHA1**), NOT the SHA-256 wrapper used for
|
|
24
|
+
* person field values. So unwrapping the envelope uses an OAEP-SHA1 path here
|
|
25
|
+
* (Node's default `oaepHash`, pinned explicitly to `'sha1'` for clarity), while the
|
|
26
|
+
* inner field `value` (still a service-key wrapper) decrypts with the normal
|
|
27
|
+
* SHA-256 {@link decrypt}. HMAC is always computed over the raw bytes, never the
|
|
28
|
+
* parsed tree.
|
|
29
|
+
*/
|
|
30
|
+
import { createDecipheriv, createHmac, createPrivateKey, privateDecrypt, timingSafeEqual, constants as cryptoConstants, } from 'node:crypto';
|
|
31
|
+
import { readFileSync } from 'node:fs';
|
|
32
|
+
import { GCM_IV_LEN, GCM_TAG_LEN } from './crypto.js';
|
|
33
|
+
import { WebhookError } from './errors.js';
|
|
34
|
+
import { Change } from './models.js';
|
|
35
|
+
import { parseXml } from './xml.js';
|
|
36
|
+
const HDR_WEBHOOK_ID = 'x-allus-webhook-id';
|
|
37
|
+
const HDR_SIGNATURE = 'x-allus-signature';
|
|
38
|
+
const ENC_MARKER = '_enc';
|
|
39
|
+
// ── header helpers ─────────────────────────────────────────────────────────────
|
|
40
|
+
/** Case-insensitive header lookup (frameworks normalize casing inconsistently). */
|
|
41
|
+
function header(headers, name) {
|
|
42
|
+
if (!headers)
|
|
43
|
+
return null;
|
|
44
|
+
const target = name.toLowerCase();
|
|
45
|
+
for (const [key, value] of Object.entries(headers)) {
|
|
46
|
+
if (key.toLowerCase() === target) {
|
|
47
|
+
if (Array.isArray(value))
|
|
48
|
+
return value.length > 0 ? String(value[0]) : null;
|
|
49
|
+
return value != null ? String(value) : null;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
return null;
|
|
53
|
+
}
|
|
54
|
+
function asBytes(rawBody) {
|
|
55
|
+
if (Buffer.isBuffer(rawBody))
|
|
56
|
+
return rawBody;
|
|
57
|
+
if (rawBody instanceof Uint8Array)
|
|
58
|
+
return Buffer.from(rawBody);
|
|
59
|
+
if (typeof rawBody === 'string')
|
|
60
|
+
return Buffer.from(rawBody, 'utf8');
|
|
61
|
+
throw new WebhookError('webhook rawBody must be a Buffer, Uint8Array, or string');
|
|
62
|
+
}
|
|
63
|
+
// ── verify ─────────────────────────────────────────────────────────────────────
|
|
64
|
+
/**
|
|
65
|
+
* Verify a webhook against the SINGLE configured auth method.
|
|
66
|
+
*
|
|
67
|
+
* Mirrors the platform's per-webhook delivery auth (one method per webhook):
|
|
68
|
+
*
|
|
69
|
+
* - `hmac` — recompute `HMAC-SHA256(rawBody, secret)` (secret selected by
|
|
70
|
+
* `X-Allus-Webhook-Id`) and constant-time-compare to `X-Allus-Signature`.
|
|
71
|
+
* - `bearer` — `Authorization` equals `Bearer <token>`.
|
|
72
|
+
* - `basic` — `Authorization` equals `Basic <base64(user:pass)>`.
|
|
73
|
+
* - `header` — the configured custom header equals the configured value.
|
|
74
|
+
* - `none` — always `true` (explicit opt-out).
|
|
75
|
+
*
|
|
76
|
+
* All comparisons are constant-time. Returns `false` on a missing/mismatched
|
|
77
|
+
* credential, or when no method is configured — never throws for a bad credential
|
|
78
|
+
* (that is {@link handleWebhook}'s job). Which method is used is decided entirely
|
|
79
|
+
* by config ({@link Config.webhookAuthMethod}); config loading guarantees at most
|
|
80
|
+
* one is set. The HMAC is over the exact raw bytes.
|
|
81
|
+
*/
|
|
82
|
+
export function verifyWebhook(rawBody, headers, config) {
|
|
83
|
+
const method = config.webhookAuthMethod();
|
|
84
|
+
if (method === null)
|
|
85
|
+
return false;
|
|
86
|
+
if (method === 'none')
|
|
87
|
+
return true;
|
|
88
|
+
if (method === 'bearer') {
|
|
89
|
+
const got = header(headers, 'authorization');
|
|
90
|
+
if (got === null)
|
|
91
|
+
return false;
|
|
92
|
+
return constantTimeStringEqual(got, 'Bearer ' + (config.webhookBearerToken ?? ''));
|
|
93
|
+
}
|
|
94
|
+
if (method === 'basic') {
|
|
95
|
+
const got = header(headers, 'authorization');
|
|
96
|
+
if (got === null)
|
|
97
|
+
return false;
|
|
98
|
+
const basic = config.webhookBasic;
|
|
99
|
+
const creds = `${basic.username}:${basic.password}`;
|
|
100
|
+
const token = Buffer.from(creds, 'utf8').toString('base64');
|
|
101
|
+
return constantTimeStringEqual(got, 'Basic ' + token);
|
|
102
|
+
}
|
|
103
|
+
if (method === 'header') {
|
|
104
|
+
const hdr = config.webhookHeader;
|
|
105
|
+
const got = header(headers, hdr.name);
|
|
106
|
+
if (got === null)
|
|
107
|
+
return false;
|
|
108
|
+
return constantTimeStringEqual(got, hdr.value);
|
|
109
|
+
}
|
|
110
|
+
// method === 'hmac'
|
|
111
|
+
const body = asBytes(rawBody);
|
|
112
|
+
const signature = header(headers, HDR_SIGNATURE);
|
|
113
|
+
if (!signature)
|
|
114
|
+
return false;
|
|
115
|
+
const webhookId = header(headers, HDR_WEBHOOK_ID);
|
|
116
|
+
const secret = config.webhookSecret(webhookId);
|
|
117
|
+
if (!secret)
|
|
118
|
+
return false;
|
|
119
|
+
const expected = createHmac('sha256', secret).update(body).digest('hex');
|
|
120
|
+
return constantTimeStringEqual(expected, signature.trim().toLowerCase());
|
|
121
|
+
}
|
|
122
|
+
/** Constant-time compare two UTF-8 strings (length-safe). */
|
|
123
|
+
function constantTimeStringEqual(a, b) {
|
|
124
|
+
// Compare fixed-length byte buffers; if lengths differ, compare against `a`
|
|
125
|
+
// itself so we never short-circuit on a length mismatch (timing-safe).
|
|
126
|
+
const ab = Buffer.from(a, 'utf8');
|
|
127
|
+
const bb = Buffer.from(b, 'utf8');
|
|
128
|
+
if (ab.length !== bb.length) {
|
|
129
|
+
// Still do a constant-time compare against a same-length buffer to avoid a
|
|
130
|
+
// length-based timing oracle, then return false.
|
|
131
|
+
timingSafeEqual(ab, ab);
|
|
132
|
+
return false;
|
|
133
|
+
}
|
|
134
|
+
return timingSafeEqual(ab, bb);
|
|
135
|
+
}
|
|
136
|
+
// ── parse ──────────────────────────────────────────────────────────────────────
|
|
137
|
+
/**
|
|
138
|
+
* Parse a webhook body → a typed {@link Change}.
|
|
139
|
+
*
|
|
140
|
+
* Does NOT verify the signature (use {@link handleWebhook} for verify+parse).
|
|
141
|
+
* Handles JSON and XML bodies, and an `encrypt_payload` account-key envelope: if
|
|
142
|
+
* the (JSON) body is a `{"_enc":1,...}` wrapper, it is first unwrapped with the
|
|
143
|
+
* account private key (OAEP-SHA1) into the inner serialized payload, which is then
|
|
144
|
+
* parsed. The inner field `value` (a service-key wrapper) is decrypted by the same
|
|
145
|
+
* model factory the feed uses, so a webhook `Change` is byte-identical to a feed
|
|
146
|
+
* `Change`.
|
|
147
|
+
*
|
|
148
|
+
* `deps.accountKey` is an optional pre-loaded account private key (the
|
|
149
|
+
* {@link Client} loads it ONCE and reuses it, so an `encrypt_payload` webhook
|
|
150
|
+
* doesn't re-read the PEM + re-run PBKDF2 ~100k iters per request). When undefined,
|
|
151
|
+
* the key is loaded from config on demand — config-only key handling either way.
|
|
152
|
+
*/
|
|
153
|
+
export function parseWebhook(rawBody, headers, config, deps) {
|
|
154
|
+
// `headers` is part of the webhook contract (verify reads them; parse keeps the
|
|
155
|
+
// symmetric signature) but the body/envelope decode is header-independent — the
|
|
156
|
+
// encrypt_payload envelope is self-describing (`{"_enc":1,…}`).
|
|
157
|
+
void headers;
|
|
158
|
+
const body = asBytes(rawBody);
|
|
159
|
+
const payload = decodePayload(body, config, deps.accountKey);
|
|
160
|
+
if (payload === null || typeof payload !== 'object' || Array.isArray(payload)) {
|
|
161
|
+
throw new WebhookError('webhook payload is not a JSON/XML object');
|
|
162
|
+
}
|
|
163
|
+
return Change.fromApi(payload, {
|
|
164
|
+
typeForSlug: deps.typeForSlug,
|
|
165
|
+
decryptValue: deps.decryptValue,
|
|
166
|
+
binaryFetch: deps.binaryFetch,
|
|
167
|
+
});
|
|
168
|
+
}
|
|
169
|
+
/**
|
|
170
|
+
* Verify + parse a webhook in one call.
|
|
171
|
+
*
|
|
172
|
+
* Throws {@link WebhookError} on a bad/unknown signature; otherwise returns the
|
|
173
|
+
* typed {@link Change}. The typical one-liner inside a webhook route. `deps.accountKey`
|
|
174
|
+
* (optional) is a pre-loaded account private key reused for the `encrypt_payload`
|
|
175
|
+
* envelope (see {@link parseWebhook}).
|
|
176
|
+
*/
|
|
177
|
+
export function handleWebhook(rawBody, headers, config, deps) {
|
|
178
|
+
if (!verifyWebhook(rawBody, headers, config)) {
|
|
179
|
+
throw new WebhookError('webhook signature verification failed');
|
|
180
|
+
}
|
|
181
|
+
return parseWebhook(rawBody, headers, config, deps);
|
|
182
|
+
}
|
|
183
|
+
// ── payload decoding (JSON / XML / encrypt_payload envelope) ────────────────────
|
|
184
|
+
function decodePayload(body, config, accountKey) {
|
|
185
|
+
const text = body.toString('utf8').trim();
|
|
186
|
+
// An encrypt_payload envelope is always JSON ({"_enc":1,...}). Detect + unwrap it
|
|
187
|
+
// before anything else (the inner payload is then JSON or XML per format).
|
|
188
|
+
if (text.startsWith('{')) {
|
|
189
|
+
let obj;
|
|
190
|
+
try {
|
|
191
|
+
obj = JSON.parse(text);
|
|
192
|
+
}
|
|
193
|
+
catch (exc) {
|
|
194
|
+
throw new WebhookError(`webhook body is not valid JSON: ${exc.message}`);
|
|
195
|
+
}
|
|
196
|
+
if (obj !== null &&
|
|
197
|
+
typeof obj === 'object' &&
|
|
198
|
+
!Array.isArray(obj) &&
|
|
199
|
+
obj[ENC_MARKER] === 1 &&
|
|
200
|
+
['k', 'iv', 'd'].every((f) => f in obj)) {
|
|
201
|
+
const inner = unwrapAccountEnvelope(obj, config, accountKey);
|
|
202
|
+
return decodeInner(inner);
|
|
203
|
+
}
|
|
204
|
+
return obj;
|
|
205
|
+
}
|
|
206
|
+
// Otherwise an XML body (the platform's <response> serialization).
|
|
207
|
+
if (text.startsWith('<')) {
|
|
208
|
+
try {
|
|
209
|
+
return parseXml(text);
|
|
210
|
+
}
|
|
211
|
+
catch (exc) {
|
|
212
|
+
throw new WebhookError(`webhook body is not valid XML: ${exc.message}`);
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
throw new WebhookError('webhook body is neither JSON nor XML');
|
|
216
|
+
}
|
|
217
|
+
function decodeInner(innerText) {
|
|
218
|
+
const stripped = innerText.trim();
|
|
219
|
+
if (stripped.startsWith('<')) {
|
|
220
|
+
try {
|
|
221
|
+
return parseXml(stripped);
|
|
222
|
+
}
|
|
223
|
+
catch (exc) {
|
|
224
|
+
throw new WebhookError(`decrypted webhook payload is not valid XML: ${exc.message}`);
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
try {
|
|
228
|
+
return JSON.parse(stripped);
|
|
229
|
+
}
|
|
230
|
+
catch (exc) {
|
|
231
|
+
throw new WebhookError(`decrypted webhook payload is not valid JSON: ${exc.message}`);
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
// ── account-key envelope unwrap (OAEP-SHA1 — webhook-specific) ───────────────────
|
|
235
|
+
/**
|
|
236
|
+
* Load the account private key from config ONCE (or `null` if not configured).
|
|
237
|
+
*
|
|
238
|
+
* Reused by the {@link Client} so an `encrypt_payload` webhook never re-reads the
|
|
239
|
+
* PEM + re-runs PBKDF2 (~100k iters) per request — the account key is loaded a
|
|
240
|
+
* single time at client construction, exactly like the service key. Returns `null`
|
|
241
|
+
* when no `accountPrivateKey` is configured (the SDK only needs it for
|
|
242
|
+
* `encrypt_payload` webhooks). Throws {@link WebhookError} on a read / passphrase /
|
|
243
|
+
* PEM problem.
|
|
244
|
+
*/
|
|
245
|
+
export function loadAccountKey(config) {
|
|
246
|
+
if (!config.accountPrivateKey)
|
|
247
|
+
return null;
|
|
248
|
+
let pem;
|
|
249
|
+
try {
|
|
250
|
+
pem = readFileSync(config.accountPrivateKey);
|
|
251
|
+
}
|
|
252
|
+
catch (exc) {
|
|
253
|
+
throw new WebhookError(`could not read accountPrivateKey PEM: ${config.accountPrivateKey}: ${exc.message}`);
|
|
254
|
+
}
|
|
255
|
+
const passphrase = config.accountPassphrase ?? '';
|
|
256
|
+
try {
|
|
257
|
+
return createPrivateKey({ key: pem, passphrase });
|
|
258
|
+
}
|
|
259
|
+
catch (exc) {
|
|
260
|
+
throw new WebhookError(`could not load account private key: ${exc.message}`);
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
function unwrapAccountEnvelope(envelope, config, accountKey) {
|
|
264
|
+
const key = accountKey ?? loadAccountKey(config);
|
|
265
|
+
if (key === null || key === undefined) {
|
|
266
|
+
throw new WebhookError('received an encrypt_payload webhook but no accountPrivateKey is configured');
|
|
267
|
+
}
|
|
268
|
+
return decryptOaepSha1(envelope, key);
|
|
269
|
+
}
|
|
270
|
+
function b64(value, name) {
|
|
271
|
+
if (typeof value !== 'string') {
|
|
272
|
+
throw new WebhookError(`envelope field '${name}' must be a base64 string`);
|
|
273
|
+
}
|
|
274
|
+
const buf = Buffer.from(value, 'base64');
|
|
275
|
+
const normalized = value.replace(/\s+/g, '');
|
|
276
|
+
if (buf.toString('base64').replace(/=+$/, '') !== normalized.replace(/=+$/, '')) {
|
|
277
|
+
throw new WebhookError(`envelope field '${name}' is not valid base64`);
|
|
278
|
+
}
|
|
279
|
+
return buf;
|
|
280
|
+
}
|
|
281
|
+
/**
|
|
282
|
+
* RSA-OAEP(**SHA-1**, MGF1-SHA1) unwrap + AES-256-GCM decrypt → utf-8 string.
|
|
283
|
+
*
|
|
284
|
+
* Mirrors {@link decrypt} but pins SHA-1 for the OAEP/MGF1 hash to match the
|
|
285
|
+
* account-key envelope (the only place the platform uses SHA-1 OAEP). Node defaults
|
|
286
|
+
* `oaepHash` to SHA-1 already; we set it explicitly for clarity + to be robust to a
|
|
287
|
+
* future default change.
|
|
288
|
+
*/
|
|
289
|
+
function decryptOaepSha1(wrapper, privateKey) {
|
|
290
|
+
const encKey = b64(wrapper.k, 'k');
|
|
291
|
+
const iv = b64(wrapper.iv, 'iv');
|
|
292
|
+
const ciphertextWithTag = b64(wrapper.d, 'd');
|
|
293
|
+
if (iv.length !== GCM_IV_LEN) {
|
|
294
|
+
throw new WebhookError(`envelope iv must be ${GCM_IV_LEN} bytes, got ${iv.length}`);
|
|
295
|
+
}
|
|
296
|
+
if (ciphertextWithTag.length < GCM_TAG_LEN) {
|
|
297
|
+
throw new WebhookError('envelope ciphertext too short to contain a GCM tag');
|
|
298
|
+
}
|
|
299
|
+
let aesKey;
|
|
300
|
+
try {
|
|
301
|
+
aesKey = privateDecrypt({
|
|
302
|
+
key: privateKey,
|
|
303
|
+
padding: cryptoConstants.RSA_PKCS1_OAEP_PADDING,
|
|
304
|
+
oaepHash: 'sha1',
|
|
305
|
+
}, encKey);
|
|
306
|
+
}
|
|
307
|
+
catch (exc) {
|
|
308
|
+
throw new WebhookError(`account-key envelope RSA-OAEP unwrap failed (wrong account key?): ${exc.message}`);
|
|
309
|
+
}
|
|
310
|
+
if (aesKey.length !== 32) {
|
|
311
|
+
throw new WebhookError(`unwrapped envelope AES key must be 32 bytes, got ${aesKey.length}`);
|
|
312
|
+
}
|
|
313
|
+
const tag = ciphertextWithTag.subarray(ciphertextWithTag.length - GCM_TAG_LEN);
|
|
314
|
+
const ciphertext = ciphertextWithTag.subarray(0, ciphertextWithTag.length - GCM_TAG_LEN);
|
|
315
|
+
let plaintext;
|
|
316
|
+
try {
|
|
317
|
+
const decipher = createDecipheriv('aes-256-gcm', aesKey, iv);
|
|
318
|
+
decipher.setAuthTag(tag);
|
|
319
|
+
plaintext = Buffer.concat([decipher.update(ciphertext), decipher.final()]);
|
|
320
|
+
}
|
|
321
|
+
catch {
|
|
322
|
+
throw new WebhookError('account-key envelope AES-GCM tag mismatch');
|
|
323
|
+
}
|
|
324
|
+
const out = plaintext.toString('utf8');
|
|
325
|
+
if (!Buffer.from(out, 'utf8').equals(plaintext)) {
|
|
326
|
+
throw new WebhookError('decrypted account-key envelope is not valid UTF-8');
|
|
327
|
+
}
|
|
328
|
+
return out;
|
|
329
|
+
}
|
package/dist/esm/xml.js
ADDED
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Minimal, XXE-safe XML parser for the platform's wire serialization.
|
|
3
|
+
*
|
|
4
|
+
* The company-data API can serve XML (`Accept: application/xml` / `format: "xml"`).
|
|
5
|
+
* The platform serializer renders:
|
|
6
|
+
*
|
|
7
|
+
* - a `<response>` document root;
|
|
8
|
+
* - a list (int keys) as repeated `<item>` children — so an element whose
|
|
9
|
+
* every child is `<item>` becomes an array;
|
|
10
|
+
* - an associative array as named child tags — an object;
|
|
11
|
+
* - scalars as element text (booleans were written as `"true"`/`"false"`).
|
|
12
|
+
*
|
|
13
|
+
* **XXE-safe by construction.** This is a hand-written recursive-descent parser
|
|
14
|
+
* (NOT a general XML library). It supports ONLY elements, text, comments, the XML
|
|
15
|
+
* declaration, CDATA, and the five built-in entities. It does NOT process a DOCTYPE
|
|
16
|
+
* / DTD, does NOT define or expand custom/general entities, and never resolves
|
|
17
|
+
* external entities or system identifiers — the classic XXE / billion-laughs
|
|
18
|
+
* vectors cannot occur because the machinery for them is simply absent. A DOCTYPE,
|
|
19
|
+
* a processing instruction other than the XML decl, or an unknown `&entity;`
|
|
20
|
+
* reference is rejected — entity expansion and external entity resolution don't
|
|
21
|
+
* exist here at all. HMAC verification is always computed over the raw bytes,
|
|
22
|
+
* never the parsed tree.
|
|
23
|
+
*
|
|
24
|
+
* This is intentionally small — JSON is the default wire format; XML is the opt-in
|
|
25
|
+
* alternative — and it only needs to invert the company-data payloads (dicts of
|
|
26
|
+
* lists of dicts of scalars).
|
|
27
|
+
*/
|
|
28
|
+
export class XmlParseError extends Error {
|
|
29
|
+
}
|
|
30
|
+
// The ONLY entities recognized — the five XML built-ins. No custom/general/external
|
|
31
|
+
// entity is ever defined or expanded (XXE-safe).
|
|
32
|
+
const BUILTIN_ENTITIES = {
|
|
33
|
+
lt: '<',
|
|
34
|
+
gt: '>',
|
|
35
|
+
amp: '&',
|
|
36
|
+
quot: '"',
|
|
37
|
+
apos: "'",
|
|
38
|
+
};
|
|
39
|
+
function decodeEntities(s) {
|
|
40
|
+
return s.replace(/&(#x?[0-9a-fA-F]+|[a-zA-Z][a-zA-Z0-9]*);/g, (_m, body) => {
|
|
41
|
+
if (body[0] === '#') {
|
|
42
|
+
const isHex = body[1] === 'x' || body[1] === 'X';
|
|
43
|
+
const codeStr = isHex ? body.slice(2) : body.slice(1);
|
|
44
|
+
const code = parseInt(codeStr, isHex ? 16 : 10);
|
|
45
|
+
if (Number.isNaN(code) || code < 0 || code > 0x10ffff) {
|
|
46
|
+
throw new XmlParseError(`invalid numeric character reference &${body};`);
|
|
47
|
+
}
|
|
48
|
+
return String.fromCodePoint(code);
|
|
49
|
+
}
|
|
50
|
+
const replacement = BUILTIN_ENTITIES[body];
|
|
51
|
+
if (replacement === undefined) {
|
|
52
|
+
// A non-builtin entity reference — reject rather than expand. This is the
|
|
53
|
+
// XXE / entity-expansion guard: we never define or look up custom entities.
|
|
54
|
+
throw new XmlParseError(`unsupported XML entity &${body}; (custom/external entities are disabled)`);
|
|
55
|
+
}
|
|
56
|
+
return replacement;
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
class Parser {
|
|
60
|
+
constructor(text) {
|
|
61
|
+
this.i = 0;
|
|
62
|
+
this.s = text;
|
|
63
|
+
}
|
|
64
|
+
parse() {
|
|
65
|
+
this.skipProlog();
|
|
66
|
+
const root = this.parseElement();
|
|
67
|
+
this.skipMisc();
|
|
68
|
+
if (this.i < this.s.length) {
|
|
69
|
+
throw new XmlParseError('trailing content after the document root element');
|
|
70
|
+
}
|
|
71
|
+
return root;
|
|
72
|
+
}
|
|
73
|
+
skipWhitespace() {
|
|
74
|
+
while (this.i < this.s.length && /\s/.test(this.s[this.i]))
|
|
75
|
+
this.i++;
|
|
76
|
+
}
|
|
77
|
+
// Skip the XML declaration, comments, and whitespace BEFORE the root element.
|
|
78
|
+
// A DOCTYPE is explicitly rejected (no DTD processing — XXE-safe).
|
|
79
|
+
skipProlog() {
|
|
80
|
+
for (;;) {
|
|
81
|
+
this.skipWhitespace();
|
|
82
|
+
if (this.s.startsWith('<?xml', this.i)) {
|
|
83
|
+
this.skipUntil('?>');
|
|
84
|
+
continue;
|
|
85
|
+
}
|
|
86
|
+
if (this.s.startsWith('<!--', this.i)) {
|
|
87
|
+
this.skipUntil('-->');
|
|
88
|
+
continue;
|
|
89
|
+
}
|
|
90
|
+
if (this.s.startsWith('<!DOCTYPE', this.i) || this.s.startsWith('<!doctype', this.i)) {
|
|
91
|
+
throw new XmlParseError('DOCTYPE / DTD is not allowed (XXE-safe parser)');
|
|
92
|
+
}
|
|
93
|
+
if (this.s.startsWith('<?', this.i)) {
|
|
94
|
+
throw new XmlParseError('processing instructions are not allowed');
|
|
95
|
+
}
|
|
96
|
+
return;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
// Skip comments + whitespace AFTER the root element (epilogue).
|
|
100
|
+
skipMisc() {
|
|
101
|
+
for (;;) {
|
|
102
|
+
this.skipWhitespace();
|
|
103
|
+
if (this.s.startsWith('<!--', this.i)) {
|
|
104
|
+
this.skipUntil('-->');
|
|
105
|
+
continue;
|
|
106
|
+
}
|
|
107
|
+
return;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
skipUntil(marker) {
|
|
111
|
+
const idx = this.s.indexOf(marker, this.i);
|
|
112
|
+
if (idx === -1)
|
|
113
|
+
throw new XmlParseError(`unterminated '${marker}'`);
|
|
114
|
+
this.i = idx + marker.length;
|
|
115
|
+
}
|
|
116
|
+
parseName() {
|
|
117
|
+
const start = this.i;
|
|
118
|
+
// XML name chars (a permissive but safe subset): letters, digits, _, -, ., :
|
|
119
|
+
while (this.i < this.s.length && /[A-Za-z0-9_\-.:]/.test(this.s[this.i]))
|
|
120
|
+
this.i++;
|
|
121
|
+
if (this.i === start)
|
|
122
|
+
throw new XmlParseError(`expected an element name at offset ${this.i}`);
|
|
123
|
+
return this.s.slice(start, this.i);
|
|
124
|
+
}
|
|
125
|
+
// Skip attributes within a start tag (the platform serializer emits none, but be
|
|
126
|
+
// tolerant). Attribute VALUES are read but never define entities.
|
|
127
|
+
skipAttributes() {
|
|
128
|
+
for (;;) {
|
|
129
|
+
this.skipWhitespace();
|
|
130
|
+
const c = this.s[this.i];
|
|
131
|
+
if (c === '>' || c === '/' || c === undefined)
|
|
132
|
+
return;
|
|
133
|
+
// name
|
|
134
|
+
this.parseName();
|
|
135
|
+
this.skipWhitespace();
|
|
136
|
+
if (this.s[this.i] !== '=')
|
|
137
|
+
throw new XmlParseError('malformed attribute (expected =)');
|
|
138
|
+
this.i++; // '='
|
|
139
|
+
this.skipWhitespace();
|
|
140
|
+
const quote = this.s[this.i];
|
|
141
|
+
if (quote !== '"' && quote !== "'")
|
|
142
|
+
throw new XmlParseError('attribute value must be quoted');
|
|
143
|
+
this.i++;
|
|
144
|
+
const end = this.s.indexOf(quote, this.i);
|
|
145
|
+
if (end === -1)
|
|
146
|
+
throw new XmlParseError('unterminated attribute value');
|
|
147
|
+
this.i = end + 1;
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
parseElement() {
|
|
151
|
+
if (this.s[this.i] !== '<')
|
|
152
|
+
throw new XmlParseError(`expected '<' at offset ${this.i}`);
|
|
153
|
+
this.i++; // '<'
|
|
154
|
+
const tag = this.parseName();
|
|
155
|
+
this.skipAttributes();
|
|
156
|
+
if (this.s.startsWith('/>', this.i)) {
|
|
157
|
+
this.i += 2; // self-closing
|
|
158
|
+
return { tag, children: [], text: '' };
|
|
159
|
+
}
|
|
160
|
+
if (this.s[this.i] !== '>')
|
|
161
|
+
throw new XmlParseError(`malformed start tag <${tag}>`);
|
|
162
|
+
this.i++; // '>'
|
|
163
|
+
const node = { tag, children: [], text: '' };
|
|
164
|
+
const textParts = [];
|
|
165
|
+
for (;;) {
|
|
166
|
+
if (this.i >= this.s.length)
|
|
167
|
+
throw new XmlParseError(`unterminated element <${tag}>`);
|
|
168
|
+
if (this.s.startsWith('</', this.i)) {
|
|
169
|
+
this.i += 2;
|
|
170
|
+
const closeName = this.parseName();
|
|
171
|
+
this.skipWhitespace();
|
|
172
|
+
if (this.s[this.i] !== '>')
|
|
173
|
+
throw new XmlParseError(`malformed end tag </${closeName}>`);
|
|
174
|
+
this.i++; // '>'
|
|
175
|
+
if (closeName !== tag) {
|
|
176
|
+
throw new XmlParseError(`mismatched end tag: </${closeName}> closing <${tag}>`);
|
|
177
|
+
}
|
|
178
|
+
break;
|
|
179
|
+
}
|
|
180
|
+
if (this.s.startsWith('<!--', this.i)) {
|
|
181
|
+
this.skipUntil('-->');
|
|
182
|
+
continue;
|
|
183
|
+
}
|
|
184
|
+
if (this.s.startsWith('<![CDATA[', this.i)) {
|
|
185
|
+
const end = this.s.indexOf(']]>', this.i + 9);
|
|
186
|
+
if (end === -1)
|
|
187
|
+
throw new XmlParseError('unterminated CDATA section');
|
|
188
|
+
textParts.push(this.s.slice(this.i + 9, end)); // raw — no entity decode in CDATA
|
|
189
|
+
this.i = end + 3;
|
|
190
|
+
continue;
|
|
191
|
+
}
|
|
192
|
+
if (this.s.startsWith('<!DOCTYPE', this.i) || this.s.startsWith('<!doctype', this.i)) {
|
|
193
|
+
throw new XmlParseError('DOCTYPE / DTD is not allowed (XXE-safe parser)');
|
|
194
|
+
}
|
|
195
|
+
if (this.s.startsWith('<?', this.i)) {
|
|
196
|
+
throw new XmlParseError('processing instructions are not allowed');
|
|
197
|
+
}
|
|
198
|
+
if (this.s[this.i] === '<') {
|
|
199
|
+
node.children.push(this.parseElement());
|
|
200
|
+
continue;
|
|
201
|
+
}
|
|
202
|
+
// Text run up to the next '<'.
|
|
203
|
+
const lt = this.s.indexOf('<', this.i);
|
|
204
|
+
const end = lt === -1 ? this.s.length : lt;
|
|
205
|
+
textParts.push(decodeEntities(this.s.slice(this.i, end)));
|
|
206
|
+
this.i = end;
|
|
207
|
+
}
|
|
208
|
+
node.text = textParts.join('');
|
|
209
|
+
return node;
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
function nodeToValue(node) {
|
|
213
|
+
if (node.children.length === 0) {
|
|
214
|
+
// A leaf node: its text. Callers coerce types from the known schema; we keep
|
|
215
|
+
// the raw string (booleans came over as "true"/"false").
|
|
216
|
+
return node.text;
|
|
217
|
+
}
|
|
218
|
+
// All children are <item> → an array (PHP int-keyed list).
|
|
219
|
+
if (node.children.every((c) => c.tag === 'item')) {
|
|
220
|
+
return node.children.map(nodeToValue);
|
|
221
|
+
}
|
|
222
|
+
// Otherwise an object: named tags → keys. Repeated tags collapse to a list.
|
|
223
|
+
const result = {};
|
|
224
|
+
for (const child of node.children) {
|
|
225
|
+
const value = nodeToValue(child);
|
|
226
|
+
if (child.tag in result) {
|
|
227
|
+
const existing = result[child.tag];
|
|
228
|
+
if (Array.isArray(existing)) {
|
|
229
|
+
existing.push(value);
|
|
230
|
+
}
|
|
231
|
+
else {
|
|
232
|
+
result[child.tag] = [existing, value];
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
else {
|
|
236
|
+
result[child.tag] = value;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
return result;
|
|
240
|
+
}
|
|
241
|
+
/**
|
|
242
|
+
* Parse the platform's XML serialization back into JS data (XXE-safe).
|
|
243
|
+
*
|
|
244
|
+
* Mirrors the platform serializer (see the module doc). Returns the document root
|
|
245
|
+
* element's value (a `<response>` element → an object). Throws {@link XmlParseError}
|
|
246
|
+
* on malformed XML, a DOCTYPE/DTD, a processing instruction, or any non-builtin
|
|
247
|
+
* entity reference.
|
|
248
|
+
*/
|
|
249
|
+
export function parseXml(text) {
|
|
250
|
+
const root = new Parser(text).parse();
|
|
251
|
+
return nodeToValue(root);
|
|
252
|
+
}
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Durable plain-file buffer for the crash-safe changes pump.
|
|
3
|
+
*
|
|
4
|
+
* The changes feed is a server-side **drain-on-fetch queue**: a fetch returns up to
|
|
5
|
+
* N events and deletes those rows in the same transaction — the API keeps no copy.
|
|
6
|
+
* So a drained batch MUST be persisted locally BEFORE any delivery, or a consumer
|
|
7
|
+
* crash mid-batch loses events the API already deleted. This module is that
|
|
8
|
+
* persistence: a zero-dependency, plain-file buffer under `cacheDir`.
|
|
9
|
+
*
|
|
10
|
+
* Layout:
|
|
11
|
+
*
|
|
12
|
+
* <cacheDir>/pending/<seq>_<change_id>.json # one un-acked event, oldest-first
|
|
13
|
+
* <cacheDir>/deadletter/<seq>_<change_id>.json # events that exhausted retries
|
|
14
|
+
*
|
|
15
|
+
* - The stored event is the **raw hardened API event object** — its `value` /
|
|
16
|
+
* `value_url` is **CIPHERTEXT**, never the decrypted plaintext. No PII is ever
|
|
17
|
+
* written to disk ("ciphertext at rest").
|
|
18
|
+
* - `<seq>` is a zero-padded, monotonically increasing sequence number persisted
|
|
19
|
+
* in `<cacheDir>/.seq`. Because {@link FileBuffer.append} is called in drain
|
|
20
|
+
* order (oldest-first), sorting filenames lexicographically yields oldest-first
|
|
21
|
+
* — a stable order even if event `at` timestamps are missing or equal.
|
|
22
|
+
* - Writes are **crash-safe**: each file is written to a temp name, fsync'd,
|
|
23
|
+
* atomically renamed into place, and the containing directory is fsync'd — so a
|
|
24
|
+
* crash never leaves a half-written pending file.
|
|
25
|
+
* - `ack(id)` deletes the pending file; `deadLetter(id, error, attempts)` moves it
|
|
26
|
+
* to `deadletter/` with the error + attempt count appended. Neither re-fetches
|
|
27
|
+
* from the API (it already deleted the row) — the buffer is the only home.
|
|
28
|
+
*
|
|
29
|
+
* All operations are SYNCHRONOUS (Node `fs` + `fsyncSync`) so the fsync discipline
|
|
30
|
+
* is exact. The pump calls them between awaits.
|
|
31
|
+
*/
|
|
32
|
+
/** A buffered event (the raw hardened API object; ciphertext value intact). */
|
|
33
|
+
export type BufferedEvent = Record<string, unknown>;
|
|
34
|
+
/** A dead-letter record: the stored event + flattened error/attempts/id. */
|
|
35
|
+
export interface DeadLetterRecord extends BufferedEvent {
|
|
36
|
+
error: string | null;
|
|
37
|
+
attempts: number | null;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* A durable, ordered, ciphertext-at-rest event buffer under `cacheDir`.
|
|
41
|
+
*
|
|
42
|
+
* Re-instantiating a `FileBuffer` on the same `cacheDir` recovers whatever is on
|
|
43
|
+
* disk — that recovery is exactly the pump's replay-on-restart.
|
|
44
|
+
*/
|
|
45
|
+
export declare class FileBuffer {
|
|
46
|
+
private readonly pendingDir;
|
|
47
|
+
private readonly deadletterDir;
|
|
48
|
+
private readonly seqPath;
|
|
49
|
+
constructor(cacheDir: string);
|
|
50
|
+
private nextSeq;
|
|
51
|
+
private readSeq;
|
|
52
|
+
private writeSeq;
|
|
53
|
+
private maxOnDiskSeq;
|
|
54
|
+
/**
|
|
55
|
+
* Persist a drained batch (oldest-first), each in its own fsync'd file.
|
|
56
|
+
*
|
|
57
|
+
* Each event is stored verbatim (ciphertext value intact). Returns the list of
|
|
58
|
+
* pending filenames written. This is the backup the API no longer holds — it
|
|
59
|
+
* MUST complete before the pump delivers anything.
|
|
60
|
+
*/
|
|
61
|
+
append(events: BufferedEvent[]): string[];
|
|
62
|
+
/** All un-acked events, oldest-first (by the sortable filename). */
|
|
63
|
+
pending(): BufferedEvent[];
|
|
64
|
+
private pendingFiles;
|
|
65
|
+
private readEvent;
|
|
66
|
+
private findPendingFile;
|
|
67
|
+
/** Delete the pending file for `changeId` (the per-item ack). Idempotent. */
|
|
68
|
+
ack(changeId: unknown): boolean;
|
|
69
|
+
/**
|
|
70
|
+
* Move a poison event from pending → deadletter with error + attempts.
|
|
71
|
+
*
|
|
72
|
+
* Crash safety: the new dead-letter copy is written BEFORE the pending copy is
|
|
73
|
+
* unlinked — never lose. A crash between the two leaves the event in BOTH dirs,
|
|
74
|
+
* which is harmless: replay re-delivers it (the id-dedup handler absorbs the
|
|
75
|
+
* duplicate). Do NOT "fix" this by deleting-first.
|
|
76
|
+
*
|
|
77
|
+
* The event keeps its ciphertext value; the failure context is appended under a
|
|
78
|
+
* reserved key so it is never silently dropped.
|
|
79
|
+
*/
|
|
80
|
+
deadLetter(changeId: unknown, error: string, attempts: number): boolean;
|
|
81
|
+
private deadletterFiles;
|
|
82
|
+
/**
|
|
83
|
+
* All dead-lettered events, oldest-first.
|
|
84
|
+
*
|
|
85
|
+
* Each item is the stored (ciphertext) event with a flattened `error` and
|
|
86
|
+
* `attempts` lifted out of the reserved `_deadletter` block, plus the event's own
|
|
87
|
+
* `id` for convenience.
|
|
88
|
+
*/
|
|
89
|
+
deadLetters(): DeadLetterRecord[];
|
|
90
|
+
private findDeadletterFile;
|
|
91
|
+
/**
|
|
92
|
+
* Rewrite a dead-letter record IN PLACE with a refreshed error + attempts.
|
|
93
|
+
*
|
|
94
|
+
* Used by a still-failing re-drive (`retryDeadLetters`): the record stays in
|
|
95
|
+
* `deadletter/` and its failure context is updated atomically (temp file inside
|
|
96
|
+
* `deadletter/` → fsync → rename over the same path). It is NEVER routed back
|
|
97
|
+
* through `pending/`, so a crash anywhere in this method leaves the record either
|
|
98
|
+
* as the old dead-letter or the new one — it can never resurrect as a live
|
|
99
|
+
* pending event. Idempotent (returns false if the record is gone).
|
|
100
|
+
* Preserves the file's seq prefix so its oldest-first ordering is unchanged.
|
|
101
|
+
*
|
|
102
|
+
* The stored attempt count is monotonic across separate re-drive runs — a later
|
|
103
|
+
* run with a smaller `maxRetries` must never lower the recorded total — so we
|
|
104
|
+
* clamp to `max(existing, new)`.
|
|
105
|
+
*/
|
|
106
|
+
updateDeadLetter(changeId: unknown, error: string, attempts: number): boolean;
|
|
107
|
+
/** Delete a dead-letter record (after a successful re-drive). Idempotent. */
|
|
108
|
+
removeDeadLetter(changeId: unknown): boolean;
|
|
109
|
+
}
|