@adobe/spacecat-shared-tokowaka-client 1.0.4 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/package.json +3 -3
- package/src/index.js +202 -12
- package/src/utils/custom-html-utils.js +194 -0
- package/src/utils/s3-utils.js +5 -1
- package/test/index.test.js +396 -2
- package/test/mappers/headings-mapper.test.js +0 -2
- package/test/utils/html-utils.test.js +434 -0
- package/test/mappers/faq-mapper.test.js.backup +0 -1264
|
@@ -0,0 +1,434 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2025 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
/* eslint-env mocha */
|
|
14
|
+
|
|
15
|
+
import { expect } from 'chai';
|
|
16
|
+
import sinon from 'sinon';
|
|
17
|
+
import { fetchHtmlWithWarmup } from '../../src/utils/custom-html-utils.js';
|
|
18
|
+
|
|
19
|
+
describe('HTML Utils', () => {
|
|
20
|
+
describe('fetchHtmlWithWarmup', () => {
|
|
21
|
+
let fetchStub;
|
|
22
|
+
let log;
|
|
23
|
+
|
|
24
|
+
beforeEach(() => {
|
|
25
|
+
fetchStub = sinon.stub(global, 'fetch');
|
|
26
|
+
log = {
|
|
27
|
+
debug: sinon.stub(),
|
|
28
|
+
warn: sinon.stub(),
|
|
29
|
+
error: sinon.stub(),
|
|
30
|
+
info: sinon.stub(),
|
|
31
|
+
};
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
afterEach(() => {
|
|
35
|
+
sinon.restore();
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
it('should throw error when URL is missing', async () => {
|
|
39
|
+
try {
|
|
40
|
+
await fetchHtmlWithWarmup(
|
|
41
|
+
'',
|
|
42
|
+
'api-key',
|
|
43
|
+
'host',
|
|
44
|
+
'edge-url',
|
|
45
|
+
log,
|
|
46
|
+
false,
|
|
47
|
+
);
|
|
48
|
+
expect.fail('Should have thrown error');
|
|
49
|
+
} catch (error) {
|
|
50
|
+
expect(error.message).to.equal('URL is required for fetching HTML');
|
|
51
|
+
}
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
it('should throw error when apiKey is missing', async () => {
|
|
55
|
+
try {
|
|
56
|
+
await fetchHtmlWithWarmup(
|
|
57
|
+
'https://example.com/page',
|
|
58
|
+
'',
|
|
59
|
+
'host',
|
|
60
|
+
'edge-url',
|
|
61
|
+
log,
|
|
62
|
+
false,
|
|
63
|
+
);
|
|
64
|
+
expect.fail('Should have thrown error');
|
|
65
|
+
} catch (error) {
|
|
66
|
+
expect(error.message).to.equal('Tokowaka API key is required for fetching HTML');
|
|
67
|
+
}
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
it('should throw error when forwardedHost is missing', async () => {
|
|
71
|
+
try {
|
|
72
|
+
await fetchHtmlWithWarmup(
|
|
73
|
+
'https://example.com/page',
|
|
74
|
+
'api-key',
|
|
75
|
+
'',
|
|
76
|
+
'edge-url',
|
|
77
|
+
log,
|
|
78
|
+
false,
|
|
79
|
+
);
|
|
80
|
+
expect.fail('Should have thrown error');
|
|
81
|
+
} catch (error) {
|
|
82
|
+
expect(error.message).to.equal('Forwarded host is required for fetching HTML');
|
|
83
|
+
}
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
it('should throw error when tokowakaEdgeUrl is missing', async () => {
|
|
87
|
+
try {
|
|
88
|
+
await fetchHtmlWithWarmup(
|
|
89
|
+
'https://example.com/page',
|
|
90
|
+
'api-key',
|
|
91
|
+
'host',
|
|
92
|
+
'',
|
|
93
|
+
log,
|
|
94
|
+
false,
|
|
95
|
+
);
|
|
96
|
+
expect.fail('Should have thrown error');
|
|
97
|
+
} catch (error) {
|
|
98
|
+
expect(error.message).to.equal('TOKOWAKA_EDGE_URL is not configured');
|
|
99
|
+
}
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
it('should successfully fetch HTML with all required parameters', async () => {
|
|
103
|
+
fetchStub.resolves({
|
|
104
|
+
ok: true,
|
|
105
|
+
status: 200,
|
|
106
|
+
statusText: 'OK',
|
|
107
|
+
headers: {
|
|
108
|
+
get: (name) => (name === 'x-tokowaka-cache' ? 'HIT' : null),
|
|
109
|
+
},
|
|
110
|
+
text: async () => '<html>Test HTML</html>',
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
const html = await fetchHtmlWithWarmup(
|
|
114
|
+
'https://example.com/page',
|
|
115
|
+
'api-key',
|
|
116
|
+
'host',
|
|
117
|
+
'https://edge.example.com',
|
|
118
|
+
log,
|
|
119
|
+
false,
|
|
120
|
+
{ warmupDelayMs: 0 },
|
|
121
|
+
);
|
|
122
|
+
|
|
123
|
+
expect(html).to.equal('<html>Test HTML</html>');
|
|
124
|
+
expect(fetchStub.callCount).to.equal(2); // warmup + actual
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
it('should handle URL with existing query parameters when fetching optimized HTML', async () => {
|
|
128
|
+
fetchStub.resolves({
|
|
129
|
+
ok: true,
|
|
130
|
+
status: 200,
|
|
131
|
+
statusText: 'OK',
|
|
132
|
+
headers: {
|
|
133
|
+
get: (name) => (name === 'x-tokowaka-cache' ? 'HIT' : null),
|
|
134
|
+
},
|
|
135
|
+
text: async () => '<html>Optimized HTML</html>',
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
const html = await fetchHtmlWithWarmup(
|
|
139
|
+
'https://example.com/page?param=value',
|
|
140
|
+
'api-key',
|
|
141
|
+
'host',
|
|
142
|
+
'https://edge.example.com',
|
|
143
|
+
log,
|
|
144
|
+
true, // isOptimized
|
|
145
|
+
{ warmupDelayMs: 0 },
|
|
146
|
+
);
|
|
147
|
+
|
|
148
|
+
expect(html).to.equal('<html>Optimized HTML</html>');
|
|
149
|
+
expect(fetchStub.callCount).to.equal(2); // warmup + actual
|
|
150
|
+
|
|
151
|
+
// Verify the URL includes & for the preview param (not ?)
|
|
152
|
+
const actualUrl = fetchStub.secondCall.args[0];
|
|
153
|
+
expect(actualUrl).to.include('param=value');
|
|
154
|
+
expect(actualUrl).to.include('&tokowakaPreview=true');
|
|
155
|
+
expect(actualUrl).to.not.include('?tokowakaPreview=true');
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
it('should throw error when HTTP response is not ok', async () => {
|
|
159
|
+
// Warmup succeeds
|
|
160
|
+
fetchStub.onCall(0).resolves({
|
|
161
|
+
ok: true,
|
|
162
|
+
status: 200,
|
|
163
|
+
statusText: 'OK',
|
|
164
|
+
headers: {
|
|
165
|
+
get: () => null,
|
|
166
|
+
},
|
|
167
|
+
text: async () => 'warmup',
|
|
168
|
+
});
|
|
169
|
+
// Actual call returns 404
|
|
170
|
+
fetchStub.onCall(1).resolves({
|
|
171
|
+
ok: false,
|
|
172
|
+
status: 404,
|
|
173
|
+
statusText: 'Not Found',
|
|
174
|
+
headers: {
|
|
175
|
+
get: () => null,
|
|
176
|
+
},
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
try {
|
|
180
|
+
await fetchHtmlWithWarmup(
|
|
181
|
+
'https://example.com/page',
|
|
182
|
+
'api-key',
|
|
183
|
+
'host',
|
|
184
|
+
'https://edge.example.com',
|
|
185
|
+
log,
|
|
186
|
+
false,
|
|
187
|
+
{ warmupDelayMs: 0, maxRetries: 0 },
|
|
188
|
+
);
|
|
189
|
+
expect.fail('Should have thrown error');
|
|
190
|
+
} catch (error) {
|
|
191
|
+
expect(error.message).to.include('Failed to fetch original HTML');
|
|
192
|
+
expect(error.message).to.include('0 retries');
|
|
193
|
+
}
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
it('should retry and eventually throw error after max retries', async () => {
|
|
197
|
+
// Warmup succeeds
|
|
198
|
+
fetchStub.onCall(0).resolves({
|
|
199
|
+
ok: true,
|
|
200
|
+
status: 200,
|
|
201
|
+
statusText: 'OK',
|
|
202
|
+
text: async () => 'warmup',
|
|
203
|
+
});
|
|
204
|
+
// All actual calls fail
|
|
205
|
+
fetchStub.onCall(1).rejects(new Error('Network error'));
|
|
206
|
+
fetchStub.onCall(2).rejects(new Error('Network error'));
|
|
207
|
+
fetchStub.onCall(3).rejects(new Error('Network error'));
|
|
208
|
+
|
|
209
|
+
try {
|
|
210
|
+
await fetchHtmlWithWarmup(
|
|
211
|
+
'https://example.com/page',
|
|
212
|
+
'api-key',
|
|
213
|
+
'host',
|
|
214
|
+
'https://edge.example.com',
|
|
215
|
+
log,
|
|
216
|
+
false,
|
|
217
|
+
{ warmupDelayMs: 0, maxRetries: 2, retryDelayMs: 0 },
|
|
218
|
+
);
|
|
219
|
+
expect.fail('Should have thrown error');
|
|
220
|
+
} catch (error) {
|
|
221
|
+
expect(error.message).to.include('Failed to fetch original HTML');
|
|
222
|
+
expect(error.message).to.include('Network error');
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
// Should have tried 3 times (initial + 2 retries) plus warmup
|
|
226
|
+
expect(fetchStub.callCount).to.equal(4);
|
|
227
|
+
});
|
|
228
|
+
|
|
229
|
+
it('should handle zero maxRetries value', async () => {
|
|
230
|
+
// Warmup succeeds
|
|
231
|
+
fetchStub.onCall(0).resolves({
|
|
232
|
+
ok: true,
|
|
233
|
+
status: 200,
|
|
234
|
+
statusText: 'OK',
|
|
235
|
+
text: async () => 'warmup',
|
|
236
|
+
});
|
|
237
|
+
// Actual call fails
|
|
238
|
+
fetchStub.onCall(1).rejects(new Error('Network error'));
|
|
239
|
+
|
|
240
|
+
try {
|
|
241
|
+
await fetchHtmlWithWarmup(
|
|
242
|
+
'https://example.com/page',
|
|
243
|
+
'api-key',
|
|
244
|
+
'host',
|
|
245
|
+
'https://edge.example.com',
|
|
246
|
+
log,
|
|
247
|
+
false,
|
|
248
|
+
{ warmupDelayMs: 0, maxRetries: 0 },
|
|
249
|
+
);
|
|
250
|
+
expect.fail('Should have thrown error');
|
|
251
|
+
} catch (error) {
|
|
252
|
+
expect(error.message).to.include('Network error');
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
// Should have tried only once (no retries) plus warmup
|
|
256
|
+
expect(fetchStub.callCount).to.equal(2);
|
|
257
|
+
});
|
|
258
|
+
|
|
259
|
+
it('should handle negative maxRetries as edge case', async () => {
|
|
260
|
+
// Warmup succeeds
|
|
261
|
+
fetchStub.onCall(0).resolves({
|
|
262
|
+
ok: true,
|
|
263
|
+
status: 200,
|
|
264
|
+
statusText: 'OK',
|
|
265
|
+
headers: {
|
|
266
|
+
get: () => null,
|
|
267
|
+
},
|
|
268
|
+
text: async () => 'warmup',
|
|
269
|
+
});
|
|
270
|
+
|
|
271
|
+
try {
|
|
272
|
+
// With maxRetries: -1, the retry loop won't execute
|
|
273
|
+
// This tests the defensive 'throw lastError' fallback
|
|
274
|
+
await fetchHtmlWithWarmup(
|
|
275
|
+
'https://example.com/page',
|
|
276
|
+
'api-key',
|
|
277
|
+
'host',
|
|
278
|
+
'https://edge.example.com',
|
|
279
|
+
log,
|
|
280
|
+
false,
|
|
281
|
+
{ warmupDelayMs: 0, maxRetries: -1 },
|
|
282
|
+
);
|
|
283
|
+
expect.fail('Should have thrown error');
|
|
284
|
+
} catch (error) {
|
|
285
|
+
// Should throw the lastError from the loop
|
|
286
|
+
expect(error).to.exist;
|
|
287
|
+
}
|
|
288
|
+
});
|
|
289
|
+
|
|
290
|
+
it('should stop retrying when x-tokowaka-cache header is found', async () => {
|
|
291
|
+
// Warmup succeeds
|
|
292
|
+
fetchStub.onCall(0).resolves({
|
|
293
|
+
ok: true,
|
|
294
|
+
status: 200,
|
|
295
|
+
statusText: 'OK',
|
|
296
|
+
headers: {
|
|
297
|
+
get: () => null,
|
|
298
|
+
},
|
|
299
|
+
text: async () => 'warmup',
|
|
300
|
+
});
|
|
301
|
+
// First actual call - no cache header
|
|
302
|
+
fetchStub.onCall(1).resolves({
|
|
303
|
+
ok: true,
|
|
304
|
+
status: 200,
|
|
305
|
+
statusText: 'OK',
|
|
306
|
+
headers: {
|
|
307
|
+
get: () => null,
|
|
308
|
+
},
|
|
309
|
+
text: async () => '<html>No cache</html>',
|
|
310
|
+
});
|
|
311
|
+
// Second actual call - cache header found
|
|
312
|
+
fetchStub.onCall(2).resolves({
|
|
313
|
+
ok: true,
|
|
314
|
+
status: 200,
|
|
315
|
+
statusText: 'OK',
|
|
316
|
+
headers: {
|
|
317
|
+
get: (name) => (name === 'x-tokowaka-cache' ? 'HIT' : null),
|
|
318
|
+
},
|
|
319
|
+
text: async () => '<html>Cached HTML</html>',
|
|
320
|
+
});
|
|
321
|
+
|
|
322
|
+
const html = await fetchHtmlWithWarmup(
|
|
323
|
+
'https://example.com/page',
|
|
324
|
+
'api-key',
|
|
325
|
+
'host',
|
|
326
|
+
'https://edge.example.com',
|
|
327
|
+
log,
|
|
328
|
+
false,
|
|
329
|
+
{ warmupDelayMs: 0, maxRetries: 3, retryDelayMs: 0 },
|
|
330
|
+
);
|
|
331
|
+
|
|
332
|
+
expect(html).to.equal('<html>Cached HTML</html>');
|
|
333
|
+
// Should stop after finding cache header (warmup + 2 attempts)
|
|
334
|
+
expect(fetchStub.callCount).to.equal(3);
|
|
335
|
+
});
|
|
336
|
+
|
|
337
|
+
it('should throw error when cache header not found after max retries', async () => {
|
|
338
|
+
// Warmup succeeds
|
|
339
|
+
fetchStub.onCall(0).resolves({
|
|
340
|
+
ok: true,
|
|
341
|
+
status: 200,
|
|
342
|
+
statusText: 'OK',
|
|
343
|
+
headers: {
|
|
344
|
+
get: () => null,
|
|
345
|
+
},
|
|
346
|
+
text: async () => 'warmup',
|
|
347
|
+
});
|
|
348
|
+
// All actual calls succeed but no cache header
|
|
349
|
+
fetchStub.onCall(1).resolves({
|
|
350
|
+
ok: true,
|
|
351
|
+
status: 200,
|
|
352
|
+
statusText: 'OK',
|
|
353
|
+
headers: {
|
|
354
|
+
get: () => null,
|
|
355
|
+
},
|
|
356
|
+
text: async () => '<html>No cache 1</html>',
|
|
357
|
+
});
|
|
358
|
+
fetchStub.onCall(2).resolves({
|
|
359
|
+
ok: true,
|
|
360
|
+
status: 200,
|
|
361
|
+
statusText: 'OK',
|
|
362
|
+
headers: {
|
|
363
|
+
get: () => null,
|
|
364
|
+
},
|
|
365
|
+
text: async () => '<html>No cache 2</html>',
|
|
366
|
+
});
|
|
367
|
+
fetchStub.onCall(3).resolves({
|
|
368
|
+
ok: true,
|
|
369
|
+
status: 200,
|
|
370
|
+
statusText: 'OK',
|
|
371
|
+
headers: {
|
|
372
|
+
get: () => null,
|
|
373
|
+
},
|
|
374
|
+
text: async () => '<html>No cache 3</html>',
|
|
375
|
+
});
|
|
376
|
+
|
|
377
|
+
try {
|
|
378
|
+
await fetchHtmlWithWarmup(
|
|
379
|
+
'https://example.com/page',
|
|
380
|
+
'api-key',
|
|
381
|
+
'host',
|
|
382
|
+
'https://edge.example.com',
|
|
383
|
+
log,
|
|
384
|
+
false,
|
|
385
|
+
{ warmupDelayMs: 0, maxRetries: 2, retryDelayMs: 0 },
|
|
386
|
+
);
|
|
387
|
+
expect.fail('Should have thrown error');
|
|
388
|
+
} catch (error) {
|
|
389
|
+
expect(error.message).to.include('Failed to fetch original HTML');
|
|
390
|
+
expect(error.message).to.include('Cache header (x-tokowaka-cache) not found after 2 retries');
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
// Should have tried 3 times (initial + 2 retries) plus warmup
|
|
394
|
+
expect(fetchStub.callCount).to.equal(4);
|
|
395
|
+
});
|
|
396
|
+
|
|
397
|
+
it('should return immediately on first attempt if cache header is present', async () => {
|
|
398
|
+
// Warmup succeeds
|
|
399
|
+
fetchStub.onCall(0).resolves({
|
|
400
|
+
ok: true,
|
|
401
|
+
status: 200,
|
|
402
|
+
statusText: 'OK',
|
|
403
|
+
headers: {
|
|
404
|
+
get: () => null,
|
|
405
|
+
},
|
|
406
|
+
text: async () => 'warmup',
|
|
407
|
+
});
|
|
408
|
+
// First actual call has cache header
|
|
409
|
+
fetchStub.onCall(1).resolves({
|
|
410
|
+
ok: true,
|
|
411
|
+
status: 200,
|
|
412
|
+
statusText: 'OK',
|
|
413
|
+
headers: {
|
|
414
|
+
get: (name) => (name === 'x-tokowaka-cache' ? 'HIT' : null),
|
|
415
|
+
},
|
|
416
|
+
text: async () => '<html>Cached HTML</html>',
|
|
417
|
+
});
|
|
418
|
+
|
|
419
|
+
const html = await fetchHtmlWithWarmup(
|
|
420
|
+
'https://example.com/page',
|
|
421
|
+
'api-key',
|
|
422
|
+
'host',
|
|
423
|
+
'https://edge.example.com',
|
|
424
|
+
log,
|
|
425
|
+
false,
|
|
426
|
+
{ warmupDelayMs: 0, maxRetries: 3, retryDelayMs: 0 },
|
|
427
|
+
);
|
|
428
|
+
|
|
429
|
+
expect(html).to.equal('<html>Cached HTML</html>');
|
|
430
|
+
// Should not retry if cache header found on first attempt
|
|
431
|
+
expect(fetchStub.callCount).to.equal(2); // warmup + 1 actual
|
|
432
|
+
});
|
|
433
|
+
});
|
|
434
|
+
});
|