javascript-solid-server 0.0.160 → 0.0.162
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/handlers/resource.js +75 -5
- package/src/mashlib/index.js +85 -6
- package/test/data-island.test.js +293 -0
package/package.json
CHANGED
package/src/handlers/resource.js
CHANGED
|
@@ -14,7 +14,8 @@ import {
|
|
|
14
14
|
} from '../rdf/conneg.js';
|
|
15
15
|
import { emitChange } from '../notifications/events.js';
|
|
16
16
|
import { checkIfMatch, checkIfNoneMatchForGet, checkIfNoneMatchForWrite } from '../utils/conditional.js';
|
|
17
|
-
import { generateDatabrowserHtml, generateModuleDatabrowserHtml, shouldServeMashlib } from '../mashlib/index.js';
|
|
17
|
+
import { generateDatabrowserHtml, generateModuleDatabrowserHtml, shouldServeMashlib, DATA_ISLAND_MAX_BYTES } from '../mashlib/index.js';
|
|
18
|
+
import { turtleToJsonLd } from '../rdf/turtle.js';
|
|
18
19
|
|
|
19
20
|
/**
|
|
20
21
|
* Live reload script - injected into HTML when --live-reload is enabled
|
|
@@ -238,9 +239,21 @@ export async function handleGet(request, reply) {
|
|
|
238
239
|
|
|
239
240
|
// Check if we should serve Mashlib data browser for containers
|
|
240
241
|
if (shouldServeMashlib(request, request.mashlibEnabled, 'application/ld+json')) {
|
|
242
|
+
// Phase 1 of #7: also embed the container's JSON-LD listing as a
|
|
243
|
+
// data island so consumers that look for `<script
|
|
244
|
+
// type="application/ld+json">` (search-engine rich-results,
|
|
245
|
+
// archival crawlers, future mashlib zero-fetch path) get the data
|
|
246
|
+
// without a second request. Use compact (no-whitespace) form for
|
|
247
|
+
// the embed so we don't burn bytes against DATA_ISLAND_MAX_BYTES
|
|
248
|
+
// on indentation that nothing will ever read.
|
|
249
|
+
const embedJsonLd = JSON.stringify(jsonLd);
|
|
241
250
|
const html = request.mashlibModule
|
|
242
|
-
? generateModuleDatabrowserHtml(request.mashlibModule)
|
|
243
|
-
: generateDatabrowserHtml(
|
|
251
|
+
? generateModuleDatabrowserHtml(request.mashlibModule, resourceUrl, { embedJsonLd })
|
|
252
|
+
: generateDatabrowserHtml(
|
|
253
|
+
resourceUrl,
|
|
254
|
+
request.mashlibCdn ? request.mashlibVersion : null,
|
|
255
|
+
{ embedJsonLd }
|
|
256
|
+
);
|
|
244
257
|
const headers = getAllHeaders({
|
|
245
258
|
isContainer: true,
|
|
246
259
|
etag: stats.etag,
|
|
@@ -318,9 +331,66 @@ export async function handleGet(request, reply) {
|
|
|
318
331
|
// Check if we should serve Mashlib data browser
|
|
319
332
|
// Only for RDF resources when Accept: text/html is requested
|
|
320
333
|
if (shouldServeMashlib(request, request.mashlibEnabled, storedContentType)) {
|
|
334
|
+
// #7 / #344: embed the resource as a JSON-LD data island so
|
|
335
|
+
// non-mashlib consumers (search-engine rich-results, archival
|
|
336
|
+
// crawlers) get the data without a second request, and so the
|
|
337
|
+
// shape is uniform regardless of the URL extension.
|
|
338
|
+
//
|
|
339
|
+
// JSS stores all RDF as JSON-LD on disk (PUT converts Turtle/N3
|
|
340
|
+
// before write — see the conneg branch in handlePut), so for
|
|
341
|
+
// `.ttl` / `.n3` URLs the bytes on disk are usually already
|
|
342
|
+
// JSON-LD. Try JSON parse first; only fall back to a Turtle parse
|
|
343
|
+
// when that fails (covers files placed on the filesystem
|
|
344
|
+
// out-of-band in their native format).
|
|
345
|
+
//
|
|
346
|
+
// Cap-aware short-circuit: skip the read entirely when the file
|
|
347
|
+
// is already over the embed cap. The island would be dropped
|
|
348
|
+
// anyway, and large RDF resources would otherwise load into
|
|
349
|
+
// memory on every HTML navigation. Other formats (rdf+xml, etc.)
|
|
350
|
+
// are not handled — the wrapper still loads and mashlib
|
|
351
|
+
// XHR-fetches them as before.
|
|
352
|
+
const islandConvertible =
|
|
353
|
+
storedContentType === RDF_TYPES.JSON_LD ||
|
|
354
|
+
storedContentType === RDF_TYPES.TURTLE ||
|
|
355
|
+
storedContentType === RDF_TYPES.N3;
|
|
356
|
+
let embedJsonLd;
|
|
357
|
+
if (islandConvertible && stats.size <= DATA_ISLAND_MAX_BYTES) {
|
|
358
|
+
const buf = await storage.read(storagePath);
|
|
359
|
+
if (buf) {
|
|
360
|
+
if (storedContentType === RDF_TYPES.JSON_LD) {
|
|
361
|
+
// Pass the Buffer through. dataIsland() decodes once when
|
|
362
|
+
// it needs to; we don't pre-validate or pre-decode here.
|
|
363
|
+
embedJsonLd = buf;
|
|
364
|
+
} else {
|
|
365
|
+
// Turtle / N3 URL. JSS stores everything as JSON-LD on
|
|
366
|
+
// disk (PUT converts), so try JSON parse first and pass
|
|
367
|
+
// the *decoded text* through (avoids a second decode
|
|
368
|
+
// inside dataIsland's String() coercion). Fall back to a
|
|
369
|
+
// Turtle parse for files placed on the filesystem
|
|
370
|
+
// out-of-band in their native format.
|
|
371
|
+
const text = buf.toString('utf8');
|
|
372
|
+
try {
|
|
373
|
+
JSON.parse(text);
|
|
374
|
+
embedJsonLd = text;
|
|
375
|
+
} catch {
|
|
376
|
+
try {
|
|
377
|
+
const jsonLd = await turtleToJsonLd(text, resourceUrl);
|
|
378
|
+
embedJsonLd = JSON.stringify(jsonLd);
|
|
379
|
+
} catch {
|
|
380
|
+
// Both parses failed → drop the island. The wrapper
|
|
381
|
+
// still renders and mashlib XHR-fetches the original.
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
}
|
|
321
387
|
const html = request.mashlibModule
|
|
322
|
-
? generateModuleDatabrowserHtml(request.mashlibModule)
|
|
323
|
-
: generateDatabrowserHtml(
|
|
388
|
+
? generateModuleDatabrowserHtml(request.mashlibModule, resourceUrl, { embedJsonLd })
|
|
389
|
+
: generateDatabrowserHtml(
|
|
390
|
+
resourceUrl,
|
|
391
|
+
request.mashlibCdn ? request.mashlibVersion : null,
|
|
392
|
+
{ embedJsonLd }
|
|
393
|
+
);
|
|
324
394
|
const headers = getAllHeaders({
|
|
325
395
|
isContainer: false,
|
|
326
396
|
etag: stats.etag,
|
package/src/mashlib/index.js
CHANGED
|
@@ -4,23 +4,97 @@
|
|
|
4
4
|
* Generates HTML wrapper that loads SolidOS Mashlib from CDN.
|
|
5
5
|
* When a browser requests an RDF resource with Accept: text/html,
|
|
6
6
|
* we return this wrapper which then fetches and renders the data.
|
|
7
|
+
*
|
|
8
|
+
* Phase 1 of #7: when the originating resource is reasonably small
|
|
9
|
+
* RDF, the JSON-LD bytes are embedded in the wrapper as a `<script
|
|
10
|
+
* type="application/ld+json" id="dataisland" data-uri="…">` block.
|
|
11
|
+
* Browsers ignore non-JS script bodies, so this is harmless to all
|
|
12
|
+
* existing clients (mashlib still XHR-fetches today). It immediately
|
|
13
|
+
* benefits anything that knows to look for `application/ld+json`
|
|
14
|
+
* islands — search engine rich-results, archival crawlers, scrapers,
|
|
15
|
+
* static-site exporters — and gives Phase 2 a zero-network fast path.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Cap on how much JSON-LD we'll inline. A 256 KB resource fits any
|
|
20
|
+
* realistic profile, type index, or container listing. Above that we
|
|
21
|
+
* drop the island and let the existing XHR path handle it so we don't
|
|
22
|
+
* make every navigation re-download a multi-megabyte resource.
|
|
23
|
+
*/
|
|
24
|
+
export const DATA_ISLAND_MAX_BYTES = 256 * 1024;
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Escape a JSON-LD body for safe inclusion inside `<script
|
|
28
|
+
* type="application/ld+json">…</script>`.
|
|
29
|
+
*
|
|
30
|
+
* Browsers don't execute the script (wrong MIME), but the HTML parser
|
|
31
|
+
* still scans the body for an end-of-script tag. The relevant rule:
|
|
32
|
+
* any `</` followed by `script` (case-insensitive) terminates the
|
|
33
|
+
* element regardless of what follows — `</script>`, `</script >`,
|
|
34
|
+
* `</script\n>`, `</SCRIPT>` and friends all close it. Escaping just
|
|
35
|
+
* the literal `</script>` token is too narrow.
|
|
36
|
+
*
|
|
37
|
+
* The robust fix is to replace every literal `<` byte in the body with
|
|
38
|
+
* the JSON string-escape for U+003C — the six characters
|
|
39
|
+
* backslash-u-0-0-3-c (the same form the implementation emits below).
|
|
40
|
+
* JSON-LD is JSON, and a JSON parser decodes that escape back to a
|
|
41
|
+
* literal `<` natively, so document semantics are preserved. After
|
|
42
|
+
* this transform the body literally cannot contain a `<` byte — so no
|
|
43
|
+
* end-tag (or comment, CDATA, etc.) can possibly start.
|
|
44
|
+
*/
|
|
45
|
+
function escapeForScriptBlock(jsonLdString) {
|
|
46
|
+
return String(jsonLdString).replace(/</g, '\\u003c');
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Build the data-island `<script>` block for the given JSON-LD payload.
|
|
51
|
+
* Returns an empty string if the payload is missing or over the size
|
|
52
|
+
* cap so callers can unconditionally interpolate `dataIsland(...)`.
|
|
53
|
+
*
|
|
54
|
+
* The cap applies to the *escaped* body — i.e. the bytes that will
|
|
55
|
+
* actually appear in the HTTP response. `escapeForScriptBlock` can
|
|
56
|
+
* expand input up to 6x (each literal `<` becomes the 6-byte JSON
|
|
57
|
+
* escape sequence backslash-u-0-0-3-c), so checking the raw input
|
|
58
|
+
* size alone could let an HTML response balloon past the cap.
|
|
59
|
+
*
|
|
60
|
+
* Two-stage check:
|
|
61
|
+
* 1. Cheap raw-byte pre-check — escape can only grow the body,
|
|
62
|
+
* so a raw payload already over the cap is guaranteed to be
|
|
63
|
+
* over after escaping; drop without doing the work.
|
|
64
|
+
* 2. Post-escape check — catches the rare case where input was
|
|
65
|
+
* under the cap but expanded above it (`<`-heavy bodies).
|
|
7
66
|
*/
|
|
67
|
+
function dataIsland(resourceUrl, jsonLdString) {
|
|
68
|
+
if (!jsonLdString) return '';
|
|
69
|
+
const raw = String(jsonLdString);
|
|
70
|
+
if (Buffer.byteLength(raw, 'utf8') > DATA_ISLAND_MAX_BYTES) return '';
|
|
71
|
+
const safeBody = escapeForScriptBlock(raw);
|
|
72
|
+
if (Buffer.byteLength(safeBody, 'utf8') > DATA_ISLAND_MAX_BYTES) return '';
|
|
73
|
+
const safeUri = escapeHtml(String(resourceUrl));
|
|
74
|
+
return `<script type="application/ld+json" id="dataisland" data-uri="${safeUri}">${safeBody}</script>`;
|
|
75
|
+
}
|
|
8
76
|
|
|
9
77
|
/**
|
|
10
78
|
* Generate Mashlib databrowser HTML
|
|
11
79
|
*
|
|
12
|
-
* @param {string} resourceUrl - The URL of the resource being viewed
|
|
80
|
+
* @param {string} resourceUrl - The URL of the resource being viewed
|
|
13
81
|
* @param {string} cdnVersion - If provided, load mashlib from unpkg CDN (e.g., "2.0.0")
|
|
82
|
+
* @param {object} [opts]
|
|
83
|
+
* @param {string|Buffer} [opts.embedJsonLd] - JSON-LD body to inline
|
|
84
|
+
* as a `<script type="application/ld+json">` data island. Accepts a
|
|
85
|
+
* UTF-8 string or a Buffer (coerced via `String()`). Honors a 256 KB
|
|
86
|
+
* size cap; oversize payloads are silently dropped. Phase 1 of #7.
|
|
14
87
|
* @returns {string} HTML content
|
|
15
88
|
*/
|
|
16
|
-
export function generateDatabrowserHtml(resourceUrl, cdnVersion = null) {
|
|
89
|
+
export function generateDatabrowserHtml(resourceUrl, cdnVersion = null, opts = {}) {
|
|
90
|
+
const island = dataIsland(resourceUrl, opts.embedJsonLd);
|
|
17
91
|
if (cdnVersion) {
|
|
18
92
|
// CDN mode - use script.onload to ensure mashlib is fully loaded before init
|
|
19
93
|
// This avoids race conditions with defer + DOMContentLoaded
|
|
20
94
|
const cdnBase = `https://unpkg.com/mashlib@${cdnVersion}/dist`;
|
|
21
95
|
return `<!doctype html><html><head><meta charset="utf-8"/><title>SolidOS Web App</title>
|
|
22
96
|
<link href="${cdnBase}/mash.css" rel="stylesheet"></head>
|
|
23
|
-
<body id="PageBody"
|
|
97
|
+
<body id="PageBody">${island}<header id="PageHeader"></header>
|
|
24
98
|
<div class="TabulatorOutline" id="DummyUUID" role="main"><table id="outline"></table><div id="GlobalDashboard"></div></div>
|
|
25
99
|
<footer id="PageFooter"></footer>
|
|
26
100
|
<script>
|
|
@@ -37,22 +111,27 @@ export function generateDatabrowserHtml(resourceUrl, cdnVersion = null) {
|
|
|
37
111
|
// Local mode - use defer (reliable when served locally)
|
|
38
112
|
return `<!doctype html><html><head><meta charset="utf-8"/><title>SolidOS Web App</title><script>document.addEventListener('DOMContentLoaded', function() {
|
|
39
113
|
panes.runDataBrowser()
|
|
40
|
-
})</script><script defer="defer" src="/mashlib.min.js"></script><link href="/mash.css" rel="stylesheet"></head><body id="PageBody"
|
|
114
|
+
})</script><script defer="defer" src="/mashlib.min.js"></script><link href="/mash.css" rel="stylesheet"></head><body id="PageBody">${island}<header id="PageHeader"></header><div class="TabulatorOutline" id="DummyUUID" role="main"><table id="outline"></table><div id="GlobalDashboard"></div></div><footer id="PageFooter"></footer></body></html>`;
|
|
41
115
|
}
|
|
42
116
|
|
|
43
117
|
/**
|
|
44
118
|
* Generate ES module-based databrowser HTML
|
|
45
119
|
*
|
|
46
120
|
* @param {string} moduleUrl - URL to the ES module entry point
|
|
121
|
+
* @param {string} resourceUrl - The URL of the resource being viewed
|
|
122
|
+
* @param {object} [opts]
|
|
123
|
+
* @param {string|Buffer} [opts.embedJsonLd] - JSON-LD body for the
|
|
124
|
+
* data island, same contract as `generateDatabrowserHtml`. Phase 1 of #7.
|
|
47
125
|
* @returns {string} HTML content
|
|
48
126
|
*/
|
|
49
|
-
export function generateModuleDatabrowserHtml(moduleUrl) {
|
|
127
|
+
export function generateModuleDatabrowserHtml(moduleUrl, resourceUrl = '', opts = {}) {
|
|
50
128
|
const cssUrl = moduleUrl.replace(/\.js$/, '.css');
|
|
129
|
+
const island = dataIsland(resourceUrl, opts.embedJsonLd);
|
|
51
130
|
return `<!doctype html><html lang="en"><head><meta charset="utf-8"/>
|
|
52
131
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
53
132
|
<title>Solid Data Browser</title>
|
|
54
133
|
<link rel="stylesheet" href="${cssUrl}"></head>
|
|
55
|
-
<body
|
|
134
|
+
<body>${island}<div id="mashlib"></div>
|
|
56
135
|
<script type="module" src="${moduleUrl}"></script>
|
|
57
136
|
</body></html>`;
|
|
58
137
|
}
|
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phase-1 tests for the JSON-LD data island (#7).
|
|
3
|
+
*
|
|
4
|
+
* The mashlib HTML wrapper now carries the resource's JSON-LD bytes
|
|
5
|
+
* inside a `<script type="application/ld+json" id="dataisland"
|
|
6
|
+
* data-uri="...">` block. Phase 1 doesn't change mashlib's runtime behaviour — the
|
|
7
|
+
* island is purely additive — so these tests pin:
|
|
8
|
+
* - emission shape (script tag, id, MIME, data-uri)
|
|
9
|
+
* - escape: any `</script>` substring inside the body must not
|
|
10
|
+
* prematurely close the script tag
|
|
11
|
+
* - size cap: oversized payloads silently drop the island so we
|
|
12
|
+
* don't make every navigation re-download a multi-megabyte file
|
|
13
|
+
* - presence in the live HTTP response (resource and container)
|
|
14
|
+
* - safe URI attribute against quote / angle-bracket injection
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import { describe, it, before, after } from 'node:test';
|
|
18
|
+
import assert from 'node:assert';
|
|
19
|
+
import fs from 'node:fs/promises';
|
|
20
|
+
import path from 'node:path';
|
|
21
|
+
import {
|
|
22
|
+
startTestServer,
|
|
23
|
+
stopTestServer,
|
|
24
|
+
request,
|
|
25
|
+
createTestPod,
|
|
26
|
+
assertStatus,
|
|
27
|
+
assertHeaderContains
|
|
28
|
+
} from './helpers.js';
|
|
29
|
+
import {
|
|
30
|
+
generateDatabrowserHtml,
|
|
31
|
+
generateModuleDatabrowserHtml,
|
|
32
|
+
DATA_ISLAND_MAX_BYTES
|
|
33
|
+
} from '../src/mashlib/index.js';
|
|
34
|
+
|
|
35
|
+
describe('mashlib data island — emission (unit, #7)', () => {
|
|
36
|
+
it('emits <script type="application/ld+json" id="dataisland" data-uri="..."> when payload supplied', () => {
|
|
37
|
+
const html = generateDatabrowserHtml(
|
|
38
|
+
'https://test.solid.social/profile/card.jsonld',
|
|
39
|
+
'2.0.0',
|
|
40
|
+
{ embedJsonLd: '{"@id":"#me","foaf:name":"Alice"}' }
|
|
41
|
+
);
|
|
42
|
+
assert.match(html, /<script type="application\/ld\+json" id="dataisland" data-uri="https:\/\/test\.solid\.social\/profile\/card\.jsonld">/);
|
|
43
|
+
assert.match(html, /"@id":"#me"/);
|
|
44
|
+
assert.match(html, /<\/script>/);
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
it('omits the data island when no payload is supplied (back-compat)', () => {
|
|
48
|
+
const html = generateDatabrowserHtml('https://x.test/foo', '2.0.0');
|
|
49
|
+
assert.doesNotMatch(html, /id="dataisland"/);
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it('omits the data island for oversized payloads (size cap)', () => {
|
|
53
|
+
// Construct a JSON-LD body just over the cap.
|
|
54
|
+
const filler = 'x'.repeat(DATA_ISLAND_MAX_BYTES + 1024);
|
|
55
|
+
const oversized = `{"content":"${filler}"}`;
|
|
56
|
+
const html = generateDatabrowserHtml(
|
|
57
|
+
'https://x.test/big',
|
|
58
|
+
'2.0.0',
|
|
59
|
+
{ embedJsonLd: oversized }
|
|
60
|
+
);
|
|
61
|
+
assert.doesNotMatch(html, /id="dataisland"/,
|
|
62
|
+
'island must drop silently above DATA_ISLAND_MAX_BYTES');
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
it('cap applies post-escape (defends against `<`-heavy expansion)', () => {
|
|
66
|
+
// A pathological body that's well under the cap as raw bytes but
|
|
67
|
+
// explodes 6x after escaping — every literal `<` byte becomes the
|
|
68
|
+
// 6-byte JSON escape sequence backslash-u-0-0-3-c. Without the
|
|
69
|
+
// post-escape check, this would emit a multi-megabyte island.
|
|
70
|
+
const halfCap = Math.floor(DATA_ISLAND_MAX_BYTES / 2);
|
|
71
|
+
const payload = '<'.repeat(halfCap); // 128 KB raw, ~768 KB escaped
|
|
72
|
+
const html = generateDatabrowserHtml(
|
|
73
|
+
'https://x.test/expand',
|
|
74
|
+
'2.0.0',
|
|
75
|
+
{ embedJsonLd: payload }
|
|
76
|
+
);
|
|
77
|
+
assert.doesNotMatch(html, /id="dataisland"/,
|
|
78
|
+
'island must drop when the escaped body exceeds the cap');
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
// The escape strategy is "encode every `<` byte as the six-character
|
|
82
|
+
// JSON escape `\\u003c`". Test the wide variety of strings that an
|
|
83
|
+
// HTML parser would otherwise treat as a closing tag — `</script>`,
|
|
84
|
+
// `</script >`, `</script\n>`, `</SCRIPT>`, `</scRIPT>` — plus
|
|
85
|
+
// `<!--`, all of which require a literal `<` to start the dangerous
|
|
86
|
+
// sequence. After escaping, no literal `<` exists in the body and
|
|
87
|
+
// every transformed location appears as `\\u003c`.
|
|
88
|
+
const escapeTrojans = [
|
|
89
|
+
['exact </script>', '{"x":"a</script>b"}'],
|
|
90
|
+
['with space </script >', '{"x":"a</script >b"}'],
|
|
91
|
+
['with newline </script\\n>', '{"x":"a</script\n>b"}'],
|
|
92
|
+
['uppercase </SCRIPT>', '{"x":"a</SCRIPT>b"}'],
|
|
93
|
+
['mixed </ScRiPt>', '{"x":"a</ScRiPt>b"}'],
|
|
94
|
+
['<!-- comment', '{"x":"<!-- hidden -->"}']
|
|
95
|
+
];
|
|
96
|
+
for (const [label, body] of escapeTrojans) {
|
|
97
|
+
it(`escape blocks ${label} from prematurely terminating the tag`, () => {
|
|
98
|
+
const html = generateDatabrowserHtml(
|
|
99
|
+
'https://x.test/r',
|
|
100
|
+
'2.0.0',
|
|
101
|
+
{ embedJsonLd: body }
|
|
102
|
+
);
|
|
103
|
+
const start = html.indexOf('id="dataisland"');
|
|
104
|
+
assert.ok(start > 0, 'data island should be present');
|
|
105
|
+
// Slice from the island's `>` (end of opening tag) forward.
|
|
106
|
+
const open = html.indexOf('>', start) + 1;
|
|
107
|
+
const close = html.indexOf('</script>', open);
|
|
108
|
+
const inner = html.slice(open, close);
|
|
109
|
+
// After our escape, the body must contain NO literal `<`.
|
|
110
|
+
assert.doesNotMatch(inner, /</,
|
|
111
|
+
`script body must not contain a literal "<" — got: ${JSON.stringify(inner)}`);
|
|
112
|
+
// The escaped form (the six characters backslash-u-0-0-3-c)
|
|
113
|
+
// should be present.
|
|
114
|
+
assert.match(inner, /\\u003c/,
|
|
115
|
+
'escaped form `\\u003c` must appear');
|
|
116
|
+
});
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
it('accepts a Buffer payload (handler may pass storage.read() result directly)', () => {
|
|
120
|
+
// The src/handlers/resource.js path used to convert with .toString()
|
|
121
|
+
// before passing in; tightening that contract risks regressions, so
|
|
122
|
+
// the helper accepts a Buffer transparently.
|
|
123
|
+
const buf = Buffer.from('{"@id":"#me","foaf:name":"BufferAlice"}', 'utf8');
|
|
124
|
+
const html = generateDatabrowserHtml(
|
|
125
|
+
'https://x.test/r',
|
|
126
|
+
'2.0.0',
|
|
127
|
+
{ embedJsonLd: buf }
|
|
128
|
+
);
|
|
129
|
+
assert.match(html, /id="dataisland"/);
|
|
130
|
+
assert.match(html, /"foaf:name":"BufferAlice"/);
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
it('the module-mode wrapper also emits the data island', () => {
|
|
134
|
+
const html = generateModuleDatabrowserHtml(
|
|
135
|
+
'https://example.test/mashlib.js',
|
|
136
|
+
'https://test.solid.social/profile/card.jsonld',
|
|
137
|
+
{ embedJsonLd: '{"@id":"#me"}' }
|
|
138
|
+
);
|
|
139
|
+
assert.match(html, /<script type="application\/ld\+json" id="dataisland" data-uri="https:\/\/test\.solid\.social\/profile\/card\.jsonld">/);
|
|
140
|
+
assert.match(html, /"@id":"#me"/);
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
it('escapes the data-uri attribute against quote / angle-bracket injection', () => {
|
|
144
|
+
const html = generateDatabrowserHtml(
|
|
145
|
+
'https://x.test/r"><img src=x onerror=alert(1)>',
|
|
146
|
+
'2.0.0',
|
|
147
|
+
{ embedJsonLd: '{}' }
|
|
148
|
+
);
|
|
149
|
+
// The dangerous characters must be HTML-entity encoded inside the
|
|
150
|
+
// attribute, so the attribute can't be broken open.
|
|
151
|
+
assert.match(html, /data-uri="https:\/\/x\.test\/r"><img/);
|
|
152
|
+
assert.doesNotMatch(html, /data-uri="https:\/\/x\.test\/r"><img/);
|
|
153
|
+
});
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
// Integration coverage — the data island must actually appear in the
|
|
157
|
+
// HTTP response when a browser asks for the wrapper.
|
|
158
|
+
describe('mashlib data island — integration (#7)', () => {
|
|
159
|
+
before(async () => {
|
|
160
|
+
await startTestServer({ mashlibCdn: true });
|
|
161
|
+
await createTestPod('islandtest');
|
|
162
|
+
// Put a small JSON-LD resource we can fetch back as HTML.
|
|
163
|
+
await request('/islandtest/public/note.jsonld', {
|
|
164
|
+
method: 'PUT',
|
|
165
|
+
headers: { 'Content-Type': 'application/ld+json' },
|
|
166
|
+
body: JSON.stringify({
|
|
167
|
+
'@context': { foaf: 'http://xmlns.com/foaf/0.1/' },
|
|
168
|
+
'@id': '#note',
|
|
169
|
+
'foaf:name': 'island test'
|
|
170
|
+
}),
|
|
171
|
+
auth: 'islandtest'
|
|
172
|
+
});
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
after(async () => { await stopTestServer(); });
|
|
176
|
+
|
|
177
|
+
it('a browser GET to a JSON-LD resource carries the data island', async () => {
|
|
178
|
+
const res = await request('/islandtest/public/note.jsonld', {
|
|
179
|
+
headers: { Accept: 'text/html,application/xhtml+xml,*/*;q=0.8' }
|
|
180
|
+
});
|
|
181
|
+
assertStatus(res, 200);
|
|
182
|
+
assertHeaderContains(res, 'Content-Type', 'text/html');
|
|
183
|
+
const body = await res.text();
|
|
184
|
+
assert.match(body, /id="dataisland"/);
|
|
185
|
+
assert.match(body, /<script type="application\/ld\+json"/);
|
|
186
|
+
assert.match(body, /"foaf:name":"island test"/);
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
it('a browser GET to a container carries the listing as a data island', async () => {
|
|
190
|
+
const res = await request('/islandtest/public/', {
|
|
191
|
+
headers: { Accept: 'text/html,application/xhtml+xml,*/*;q=0.8' }
|
|
192
|
+
});
|
|
193
|
+
assertStatus(res, 200);
|
|
194
|
+
assertHeaderContains(res, 'Content-Type', 'text/html');
|
|
195
|
+
const body = await res.text();
|
|
196
|
+
assert.match(body, /id="dataisland"/);
|
|
197
|
+
assert.match(body, /<script type="application\/ld\+json"/);
|
|
198
|
+
// Contains an ldp:contains pointing at the resource we just PUT.
|
|
199
|
+
assert.match(body, /note\.jsonld/);
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
it('non-HTML Accept (mashlib XHR) does NOT trigger the wrapper', async () => {
|
|
203
|
+
const res = await request('/islandtest/public/note.jsonld', {
|
|
204
|
+
headers: { Accept: 'application/ld+json' }
|
|
205
|
+
});
|
|
206
|
+
assertHeaderContains(res, 'Content-Type', 'application/ld+json');
|
|
207
|
+
const body = await res.text();
|
|
208
|
+
assert.doesNotMatch(body, /id="dataisland"/);
|
|
209
|
+
assert.doesNotMatch(body, /<!doctype html>/i);
|
|
210
|
+
});
|
|
211
|
+
});
|
|
212
|
+
|
|
213
|
+
// #344: data island also covers Turtle and N3 stored resources, by
|
|
214
|
+
// parsing them server-side and re-emitting the body as JSON-LD inside
|
|
215
|
+
// the script tag. Embedded shape is uniform across stored formats.
|
|
216
|
+
describe('mashlib data island — Turtle/N3 translation (#344)', () => {
|
|
217
|
+
before(async () => {
|
|
218
|
+
await startTestServer({ mashlibCdn: true, conneg: true });
|
|
219
|
+
await createTestPod('turtleisland');
|
|
220
|
+
await request('/turtleisland/public/note.ttl', {
|
|
221
|
+
method: 'PUT',
|
|
222
|
+
headers: { 'Content-Type': 'text/turtle' },
|
|
223
|
+
body: '@prefix foaf: <http://xmlns.com/foaf/0.1/> .\n' +
|
|
224
|
+
'<#note> foaf:name "turtle island" .\n',
|
|
225
|
+
auth: 'turtleisland'
|
|
226
|
+
});
|
|
227
|
+
await request('/turtleisland/public/note.n3', {
|
|
228
|
+
method: 'PUT',
|
|
229
|
+
headers: { 'Content-Type': 'text/n3' },
|
|
230
|
+
body: '@prefix foaf: <http://xmlns.com/foaf/0.1/> .\n' +
|
|
231
|
+
'<#note> foaf:name "n3 island" .\n',
|
|
232
|
+
auth: 'turtleisland'
|
|
233
|
+
});
|
|
234
|
+
// The both-parses-fail branch in the handler is unreachable via
|
|
235
|
+
// HTTP — handlePut validates Turtle/N3 input and rejects malformed
|
|
236
|
+
// bodies with 400 before they ever reach storage. To exercise the
|
|
237
|
+
// defensive guard we plant a file directly on disk in the test
|
|
238
|
+
// data dir, mimicking the "out-of-band placement" case the
|
|
239
|
+
// production code handles.
|
|
240
|
+
const brokenPath = path.resolve('./data/turtleisland/public/broken.ttl');
|
|
241
|
+
await fs.writeFile(
|
|
242
|
+
brokenPath,
|
|
243
|
+
'@prefix foaf: <http://xmlns.com/foaf/0.1/>\n' +
|
|
244
|
+
'<#note> foaf:name "broken — missing dot above" .\n'
|
|
245
|
+
);
|
|
246
|
+
});
|
|
247
|
+
|
|
248
|
+
after(async () => { await stopTestServer(); });
|
|
249
|
+
|
|
250
|
+
it('a browser GET to a Turtle resource embeds parsed JSON-LD', async () => {
|
|
251
|
+
const res = await request('/turtleisland/public/note.ttl', {
|
|
252
|
+
headers: { Accept: 'text/html,application/xhtml+xml,*/*;q=0.8' }
|
|
253
|
+
});
|
|
254
|
+
assertStatus(res, 200);
|
|
255
|
+
assertHeaderContains(res, 'Content-Type', 'text/html');
|
|
256
|
+
const body = await res.text();
|
|
257
|
+
assert.match(body, /id="dataisland"/);
|
|
258
|
+
assert.match(body, /<script type="application\/ld\+json"/);
|
|
259
|
+
// The Turtle name literal must round-trip into the embedded JSON-LD.
|
|
260
|
+
assert.match(body, /"turtle island"/);
|
|
261
|
+
// No raw Turtle prefix syntax should leak into the script body.
|
|
262
|
+
assert.doesNotMatch(body, /id="dataisland"[^>]*>[^<]*@prefix/);
|
|
263
|
+
});
|
|
264
|
+
|
|
265
|
+
it('a browser GET to an N3 resource embeds parsed JSON-LD', async () => {
|
|
266
|
+
const res = await request('/turtleisland/public/note.n3', {
|
|
267
|
+
headers: { Accept: 'text/html,application/xhtml+xml,*/*;q=0.8' }
|
|
268
|
+
});
|
|
269
|
+
assertStatus(res, 200);
|
|
270
|
+
assertHeaderContains(res, 'Content-Type', 'text/html');
|
|
271
|
+
const body = await res.text();
|
|
272
|
+
assert.match(body, /id="dataisland"/);
|
|
273
|
+
assert.match(body, /"n3 island"/);
|
|
274
|
+
});
|
|
275
|
+
|
|
276
|
+
it('out-of-band malformed Turtle drops the island, wrapper still renders', async () => {
|
|
277
|
+
// File was planted on disk directly (in `before`), bypassing the
|
|
278
|
+
// PUT validator. The handler's two-stage parse (JSON, then Turtle)
|
|
279
|
+
// both fail; the island is dropped silently and the mashlib
|
|
280
|
+
// wrapper is still served so the browser can XHR-fetch the
|
|
281
|
+
// resource and surface the parse problem to the user.
|
|
282
|
+
const res = await request('/turtleisland/public/broken.ttl', {
|
|
283
|
+
headers: { Accept: 'text/html,application/xhtml+xml,*/*;q=0.8' }
|
|
284
|
+
});
|
|
285
|
+
assertStatus(res, 200);
|
|
286
|
+
assertHeaderContains(res, 'Content-Type', 'text/html');
|
|
287
|
+
const body = await res.text();
|
|
288
|
+
assert.doesNotMatch(body, /id="dataisland"/,
|
|
289
|
+
'island must drop when both JSON and Turtle parses fail');
|
|
290
|
+
assert.match(body, /<!doctype html>/i);
|
|
291
|
+
assert.match(body, /mashlib/);
|
|
292
|
+
});
|
|
293
|
+
});
|