npm - html-json-extractor - Versions diffs - 0.0.1 → 0.2.0 - Mend

html-json-extractor 0.0.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md CHANGED Viewed

@@ -1,10 +1,10 @@
 # html-json-extractor
-Fast, forgiving extraction of `<script type="application/ld+json">` blocks from an HTML string.
+Fast, forgiving extraction of `<script type="application/json">` and `<script type="application/ld+json">` blocks from an HTML string.
 - No DOM parser or runtime dependencies
 - Returns one result per matching script block
-- Malformed JSON-LD blocks do not break the rest
+- Malformed JSON blocks do not break the rest
 ## Install
@@ -15,31 +15,53 @@ npm install html-json-extractor
 ## Usage
 ```ts
-import { extractJsonLd, extractJsonLdStrings } from 'html-json-extractor';
+import {
+  extractJson,
+  extractJsonStrings,
+  getJsonLdItems,
+  getJsonLdRecords
+} from 'html-json-extractor';
 const html = `
+  <script type="application/json">{"featureFlags":{"search":true}}</script>
   <script type="application/ld+json">{"@type":"WebSite","name":"Example"}</script>
-  <script type="application/ld+json">{"broken":</script>
+  <script type="application/json">{"broken":</script>
   <script type="application/ld+json">[{"@type":"Person","name":"Ada"}]</script>
 `;
-const raw = extractJsonLdStrings(html);
-// ['{"@type":"WebSite","name":"Example"}', '{"broken":', '[{"@type":"Person","name":"Ada"}]']
+const raw = extractJsonStrings(html);
+// ['{"featureFlags":{"search":true}}', '{"@type":"WebSite","name":"Example"}', '{"broken":', '[{"@type":"Person","name":"Ada"}]']
-const parsed = extractJsonLd(html);
-// [{ '@type': 'WebSite', name: 'Example' }, [{ '@type': 'Person', name: 'Ada' }]]
+const parsed = extractJson(html);
+// [{ featureFlags: { search: true } }, { '@type': 'WebSite', name: 'Example' }, [{ '@type': 'Person', name: 'Ada' }]]
+const items = parsed.flatMap(getJsonLdItems);
+// [{ featureFlags: { search: true } }, { '@type': 'WebSite', name: 'Example' }, { '@type': 'Person', name: 'Ada' }]
+const records = parsed.flatMap(getJsonLdRecords);
+// [{ featureFlags: { search: true } }, { '@type': 'WebSite', name: 'Example' }, { '@type': 'Person', name: 'Ada' }]
 ```
 ## API
-### `extractJsonLdStrings(html: string): string[]`
+### `extractJsonStrings(html: string): string[]`
-Returns normalized JSON-LD script contents as strings.
+Returns normalized `application/json` and `application/ld+json` script contents as strings.
-### `extractJsonLd<T = JsonValue>(html: string): T[]`
+### `extractJson<T = JsonValue>(html: string): T[]`
 Parses the extracted strings with `JSON.parse`. Entries that fail to parse are skipped.
+### `getJsonLdItems(value: unknown): unknown[]`
+Normalizes a single parsed entry or parsed array entry into a flat list of items.
+### `getJsonLdRecords(value: unknown): Record<string, unknown>[]`
+Returns record-shaped items and follows nested `@graph` content.
+`getJsonLdItems` and `getJsonLdRecords` remain JSON-LD-oriented helpers for structured-data use cases on top of the generic extractor.
 ## License
 MIT

package/dist/index.cjs CHANGED Viewed

@@ -1,7 +1,7 @@
 Object.defineProperty(exports, Symbol.toStringTag, { value: "Module" });
 //#region src/scan.ts
 const SCRIPT_TAG_NAME = "script";
-const APPLICATION_LD_JSON_MIME = "application/ld+json";
+const SUPPORTED_JSON_SCRIPT_MIME_TYPES = ["application/ld+json", "application/json"];
 const CHAR_TAB = 9;
 const CHAR_LINE_FEED = 10;
 const CHAR_FORM_FEED = 12;
@@ -45,7 +45,7 @@ const TEXT_LITERAL_CONTAINERS = [
 		isTerminal: true
 	}
 ];
-function extractJsonLdStrings(html) {
+function extractJsonStrings(html) {
 	if (html.length === 0) return [];
 	const results = [];
 	let cursor = 0;
@@ -57,8 +57,8 @@ function extractJsonLdStrings(html) {
 		const contentStart = openTag.tagEnd + 1;
 		const closeTagStart = findNextCloseTag(html, contentStart, SCRIPT_TAG_NAME);
 		const contentEnd = closeTagStart === -1 ? html.length : closeTagStart;
-		if (openTag.isJsonLd) {
-			const content = normalizeJsonLdContent(html.slice(contentStart, contentEnd));
+		if (openTag.hasSupportedMime) {
+			const content = normalizeJsonScriptContent(html.slice(contentStart, contentEnd));
 			if (content.length > 0) results.push(content);
 		}
 		if (closeTagStart === -1) break;
@@ -105,13 +105,13 @@ function findNextScriptOpenTag(html, start) {
 }
 function parseScriptOpenTag(html, openTagStart) {
 	let cursor = openTagStart + 1 + 6;
-	let isJsonLd = false;
+	let hasSupportedMime = false;
 	while (cursor < html.length) {
 		cursor = skipLeadingHtmlWhitespace(html, cursor, html.length);
 		if (cursor >= html.length) break;
 		const code = html.charCodeAt(cursor);
 		if (code === CHAR_GREATER_THAN) return {
-			isJsonLd,
+			hasSupportedMime,
 			tagEnd: cursor
 		};
 		if (isHtmlWhitespace(code) || code === CHAR_SLASH) {
@@ -147,11 +147,11 @@ function parseScriptOpenTag(html, openTagStart) {
 			}
 			valueEnd = cursor;
 		}
-		if (isTypeAttribute && isApplicationLdJsonMime(html, valueStart, valueEnd)) isJsonLd = true;
+		if (isTypeAttribute && isSupportedJsonScriptMime(html, valueStart, valueEnd)) hasSupportedMime = true;
 	}
 	return null;
 }
-function normalizeJsonLdContent(content) {
+function normalizeJsonScriptContent(content) {
 	let start = 0;
 	let end = content.length;
 	let changed = false;
@@ -225,13 +225,17 @@ function findNextCloseTag(html, start, tagName) {
 	}
 	return -1;
 }
-function isApplicationLdJsonMime(value, start, end) {
+function isSupportedJsonScriptMime(value, start, end) {
 	start = skipLeadingHtmlWhitespace(value, start, end);
 	end = skipTrailingHtmlWhitespace(value, start, end);
-	if (end - start < 19) return false;
-	if (!matchesAsciiLiteral(value, start, APPLICATION_LD_JSON_MIME)) return false;
-	if (end - start === 19) return true;
-	const suffixStart = start + 19;
+	for (const mime of SUPPORTED_JSON_SCRIPT_MIME_TYPES) if (matchesMimeType(value, start, end, mime)) return true;
+	return false;
+}
+function matchesMimeType(value, start, end, expectedMime) {
+	if (end - start < expectedMime.length) return false;
+	if (!matchesAsciiLiteral(value, start, expectedMime)) return false;
+	if (end - start === expectedMime.length) return true;
+	const suffixStart = start + expectedMime.length;
 	const nextCode = value.charCodeAt(suffixStart);
 	if (nextCode === CHAR_SEMICOLON) return true;
 	if (!isHtmlWhitespace(nextCode)) return false;
@@ -306,8 +310,8 @@ function isTagStartChar(code) {
 }
 //#endregion
 //#region src/parse.ts
-function extractJsonLd(html) {
-	const rawEntries = extractJsonLdStrings(html);
+function extractJson(html) {
+	const rawEntries = extractJsonStrings(html);
 	const results = [];
 	for (const rawEntry of rawEntries) try {
 		results.push(JSON.parse(rawEntry));
@@ -315,5 +319,29 @@ function extractJsonLd(html) {
 	return results;
 }
 //#endregion
-exports.extractJsonLd = extractJsonLd;
-exports.extractJsonLdStrings = extractJsonLdStrings;
+//#region src/helpers.ts
+const isRecord = (value) => {
+	return typeof value === "object" && value !== null && !Array.isArray(value);
+};
+const asRecord = (value) => {
+	return isRecord(value) ? value : void 0;
+};
+const asArray = (value) => {
+	return Array.isArray(value) ? value : void 0;
+};
+function getJsonLdRecords(value) {
+	const items = asArray(value);
+	if (items) return items.flatMap(getJsonLdRecords);
+	const record = asRecord(value);
+	if (!record) return [];
+	return [record, ...getJsonLdRecords(record["@graph"])];
+}
+function getJsonLdItems(value) {
+	if (value === void 0) return [];
+	return asArray(value) ?? [value];
+}
+//#endregion
+exports.extractJson = extractJson;
+exports.extractJsonStrings = extractJsonStrings;
+exports.getJsonLdItems = getJsonLdItems;
+exports.getJsonLdRecords = getJsonLdRecords;

package/dist/index.d.cts CHANGED Viewed

@@ -7,9 +7,13 @@ type JsonArray = JsonValue[];
 type JsonValue = JsonArray | JsonObject | JsonPrimitive;
 //#endregion
 //#region src/parse.d.ts
-declare function extractJsonLd<T extends JsonValue = JsonValue>(html: string): T[];
+declare function extractJson<T extends JsonValue = JsonValue>(html: string): T[];
 //#endregion
 //#region src/scan.d.ts
-declare function extractJsonLdStrings(html: string): string[];
+declare function extractJsonStrings(html: string): string[];
 //#endregion
-export { type JsonArray, type JsonObject, type JsonPrimitive, type JsonValue, extractJsonLd, extractJsonLdStrings };
+//#region src/helpers.d.ts
+declare function getJsonLdRecords(value: unknown): Record<string, unknown>[];
+declare function getJsonLdItems(value: unknown): unknown[];
+//#endregion
+export { type JsonArray, type JsonObject, type JsonPrimitive, type JsonValue, extractJson, extractJsonStrings, getJsonLdItems, getJsonLdRecords };

package/dist/index.d.ts CHANGED Viewed

@@ -7,9 +7,13 @@ type JsonArray = JsonValue[];
 type JsonValue = JsonArray | JsonObject | JsonPrimitive;
 //#endregion
 //#region src/parse.d.ts
-declare function extractJsonLd<T extends JsonValue = JsonValue>(html: string): T[];
+declare function extractJson<T extends JsonValue = JsonValue>(html: string): T[];
 //#endregion
 //#region src/scan.d.ts
-declare function extractJsonLdStrings(html: string): string[];
+declare function extractJsonStrings(html: string): string[];
 //#endregion
-export { type JsonArray, type JsonObject, type JsonPrimitive, type JsonValue, extractJsonLd, extractJsonLdStrings };
+//#region src/helpers.d.ts
+declare function getJsonLdRecords(value: unknown): Record<string, unknown>[];
+declare function getJsonLdItems(value: unknown): unknown[];
+//#endregion
+export { type JsonArray, type JsonObject, type JsonPrimitive, type JsonValue, extractJson, extractJsonStrings, getJsonLdItems, getJsonLdRecords };

package/dist/index.js CHANGED Viewed

@@ -1,6 +1,6 @@
 //#region src/scan.ts
 const SCRIPT_TAG_NAME = "script";
-const APPLICATION_LD_JSON_MIME = "application/ld+json";
+const SUPPORTED_JSON_SCRIPT_MIME_TYPES = ["application/ld+json", "application/json"];
 const CHAR_TAB = 9;
 const CHAR_LINE_FEED = 10;
 const CHAR_FORM_FEED = 12;
@@ -44,7 +44,7 @@ const TEXT_LITERAL_CONTAINERS = [
 		isTerminal: true
 	}
 ];
-function extractJsonLdStrings(html) {
+function extractJsonStrings(html) {
 	if (html.length === 0) return [];
 	const results = [];
 	let cursor = 0;
@@ -56,8 +56,8 @@ function extractJsonLdStrings(html) {
 		const contentStart = openTag.tagEnd + 1;
 		const closeTagStart = findNextCloseTag(html, contentStart, SCRIPT_TAG_NAME);
 		const contentEnd = closeTagStart === -1 ? html.length : closeTagStart;
-		if (openTag.isJsonLd) {
-			const content = normalizeJsonLdContent(html.slice(contentStart, contentEnd));
+		if (openTag.hasSupportedMime) {
+			const content = normalizeJsonScriptContent(html.slice(contentStart, contentEnd));
 			if (content.length > 0) results.push(content);
 		}
 		if (closeTagStart === -1) break;
@@ -104,13 +104,13 @@ function findNextScriptOpenTag(html, start) {
 }
 function parseScriptOpenTag(html, openTagStart) {
 	let cursor = openTagStart + 1 + 6;
-	let isJsonLd = false;
+	let hasSupportedMime = false;
 	while (cursor < html.length) {
 		cursor = skipLeadingHtmlWhitespace(html, cursor, html.length);
 		if (cursor >= html.length) break;
 		const code = html.charCodeAt(cursor);
 		if (code === CHAR_GREATER_THAN) return {
-			isJsonLd,
+			hasSupportedMime,
 			tagEnd: cursor
 		};
 		if (isHtmlWhitespace(code) || code === CHAR_SLASH) {
@@ -146,11 +146,11 @@ function parseScriptOpenTag(html, openTagStart) {
 			}
 			valueEnd = cursor;
 		}
-		if (isTypeAttribute && isApplicationLdJsonMime(html, valueStart, valueEnd)) isJsonLd = true;
+		if (isTypeAttribute && isSupportedJsonScriptMime(html, valueStart, valueEnd)) hasSupportedMime = true;
 	}
 	return null;
 }
-function normalizeJsonLdContent(content) {
+function normalizeJsonScriptContent(content) {
 	let start = 0;
 	let end = content.length;
 	let changed = false;
@@ -224,13 +224,17 @@ function findNextCloseTag(html, start, tagName) {
 	}
 	return -1;
 }
-function isApplicationLdJsonMime(value, start, end) {
+function isSupportedJsonScriptMime(value, start, end) {
 	start = skipLeadingHtmlWhitespace(value, start, end);
 	end = skipTrailingHtmlWhitespace(value, start, end);
-	if (end - start < 19) return false;
-	if (!matchesAsciiLiteral(value, start, APPLICATION_LD_JSON_MIME)) return false;
-	if (end - start === 19) return true;
-	const suffixStart = start + 19;
+	for (const mime of SUPPORTED_JSON_SCRIPT_MIME_TYPES) if (matchesMimeType(value, start, end, mime)) return true;
+	return false;
+}
+function matchesMimeType(value, start, end, expectedMime) {
+	if (end - start < expectedMime.length) return false;
+	if (!matchesAsciiLiteral(value, start, expectedMime)) return false;
+	if (end - start === expectedMime.length) return true;
+	const suffixStart = start + expectedMime.length;
 	const nextCode = value.charCodeAt(suffixStart);
 	if (nextCode === CHAR_SEMICOLON) return true;
 	if (!isHtmlWhitespace(nextCode)) return false;
@@ -305,8 +309,8 @@ function isTagStartChar(code) {
 }
 //#endregion
 //#region src/parse.ts
-function extractJsonLd(html) {
-	const rawEntries = extractJsonLdStrings(html);
+function extractJson(html) {
+	const rawEntries = extractJsonStrings(html);
 	const results = [];
 	for (const rawEntry of rawEntries) try {
 		results.push(JSON.parse(rawEntry));
@@ -314,4 +318,26 @@ function extractJsonLd(html) {
 	return results;
 }
 //#endregion
-export { extractJsonLd, extractJsonLdStrings };
+//#region src/helpers.ts
+const isRecord = (value) => {
+	return typeof value === "object" && value !== null && !Array.isArray(value);
+};
+const asRecord = (value) => {
+	return isRecord(value) ? value : void 0;
+};
+const asArray = (value) => {
+	return Array.isArray(value) ? value : void 0;
+};
+function getJsonLdRecords(value) {
+	const items = asArray(value);
+	if (items) return items.flatMap(getJsonLdRecords);
+	const record = asRecord(value);
+	if (!record) return [];
+	return [record, ...getJsonLdRecords(record["@graph"])];
+}
+function getJsonLdItems(value) {
+	if (value === void 0) return [];
+	return asArray(value) ?? [value];
+}
+//#endregion
+export { extractJson, extractJsonStrings, getJsonLdItems, getJsonLdRecords };

package/package.json CHANGED Viewed

@@ -1,9 +1,17 @@
 {
   "name": "html-json-extractor",
-  "version": "0.0.1",
-  "description": "Fast, forgiving extraction of application/ld+json script blocks from HTML strings.",
+  "version": "0.2.0",
+  "description": "Fast, forgiving extraction of application/json and application/ld+json script blocks from HTML strings.",
   "license": "MIT",
   "author": "VastBlast",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/VastBlast/html-json-extractor.git"
+  },
+  "bugs": {
+    "url": "https://github.com/VastBlast/html-json-extractor/issues"
+  },
+  "homepage": "https://github.com/VastBlast/html-json-extractor#readme",
   "type": "module",
   "sideEffects": false,
   "files": [
@@ -11,6 +19,8 @@
   ],
   "keywords": [
     "html",
+    "json",
+    "application/json",
     "json-ld",
     "ld+json",
     "schema",
@@ -42,10 +52,9 @@
   },
   "main": "./dist/index.cjs",
   "module": "./dist/index.js",
-  "types": "./dist/index.d.ts",
+  "types": "./dist/index.d.cts",
   "exports": {
     ".": {
-      "types": "./dist/index.d.ts",
       "import": "./dist/index.js",
       "require": "./dist/index.cjs"
     },