steamutils 1.5.55 → 1.5.56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "steamutils",
3
- "version": "1.5.55",
3
+ "version": "1.5.56",
4
4
  "main": "index.js",
5
5
  "dependencies": {
6
6
  "alpha-common-utils": "^1.0.6",
package/parse_html.js CHANGED
@@ -3,6 +3,7 @@ import { formatMarketHistoryDate, getAvatarHashFromUrl } from "./utils.js";
3
3
  import * as cheerio from "cheerio";
4
4
  import { getJSObjectFronXML } from "./xml2json.js";
5
5
  import moment from "moment";
6
+ import { parseString } from "xml2js";
6
7
 
7
8
  /**
8
9
  * @typedef {Object} HoverItem
@@ -271,6 +272,8 @@ export function parseMarketListings(html) {
271
272
  * console.log(profile.name);
272
273
  */
273
274
  export function parseSteamProfileXmlToJson(xml) {
275
+ if (guessDocumentType(xml) !== "xml") return;
276
+
274
277
  const parsed = getJSObjectFronXML(xml);
275
278
  if (!parsed || !parsed.profile) return;
276
279
 
@@ -346,3 +349,61 @@ export function parseSteamProfileXmlToJson(xml) {
346
349
 
347
350
  return profile;
348
351
  }
352
+
353
+ /**
354
+ * Attempts to heuristically determine if the input string is XML, HTML, or unknown.
355
+ *
356
+ * Checks for XML declarations, HTML DOCTYPE or root tags, XML namespaces, common error page patterns,
357
+ * and known XML root elements. Optionally uses strict XML parsing (if xml2js is installed) as a fallback.
358
+ *
359
+ * @param {string} input - The markup document as a string to be analyzed.
360
+ * @returns {"xml"|"html"|"unknown"} - Returns 'xml' if input appears to be XML, 'html' for HTML, or 'unknown' if indeterminate.
361
+ *
362
+ * @example
363
+ * guessDocumentType('<?xml version="1.0"?><profile></profile>'); // 'xml'
364
+ * guessDocumentType('<!DOCTYPE html><html><body></body></html>'); // 'html'
365
+ * guessDocumentType('<svg xmlns="http://www.w3.org/2000/svg"></svg>'); // 'xml'
366
+ * guessDocumentType('random text'); // 'unknown'
367
+ */
368
+ export function guessDocumentType(input) {
369
+ if (typeof input !== "string" || !input.trim()) return "unknown";
370
+ const trimmed = input.trim();
371
+
372
+ // XML declaration
373
+ if (/^\s*<\?xml\b/i.test(trimmed)) return "xml";
374
+
375
+ // HTML DOCTYPE or <html> as first tag
376
+ if (/<!DOCTYPE\s+html\b[^>]*>/i.test(trimmed)) return "html";
377
+ if (/^\s*<html\b[^>]*>/i.test(trimmed)) return "html";
378
+
379
+ // Common HTML error patterns
380
+ if (/<meta\s[^>]*http-equiv=["']?refresh["']?/i.test(trimmed)) return "html";
381
+ if (/<title>.*(error|not\s*found|forbidden).*<\/title>/i.test(trimmed)) return "html";
382
+
383
+ // Root tag clues
384
+ const rootTagMatch = trimmed.match(/^\s*<([\w:-]+)/);
385
+ if (rootTagMatch) {
386
+ const tag = rootTagMatch[1].toLowerCase();
387
+ // Common XML API roots
388
+ const xmlLikeRoots = ["rss", "feed", "svg", "profile", "users", "user", "response", "data", "opml", "atom", "plist", "soap:envelope", "xsl:stylesheet"];
389
+ if (tag === "html") return "html";
390
+ if (xmlLikeRoots.includes(tag) || tag.includes(":")) return "xml";
391
+ }
392
+
393
+ // Namespace? (usually in XML, rarely HTML)
394
+ if (/xmlns\s*=\s*['"]/i.test(trimmed)) return "xml";
395
+
396
+ // Try strict XML parse (requires xml2js, non-fatal if missing)
397
+ let isXml = false;
398
+ try {
399
+ // xml2js parseString uses a callback, so we wrap in a try/catch for sync usage
400
+ parseString(trimmed, { strict: true }, (err) => {
401
+ isXml = !err;
402
+ });
403
+ if (isXml) return "xml";
404
+ } catch (e) {
405
+ // xml2js parse failed, ignore
406
+ }
407
+
408
+ return "unknown";
409
+ }