@chrischall/mcp-utils 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +235 -0
  2. package/dist/auth/index.d.ts +223 -0
  3. package/dist/auth/index.d.ts.map +1 -0
  4. package/dist/auth/index.js +267 -0
  5. package/dist/auth/index.js.map +1 -0
  6. package/dist/config/index.d.ts +86 -0
  7. package/dist/config/index.d.ts.map +1 -0
  8. package/dist/config/index.js +121 -0
  9. package/dist/config/index.js.map +1 -0
  10. package/dist/errors/index.d.ts +90 -0
  11. package/dist/errors/index.d.ts.map +1 -0
  12. package/dist/errors/index.js +157 -0
  13. package/dist/errors/index.js.map +1 -0
  14. package/dist/fetchproxy/index.d.ts +156 -0
  15. package/dist/fetchproxy/index.d.ts.map +1 -0
  16. package/dist/fetchproxy/index.js +197 -0
  17. package/dist/fetchproxy/index.js.map +1 -0
  18. package/dist/html/index.d.ts +142 -0
  19. package/dist/html/index.d.ts.map +1 -0
  20. package/dist/html/index.js +321 -0
  21. package/dist/html/index.js.map +1 -0
  22. package/dist/http/index.d.ts +202 -0
  23. package/dist/http/index.d.ts.map +1 -0
  24. package/dist/http/index.js +341 -0
  25. package/dist/http/index.js.map +1 -0
  26. package/dist/index.d.ts +23 -0
  27. package/dist/index.d.ts.map +1 -0
  28. package/dist/index.js +23 -0
  29. package/dist/index.js.map +1 -0
  30. package/dist/response/index.d.ts +22 -0
  31. package/dist/response/index.d.ts.map +1 -0
  32. package/dist/response/index.js +61 -0
  33. package/dist/response/index.js.map +1 -0
  34. package/dist/server/index.d.ts +109 -0
  35. package/dist/server/index.d.ts.map +1 -0
  36. package/dist/server/index.js +95 -0
  37. package/dist/server/index.js.map +1 -0
  38. package/dist/session/index.d.ts +233 -0
  39. package/dist/session/index.d.ts.map +1 -0
  40. package/dist/session/index.js +404 -0
  41. package/dist/session/index.js.map +1 -0
  42. package/dist/test/index.d.ts +124 -0
  43. package/dist/test/index.d.ts.map +1 -0
  44. package/dist/test/index.js +181 -0
  45. package/dist/test/index.js.map +1 -0
  46. package/dist/zod/index.d.ts +130 -0
  47. package/dist/zod/index.d.ts.map +1 -0
  48. package/dist/zod/index.js +184 -0
  49. package/dist/zod/index.js.map +1 -0
  50. package/package.json +77 -0
@@ -0,0 +1,142 @@
1
+ /**
2
+ * Opt-in HTML helpers for the chrischall MCP fleet.
3
+ *
4
+ * Isolated behind the `@chrischall/mcp-utils/html` subpath because it pulls the
5
+ * heavy `node-html-parser` dependency — lean API-only MCPs shouldn't pay for it.
6
+ *
7
+ * Consolidates the HTML-scraping primitives that were independently
8
+ * re-implemented across the realty cohort (homes / redfin / compass /
9
+ * zillow / onehome) and a couple of content MCPs (opentable, infinitecampus):
10
+ *
11
+ * - `parsePropertyTable` / `findLinksUnderHeading` — heading-anchored DOM
12
+ * scraping for the SSR property-detail tables and link lists.
13
+ * - `extractJsonFromHtml` — the balanced-brace `__INITIAL_STATE__` walker
14
+ * (opentable / compass / realty); regex can't handle nested objects and
15
+ * escaped strings.
16
+ * - `extractPlainTextFromHtml` — dependency-free script/style strip + entity
17
+ * decode used to render Infinite Campus message bodies as plain text.
18
+ * - `urlToPath` / `locationToSlug` / `buildIdExtractor` — the small URL atoms
19
+ * that were byte-identical across the cohort.
20
+ */
21
+ import { type HTMLElement } from 'node-html-parser';
22
+ export type { HTMLElement };
23
+ /** A scraped HTML table: column headers plus row-major cell text. */
24
+ export interface PropertyTable {
25
+ /** Trimmed text of each header cell, in document order. */
26
+ headers: string[];
27
+ /** Each body row as an array of trimmed cell strings, in document order. */
28
+ rows: string[][];
29
+ }
30
+ /**
31
+ * Locate the `<table>` under the heading matching `heading` and return its
32
+ * column headers and body rows.
33
+ *
34
+ * Header cells are scoped to `<thead>` when present (falling back to all `<th>`
35
+ * in the table). Body rows collect **both** `<th>` and `<td>` because the realty
36
+ * portals use `<th scope="row">` for the leading cell of every data row (the
37
+ * year / date column) — dropping those would silently shift every column left
38
+ * and corrupt the parsed record.
39
+ *
40
+ * Cell text has internal whitespace collapsed to single spaces and is trimmed.
41
+ *
42
+ * @param html Raw page HTML (or an HTML fragment).
43
+ * @param heading Case-insensitive substring of the heading above the table.
44
+ * @returns The parsed table, or `null` when no matching heading+table is found.
45
+ *
46
+ * @example parsePropertyTable(detailHtml, 'Tax History')
47
+ * // { headers: ['Year', 'Property Tax'], rows: [['2023', '$1,200'], ...] }
48
+ */
49
+ export declare function parsePropertyTable(html: string | HTMLElement, heading: string): PropertyTable | null;
50
+ /**
51
+ * Find every `<a>` (or `selector`-matching element) that follows the first
52
+ * heading matching `heading`, up to — but not including — the next sibling
53
+ * heading. Useful for the "Homes for Sale Near" link lists at the bottom of a
54
+ * detail page.
55
+ *
56
+ * Collects both direct-sibling anchors and anchors nested inside the
57
+ * intervening sibling elements (lists, cards), in document order.
58
+ *
59
+ * @param root Raw HTML string or an already-parsed root element.
60
+ * @param heading Case-insensitive substring of the heading to anchor on.
61
+ * @param selector CSS selector for the elements to collect (default `'a'`).
62
+ * @returns The matching elements, or an empty array when the heading is absent.
63
+ */
64
+ export declare function findLinksUnderHeading(root: string | HTMLElement, heading: string, selector?: string): HTMLElement[];
65
+ /**
66
+ * Extract an embedded JSON state object from a server-rendered HTML page by
67
+ * walking the balanced brace/string structure after a marker.
68
+ *
69
+ * Handles both rendering forms seen across the fleet:
70
+ * 1. `window.__INITIAL_STATE__ = {...};` — a JS assignment in a `<script>`.
71
+ * 2. `"__INITIAL_STATE__":{...}` — a JSON key inside a larger embedded blob.
72
+ *
73
+ * A regex can't be used: the state contains nested objects and escaped strings.
74
+ * This walks the structure tracking string/escape context so braces inside
75
+ * string literals don't terminate the object early.
76
+ *
77
+ * Unlike the per-MCP `extractInitialState`, this returns `null` (rather than
78
+ * throwing) on any failure — no marker, unbalanced braces, or invalid JSON —
79
+ * so callers can branch on a missing blob without a try/catch.
80
+ *
81
+ * @param html The page HTML.
82
+ * @param marker The state key to search for (default `'__INITIAL_STATE__'`).
83
+ * Both the `window.<marker>` and `"<marker>"` forms are tried.
84
+ * @returns The parsed object, or `null` if not found / not parseable.
85
+ */
86
+ export declare function extractJsonFromHtml(html: string, marker?: string): Record<string, unknown> | null;
87
+ /**
88
+ * Strip an HTML document down to its readable plain text: remove `<script>` and
89
+ * `<style>` blocks (and their contents), drop all remaining tags, decode common
90
+ * HTML entities, then collapse whitespace.
91
+ *
92
+ * Dependency-free (no DOM parser) — this mirrors the Infinite Campus message
93
+ * extractor, which renders `messageView.xsl` bodies into a flat string. Removing
94
+ * `<script>`/`<style>` *content* first is load-bearing for safety: it ensures
95
+ * inline JS/CSS text never leaks into the surfaced message body.
96
+ *
97
+ * @param html The HTML (full document or fragment).
98
+ * @returns The decoded, whitespace-collapsed plain text (`''` for empty input).
99
+ */
100
+ export declare function extractPlainTextFromHtml(html: string): string;
101
+ /**
102
+ * Reduce a portal URL (or path) to its `pathname + search` portion.
103
+ *
104
+ * Accepts an absolute URL (any host — only the path is kept), a path already
105
+ * starting with `/` (returned unchanged), or a bare segment which is coerced to
106
+ * a leading-slash path. Malformed input that `new URL()` can't parse falls
107
+ * through to the same coercion branch, so the function never throws.
108
+ *
109
+ * @example urlToPath('https://www.zillow.com/homedetails/foo/7_zpid/')
110
+ * // '/homedetails/foo/7_zpid/'
111
+ * @example urlToPath('homedetails/7_zpid/') // '/homedetails/7_zpid/'
112
+ * @example urlToPath('/already/a/path/') // '/already/a/path/'
113
+ */
114
+ export declare function urlToPath(input: string): string;
115
+ /**
116
+ * Slugify a free-text location into a portal search-URL segment.
117
+ *
118
+ * NFKD-normalises, strips diacritics, lowercases, collapses runs of
119
+ * non-alphanumerics to a single `-`, then trims leading/trailing `-`. A bare ZIP
120
+ * passes through unchanged.
121
+ *
122
+ * @example locationToSlug('Brooklyn, NY') // 'brooklyn-ny'
123
+ * @example locationToSlug('Cañon City, CO') // 'canon-city-co'
124
+ * @example locationToSlug('94110') // '94110'
125
+ */
126
+ export declare function locationToSlug(input: string): string;
127
+ /**
128
+ * Build a stateless id-extractor from a regular expression.
129
+ *
130
+ * Returns a function that runs `regex` against a URL/path and yields the first
131
+ * capture group (or the whole match when the pattern has no groups), or
132
+ * `undefined` on no match / nullish input.
133
+ *
134
+ * The extractor is **stateless**: a fresh `RegExp` is constructed per call so a
135
+ * `g`/`y`-flagged source can't carry `lastIndex` between invocations and
136
+ * silently skip matches.
137
+ *
138
+ * @example buildIdExtractor(/\/home\/(\d+)(?:[/?#]|$)/)('/x/home/42/') // '42'
139
+ * @example buildIdExtractor(/\/([A-Za-z0-9]+)_pid\/?$/)('/s/abc_pid/') // 'abc'
140
+ */
141
+ export declare function buildIdExtractor(regex: RegExp): (input: string | undefined | null) => string | undefined;
142
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/html/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,EAAS,KAAK,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAE3D,YAAY,EAAE,WAAW,EAAE,CAAC;AAE5B,qEAAqE;AACrE,MAAM,WAAW,aAAa;IAC5B,2DAA2D;IAC3D,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,4EAA4E;IAC5E,IAAI,EAAE,MAAM,EAAE,EAAE,CAAC;CAClB;AA4CD;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAgB,kBAAkB,CAChC,IAAI,EAAE,MAAM,GAAG,WAAW,EAC1B,OAAO,EAAE,MAAM,GACd,aAAa,GAAG,IAAI,CAetB;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAgB,qBAAqB,CACnC,IAAI,EAAE,MAAM,GAAG,WAAW,EAC1B,OAAO,EAAE,MAAM,EACf,QAAQ,SAAM,GACb,WAAW,EAAE,CAgBf;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAgB,mBAAmB,CACjC,IAAI,EAAE,MAAM,EACZ,MAAM,SAAsB,GAC3B,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAkDhC;AAYD;;;;;;;;;;;;GAYG;AACH,wBAAgB,wBAAwB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAqB7D;AAED;;;;;;;;;;;;GAYG;AACH,wBAAgB,SAAS,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAO/C;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,cAAc,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAOpD;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAgB,gBAAgB,CAC9B,KAAK,EAAE,MAAM,GACZ,CAAC,KAAK,EAAE,MAAM,GAAG,SAAS,GAAG,IAAI,KAAK,MAAM,GAAG,SAAS,CAU1D"}
@@ -0,0 +1,321 @@
1
+ /**
2
+ * Opt-in HTML helpers for the chrischall MCP fleet.
3
+ *
4
+ * Isolated behind the `@chrischall/mcp-utils/html` subpath because it pulls the
5
+ * heavy `node-html-parser` dependency — lean API-only MCPs shouldn't pay for it.
6
+ *
7
+ * Consolidates the HTML-scraping primitives that were independently
8
+ * re-implemented across the realty cohort (homes / redfin / compass /
9
+ * zillow / onehome) and a couple of content MCPs (opentable, infinitecampus):
10
+ *
11
+ * - `parsePropertyTable` / `findLinksUnderHeading` — heading-anchored DOM
12
+ * scraping for the SSR property-detail tables and link lists.
13
+ * - `extractJsonFromHtml` — the balanced-brace `__INITIAL_STATE__` walker
14
+ * (opentable / compass / realty); regex can't handle nested objects and
15
+ * escaped strings.
16
+ * - `extractPlainTextFromHtml` — dependency-free script/style strip + entity
17
+ * decode used to render Infinite Campus message bodies as plain text.
18
+ * - `urlToPath` / `locationToSlug` / `buildIdExtractor` — the small URL atoms
19
+ * that were byte-identical across the cohort.
20
+ */
21
+ import { parse } from 'node-html-parser';
22
+ /** Normalise an element's text: collapse internal whitespace runs and trim. */
23
+ function cellText(el) {
24
+ return el.text.replace(/\s+/g, ' ').trim();
25
+ }
26
+ /** Coerce a raw HTML string or an already-parsed root into an `HTMLElement`. */
27
+ function toRoot(input) {
28
+ return typeof input === 'string'
29
+ ? parse(input, { lowerCaseTagName: false, comment: false })
30
+ : input;
31
+ }
32
+ /**
33
+ * Find the first `<h1>`–`<h4>` whose text contains `heading` (case-insensitive,
34
+ * substring), then return the nearest following `<table>`.
35
+ *
36
+ * Walks forward through the heading's siblings, stopping at the next heading.
37
+ * If nothing's found and the heading lives in a dedicated wrapper
38
+ * (`<section>`/`<article>`/`<aside>`), it looks inside that wrapper (but not the
39
+ * broader parent — peer headings might own peer tables).
40
+ */
41
+ function findTableByHeading(root, heading) {
42
+ const needle = heading.trim().toLowerCase();
43
+ for (const h of root.querySelectorAll('h1, h2, h3, h4')) {
44
+ if (!h.text.toLowerCase().includes(needle))
45
+ continue;
46
+ let cur = h.nextElementSibling;
47
+ while (cur) {
48
+ if (/^H[1-4]$/.test(cur.tagName))
49
+ break;
50
+ if (cur.tagName === 'TABLE')
51
+ return cur;
52
+ const nested = cur.querySelector('table');
53
+ if (nested)
54
+ return nested;
55
+ cur = cur.nextElementSibling;
56
+ }
57
+ const parent = h.parentNode;
58
+ if (parent && /^(SECTION|ARTICLE|ASIDE)$/.test(parent.tagName)) {
59
+ const inside = parent.querySelector('table');
60
+ if (inside)
61
+ return inside;
62
+ }
63
+ }
64
+ return null;
65
+ }
66
+ /**
67
+ * Locate the `<table>` under the heading matching `heading` and return its
68
+ * column headers and body rows.
69
+ *
70
+ * Header cells are scoped to `<thead>` when present (falling back to all `<th>`
71
+ * in the table). Body rows collect **both** `<th>` and `<td>` because the realty
72
+ * portals use `<th scope="row">` for the leading cell of every data row (the
73
+ * year / date column) — dropping those would silently shift every column left
74
+ * and corrupt the parsed record.
75
+ *
76
+ * Cell text has internal whitespace collapsed to single spaces and is trimmed.
77
+ *
78
+ * @param html Raw page HTML (or an HTML fragment).
79
+ * @param heading Case-insensitive substring of the heading above the table.
80
+ * @returns The parsed table, or `null` when no matching heading+table is found.
81
+ *
82
+ * @example parsePropertyTable(detailHtml, 'Tax History')
83
+ * // { headers: ['Year', 'Property Tax'], rows: [['2023', '$1,200'], ...] }
84
+ */
85
+ export function parsePropertyTable(html, heading) {
86
+ const table = findTableByHeading(toRoot(html), heading);
87
+ if (!table)
88
+ return null;
89
+ const thead = table.querySelector('thead');
90
+ const headerScope = thead ?? table;
91
+ const headers = headerScope.querySelectorAll('th').map(cellText);
92
+ const tbody = table.querySelector('tbody') ?? table;
93
+ const rows = tbody
94
+ .querySelectorAll('tr')
95
+ .map((tr) => tr.querySelectorAll('th, td').map(cellText))
96
+ .filter((cells) => cells.length > 0);
97
+ return { headers, rows };
98
+ }
99
+ /**
100
+ * Find every `<a>` (or `selector`-matching element) that follows the first
101
+ * heading matching `heading`, up to — but not including — the next sibling
102
+ * heading. Useful for the "Homes for Sale Near" link lists at the bottom of a
103
+ * detail page.
104
+ *
105
+ * Collects both direct-sibling anchors and anchors nested inside the
106
+ * intervening sibling elements (lists, cards), in document order.
107
+ *
108
+ * @param root Raw HTML string or an already-parsed root element.
109
+ * @param heading Case-insensitive substring of the heading to anchor on.
110
+ * @param selector CSS selector for the elements to collect (default `'a'`).
111
+ * @returns The matching elements, or an empty array when the heading is absent.
112
+ */
113
+ export function findLinksUnderHeading(root, heading, selector = 'a') {
114
+ const parsed = toRoot(root);
115
+ const needle = heading.trim().toLowerCase();
116
+ for (const h of parsed.querySelectorAll('h1, h2, h3, h4')) {
117
+ if (!h.text.toLowerCase().includes(needle))
118
+ continue;
119
+ const out = [];
120
+ let cur = h.nextElementSibling;
121
+ while (cur) {
122
+ if (/^H[1-4]$/.test(cur.tagName))
123
+ break;
124
+ if (cur.matches?.(selector))
125
+ out.push(cur);
126
+ for (const el of cur.querySelectorAll(selector))
127
+ out.push(el);
128
+ cur = cur.nextElementSibling;
129
+ }
130
+ return out;
131
+ }
132
+ return [];
133
+ }
134
+ /**
135
+ * Extract an embedded JSON state object from a server-rendered HTML page by
136
+ * walking the balanced brace/string structure after a marker.
137
+ *
138
+ * Handles both rendering forms seen across the fleet:
139
+ * 1. `window.__INITIAL_STATE__ = {...};` — a JS assignment in a `<script>`.
140
+ * 2. `"__INITIAL_STATE__":{...}` — a JSON key inside a larger embedded blob.
141
+ *
142
+ * A regex can't be used: the state contains nested objects and escaped strings.
143
+ * This walks the structure tracking string/escape context so braces inside
144
+ * string literals don't terminate the object early.
145
+ *
146
+ * Unlike the per-MCP `extractInitialState`, this returns `null` (rather than
147
+ * throwing) on any failure — no marker, unbalanced braces, or invalid JSON —
148
+ * so callers can branch on a missing blob without a try/catch.
149
+ *
150
+ * @param html The page HTML.
151
+ * @param marker The state key to search for (default `'__INITIAL_STATE__'`).
152
+ * Both the `window.<marker>` and `"<marker>"` forms are tried.
153
+ * @returns The parsed object, or `null` if not found / not parseable.
154
+ */
155
+ export function extractJsonFromHtml(html, marker = '__INITIAL_STATE__') {
156
+ const candidates = [`window.${marker}`, `"${marker}"`];
157
+ let idx = -1;
158
+ let markerLen = 0;
159
+ for (const m of candidates) {
160
+ const i = html.indexOf(m);
161
+ if (i >= 0) {
162
+ idx = i;
163
+ markerLen = m.length;
164
+ break;
165
+ }
166
+ }
167
+ if (idx < 0)
168
+ return null;
169
+ let start = idx + markerLen;
170
+ while (start < html.length && html[start] !== '{')
171
+ start++;
172
+ if (start >= html.length)
173
+ return null;
174
+ let depth = 0;
175
+ let inString = false;
176
+ let escape = false;
177
+ let end = -1;
178
+ for (let i = start; i < html.length; i++) {
179
+ const ch = html[i];
180
+ if (escape) {
181
+ escape = false;
182
+ continue;
183
+ }
184
+ if (inString) {
185
+ if (ch === '\\')
186
+ escape = true;
187
+ else if (ch === '"')
188
+ inString = false;
189
+ continue;
190
+ }
191
+ if (ch === '"')
192
+ inString = true;
193
+ else if (ch === '{')
194
+ depth++;
195
+ else if (ch === '}') {
196
+ depth--;
197
+ if (depth === 0) {
198
+ end = i + 1;
199
+ break;
200
+ }
201
+ }
202
+ }
203
+ if (end < 0)
204
+ return null;
205
+ try {
206
+ return JSON.parse(html.slice(start, end));
207
+ }
208
+ catch {
209
+ return null;
210
+ }
211
+ }
212
+ /** The small set of named entities IC message bodies actually contain. */
213
+ const NAMED_ENTITIES = {
214
+ nbsp: ' ',
215
+ amp: '&',
216
+ lt: '<',
217
+ gt: '>',
218
+ quot: '"',
219
+ apos: "'",
220
+ };
221
+ /**
222
+ * Strip an HTML document down to its readable plain text: remove `<script>` and
223
+ * `<style>` blocks (and their contents), drop all remaining tags, decode common
224
+ * HTML entities, then collapse whitespace.
225
+ *
226
+ * Dependency-free (no DOM parser) — this mirrors the Infinite Campus message
227
+ * extractor, which renders `messageView.xsl` bodies into a flat string. Removing
228
+ * `<script>`/`<style>` *content* first is load-bearing for safety: it ensures
229
+ * inline JS/CSS text never leaks into the surfaced message body.
230
+ *
231
+ * @param html The HTML (full document or fragment).
232
+ * @returns The decoded, whitespace-collapsed plain text (`''` for empty input).
233
+ */
234
+ export function extractPlainTextFromHtml(html) {
235
+ if (!html)
236
+ return '';
237
+ let text = html
238
+ .replace(/<script[\s\S]*?<\/script>/gi, ' ')
239
+ .replace(/<style[\s\S]*?<\/style>/gi, ' ');
240
+ // Drop all remaining tags.
241
+ text = text.replace(/<[^>]+>/g, ' ');
242
+ // Decode numeric (decimal + hex) character references.
243
+ text = text.replace(/&#(\d+);/g, (_, d) => String.fromCodePoint(Number(d)));
244
+ text = text.replace(/&#x([0-9a-fA-F]+);/g, (_, h) => String.fromCodePoint(parseInt(h, 16)));
245
+ // Decode the common named entities.
246
+ text = text.replace(/&([a-zA-Z]+);/g, (whole, name) => {
247
+ const decoded = NAMED_ENTITIES[name.toLowerCase()];
248
+ return decoded ?? whole;
249
+ });
250
+ // Collapse whitespace runs and trim.
251
+ return text.replace(/\s+/g, ' ').trim();
252
+ }
253
+ /**
254
+ * Reduce a portal URL (or path) to its `pathname + search` portion.
255
+ *
256
+ * Accepts an absolute URL (any host — only the path is kept), a path already
257
+ * starting with `/` (returned unchanged), or a bare segment which is coerced to
258
+ * a leading-slash path. Malformed input that `new URL()` can't parse falls
259
+ * through to the same coercion branch, so the function never throws.
260
+ *
261
+ * @example urlToPath('https://www.zillow.com/homedetails/foo/7_zpid/')
262
+ * // '/homedetails/foo/7_zpid/'
263
+ * @example urlToPath('homedetails/7_zpid/') // '/homedetails/7_zpid/'
264
+ * @example urlToPath('/already/a/path/') // '/already/a/path/'
265
+ */
266
+ export function urlToPath(input) {
267
+ try {
268
+ const u = new URL(input);
269
+ return `${u.pathname}${u.search}`;
270
+ }
271
+ catch {
272
+ return input.startsWith('/') ? input : `/${input}`;
273
+ }
274
+ }
275
+ /**
276
+ * Slugify a free-text location into a portal search-URL segment.
277
+ *
278
+ * NFKD-normalises, strips diacritics, lowercases, collapses runs of
279
+ * non-alphanumerics to a single `-`, then trims leading/trailing `-`. A bare ZIP
280
+ * passes through unchanged.
281
+ *
282
+ * @example locationToSlug('Brooklyn, NY') // 'brooklyn-ny'
283
+ * @example locationToSlug('Cañon City, CO') // 'canon-city-co'
284
+ * @example locationToSlug('94110') // '94110'
285
+ */
286
+ export function locationToSlug(input) {
287
+ return input
288
+ .normalize('NFKD')
289
+ .replace(/[̀-ͯ]/g, '')
290
+ .toLowerCase()
291
+ .replace(/[^a-z0-9]+/g, '-')
292
+ .replace(/^-+|-+$/g, '');
293
+ }
294
+ /**
295
+ * Build a stateless id-extractor from a regular expression.
296
+ *
297
+ * Returns a function that runs `regex` against a URL/path and yields the first
298
+ * capture group (or the whole match when the pattern has no groups), or
299
+ * `undefined` on no match / nullish input.
300
+ *
301
+ * The extractor is **stateless**: a fresh `RegExp` is constructed per call so a
302
+ * `g`/`y`-flagged source can't carry `lastIndex` between invocations and
303
+ * silently skip matches.
304
+ *
305
+ * @example buildIdExtractor(/\/home\/(\d+)(?:[/?#]|$)/)('/x/home/42/') // '42'
306
+ * @example buildIdExtractor(/\/([A-Za-z0-9]+)_pid\/?$/)('/s/abc_pid/') // 'abc'
307
+ */
308
+ export function buildIdExtractor(regex) {
309
+ // Strip stateful flags so each call is independent of prior matches.
310
+ const flags = regex.flags.replace(/[gy]/g, '');
311
+ const source = regex.source;
312
+ return (input) => {
313
+ if (!input)
314
+ return undefined;
315
+ const m = new RegExp(source, flags).exec(input);
316
+ if (!m)
317
+ return undefined;
318
+ return m[1] ?? m[0];
319
+ };
320
+ }
321
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/html/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,EAAE,KAAK,EAAoB,MAAM,kBAAkB,CAAC;AAY3D,+EAA+E;AAC/E,SAAS,QAAQ,CAAC,EAAe;IAC/B,OAAO,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;AAC7C,CAAC;AAED,gFAAgF;AAChF,SAAS,MAAM,CAAC,KAA2B;IACzC,OAAO,OAAO,KAAK,KAAK,QAAQ;QAC9B,CAAC,CAAC,KAAK,CAAC,KAAK,EAAE,EAAE,gBAAgB,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;QAC3D,CAAC,CAAC,KAAK,CAAC;AACZ,CAAC;AAED;;;;;;;;GAQG;AACH,SAAS,kBAAkB,CAAC,IAAiB,EAAE,OAAe;IAC5D,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAC5C,KAAK,MAAM,CAAC,IAAI,IAAI,CAAC,gBAAgB,CAAC,gBAAgB,CAAC,EAAE,CAAC;QACxD,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC;YAAE,SAAS;QACrD,IAAI,GAAG,GAAuB,CAAC,CAAC,kBAAwC,CAAC;QACzE,OAAO,GAAG,EAAE,CAAC;YACX,IAAI,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC;gBAAE,MAAM;YACxC,IAAI,GAAG,CAAC,OAAO,KAAK,OAAO;gBAAE,OAAO,GAAG,CAAC;YACxC,MAAM,MAAM,GAAG,GAAG,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;YAC1C,IAAI,MAAM;gBAAE,OAAO,MAAM,CAAC;YAC1B,GAAG,GAAG,GAAG,CAAC,kBAAwC,CAAC;QACrD,CAAC;QACD,MAAM,MAAM,GAAG,CAAC,CAAC,UAAgC,CAAC;QAClD,IAAI,MAAM,IAAI,2BAA2B,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC;YAC/D,MAAM,MAAM,GAAG,MAAM,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;YAC7C,IAAI,MAAM;gBAAE,OAAO,MAAM,CAAC;QAC5B,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;;;;;;;;;;;;;;;;GAkBG;AACH,MAAM,UAAU,kBAAkB,CAChC,IAA0B,EAC1B,OAAe;IAEf,MAAM,KAAK,GAAG,kBAAkB,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,OAAO,CAAC,CAAC;IACxD,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAC;IAExB,MAAM,KAAK,GAAG,KAAK,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;IAC3C,MAAM,WAAW,GAAG,KAAK,IAAI,KAAK,CAAC;IACnC,MAAM,OAAO,GAAG,WAAW,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IAEjE,MAAM,KAAK,GAAG,KAAK,CAAC,aAAa,CAAC,OAAO,CAAC,IAAI,KAAK,CAAC;IACpD,MAAM,IAAI,GAAG,KAAK;SACf,gBAAgB,CAAC,IAAI,CAAC;SACtB,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;SACxD,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAEvC,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;AAC3B,CAAC;AAED;;;;;;;;;;;;;GAaG;AACH,MAAM,UAAU,qBAAqB,CACnC,IAA0B,EAC1B,OAAe,EACf,QAAQ,GAAG,GAAG;IAEd,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC;IAC5B,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAC5C,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,gBAAgB,CAAC,gBAAgB,CAAC,EAAE,CAAC;QAC1D,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC;YAAE,SAAS;QACrD,MAAM,GAAG,GAAkB,EAAE,CAAC;QAC9B,IAAI,GAAG,GAAuB,CAAC,CAAC,kBAAwC,CAAC;QACzE,OAAO,GAAG,EAAE,CAAC;YACX,IAAI,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC;gBAAE,MAAM;YACxC,IAAI,GAAG,CAAC,OAAO,EAAE,CAAC,QAAQ,CAAC;gBAAE,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAC3C,KAAK,MAAM,EAAE,IAAI,GAAG,CAAC,gBAAgB,CAAC,QAAQ,CAAC;gBAAE,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAC9D,GAAG,GAAG,GAAG,CAAC,kBAAwC,CAAC;QACrD,CAAC;QACD,OAAO,GAAG,CAAC;IACb,CAAC;IACD,OAAO,EAAE,CAAC;AACZ,CAAC;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,MAAM,UAAU,mBAAmB,CACjC,IAAY,EACZ,MAAM,GAAG,mBAAmB;IAE5B,MAAM,UAAU,GAAG,CAAC,UAAU,MAAM,EAAE,EAAE,IAAI,MAAM,GAAG,CAAC,CAAC;IACvD,IAAI,GAAG,GAAG,CAAC,CAAC,CAAC;IACb,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,KAAK,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;QAC3B,MAAM,CAAC,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QAC1B,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YACX,GAAG,GAAG,CAAC,CAAC;YACR,SAAS,GAAG,CAAC,CAAC,MAAM,CAAC;YACrB,MAAM;QACR,CAAC;IACH,CAAC;IACD,IAAI,GAAG,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IAEzB,IAAI,KAAK,GAAG,GAAG,GAAG,SAAS,CAAC;IAC5B,OAAO,KAAK,GAAG,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG;QAAE,KAAK,EAAE,CAAC;IAC3D,IAAI,KAAK,IAAI,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IAEtC,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,QAAQ,GAAG,KAAK,CAAC;IACrB,IAAI,MAAM,GAAG,KAAK,CAAC;IACnB,IAAI,GAAG,GAAG,CAAC,CAAC,CAAC;IACb,KAAK,IAAI,CAAC,GAAG,KAAK,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACzC,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACnB,IAAI,MAAM,EAAE,CAAC;YACX,MAAM,GAAG,KAAK,CAAC;YACf,SAAS;QACX,CAAC;QACD,IAAI,QAAQ,EAAE,CAAC;YACb,IAAI,EAAE,KAAK,IAAI;gBAAE,MAAM,GAAG,IAAI,CAAC;iBAC1B,IAAI,EAAE,KAAK,GAAG;gBAAE,QAAQ,GAAG,KAAK,CAAC;YACtC,SAAS;QACX,CAAC;QACD,IAAI,EAAE,KAAK,GAAG;YAAE,QAAQ,GAAG,IAAI,CAAC;aAC3B,IAAI,EAAE,KAAK,GAAG;YAAE,KAAK,EAAE,CAAC;aACxB,IAAI,EAAE,KAAK,GAAG,EAAE,CAAC;YACpB,KAAK,EAAE,CAAC;YACR,IAAI,KAAK,KAAK,CAAC,EAAE,CAAC;gBAChB,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC;gBACZ,MAAM;YACR,CAAC;QACH,CAAC;IACH,CAAC;IACD,IAAI,GAAG,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IAEzB,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAA4B,CAAC;IACvE,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,0EAA0E;AAC1E,MAAM,cAAc,GAA2B;IAC7C,IAAI,EAAE,GAAG;IACT,GAAG,EAAE,GAAG;IACR,EAAE,EAAE,GAAG;IACP,EAAE,EAAE,GAAG;IACP,IAAI,EAAE,GAAG;IACT,IAAI,EAAE,GAAG;CACV,CAAC;AAEF;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,wBAAwB,CAAC,IAAY;IACnD,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAC;IACrB,IAAI,IAAI,GAAG,IAAI;SACZ,OAAO,CAAC,6BAA6B,EAAE,GAAG,CAAC;SAC3C,OAAO,CAAC,2BAA2B,EAAE,GAAG,CAAC,CAAC;IAC7C,2BAA2B;IAC3B,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;IACrC,uDAAuD;IACvD,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,EAAE,CAAS,EAAE,EAAE,CAChD,MAAM,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAChC,CAAC;IACF,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,qBAAqB,EAAE,CAAC,CAAC,EAAE,CAAS,EAAE,EAAE,CAC1D,MAAM,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CACtC,CAAC;IACF,oCAAoC;IACpC,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,gBAAgB,EAAE,CAAC,KAAK,EAAE,IAAY,EAAE,EAAE;QAC5D,MAAM,OAAO,GAAG,cAAc,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC;QACnD,OAAO,OAAO,IAAI,KAAK,CAAC;IAC1B,CAAC,CAAC,CAAC;IACH,qCAAqC;IACrC,OAAO,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;AAC1C,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,SAAS,CAAC,KAAa;IACrC,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC;QACzB,OAAO,GAAG,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC;IACpC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,EAAE,CAAC;IACrD,CAAC;AACH,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,UAAU,cAAc,CAAC,KAAa;IAC1C,OAAO,KAAK;SACT,SAAS,CAAC,MAAM,CAAC;SACjB,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;SACrB,WAAW,EAAE;SACb,OAAO,CAAC,aAAa,EAAE,GAAG,CAAC;SAC3B,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;AAC7B,CAAC;AAED;;;;;;;;;;;;;GAaG;AACH,MAAM,UAAU,gBAAgB,CAC9B,KAAa;IAEb,qEAAqE;IACrE,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;IAC/C,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;IAC5B,OAAO,CAAC,KAAK,EAAE,EAAE;QACf,IAAI,CAAC,KAAK;YAAE,OAAO,SAAS,CAAC;QAC7B,MAAM,CAAC,GAAG,IAAI,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAChD,IAAI,CAAC,CAAC;YAAE,OAAO,SAAS,CAAC;QACzB,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;IACtB,CAAC,CAAC;AACJ,CAAC"}