html2any 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/_cli.js ADDED
@@ -0,0 +1,848 @@
1
+ #!/usr/bin/env node
2
+ import { readFile } from 'node:fs/promises';
3
+ import path from 'node:path';
4
+
5
+ const voidElementTags = [
6
+ 'area',
7
+ 'base',
8
+ 'br',
9
+ 'col',
10
+ 'embed',
11
+ 'hr',
12
+ 'img',
13
+ 'input',
14
+ 'link',
15
+ 'meta',
16
+ 'param',
17
+ 'source',
18
+ 'track',
19
+ 'wbr'
20
+ ];
21
+ function isSelfClose(tagName) {
22
+ return voidElementTags.indexOf(tagName.toLowerCase()) > -1;
23
+ }
24
+ function isPair(tagX, tagY) {
25
+ if (!tagX || tagY.type === 'string') {
26
+ return false;
27
+ }
28
+ return tagX.name === tagY.name && tagX.type === 'start' && tagY.type === 'end';
29
+ }
30
+ var utils = {
31
+ isPair,
32
+ isSelfClose
33
+ };
34
+
35
+ const RAW_TEXT_TAGS = [
36
+ 'script',
37
+ 'style',
38
+ 'textarea',
39
+ 'title'
40
+ ];
41
+ function extraAttrs(str) {
42
+ let i = 0;
43
+ const attrs = {};
44
+ while(i < str.length){
45
+ while(/\s/.test(str[i]))i++;
46
+ if (!str[i] || str[i] === '/') {
47
+ break;
48
+ }
49
+ const nameStart = i;
50
+ while(str[i] && !/[\s=/>]/.test(str[i]))i++;
51
+ const key = str.slice(nameStart, i);
52
+ let value = true;
53
+ while(/\s/.test(str[i]))i++;
54
+ if (str[i] === '=') {
55
+ i++;
56
+ while(/\s/.test(str[i]))i++;
57
+ const quote = str[i];
58
+ if (quote === '"' || quote === "'") {
59
+ i++;
60
+ const valueStart = i;
61
+ while(str[i] && str[i] !== quote)i++;
62
+ value = str.slice(valueStart, i);
63
+ if (str[i] === quote) i++;
64
+ } else {
65
+ const valueStart = i;
66
+ while(str[i] && !/[\s>]/.test(str[i]))i++;
67
+ value = str.slice(valueStart, i);
68
+ }
69
+ }
70
+ if (key) {
71
+ attrs[key] = value;
72
+ }
73
+ }
74
+ return attrs;
75
+ }
76
+ function makeToken(tag) {
77
+ const isTag = tag[0] === '<' && tag[tag.length - 1] === '>';
78
+ if (!isTag) {
79
+ return {
80
+ type: 'string',
81
+ value: tag
82
+ };
83
+ } else if (/^<!--/.test(tag) || /^<!doctype/i.test(tag) || /^<\?/.test(tag)) {
84
+ return null;
85
+ } else if (tag.startsWith('</')) {
86
+ return {
87
+ type: 'end',
88
+ name: tag.slice(2, -1).trim().split(/\s+/)[0]
89
+ };
90
+ } else {
91
+ const body = tag.slice(1, -1).trim();
92
+ const match = body.match(/^([^\s/>]+)/);
93
+ if (!match) {
94
+ return null;
95
+ }
96
+ const tagName = match[1];
97
+ const tagBody = body.slice(tagName.length);
98
+ return {
99
+ type: utils.isSelfClose(tagName) || tagBody[tagBody.length - 1] === '/' ? 'self-close' : 'start',
100
+ name: tagName,
101
+ attributes: extraAttrs(tagBody)
102
+ };
103
+ }
104
+ }
105
+ function findTagEnd(html, start) {
106
+ let quote = null;
107
+ for(let i = start + 1; i < html.length; i++){
108
+ const curr = html[i];
109
+ if (quote) {
110
+ if (curr === quote) quote = null;
111
+ } else if (curr === '"' || curr === "'") {
112
+ quote = curr;
113
+ } else if (curr === '>') {
114
+ return i;
115
+ }
116
+ }
117
+ return -1;
118
+ }
119
+ function getStartTagName(tag) {
120
+ if (tag.startsWith('</') || tag.startsWith('<!') || tag.startsWith('<?')) {
121
+ return null;
122
+ }
123
+ const match = tag.slice(1, -1).trim().match(/^([^\s/>]+)/);
124
+ return match && match[1];
125
+ }
126
+ function splitTokens(html) {
127
+ let i = 0;
128
+ let j = 0;
129
+ const tokens = [];
130
+ while(i < html.length){
131
+ const curr = html[i];
132
+ if (curr === '<') {
133
+ if (html.startsWith('<!--', i)) {
134
+ const k = html.indexOf('-->', i + 4);
135
+ if (k === -1) break;
136
+ if (j < i) {
137
+ tokens.push(html.slice(j, i));
138
+ }
139
+ tokens.push(html.slice(i, k + 3));
140
+ i = j = k + 3;
141
+ continue;
142
+ }
143
+ if (j < i) {
144
+ tokens.push(html.slice(j, i));
145
+ j = i;
146
+ }
147
+ const k = findTagEnd(html, i);
148
+ if (k === -1) {
149
+ break;
150
+ }
151
+ tokens.push(html.slice(i, k + 1));
152
+ const tagName = getStartTagName(html.slice(i, k + 1));
153
+ if (tagName && RAW_TEXT_TAGS.indexOf(tagName.toLowerCase()) > -1) {
154
+ const closeTagStart = html.toLowerCase().indexOf(`</${tagName.toLowerCase()}`, k + 1);
155
+ if (closeTagStart > -1) {
156
+ const closeTagEnd = findTagEnd(html, closeTagStart);
157
+ if (closeTagEnd > -1) {
158
+ if (k + 1 < closeTagStart) {
159
+ tokens.push(html.slice(k + 1, closeTagStart));
160
+ }
161
+ tokens.push(html.slice(closeTagStart, closeTagEnd + 1));
162
+ i = j = closeTagEnd + 1;
163
+ continue;
164
+ }
165
+ }
166
+ }
167
+ i = j = k + 1;
168
+ continue;
169
+ }
170
+ i++;
171
+ }
172
+ if (j < html.length) {
173
+ tokens.push(html.slice(j));
174
+ }
175
+ return tokens;
176
+ }
177
+ function tokenize(html) {
178
+ return splitTokens(html).map((s)=>s.replace(/^\n+$/g, '')).map((s)=>s.trim()).filter(Boolean).map(makeToken).filter((token)=>Boolean(token));
179
+ }
180
+
181
+ function isEmpty(stack) {
182
+ return stack.length === 0;
183
+ }
184
+ function getTop(stack) {
185
+ return stack[stack.length - 1];
186
+ }
187
+ function appendChild(node, child) {
188
+ if (!node.children) {
189
+ node.children = [];
190
+ }
191
+ node.children.push(filterProps(child));
192
+ }
193
+ function filterProps(node) {
194
+ if (typeof node === 'string') {
195
+ return node;
196
+ }
197
+ return {
198
+ name: node.name,
199
+ children: node.children,
200
+ attributes: node.attributes
201
+ };
202
+ }
203
+ function parse(src) {
204
+ const tokens = tokenize(src);
205
+ const stack = [];
206
+ const tree = {
207
+ type: 'root',
208
+ children: [],
209
+ name: 'root',
210
+ attributes: {}
211
+ };
212
+ stack.push(tree);
213
+ while(!isEmpty(stack) && !isEmpty(tokens)){
214
+ const curr = tokens.shift();
215
+ const top = getTop(stack);
216
+ if (curr.type === 'string') {
217
+ appendChild(top, curr.value);
218
+ } else if (utils.isPair(top, curr)) {
219
+ const node = stack.pop();
220
+ if (!isEmpty(stack)) {
221
+ appendChild(getTop(stack), node);
222
+ }
223
+ } else if (curr.type === 'self-close') {
224
+ appendChild(top, curr);
225
+ } else if (curr.type === 'start') {
226
+ stack.push(curr);
227
+ }
228
+ }
229
+ while(stack.length > 1){
230
+ const node = stack.pop();
231
+ appendChild(getTop(stack), node);
232
+ }
233
+ return tree.children || [];
234
+ }
235
+
236
+ const DROP_TAGS = new Set([
237
+ 'script',
238
+ 'style',
239
+ 'noscript',
240
+ 'template',
241
+ 'iframe',
242
+ 'svg',
243
+ 'canvas'
244
+ ]);
245
+ const NAV_TAGS = new Set([
246
+ 'nav'
247
+ ]);
248
+ const CHROME_TAGS = new Set([
249
+ 'header',
250
+ 'footer',
251
+ 'aside'
252
+ ]);
253
+ const BLOCK_TAGS = new Set([
254
+ 'article',
255
+ 'blockquote',
256
+ 'dd',
257
+ 'details',
258
+ 'div',
259
+ 'dl',
260
+ 'dt',
261
+ 'figcaption',
262
+ 'figure',
263
+ 'li',
264
+ 'main',
265
+ 'p',
266
+ 'section',
267
+ 'summary'
268
+ ]);
269
+ const HEADING_TAGS = new Set([
270
+ 'h1',
271
+ 'h2',
272
+ 'h3',
273
+ 'h4',
274
+ 'h5',
275
+ 'h6'
276
+ ]);
277
+ const LIST_TAGS = new Set([
278
+ 'ul',
279
+ 'ol'
280
+ ]);
281
+ function tagName(node) {
282
+ if (!node || typeof node === 'string') {
283
+ return '';
284
+ }
285
+ return typeof node === 'string' ? '' : String(node.name || '').toLowerCase();
286
+ }
287
+ function attrs(node) {
288
+ return node && typeof node !== 'string' ? node.attributes || {} : {};
289
+ }
290
+ function decodeEntity(entity) {
291
+ const named = {
292
+ amp: '&',
293
+ apos: "'",
294
+ copy: '(c)',
295
+ hellip: '...',
296
+ gt: '>',
297
+ lt: '<',
298
+ mdash: '--',
299
+ nbsp: ' ',
300
+ ndash: '-',
301
+ reg: '(r)',
302
+ rsquo: "'",
303
+ lsquo: "'",
304
+ rdquo: '"',
305
+ ldquo: '"',
306
+ trade: '(tm)',
307
+ quot: '"'
308
+ };
309
+ if (entity[0] === '#') {
310
+ const code = entity[1] && entity[1].toLowerCase() === 'x' ? parseInt(entity.slice(2), 16) : parseInt(entity.slice(1), 10);
311
+ return Number.isFinite(code) ? String.fromCodePoint(code) : `&${entity};`;
312
+ }
313
+ return Object.prototype.hasOwnProperty.call(named, entity) ? named[entity] : `&${entity};`;
314
+ }
315
+ function decodeHtml(value) {
316
+ return String(value || '').replace(/&([a-zA-Z][a-zA-Z0-9]+|#[0-9]+|#x[0-9a-fA-F]+);/g, (_, entity)=>decodeEntity(entity));
317
+ }
318
+ function compactText(value) {
319
+ return decodeHtml(value).replace(/\s+/g, ' ').trim();
320
+ }
321
+ function compactLines(value) {
322
+ return decodeHtml(value).replace(/\r\n?/g, '\n').split('\n').map((line)=>line.trimEnd()).join('\n').trim();
323
+ }
324
+ function compactInline(value) {
325
+ return compactText(value).replace(/\s+([.,;:!?])/g, '$1');
326
+ }
327
+ function isHidden(node) {
328
+ const nodeAttrs = attrs(node);
329
+ const style = String(nodeAttrs.style || '').toLowerCase();
330
+ return nodeAttrs.hidden === true || String(nodeAttrs['aria-hidden']).toLowerCase() === 'true' || /display\s*:\s*none/.test(style) || /visibility\s*:\s*hidden/.test(style);
331
+ }
332
+ function shouldDrop(node) {
333
+ return DROP_TAGS.has(tagName(node)) || isHidden(node);
334
+ }
335
+ function childrenOf(node) {
336
+ return node && typeof node !== 'string' && Array.isArray(node.children) ? node.children : [];
337
+ }
338
+ function textOf(node, options = {}) {
339
+ if (typeof node === 'string') {
340
+ return options.preserveLines ? compactLines(node) : compactText(node);
341
+ }
342
+ if (!node || shouldDrop(node)) {
343
+ return '';
344
+ }
345
+ const name = tagName(node);
346
+ if (name === 'br') {
347
+ return '\n';
348
+ }
349
+ if (name === 'img') {
350
+ return compactText(attrs(node).alt || attrs(node).title || '');
351
+ }
352
+ const joined = childrenOf(node).map((child)=>textOf(child, options)).filter(Boolean).join(options.preserveLines ? '\n' : ' ');
353
+ return options.preserveLines ? compactLines(joined) : compactText(joined);
354
+ }
355
+ function inlineText(node, links) {
356
+ if (typeof node === 'string') {
357
+ return compactText(node);
358
+ }
359
+ if (!node || shouldDrop(node)) {
360
+ return '';
361
+ }
362
+ const name = tagName(node);
363
+ const nodeAttrs = attrs(node);
364
+ if (name === 'br') {
365
+ return '\n';
366
+ }
367
+ if (name === 'code') {
368
+ const code = textOf(node);
369
+ return code ? `\`${code.replace(/`/g, '\\`')}\`` : '';
370
+ }
371
+ if (name === 'a') {
372
+ const label = textOf(node) || compactText(nodeAttrs.href || '');
373
+ const href = compactText(nodeAttrs.href || '');
374
+ if (label && href) {
375
+ links.push({
376
+ label,
377
+ href
378
+ });
379
+ return `[${escapeMarkdown(label)}](${href})`;
380
+ }
381
+ return label;
382
+ }
383
+ if (name === 'img') {
384
+ return compactText(nodeAttrs.alt || nodeAttrs.title || '');
385
+ }
386
+ return compactInline(childrenOf(node).map((child)=>inlineText(child, links)).filter(Boolean).join(' '));
387
+ }
388
+ function escapeMarkdown(value) {
389
+ return String(value).replace(/([\[\]])/g, '\\$1');
390
+ }
391
+ function pushUnique(list, item, key) {
392
+ if (!item || !key(item)) {
393
+ return;
394
+ }
395
+ if (!list.some((existing)=>key(existing) === key(item))) {
396
+ list.push(item);
397
+ }
398
+ }
399
+ function extractMeta(roots, sourceUrl) {
400
+ const page = {
401
+ title: '',
402
+ description: '',
403
+ url: sourceUrl || ''
404
+ };
405
+ function visit(node) {
406
+ if (!node || typeof node === 'string') {
407
+ return;
408
+ }
409
+ const name = tagName(node);
410
+ const nodeAttrs = attrs(node);
411
+ if (name === 'title' && !page.title) {
412
+ page.title = textOf(node);
413
+ } else if (name === 'meta') {
414
+ const metaName = String(nodeAttrs.name || nodeAttrs.property || '').toLowerCase();
415
+ if ((metaName === 'description' || metaName === 'og:description') && !page.description) {
416
+ page.description = compactText(nodeAttrs.content || '');
417
+ } else if (metaName === 'og:title' && !page.title) {
418
+ page.title = compactText(nodeAttrs.content || '');
419
+ } else if (metaName === 'og:url' && !page.url) {
420
+ page.url = compactText(nodeAttrs.content || '');
421
+ }
422
+ } else if (name === 'link' && String(nodeAttrs.rel || '').toLowerCase() === 'canonical' && !page.url) {
423
+ page.url = compactText(nodeAttrs.href || '');
424
+ }
425
+ childrenOf(node).forEach(visit);
426
+ }
427
+ roots.forEach(visit);
428
+ return page;
429
+ }
430
+ function extractRows(node) {
431
+ const rows = [];
432
+ function visit(rowNode) {
433
+ if (!rowNode || typeof rowNode === 'string' || shouldDrop(rowNode)) {
434
+ return;
435
+ }
436
+ if (tagName(rowNode) === 'tr') {
437
+ const cells = childrenOf(rowNode).filter((child)=>[
438
+ 'td',
439
+ 'th'
440
+ ].includes(tagName(child))).map((cell)=>textOf(cell)).filter(Boolean);
441
+ if (cells.length) {
442
+ rows.push(cells);
443
+ }
444
+ return;
445
+ }
446
+ childrenOf(rowNode).forEach(visit);
447
+ }
448
+ visit(node);
449
+ return rows;
450
+ }
451
+ function tableToMarkdown(rows) {
452
+ if (!rows.length) {
453
+ return '';
454
+ }
455
+ const width = Math.max(...rows.map((row)=>row.length));
456
+ const normalized = rows.map((row)=>Array.from({
457
+ length: width
458
+ }, (_, index)=>compactText(row[index] || '')));
459
+ const header = normalized[0];
460
+ const separator = header.map(()=>'---');
461
+ return [
462
+ header,
463
+ separator,
464
+ ...normalized.slice(1)
465
+ ].map((row)=>`| ${row.map((cell)=>cell.replace(/\|/g, '\\|')).join(' | ')} |`).join('\n');
466
+ }
467
+ function listToMarkdown(node, depth = 0) {
468
+ const ordered = tagName(node) === 'ol';
469
+ return childrenOf(node).filter((child)=>tagName(child) === 'li').map((child, index)=>{
470
+ const links = [];
471
+ const direct = childrenOf(child).filter((grandchild)=>!LIST_TAGS.has(tagName(grandchild))).map((grandchild)=>inlineText(grandchild, links)).filter(Boolean).join(' ');
472
+ const nested = childrenOf(child).filter((grandchild)=>LIST_TAGS.has(tagName(grandchild))).map((grandchild)=>listToMarkdown(grandchild, depth + 1)).filter(Boolean).join('\n');
473
+ const marker = ordered ? `${index + 1}.` : '-';
474
+ const line = `${' '.repeat(depth)}${marker} ${compactText(direct || textOf(child))}`;
475
+ return nested ? `${line}\n${nested}` : line;
476
+ }).filter(Boolean).join('\n');
477
+ }
478
+ function fieldFromInput(node) {
479
+ const name = tagName(node);
480
+ const nodeAttrs = attrs(node);
481
+ if (![
482
+ 'input',
483
+ 'select',
484
+ 'textarea'
485
+ ].includes(name)) {
486
+ return null;
487
+ }
488
+ if ([
489
+ 'hidden',
490
+ 'submit',
491
+ 'button',
492
+ 'reset'
493
+ ].includes(String(nodeAttrs.type || '').toLowerCase())) {
494
+ return null;
495
+ }
496
+ return {
497
+ name: compactText(nodeAttrs.name || nodeAttrs.id || ''),
498
+ label: compactText(nodeAttrs['aria-label'] || nodeAttrs.placeholder || ''),
499
+ type: compactText(nodeAttrs.type || name),
500
+ required: nodeAttrs.required === true
501
+ };
502
+ }
503
+ function extractForm(node) {
504
+ const fields = [];
505
+ const submit = [];
506
+ function visit(child) {
507
+ if (!child || typeof child === 'string' || shouldDrop(child)) {
508
+ return;
509
+ }
510
+ const name = tagName(child);
511
+ const nodeAttrs = attrs(child);
512
+ const field = fieldFromInput(child);
513
+ if (field) {
514
+ fields.push(field);
515
+ }
516
+ if (name === 'button' || name === 'input' && [
517
+ 'submit',
518
+ 'button'
519
+ ].includes(String(nodeAttrs.type || '').toLowerCase())) {
520
+ submit.push({
521
+ label: compactText(textOf(child) || nodeAttrs.value || nodeAttrs['aria-label'] || 'submit'),
522
+ role: 'submit'
523
+ });
524
+ }
525
+ childrenOf(child).forEach(visit);
526
+ }
527
+ visit(node);
528
+ return {
529
+ fields,
530
+ submit: submit[0] || null
531
+ };
532
+ }
533
+ function createSection(heading = '', level = 1) {
534
+ return {
535
+ heading,
536
+ level,
537
+ summary: '',
538
+ content: [],
539
+ code_examples: [],
540
+ links: []
541
+ };
542
+ }
543
+ function extractContext(html, options = {}) {
544
+ const roots = parse(html);
545
+ const page = extractMeta(roots, options.url || '');
546
+ const sections = [];
547
+ const actions = [];
548
+ const forms = [];
549
+ const navigation = [];
550
+ const codeExamples = [];
551
+ let current = createSection('', 1);
552
+ function commitSection() {
553
+ if (current.content.length || current.code_examples.length || current.links.length) {
554
+ current.summary = current.content.find(Boolean) || '';
555
+ sections.push(current);
556
+ }
557
+ }
558
+ function addContent(value) {
559
+ const text = compactLines(value);
560
+ if (text && !current.content.includes(text)) {
561
+ current.content.push(text);
562
+ }
563
+ }
564
+ function addCode(code, language = '') {
565
+ const cleanCode = compactLines(code);
566
+ if (!cleanCode) {
567
+ return;
568
+ }
569
+ const item = {
570
+ language: language || '',
571
+ code: cleanCode,
572
+ section: current.heading
573
+ };
574
+ current.code_examples.push(item);
575
+ codeExamples.push(item);
576
+ }
577
+ function visit(node, inChrome = false) {
578
+ if (!node || typeof node === 'string' || shouldDrop(node)) {
579
+ return;
580
+ }
581
+ const name = tagName(node);
582
+ attrs(node);
583
+ const chrome = inChrome || CHROME_TAGS.has(name);
584
+ if (NAV_TAGS.has(name)) {
585
+ collectNavigation(node, navigation);
586
+ return;
587
+ }
588
+ if (chrome) {
589
+ collectNavigation(node, navigation);
590
+ collectActions(node, actions);
591
+ return;
592
+ }
593
+ if (HEADING_TAGS.has(name)) {
594
+ const heading = textOf(node);
595
+ if (heading) {
596
+ commitSection();
597
+ current = createSection(heading, Number(name.slice(1)));
598
+ }
599
+ return;
600
+ }
601
+ if (name === 'pre') {
602
+ const codeNode = childrenOf(node).find((child)=>tagName(child) === 'code');
603
+ const languageClass = compactText(attrs(codeNode).class || attrs(codeNode).className || attrs(node).class || '');
604
+ addCode(textOf(codeNode || node, {
605
+ preserveLines: true
606
+ }), languageClass.replace(/^language-/, ''));
607
+ return;
608
+ }
609
+ if (name === 'table') {
610
+ const table = tableToMarkdown(extractRows(node));
611
+ if (table) {
612
+ addContent(table);
613
+ }
614
+ return;
615
+ }
616
+ if (LIST_TAGS.has(name)) {
617
+ addContent(listToMarkdown(node));
618
+ return;
619
+ }
620
+ if (name === 'form') {
621
+ forms.push(extractForm(node));
622
+ childrenOf(node).forEach((child)=>visit(child, chrome));
623
+ return;
624
+ }
625
+ if (name === 'a' || name === 'button') {
626
+ collectAction(node, actions);
627
+ }
628
+ if (name === 'p' || name === 'blockquote' || name === 'summary' || name === 'figcaption') {
629
+ const links = [];
630
+ const text = inlineText(node, links);
631
+ links.forEach((link)=>pushUnique(current.links, link, (item)=>`${item.label}\n${item.href}`));
632
+ addContent(text);
633
+ return;
634
+ }
635
+ if (name === 'code' && !childrenOf(node).some((child)=>typeof child !== 'string')) {
636
+ addContent(`\`${textOf(node)}\``);
637
+ return;
638
+ }
639
+ if (BLOCK_TAGS.has(name)) {
640
+ const blockChildren = childrenOf(node);
641
+ const hasStructuredChild = blockChildren.some((child)=>{
642
+ const childName = tagName(child);
643
+ return HEADING_TAGS.has(childName) || LIST_TAGS.has(childName) || [
644
+ 'p',
645
+ 'pre',
646
+ 'table',
647
+ 'form'
648
+ ].includes(childName);
649
+ });
650
+ if (!hasStructuredChild && textOf(node)) {
651
+ const links = [];
652
+ const text = inlineText(node, links);
653
+ links.forEach((link)=>pushUnique(current.links, link, (item)=>`${item.label}\n${item.href}`));
654
+ addContent(text);
655
+ return;
656
+ }
657
+ }
658
+ childrenOf(node).forEach((child)=>visit(child, chrome));
659
+ }
660
+ roots.forEach((root)=>visit(root));
661
+ commitSection();
662
+ return {
663
+ page,
664
+ sections: sections.filter((section)=>section.content.length || section.code_examples.length || section.heading !== 'Page'),
665
+ actions,
666
+ forms: forms.filter((form)=>form.fields.length || form.submit),
667
+ navigation,
668
+ code_examples: codeExamples
669
+ };
670
+ }
671
+ function collectNavigation(node, navigation) {
672
+ if (!node || typeof node === 'string' || shouldDrop(node)) {
673
+ return;
674
+ }
675
+ if (tagName(node) === 'a') {
676
+ const label = textOf(node);
677
+ const href = compactText(attrs(node).href || '');
678
+ if (label && href) {
679
+ pushUnique(navigation, {
680
+ label,
681
+ href
682
+ }, (item)=>`${item.label}\n${item.href}`);
683
+ }
684
+ }
685
+ childrenOf(node).forEach((child)=>collectNavigation(child, navigation));
686
+ }
687
+ function collectAction(node, actions) {
688
+ const name = tagName(node);
689
+ const nodeAttrs = attrs(node);
690
+ const label = compactText(textOf(node) || nodeAttrs.value || nodeAttrs['aria-label'] || nodeAttrs.title || '');
691
+ const href = compactText(nodeAttrs.href || '');
692
+ const role = compactText(nodeAttrs.role || (name === 'button' ? 'button' : href ? 'link' : ''));
693
+ if (label && (href || role)) {
694
+ pushUnique(actions, {
695
+ label,
696
+ role,
697
+ href,
698
+ selector: selectorFor(node)
699
+ }, (item)=>`${item.label}\n${item.href}\n${item.role}`);
700
+ }
701
+ }
702
+ function collectActions(node, actions) {
703
+ if (!node || typeof node === 'string' || shouldDrop(node)) {
704
+ return;
705
+ }
706
+ if ([
707
+ 'a',
708
+ 'button'
709
+ ].includes(tagName(node))) {
710
+ collectAction(node, actions);
711
+ }
712
+ childrenOf(node).forEach((child)=>collectActions(child, actions));
713
+ }
714
+ function selectorFor(node) {
715
+ const nodeAttrs = attrs(node);
716
+ if (nodeAttrs.id) {
717
+ return `#${nodeAttrs.id}`;
718
+ }
719
+ if (nodeAttrs.name) {
720
+ return `${tagName(node)}[name="${nodeAttrs.name}"]`;
721
+ }
722
+ if (nodeAttrs.href) {
723
+ return `${tagName(node)}[href="${nodeAttrs.href}"]`;
724
+ }
725
+ return tagName(node);
726
+ }
727
+ function renderMarkdown(context) {
728
+ const lines = [];
729
+ if (context.page.title) {
730
+ lines.push(`# ${context.page.title}`);
731
+ }
732
+ if (context.page.description) {
733
+ lines.push(context.page.description);
734
+ }
735
+ if (context.page.url) {
736
+ lines.push(`Source: ${context.page.url}`);
737
+ }
738
+ context.sections.forEach((section)=>{
739
+ if (section.heading && section.heading !== context.page.title) {
740
+ lines.push('', `${'#'.repeat(Math.min(Math.max(section.level, 2), 6))} ${section.heading}`);
741
+ }
742
+ section.content.forEach((item)=>{
743
+ lines.push('', item);
744
+ });
745
+ section.code_examples.forEach((example)=>{
746
+ lines.push('', `\`\`\`${example.language || ''}`, example.code, '```');
747
+ });
748
+ });
749
+ return `${lines.filter((line, index)=>line !== '' || lines[index - 1] !== '').join('\n').trim()}\n`;
750
+ }
751
+ function htmlToMarkdown(html, options = {}) {
752
+ return renderMarkdown(extractContext(html, options));
753
+ }
754
+
755
+ const USAGE = `Usage:
756
+ html2any md <file|url|->
757
+ html2 md <file|url|->
758
+
759
+ Options:
760
+ --url <url> Set source URL metadata for stdin or local files
761
+ --help Show this help
762
+ `;
763
+ function parseArgs(argv) {
764
+ const args = [
765
+ ...argv
766
+ ];
767
+ const command = args.shift();
768
+ let input = '';
769
+ let url = '';
770
+ for(let index = 0; index < args.length; index++){
771
+ const arg = args[index];
772
+ if (arg === '--url') {
773
+ url = args[++index] || '';
774
+ } else if (arg === '--help' || arg === '-h') {
775
+ return {
776
+ help: true
777
+ };
778
+ } else if (!input) {
779
+ input = arg;
780
+ }
781
+ }
782
+ return {
783
+ command,
784
+ input,
785
+ url
786
+ };
787
+ }
788
+ function isUrl(value) {
789
+ return /^https?:\/\//i.test(value);
790
+ }
791
+ async function readStdin() {
792
+ const chunks = [];
793
+ for await (const chunk of process.stdin){
794
+ chunks.push(chunk);
795
+ }
796
+ return Buffer.concat(chunks).toString('utf8');
797
+ }
798
+ async function readInput(input) {
799
+ if (!input || input === '-') {
800
+ return {
801
+ html: await readStdin(),
802
+ url: ''
803
+ };
804
+ }
805
+ if (isUrl(input)) {
806
+ const response = await fetch(input, {
807
+ headers: {
808
+ accept: 'text/html,application/xhtml+xml',
809
+ 'user-agent': 'html2any/0.1'
810
+ }
811
+ });
812
+ if (!response.ok) {
813
+ throw new Error(`Failed to fetch ${input}: ${response.status} ${response.statusText}`);
814
+ }
815
+ return {
816
+ html: await response.text(),
817
+ url: input
818
+ };
819
+ }
820
+ return {
821
+ html: await readFile(input, 'utf8'),
822
+ url: path.resolve(input)
823
+ };
824
+ }
825
+ async function main() {
826
+ const args = parseArgs(process.argv.slice(2));
827
+ if (args.help || !args.command || !args.input) {
828
+ process.stdout.write(USAGE);
829
+ process.exit(args.help ? 0 : 1);
830
+ }
831
+ const command = args.command.toLowerCase();
832
+ if (![
833
+ 'md',
834
+ 'markdown'
835
+ ].includes(command)) {
836
+ process.stderr.write(`Unknown command: ${args.command}\n\n${USAGE}`);
837
+ process.exit(1);
838
+ }
839
+ const input = await readInput(args.input);
840
+ const options = {
841
+ url: args.url || input.url
842
+ };
843
+ process.stdout.write(htmlToMarkdown(input.html, options));
844
+ }
845
+ main().catch((error)=>{
846
+ process.stderr.write(`${error.message}\n`);
847
+ process.exit(1);
848
+ });