@plurnk/plurnk-mimetypes-text-html 0.6.2 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # @plurnk/plurnk-mimetypes-text-html
2
2
 
3
- `text/html` AND `application/xhtml+xml` mimetype handler for the [plurnk](https://github.com/plurnk) ecosystem. Structural extraction via [parse5](https://www.npmjs.com/package/parse5); real-DOM xpath via [@xmldom/xmldom](https://www.npmjs.com/package/@xmldom/xmldom) + [xpath](https://www.npmjs.com/package/xpath).
3
+ `text/html` AND `application/xhtml+xml` mimetype handler for the [plurnk](https://github.com/plurnk) ecosystem. Two faces: **structural** extraction via [parse5](https://www.npmjs.com/package/parse5) (symbols, deep-json/deep-xml, real-DOM xpath via [@xmldom/xmldom](https://www.npmjs.com/package/@xmldom/xmldom) + [xpath](https://www.npmjs.com/package/xpath)) and **readable** projection — the page's main content as clean reading markdown via [@mozilla/readability](https://www.npmjs.com/package/@mozilla/readability) + [turndown](https://www.npmjs.com/package/turndown) over a [linkedom](https://www.npmjs.com/package/linkedom) DOM.
4
4
 
5
5
  ## install
6
6
 
@@ -10,15 +10,15 @@ npm i @plurnk/plurnk-mimetypes-text-html
10
10
 
11
11
  ## what it does
12
12
 
13
+ - `content(content)` — the **content channel** (SPEC §18): the page's markup-free reading markdown. Main-content extraction via Readability strips nav, ads, and chrome; turndown renders the article body as markdown. Non-article pages (apps, forms, fragments, very short HTML) degrade to best-effort markdown of the `<body>` — never raw HTML, never a throw. Empty/whitespace input → absent. This is also the embed-source: an HTML entry's embedding reflects the article, not `<div class>` noise. HTML is the only mimetype that populates this channel.
13
14
  - `extractRaw(content)` — h1–h6 headings as `heading` symbols (with `level`), `<title>` as an h1 fallback when no headings exist, and code blocks as `module` symbols. Source line numbers come from parse5's location info.
14
- - `preview(content)` — hybrid per SPEC §1: a `SymbolPreview` when structural signals were found, otherwise a head-oriented `TextPreview` over the raw HTML (the framework truncates and marks it).
15
15
  - `deepJson(content)` — the parse5 DOM as a nested node tree, with source-algebra attributes under the `attrs` convention (framework projects this to the deep-xml channel).
16
- - `query(content, dialect, pattern)` — overrides xpath to dispatch against the real parsed DOM (XPath 1.0) instead of the projected deep-xml.
16
+ - `query(content, dialect, pattern)` — overrides xpath to dispatch against the real parsed DOM (XPath 1.0) instead of the projected deep-xml. regex/glob run against the same readable markdown the content channel produces (one projection, shared by `toText`).
17
17
  - `validate(content)` — no-op (HTML is forgiving).
18
18
 
19
- ## not in scope
19
+ ## two faces, one handler
20
20
 
21
- Web-page denoising (Readability-style filtering of nav/ads/comments) belongs in the fetcher layer (`plurnk-schemes-http` when it lands), not in a mimetype handler. Markdown conversion of rendered content likewise the preview channel is a structural-or-truncated radar, not a substitute for fetching the content.
21
+ The structural channels (`extractRaw`/`deepJson`/`query` xpath) stay parse5-based with source positions — they answer "where is this tag, on what line." The content channel answers a different question — "what does this page *say*" — and for that the raw markup is noise. Readability + turndown denoise it into reading markdown. Web-page denoising used to be deferred to the fetcher layer; SPEC §18 moved it here, because the readable projection is a pure function of the HTML bytes (whatever a browser scheme rendered and serialized, or a file on disk) and belongs with the mimetype that owns HTML.
22
22
 
23
23
  ## license
24
24
 
@@ -3,6 +3,8 @@ import type { HandlerContent, MimeSymbol, QueryDialect, QueryMatch } from "@plur
3
3
  export default class TextHtml extends BaseHandler {
4
4
  extractRaw(content: string | Uint8Array): MimeSymbol[];
5
5
  deepJson(content: HandlerContent): unknown;
6
+ content(content: HandlerContent): string | undefined;
7
+ protected toText(content: HandlerContent): string;
6
8
  query(content: HandlerContent, dialect: QueryDialect, pattern: string, flags?: string): Promise<QueryMatch[]>;
7
9
  }
8
10
  //# sourceMappingURL=TextHtml.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"TextHtml.d.ts","sourceRoot":"","sources":["../src/TextHtml.ts"],"names":[],"mappings":"AAAA,OAAO,EACH,WAAW,EAGd,MAAM,0BAA0B,CAAC;AAClC,OAAO,KAAK,EACR,cAAc,EACd,UAAU,EACV,YAAY,EACZ,UAAU,EACb,MAAM,0BAA0B,CAAC;AA+BlC,MAAM,CAAC,OAAO,OAAO,QAAS,SAAQ,WAAW;IACpC,UAAU,CAAC,OAAO,EAAE,MAAM,GAAG,UAAU,GAAG,UAAU,EAAE;IAkCtD,QAAQ,CAAC,OAAO,EAAE,cAAc,GAAG,OAAO;IAyBpC,KAAK,CAChB,OAAO,EAAE,cAAc,EACvB,OAAO,EAAE,YAAY,EACrB,OAAO,EAAE,MAAM,EACf,KAAK,CAAC,EAAE,MAAM,GACf,OAAO,CAAC,UAAU,EAAE,CAAC;CAiC3B"}
1
+ {"version":3,"file":"TextHtml.d.ts","sourceRoot":"","sources":["../src/TextHtml.ts"],"names":[],"mappings":"AAAA,OAAO,EACH,WAAW,EAGd,MAAM,0BAA0B,CAAC;AAClC,OAAO,KAAK,EACR,cAAc,EACd,UAAU,EACV,YAAY,EACZ,UAAU,EACb,MAAM,0BAA0B,CAAC;AAgClC,MAAM,CAAC,OAAO,OAAO,QAAS,SAAQ,WAAW;IACpC,UAAU,CAAC,OAAO,EAAE,MAAM,GAAG,UAAU,GAAG,UAAU,EAAE;IAkCtD,QAAQ,CAAC,OAAO,EAAE,cAAc,GAAG,OAAO;IA4B1C,OAAO,CAAC,OAAO,EAAE,cAAc,GAAG,MAAM,GAAG,SAAS;cAa1C,MAAM,CAAC,OAAO,EAAE,cAAc,GAAG,MAAM;IAa3C,KAAK,CAChB,OAAO,EAAE,cAAc,EACvB,OAAO,EAAE,YAAY,EACrB,OAAO,EAAE,MAAM,EACf,KAAK,CAAC,EAAE,MAAM,GACf,OAAO,CAAC,UAAU,EAAE,CAAC;CAiC3B"}
package/dist/TextHtml.js CHANGED
@@ -2,6 +2,7 @@ import { BaseHandler, InvalidExpressionError, QueryParseFailureError, } from "@p
2
2
  import { parse } from "parse5";
3
3
  import { DOMParser } from "@xmldom/xmldom";
4
4
  import * as xpath from "xpath";
5
+ import { htmlToMarkdown } from "./htmlToMarkdown.js";
5
6
  const HEADING_TAGS = new Set(["h1", "h2", "h3", "h4", "h5", "h6"]);
6
7
  export default class TextHtml extends BaseHandler {
7
8
  extractRaw(content) {
@@ -53,6 +54,33 @@ export default class TextHtml extends BaseHandler {
53
54
  };
54
55
  return root;
55
56
  }
57
+ // Content channel (SPEC §18) — the model-facing readable markdown. HTML
58
+ // is the only mimetype that populates this channel: an already-textual but
59
+ // markup-noisy body projected to clean reading markdown via Readability
60
+ // (main-content extraction, strips nav/ads/chrome) + turndown. Absent
61
+ // (undefined) for empty/whitespace input so the channel stays absent when
62
+ // there is no readable content. Also the embed-source — the framework
63
+ // embeds content() over the raw bytes, so HTML embeddings carry the
64
+ // article, not the chrome. Binary content is decoded utf-8 first, mirroring
65
+ // extractRaw/deepJson.
66
+ content(content) {
67
+ const html = typeof content === "string"
68
+ ? content
69
+ : new TextDecoder("utf-8").decode(content);
70
+ return htmlToMarkdown(html);
71
+ }
72
+ // Route the regex/glob query surface (and, transitively, the framework's
73
+ // content()??toText() embed-source) through the SAME markdown projection,
74
+ // so body matchers scan the readable text, not raw `<div class>` markup.
75
+ // xpath is unaffected — query() overrides it to hit the real DOM. When the
76
+ // page has no readable content, fall back to the raw body so regex/glob
77
+ // still have something to match rather than throwing.
78
+ toText(content) {
79
+ const html = typeof content === "string"
80
+ ? content
81
+ : new TextDecoder("utf-8").decode(content);
82
+ return htmlToMarkdown(html) ?? html;
83
+ }
56
84
  // Override xpath dispatch. parse5's tree isn't xpath-traversable, so we
57
85
  // re-parse via @xmldom/xmldom (which produces a real DOM that the `xpath`
58
86
  // package can walk). Line numbers default to 1 because xmldom doesn't
@@ -1 +1 @@
1
- {"version":3,"file":"TextHtml.js","sourceRoot":"","sources":["../src/TextHtml.ts"],"names":[],"mappings":"AAAA,OAAO,EACH,WAAW,EACX,sBAAsB,EACtB,sBAAsB,GACzB,MAAM,0BAA0B,CAAC;AAOlC,OAAO,EAAE,KAAK,EAAE,MAAM,QAAQ,CAAC;AAE/B,OAAO,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAC3C,OAAO,KAAK,KAAK,MAAM,OAAO,CAAC;AAyB/B,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC;AAEnE,MAAM,CAAC,OAAO,OAAO,QAAS,SAAQ,WAAW;IACpC,UAAU,CAAC,OAA4B;QAC5C,MAAM,IAAI,GAAG,OAAO,OAAO,KAAK,QAAQ;YACpC,CAAC,CAAC,OAAO;YACT,CAAC,CAAC,IAAI,WAAW,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAE/C,MAAM,GAAG,GAAG,KAAK,CAAC,IAAI,EAAE,EAAE,sBAAsB,EAAE,IAAI,EAAE,CAAC,CAAC;QAC1D,MAAM,OAAO,GAAiB,EAAE,CAAC;QACjC,MAAM,SAAS,GAA0C,iBAAiB,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;QAEzF,wEAAwE;QACxE,qEAAqE;QACrE,oEAAoE;QACpE,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,SAAS,IAAI,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,CAAC;QACzE,IAAI,SAAS,KAAK,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YAC/B,OAAO,CAAC,OAAO,CAAC;gBACZ,IAAI,EAAE,SAAS,CAAC,IAAI;gBACpB,IAAI,EAAE,SAAS;gBACf,KAAK,EAAE,CAAC;gBACR,IAAI,EAAE,SAAS,CAAC,IAAI;gBACpB,OAAO,EAAE,SAAS,CAAC,IAAI;aAC1B,CAAC,CAAC;QACP,CAAC;QAED,OAAO,OAAO,CAAC;IACnB,CAAC;IAED,yEAAyE;IACzE,mEAAmE;IACnE,wEAAwE;IACxE,sEAAsE;IACtE,iEAAiE;IACjE,oEAAoE;IACpE,qEAAqE;IACrE,0CAA0C;IACjC,QAAQ,CAAC,OAAuB;QACrC,MAAM,IAAI,GAAG,OAAO,OAAO,KAAK,QAAQ;YACpC,CAAC,CAAC,OAAO;YACT,CAAC,CAAC,IAAI,WAAW,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC/C,IAAI,GAAG,CAAC;QACR,IAAI,CAAC;YACD,GAAG,GAAG,KAAK,CAAC,IAAI,EAAE,EAAE,sBAAsB,EAAE,IAAI,EAAE,CAAC,CAAC;QACxD,CAAC;QAAC,MAAM,CAAC;YACL,OAAO,IAAI,CAAC;QAChB,CAAC;QACD,MAAM,IAAI,GAA4B;YAClC,IAAI,EAAE,UAAU;YAChB,IAAI,EAAE,CAAC;YACP,OAAO,EAAE,CAAC;YACV,QAAQ,EAAE,eAAe,CAAC,GAAG,CAAC;SACjC,CAAC;QACF,OAAO,IAAI,CAAC;IAChB,CAAC;IAED,wEAAwE;IACxE,0EAA0E;IAC1E,sEAAsE;IACtE,uEAAuE;IACvE,yEAAyE;IACzE,yEAAyE;IAChE,KAAK,CAAC,KAAK,CAChB,OAAuB,EACvB,OAAqB,EACrB,OAAe,EACf,KAAc;QAEd,IAAI,OAAO,KAAK,OAAO,EAAE,CAAC;YACtB,MAAM,IAAI,GAAG,OAAO,OAAO,KAAK,QAAQ;gBACpC,CAAC,CAAC,OAAO;gBACT,CAAC,CAAC,IAAI,WAAW,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YAE/C,2DAA2D;YAC3D,kEAAkE;YAClE,iEAAiE;YACjE,6DAA6D;YAC7D,6DAA6D;YAC7D,+DAA+D;YAC/D,6DAA6D;YAC7D,+DAA+D;YAC/D,qDAAqD;YACrD,IAAI,GAAG,CAAC;YACR,IAAI,CAAC;gBACD,GAAG,GAAG,IAAI,SAAS,EAAE,CAAC,eAAe,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC;YAC5D,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACb,MAAM,IAAI,sBAAsB,CAAC,EAAE,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,KAAK,EAAE,CAAC,CAAC;YACzE,CAAC;YAED,IAAI,MAA8B,CAAC;YACnC,IAAI,CAAC;gBACD,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,OAAO,EAAE,GAAsB,CAAC,CAAC;YAC3D,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACb,MAAM,IAAI,sBAAsB,CAAC,EAAE,OAAO,EAAE,OAAO,EAAE,UAAU,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;YACvF,CAAC;YAED,OAAO,gBAAgB,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QAC7C,CAAC;QACD,OAAO,KAAK,CAAC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC;IACzD,CAAC;CACJ;AAED,0EAA0E;AAC1E,SAAS,gBAAgB,CAAC,OAAe,EAAE,MAA8B;IACrE,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;QACxB,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAc,EAAE,CAAC,CAAC;YACxC,IAAI,EAAE,CAAC;YACP,OAAO,EAAE,aAAa,CAAC,IAAI,CAAC;YAC5B,QAAQ,EAAE,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,OAAO,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,SAAS;SACrE,CAAC,CAAC,CAAC;IACR,CAAC;IACD,IAAI,MAAM,KAAK,IAAI,IAAI,MAAM,KAAK,SAAS;QAAE,OAAO,EAAE,CAAC;IACvD,0EAA0E;IAC1E,0CAA0C;IAC1C,OAAO,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,OAAO,EAAE,OAAO,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;AACxF,CAAC;AAED,4EAA4E;AAC5E,4EAA4E;AAC5E,6EAA6E;AAC7E,wEAAwE;AACxE,yCAAyC;AACzC,MAAM,cAAc,GAAG,CAAC,CAAC;AACzB,MAAM,SAAS,GAAG,CAAC,CAAC;AACpB,MAAM,kBAAkB,GAAG,CAAC,CAAC;AAC7B,MAAM,2BAA2B,GAAG,CAAC,CAAC;AACtC,MAAM,YAAY,GAAG,CAAC,CAAC;AACvB,SAAS,aAAa,CAAC,IAAU;IAC7B,MAAM,EAAE,GAAG,IAAI,CAAC,QAAQ,CAAC;IACzB,IAAI,EAAE,KAAK,cAAc;QAAE,OAAQ,IAAa,CAAC,KAAK,CAAC;IACvD,IAAI,EAAE,KAAK,SAAS,IAAI,EAAE,KAAK,kBAAkB;QAAE,OAAQ,IAAa,CAAC,IAAI,CAAC;IAC9E,IAAI,EAAE,KAAK,YAAY;QAAE,OAAQ,IAAgB,CAAC,IAAI,CAAC;IACvD,IAAI,EAAE,KAAK,2BAA2B;QAAE,OAAQ,IAA8B,CAAC,IAAI,CAAC;IACpF,OAAQ,IAA8C,CAAC,QAAQ,EAAE,CAAC;AACtE,CAAC;AAED,4EAA4E;AAC5E,uEAAuE;AACvE,wBAAwB;AACxB,SAAS,iBAAiB,CACtB,IAAgB,EAChB,GAAiB;IAEjB,IAAI,KAAK,GAA0C,IAAI,CAAC;IACxD,sEAAsE;IACtE,yEAAyE;IACzE,iEAAiE;IACjE,MAAM,IAAI,GAA2C,EAAE,CAAC;IAExD,SAAS,IAAI,CAAC,IAA4B;QACtC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC;YACnB,IAAI,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC;gBACtB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,UAAU;oBAAE,IAAI,CAAC,KAAK,CAAC,CAAC;YACrD,CAAC;YACD,OAAO;QACX,CAAC;QAED,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC;QACzB,IAAI,GAAG,KAAK,OAAO,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;YACpC,MAAM,IAAI,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;YACtC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAClB,KAAK,GAAG;oBACJ,IAAI,EAAE,IAAI;oBACV,IAAI,EAAE,IAAI,CAAC,kBAAkB,EAAE,SAAS,IAAI,CAAC;iBAChD,CAAC;YACN,CAAC;QACL,CAAC;aAAM,IAAI,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YAC/B,MAAM,IAAI,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;YACtC,MAAM,GAAG,GAAG,IAAI,CAAC,kBAAkB,CAAC;YACpC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAClB,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC7B,OAAO,IAAI,CAAC,MAAM,GAAG,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,KAAK,IAAI,KAAK,EAAE,CAAC;oBAC7D,IAAI,CAAC,GAAG,EAAE,CAAC;gBACf,CAAC;gBACD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACpD,GAAG,CAAC,IAAI,CAAC;oBACL,IAAI,EAAE,IAAI;oBACV,IAAI,EAAE,SAAS;oBACf,KAAK;oBACL,IAAI,EAAE,GAAG,EAAE,SAAS,IAAI,CAAC;oBACzB,OAAO,EAAE,GAAG,EAAE,OAAO,IAAI,GAAG,EAAE,SAAS,IAAI,CAAC;oBAC5C,GAAG,CAAC,GAAG,IAAI,EAAE,MAAM,EAAE,GAAG,CAAC,QAAQ,EAAE,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC;oBAC3D,GAAG,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,IAAI,EAAE,SAAS,EAAE,CAAC;iBAC7C,CAAC,CAAC;gBACH,IAAI,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC;YACrC,CAAC;QACL,CAAC;aAAM,IAAI,GAAG,KAAK,KAAK,EAAE,CAAC;YACvB,MAAM,SAAS,GAAG,gBAAgB,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YACjD,MAAM,QAAQ,GAAG,SAAS,CAAC,CAAC,CAAC,eAAe,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;YACjE,MAAM,GAAG,GAAG,IAAI,CAAC,kBAAkB,CAAC;YACpC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACpD,GAAG,CAAC,IAAI,CAAC;gBACL,IAAI,EAAE,QAAQ;gBACd,IAAI,EAAE,QAAQ;gBACd,IAAI,EAAE,GAAG,EAAE,SAAS,IAAI,CAAC;gBACzB,OAAO,EAAE,GAAG,EAAE,OAAO,IAAI,GAAG,EAAE,SAAS,IAAI,CAAC;gBAC5C,GAAG,CAAC,GAAG,IAAI,EAAE,MAAM,EAAE,GAAG,CAAC,QAAQ,EAAE,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC;gBAC3D,GAAG,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,IAAI,EAAE,SAAS,EAAE,CAAC;aAC7C,CAAC,CAAC;YACH,OAAO;QACX,CAAC;QAED,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,UAAU;YAAE,IAAI,CAAC,KAAK,CAAC,CAAC;IACrD,CAAC;IAED,IAAI,CAAC,IAAI,CAAC,CAAC;IACX,OAAO,KAAK,CAAC;AACjB,CAAC;AAED,SAAS,eAAe,CAAC,MAAe;IACpC,MAAM,SAAS,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,OAAO,CAAC,CAAC;IAC/D,IAAI,CAAC,SAAS;QAAE,OAAO,MAAM,CAAC;IAC9B,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,KAAK,CAAC,wBAAwB,CAAC,CAAC;IAC9D,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;AACrC,CAAC;AAED,SAAS,gBAAgB,CAAC,MAAkB,EAAE,OAAe;IACzD,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;QACpC,IAAI,SAAS,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,OAAO,KAAK,OAAO;YAAE,OAAO,KAAK,CAAC;IACpE,CAAC;IACD,OAAO,IAAI,CAAC;AAChB,CAAC;AAED,SAAS,WAAW,CAAC,IAA4B;IAC7C,IAAI,UAAU,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC,KAAK,CAAC;IACxC,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC;QAAE,OAAO,EAAE,CAAC;IACpC,IAAI,GAAG,GAAG,EAAE,CAAC;IACb,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,UAAU;QAAE,GAAG,IAAI,WAAW,CAAC,KAAK,CAAC,CAAC;IAC/D,OAAO,GAAG,CAAC;AACf,CAAC;AAED,SAAS,SAAS,CAAC,IAA4B;IAC3C,OAAQ,IAAgB,CAAC,OAAO,KAAK,SAAS,IAAK,IAAgB,CAAC,KAAK,KAAK,SAAS,CAAC;AAC5F,CAAC;AAED,SAAS,UAAU,CAAC,IAA4B;IAC5C,OAAQ,IAA8B,CAAC,QAAQ,KAAK,OAAO,CAAC;AAChE,CAAC;AAED,SAAS,aAAa,CAAC,IAAa;IAChC,OAAO,KAAK,CAAC,OAAO,CAAE,IAAiC,CAAC,UAAU,CAAC,CAAC;AACxE,CAAC;AAED,6EAA6E;AAC7E,8EAA8E;AAC9E,8EAA8E;AAC9E,+EAA+E;AAC/E,qCAAqC;AACrC,SAAS,eAAe,CAAC,MAAkB;IACvC,MAAM,GAAG,GAAc,EAAE,CAAC;IAC1B,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC;QAAE,OAAO,GAAG,CAAC;IACvC,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;QACpC,IAAI,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;YACpB,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC;YACzB,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;gBAAE,SAAS;YAChC,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;YAClC,SAAS;QACb,CAAC;QACD,IAAI,SAAS,CAAC,KAAK,CAAC,EAAE,CAAC;YACnB,GAAG,CAAC,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC,CAAC;YAC/B,SAAS;QACb,CAAC;QACD,2DAA2D;IAC/D,CAAC;IACD,OAAO,GAAG,CAAC;AACf,CAAC;AAED,SAAS,aAAa,CAAC,EAAW;IAC9B,MAAM,GAAG,GAAG,EAAE,CAAC,kBAAkB,CAAC;IAClC,MAAM,IAAI,GAA4B;QAClC,IAAI,EAAE,EAAE,CAAC,OAAO;QAChB,IAAI,EAAE,GAAG,EAAE,SAAS,IAAI,CAAC;QACzB,OAAO,EAAE,GAAG,EAAE,OAAO,IAAI,GAAG,EAAE,SAAS,IAAI,CAAC;KAC/C,CAAC;IACF,IAAI,EAAE,CAAC,KAAK,IAAI,EAAE,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAClC,MAAM,KAAK,GAA2B,EAAE,CAAC;QACzC,KAAK,MAAM,CAAC,IAAI,EAAE,CAAC,KAAK;YAAE,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC;QAClD,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;IACvB,CAAC;IACD,MAAM,QAAQ,GAAG,eAAe,CAAC,EAAE,CAAC,CAAC;IACrC,uEAAuE;IACvE,sBAAsB;IACtB,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;WAClB,OAAO,QAAQ,CAAC,CAAC,CAAC,KAAK,QAAQ;WAC/B,QAAQ,CAAC,CAAC,CAAC,KAAK,IAAI;WACnB,QAAQ,CAAC,CAAC,CAAsB,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;QACxD,IAAI,CAAC,IAAI,GAAI,QAAQ,CAAC,CAAC,CAAsB,CAAC,IAAI,CAAC;IACvD,CAAC;SAAM,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7B,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;IAC7B,CAAC;IACD,OAAO,IAAI,CAAC;AAChB,CAAC"}
1
+ {"version":3,"file":"TextHtml.js","sourceRoot":"","sources":["../src/TextHtml.ts"],"names":[],"mappings":"AAAA,OAAO,EACH,WAAW,EACX,sBAAsB,EACtB,sBAAsB,GACzB,MAAM,0BAA0B,CAAC;AAOlC,OAAO,EAAE,KAAK,EAAE,MAAM,QAAQ,CAAC;AAE/B,OAAO,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAC3C,OAAO,KAAK,KAAK,MAAM,OAAO,CAAC;AAC/B,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAyBrD,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC;AAEnE,MAAM,CAAC,OAAO,OAAO,QAAS,SAAQ,WAAW;IACpC,UAAU,CAAC,OAA4B;QAC5C,MAAM,IAAI,GAAG,OAAO,OAAO,KAAK,QAAQ;YACpC,CAAC,CAAC,OAAO;YACT,CAAC,CAAC,IAAI,WAAW,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAE/C,MAAM,GAAG,GAAG,KAAK,CAAC,IAAI,EAAE,EAAE,sBAAsB,EAAE,IAAI,EAAE,CAAC,CAAC;QAC1D,MAAM,OAAO,GAAiB,EAAE,CAAC;QACjC,MAAM,SAAS,GAA0C,iBAAiB,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;QAEzF,wEAAwE;QACxE,qEAAqE;QACrE,oEAAoE;QACpE,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,SAAS,IAAI,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,CAAC;QACzE,IAAI,SAAS,KAAK,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YAC/B,OAAO,CAAC,OAAO,CAAC;gBACZ,IAAI,EAAE,SAAS,CAAC,IAAI;gBACpB,IAAI,EAAE,SAAS;gBACf,KAAK,EAAE,CAAC;gBACR,IAAI,EAAE,SAAS,CAAC,IAAI;gBACpB,OAAO,EAAE,SAAS,CAAC,IAAI;aAC1B,CAAC,CAAC;QACP,CAAC;QAED,OAAO,OAAO,CAAC;IACnB,CAAC;IAED,yEAAyE;IACzE,mEAAmE;IACnE,wEAAwE;IACxE,sEAAsE;IACtE,iEAAiE;IACjE,oEAAoE;IACpE,qEAAqE;IACrE,0CAA0C;IACjC,QAAQ,CAAC,OAAuB;QACrC,MAAM,IAAI,GAAG,OAAO,OAAO,KAAK,QAAQ;YACpC,CAAC,CAAC,OAAO;YACT,CAAC,CAAC,IAAI,WAAW,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC/C,IAAI,GAAG,CAAC;QACR,IAAI,CAAC;YACD,GAAG,GAAG,KAAK,CAAC,IAAI,EAAE,EAAE,sBAAsB,EAAE,IAAI,EAAE,CAAC,CAAC;QACxD,CAAC;QAAC,MAAM,CAAC;YACL,OAAO,IAAI,CAAC;QAChB,CAAC;QACD,MAAM,IAAI,GAA4B;YAClC,IAAI,EAAE,UAAU;YAChB,IAAI,EAAE,CAAC;YACP,OAAO,EAAE,CAAC;YACV,QAAQ,EAAE,eAAe,CAAC,GAAG,CAAC;SACjC,CAAC;QACF,OAAO,IAAI,CAAC;IAChB,CAAC;IAED,wEAAwE;IACxE,2EAA2E;IAC3E,wEAAwE;IACxE,sEAAsE;IACtE,0EAA0E;IAC1E,sEAAsE;IACtE,oEAAoE;IACpE,4EAA4E;IAC5E,uBAAuB;IACd,OAAO,CAAC,OAAuB;QACpC,MAAM,IAAI,GAAG,OAAO,OAAO,KAAK,QAAQ;YACpC,CAAC,CAAC,OAAO;YACT,CAAC,CAAC,IAAI,WAAW,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC/C,OAAO,cAAc,CAAC,IAAI,CAAC,CAAC;IAChC,CAAC;IAED,yEAAyE;IACzE,0EAA0E;IAC1E,yEAAyE;IACzE,2EAA2E;IAC3E,wEAAwE;IACxE,sDAAsD;IACnC,MAAM,CAAC,OAAuB;QAC7C,MAAM,IAAI,GAAG,OAAO,OAAO,KAAK,QAAQ;YACpC,CAAC,CAAC,OAAO;YACT,CAAC,CAAC,IAAI,WAAW,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC/C,OAAO,cAAc,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC;IACxC,CAAC;IAED,wEAAwE;IACxE,0EAA0E;IAC1E,sEAAsE;IACtE,uEAAuE;IACvE,yEAAyE;IACzE,yEAAyE;IAChE,KAAK,CAAC,KAAK,CAChB,OAAuB,EACvB,OAAqB,EACrB,OAAe,EACf,KAAc;QAEd,IAAI,OAAO,KAAK,OAAO,EAAE,CAAC;YACtB,MAAM,IAAI,GAAG,OAAO,OAAO,KAAK,QAAQ;gBACpC,CAAC,CAAC,OAAO;gBACT,CAAC,CAAC,IAAI,WAAW,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YAE/C,2DAA2D;YAC3D,kEAAkE;YAClE,iEAAiE;YACjE,6DAA6D;YAC7D,6DAA6D;YAC7D,+DAA+D;YAC/D,6DAA6D;YAC7D,+DAA+D;YAC/D,qDAAqD;YACrD,IAAI,GAAG,CAAC;YACR,IAAI,CAAC;gBACD,GAAG,GAAG,IAAI,SAAS,EAAE,CAAC,eAAe,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC;YAC5D,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACb,MAAM,IAAI,sBAAsB,CAAC,EAAE,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,KAAK,EAAE,CAAC,CAAC;YACzE,CAAC;YAED,IAAI,MAA8B,CAAC;YACnC,IAAI,CAAC;gBACD,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,OAAO,EAAE,GAAsB,CAAC,CAAC;YAC3D,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACb,MAAM,IAAI,sBAAsB,CAAC,EAAE,OAAO,EAAE,OAAO,EAAE,UAAU,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;YACvF,CAAC;YAED,OAAO,gBAAgB,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QAC7C,CAAC;QACD,OAAO,KAAK,CAAC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC;IACzD,CAAC;CACJ;AAED,0EAA0E;AAC1E,SAAS,gBAAgB,CAAC,OAAe,EAAE,MAA8B;IACrE,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;QACxB,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAc,EAAE,CAAC,CAAC;YACxC,IAAI,EAAE,CAAC;YACP,OAAO,EAAE,aAAa,CAAC,IAAI,CAAC;YAC5B,QAAQ,EAAE,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,OAAO,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,SAAS;SACrE,CAAC,CAAC,CAAC;IACR,CAAC;IACD,IAAI,MAAM,KAAK,IAAI,IAAI,MAAM,KAAK,SAAS;QAAE,OAAO,EAAE,CAAC;IACvD,0EAA0E;IAC1E,0CAA0C;IAC1C,OAAO,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,OAAO,EAAE,OAAO,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;AACxF,CAAC;AAED,4EAA4E;AAC5E,4EAA4E;AAC5E,6EAA6E;AAC7E,wEAAwE;AACxE,yCAAyC;AACzC,MAAM,cAAc,GAAG,CAAC,CAAC;AACzB,MAAM,SAAS,GAAG,CAAC,CAAC;AACpB,MAAM,kBAAkB,GAAG,CAAC,CAAC;AAC7B,MAAM,2BAA2B,GAAG,CAAC,CAAC;AACtC,MAAM,YAAY,GAAG,CAAC,CAAC;AACvB,SAAS,aAAa,CAAC,IAAU;IAC7B,MAAM,EAAE,GAAG,IAAI,CAAC,QAAQ,CAAC;IACzB,IAAI,EAAE,KAAK,cAAc;QAAE,OAAQ,IAAa,CAAC,KAAK,CAAC;IACvD,IAAI,EAAE,KAAK,SAAS,IAAI,EAAE,KAAK,kBAAkB;QAAE,OAAQ,IAAa,CAAC,IAAI,CAAC;IAC9E,IAAI,EAAE,KAAK,YAAY;QAAE,OAAQ,IAAgB,CAAC,IAAI,CAAC;IACvD,IAAI,EAAE,KAAK,2BAA2B;QAAE,OAAQ,IAA8B,CAAC,IAAI,CAAC;IACpF,OAAQ,IAA8C,CAAC,QAAQ,EAAE,CAAC;AACtE,CAAC;AAED,4EAA4E;AAC5E,uEAAuE;AACvE,wBAAwB;AACxB,SAAS,iBAAiB,CACtB,IAAgB,EAChB,GAAiB;IAEjB,IAAI,KAAK,GAA0C,IAAI,CAAC;IACxD,sEAAsE;IACtE,yEAAyE;IACzE,iEAAiE;IACjE,MAAM,IAAI,GAA2C,EAAE,CAAC;IAExD,SAAS,IAAI,CAAC,IAA4B;QACtC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC;YACnB,IAAI,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC;gBACtB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,UAAU;oBAAE,IAAI,CAAC,KAAK,CAAC,CAAC;YACrD,CAAC;YACD,OAAO;QACX,CAAC;QAED,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC;QACzB,IAAI,GAAG,KAAK,OAAO,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;YACpC,MAAM,IAAI,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;YACtC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAClB,KAAK,GAAG;oBACJ,IAAI,EAAE,IAAI;oBACV,IAAI,EAAE,IAAI,CAAC,kBAAkB,EAAE,SAAS,IAAI,CAAC;iBAChD,CAAC;YACN,CAAC;QACL,CAAC;aAAM,IAAI,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YAC/B,MAAM,IAAI,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;YACtC,MAAM,GAAG,GAAG,IAAI,CAAC,kBAAkB,CAAC;YACpC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAClB,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC7B,OAAO,IAAI,CAAC,MAAM,GAAG,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,KAAK,IAAI,KAAK,EAAE,CAAC;oBAC7D,IAAI,CAAC,GAAG,EAAE,CAAC;gBACf,CAAC;gBACD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACpD,GAAG,CAAC,IAAI,CAAC;oBACL,IAAI,EAAE,IAAI;oBACV,IAAI,EAAE,SAAS;oBACf,KAAK;oBACL,IAAI,EAAE,GAAG,EAAE,SAAS,IAAI,CAAC;oBACzB,OAAO,EAAE,GAAG,EAAE,OAAO,IAAI,GAAG,EAAE,SAAS,IAAI,CAAC;oBAC5C,GAAG,CAAC,GAAG,IAAI,EAAE,MAAM,EAAE,GAAG,CAAC,QAAQ,EAAE,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC;oBAC3D,GAAG,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,IAAI,EAAE,SAAS,EAAE,CAAC;iBAC7C,CAAC,CAAC;gBACH,IAAI,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC;YACrC,CAAC;QACL,CAAC;aAAM,IAAI,GAAG,KAAK,KAAK,EAAE,CAAC;YACvB,MAAM,SAAS,GAAG,gBAAgB,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YACjD,MAAM,QAAQ,GAAG,SAAS,CAAC,CAAC,CAAC,eAAe,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;YACjE,MAAM,GAAG,GAAG,IAAI,CAAC,kBAAkB,CAAC;YACpC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACpD,GAAG,CAAC,IAAI,CAAC;gBACL,IAAI,EAAE,QAAQ;gBACd,IAAI,EAAE,QAAQ;gBACd,IAAI,EAAE,GAAG,EAAE,SAAS,IAAI,CAAC;gBACzB,OAAO,EAAE,GAAG,EAAE,OAAO,IAAI,GAAG,EAAE,SAAS,IAAI,CAAC;gBAC5C,GAAG,CAAC,GAAG,IAAI,EAAE,MAAM,EAAE,GAAG,CAAC,QAAQ,EAAE,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC;gBAC3D,GAAG,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,IAAI,EAAE,SAAS,EAAE,CAAC;aAC7C,CAAC,CAAC;YACH,OAAO;QACX,CAAC;QAED,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,UAAU;YAAE,IAAI,CAAC,KAAK,CAAC,CAAC;IACrD,CAAC;IAED,IAAI,CAAC,IAAI,CAAC,CAAC;IACX,OAAO,KAAK,CAAC;AACjB,CAAC;AAED,SAAS,eAAe,CAAC,MAAe;IACpC,MAAM,SAAS,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,OAAO,CAAC,CAAC;IAC/D,IAAI,CAAC,SAAS;QAAE,OAAO,MAAM,CAAC;IAC9B,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,KAAK,CAAC,wBAAwB,CAAC,CAAC;IAC9D,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;AACrC,CAAC;AAED,SAAS,gBAAgB,CAAC,MAAkB,EAAE,OAAe;IACzD,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;QACpC,IAAI,SAAS,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,OAAO,KAAK,OAAO;YAAE,OAAO,KAAK,CAAC;IACpE,CAAC;IACD,OAAO,IAAI,CAAC;AAChB,CAAC;AAED,SAAS,WAAW,CAAC,IAA4B;IAC7C,IAAI,UAAU,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC,KAAK,CAAC;IACxC,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC;QAAE,OAAO,EAAE,CAAC;IACpC,IAAI,GAAG,GAAG,EAAE,CAAC;IACb,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,UAAU;QAAE,GAAG,IAAI,WAAW,CAAC,KAAK,CAAC,CAAC;IAC/D,OAAO,GAAG,CAAC;AACf,CAAC;AAED,SAAS,SAAS,CAAC,IAA4B;IAC3C,OAAQ,IAAgB,CAAC,OAAO,KAAK,SAAS,IAAK,IAAgB,CAAC,KAAK,KAAK,SAAS,CAAC;AAC5F,CAAC;AAED,SAAS,UAAU,CAAC,IAA4B;IAC5C,OAAQ,IAA8B,CAAC,QAAQ,KAAK,OAAO,CAAC;AAChE,CAAC;AAED,SAAS,aAAa,CAAC,IAAa;IAChC,OAAO,KAAK,CAAC,OAAO,CAAE,IAAiC,CAAC,UAAU,CAAC,CAAC;AACxE,CAAC;AAED,6EAA6E;AAC7E,8EAA8E;AAC9E,8EAA8E;AAC9E,+EAA+E;AAC/E,qCAAqC;AACrC,SAAS,eAAe,CAAC,MAAkB;IACvC,MAAM,GAAG,GAAc,EAAE,CAAC;IAC1B,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC;QAAE,OAAO,GAAG,CAAC;IACvC,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;QACpC,IAAI,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;YACpB,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC;YACzB,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;gBAAE,SAAS;YAChC,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;YAClC,SAAS;QACb,CAAC;QACD,IAAI,SAAS,CAAC,KAAK,CAAC,EAAE,CAAC;YACnB,GAAG,CAAC,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC,CAAC;YAC/B,SAAS;QACb,CAAC;QACD,2DAA2D;IAC/D,CAAC;IACD,OAAO,GAAG,CAAC;AACf,CAAC;AAED,SAAS,aAAa,CAAC,EAAW;IAC9B,MAAM,GAAG,GAAG,EAAE,CAAC,kBAAkB,CAAC;IAClC,MAAM,IAAI,GAA4B;QAClC,IAAI,EAAE,EAAE,CAAC,OAAO;QAChB,IAAI,EAAE,GAAG,EAAE,SAAS,IAAI,CAAC;QACzB,OAAO,EAAE,GAAG,EAAE,OAAO,IAAI,GAAG,EAAE,SAAS,IAAI,CAAC;KAC/C,CAAC;IACF,IAAI,EAAE,CAAC,KAAK,IAAI,EAAE,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAClC,MAAM,KAAK,GAA2B,EAAE,CAAC;QACzC,KAAK,MAAM,CAAC,IAAI,EAAE,CAAC,KAAK;YAAE,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC;QAClD,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;IACvB,CAAC;IACD,MAAM,QAAQ,GAAG,eAAe,CAAC,EAAE,CAAC,CAAC;IACrC,uEAAuE;IACvE,sBAAsB;IACtB,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;WAClB,OAAO,QAAQ,CAAC,CAAC,CAAC,KAAK,QAAQ;WAC/B,QAAQ,CAAC,CAAC,CAAC,KAAK,IAAI;WACnB,QAAQ,CAAC,CAAC,CAAsB,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;QACxD,IAAI,CAAC,IAAI,GAAI,QAAQ,CAAC,CAAC,CAAsB,CAAC,IAAI,CAAC;IACvD,CAAC;SAAM,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7B,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;IAC7B,CAAC;IACD,OAAO,IAAI,CAAC;AAChB,CAAC"}
@@ -0,0 +1,2 @@
1
+ export declare function htmlToMarkdown(html: string): string | undefined;
2
+ //# sourceMappingURL=htmlToMarkdown.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"htmlToMarkdown.d.ts","sourceRoot":"","sources":["../src/htmlToMarkdown.ts"],"names":[],"mappings":"AAuBA,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAuB/D"}
@@ -0,0 +1,53 @@
1
+ import { parseHTML } from "linkedom";
2
+ import { Readability } from "@mozilla/readability";
3
+ import TurndownService from "turndown";
4
+ // The single readable-text projection backing both content() (the content
5
+ // channel) and toText() (the regex/glob query surface + the framework's
6
+ // embed-source). One implementation, so the markdown a model READs, the text
7
+ // a regex/glob body-matcher scans, and the bytes the embedder vectorizes are
8
+ // all the same denoised markdown — never raw HTML.
9
+ //
10
+ // Pipeline (SPEC §18): main-content extraction via @mozilla/readability over a
11
+ // linkedom DOM, then HTML→markdown via turndown. Readability strips nav, ads,
12
+ // and chrome and returns the article body; turndown renders it as markdown.
13
+ // When Readability finds no article (apps, forms, fragments, very short HTML)
14
+ // it returns null — we degrade to best-effort markdown of the <body> (or the
15
+ // whole document for unwrapped fragments). Never raw HTML, never a throw.
16
+ const turndown = new TurndownService({
17
+ headingStyle: "atx",
18
+ bulletListMarker: "-",
19
+ codeBlockStyle: "fenced",
20
+ });
21
+ export function htmlToMarkdown(html) {
22
+ if (html.trim().length === 0)
23
+ return undefined;
24
+ const { document } = parseHTML(html);
25
+ // Whitespace/degenerate input can leave linkedom without a root element.
26
+ if (document.documentElement === null)
27
+ return undefined;
28
+ let articleHtml;
29
+ try {
30
+ // Readability mutates the document it walks; hand it a clone so the
31
+ // original stays intact for the body fallback below.
32
+ articleHtml = new Readability(document.cloneNode(true)).parse()?.content;
33
+ }
34
+ catch {
35
+ // Readability is best-effort denoising — its failure is not ours.
36
+ articleHtml = undefined;
37
+ }
38
+ if (articleHtml === null || articleHtml === undefined || articleHtml.trim().length === 0) {
39
+ articleHtml = readableBody(document);
40
+ }
41
+ const markdown = turndown.turndown(articleHtml).trim();
42
+ return markdown.length === 0 ? undefined : markdown;
43
+ }
44
+ // Best-effort source for the fallback turndown: the <body> when it carries
45
+ // content, else the whole document element (unwrapped fragments like a bare
46
+ // <form> or <div> become documentElement with an empty auto-inserted <body>).
47
+ function readableBody(document) {
48
+ const bodyHtml = document.body?.innerHTML ?? "";
49
+ if (bodyHtml.trim().length > 0)
50
+ return bodyHtml;
51
+ return document.documentElement?.innerHTML ?? "";
52
+ }
53
+ //# sourceMappingURL=htmlToMarkdown.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"htmlToMarkdown.js","sourceRoot":"","sources":["../src/htmlToMarkdown.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AACnD,OAAO,eAAe,MAAM,UAAU,CAAC;AAEvC,0EAA0E;AAC1E,wEAAwE;AACxE,6EAA6E;AAC7E,6EAA6E;AAC7E,mDAAmD;AACnD,EAAE;AACF,+EAA+E;AAC/E,8EAA8E;AAC9E,4EAA4E;AAC5E,8EAA8E;AAC9E,6EAA6E;AAC7E,0EAA0E;AAE1E,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC;IACjC,YAAY,EAAE,KAAK;IACnB,gBAAgB,EAAE,GAAG;IACrB,cAAc,EAAE,QAAQ;CAC3B,CAAC,CAAC;AAEH,MAAM,UAAU,cAAc,CAAC,IAAY;IACvC,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,SAAS,CAAC;IAE/C,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IACrC,yEAAyE;IACzE,IAAI,QAAQ,CAAC,eAAe,KAAK,IAAI;QAAE,OAAO,SAAS,CAAC;IAExD,IAAI,WAAsC,CAAC;IAC3C,IAAI,CAAC;QACD,oEAAoE;QACpE,qDAAqD;QACrD,WAAW,GAAG,IAAI,WAAW,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAa,CAAC,CAAC,KAAK,EAAE,EAAE,OAAO,CAAC;IACzF,CAAC;IAAC,MAAM,CAAC;QACL,kEAAkE;QAClE,WAAW,GAAG,SAAS,CAAC;IAC5B,CAAC;IAED,IAAI,WAAW,KAAK,IAAI,IAAI,WAAW,KAAK,SAAS,IAAI,WAAW,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvF,WAAW,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IACzC,CAAC;IAED,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC,IAAI,EAAE,CAAC;IACvD,OAAO,QAAQ,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,QAAQ,CAAC;AACxD,CAAC;AAED,2EAA2E;AAC3E,4EAA4E;AAC5E,8EAA8E;AAC9E,SAAS,YAAY,CAAC,QAAkB;IACpC,MAAM,QAAQ,GAAG,QAAQ,CAAC,IAAI,EAAE,SAAS,IAAI,EAAE,CAAC;IAChD,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,QAAQ,CAAC;IAChD,OAAO,QAAQ,CAAC,eAAe,EAAE,SAAS,IAAI,EAAE,CAAC;AACrD,CAAC"}
package/package.json CHANGED
@@ -1,14 +1,14 @@
1
1
  {
2
2
  "name": "@plurnk/plurnk-mimetypes-text-html",
3
- "version": "0.6.2",
4
- "description": "text/html and application/xhtml+xml mimetype handler for plurnk-service. Extracts headings, code blocks, and title via parse5.",
3
+ "version": "0.6.4",
4
+ "description": "text/html and application/xhtml+xml mimetype handler for plurnk-service. Structural extraction via parse5; readable markdown (content channel) via Readability + turndown.",
5
5
  "type": "module",
6
6
  "license": "MIT",
7
7
  "publishConfig": {
8
8
  "access": "public"
9
9
  },
10
10
  "engines": {
11
- "node": ">=25"
11
+ "node": ">=26"
12
12
  },
13
13
  "plurnk": {
14
14
  "kind": "mimetype",
@@ -50,16 +50,20 @@
50
50
  "prepare": "npm run build"
51
51
  },
52
52
  "dependencies": {
53
+ "@mozilla/readability": "^0.6.0",
53
54
  "@xmldom/xmldom": "^0.9.10",
55
+ "linkedom": "^0.18.12",
54
56
  "parse5": "^8.0.1",
57
+ "turndown": "^7.2.4",
55
58
  "xpath": "^0.0.34"
56
59
  },
57
60
  "devDependencies": {
58
- "@types/node": "^25.8.0",
61
+ "@types/node": "^26.0.0",
62
+ "@types/turndown": "^5.0.6",
59
63
  "typescript": "^6.0.3",
60
- "@plurnk/plurnk-mimetypes": "^0.15.0"
64
+ "@plurnk/plurnk-mimetypes": "0.15.25"
61
65
  },
62
66
  "peerDependencies": {
63
- "@plurnk/plurnk-mimetypes": "^0.15.0"
67
+ "@plurnk/plurnk-mimetypes": "^0.15.10"
64
68
  }
65
69
  }