@mz1999/defuddle 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +371 -0
  3. package/dist/cli.d.ts +2 -0
  4. package/dist/cli.js +145 -0
  5. package/dist/cli.js.map +1 -0
  6. package/dist/constants.d.ts +24 -0
  7. package/dist/constants.js +950 -0
  8. package/dist/constants.js.map +1 -0
  9. package/dist/defuddle.d.ts +136 -0
  10. package/dist/defuddle.js +1816 -0
  11. package/dist/defuddle.js.map +1 -0
  12. package/dist/elements/callouts.d.ts +6 -0
  13. package/dist/elements/callouts.js +74 -0
  14. package/dist/elements/callouts.js.map +1 -0
  15. package/dist/elements/code.d.ts +5 -0
  16. package/dist/elements/code.js +346 -0
  17. package/dist/elements/code.js.map +1 -0
  18. package/dist/elements/footnotes.d.ts +5 -0
  19. package/dist/elements/footnotes.js +619 -0
  20. package/dist/elements/footnotes.js.map +1 -0
  21. package/dist/elements/headings.d.ts +11 -0
  22. package/dist/elements/headings.js +100 -0
  23. package/dist/elements/headings.js.map +1 -0
  24. package/dist/elements/images.d.ts +8 -0
  25. package/dist/elements/images.js +877 -0
  26. package/dist/elements/images.js.map +1 -0
  27. package/dist/elements/math.base.d.ts +9 -0
  28. package/dist/elements/math.base.js +195 -0
  29. package/dist/elements/math.base.js.map +1 -0
  30. package/dist/elements/math.core.d.ts +7 -0
  31. package/dist/elements/math.core.js +52 -0
  32. package/dist/elements/math.core.js.map +1 -0
  33. package/dist/elements/math.d.ts +2 -0
  34. package/dist/elements/math.full.d.ts +8 -0
  35. package/dist/elements/math.js +7 -0
  36. package/dist/elements/math.js.map +1 -0
  37. package/dist/extractor-registry.d.ts +16 -0
  38. package/dist/extractor-registry.js +140 -0
  39. package/dist/extractor-registry.js.map +1 -0
  40. package/dist/extractors/_base.d.ts +22 -0
  41. package/dist/extractors/_base.js +27 -0
  42. package/dist/extractors/_base.js.map +1 -0
  43. package/dist/extractors/_conversation.d.ts +9 -0
  44. package/dist/extractors/_conversation.js +78 -0
  45. package/dist/extractors/_conversation.js.map +1 -0
  46. package/dist/extractors/chatgpt.d.ts +14 -0
  47. package/dist/extractors/chatgpt.js +138 -0
  48. package/dist/extractors/chatgpt.js.map +1 -0
  49. package/dist/extractors/claude.d.ts +10 -0
  50. package/dist/extractors/claude.js +91 -0
  51. package/dist/extractors/claude.js.map +1 -0
  52. package/dist/extractors/gemini.d.ts +14 -0
  53. package/dist/extractors/gemini.js +111 -0
  54. package/dist/extractors/gemini.js.map +1 -0
  55. package/dist/extractors/github.d.ts +20 -0
  56. package/dist/extractors/github.js +251 -0
  57. package/dist/extractors/github.js.map +1 -0
  58. package/dist/extractors/grok.d.ts +15 -0
  59. package/dist/extractors/grok.js +142 -0
  60. package/dist/extractors/grok.js.map +1 -0
  61. package/dist/extractors/hackernews.d.ts +21 -0
  62. package/dist/extractors/hackernews.js +155 -0
  63. package/dist/extractors/hackernews.js.map +1 -0
  64. package/dist/extractors/reddit.d.ts +22 -0
  65. package/dist/extractors/reddit.js +197 -0
  66. package/dist/extractors/reddit.js.map +1 -0
  67. package/dist/extractors/twitter.d.ts +16 -0
  68. package/dist/extractors/twitter.js +204 -0
  69. package/dist/extractors/twitter.js.map +1 -0
  70. package/dist/extractors/x-article.d.ts +24 -0
  71. package/dist/extractors/x-article.js +267 -0
  72. package/dist/extractors/x-article.js.map +1 -0
  73. package/dist/extractors/x-oembed.d.ts +20 -0
  74. package/dist/extractors/x-oembed.js +350 -0
  75. package/dist/extractors/x-oembed.js.map +1 -0
  76. package/dist/extractors/youtube.d.ts +87 -0
  77. package/dist/extractors/youtube.js +869 -0
  78. package/dist/extractors/youtube.js.map +1 -0
  79. package/dist/fetch.d.ts +18 -0
  80. package/dist/fetch.js +265 -0
  81. package/dist/fetch.js.map +1 -0
  82. package/dist/index.d.ts +3 -0
  83. package/dist/index.full.d.ts +12 -0
  84. package/dist/index.full.js +1 -0
  85. package/dist/index.js +1 -0
  86. package/dist/index.js.map +1 -0
  87. package/dist/markdown.d.ts +30 -0
  88. package/dist/markdown.js +661 -0
  89. package/dist/markdown.js.map +1 -0
  90. package/dist/metadata.d.ts +25 -0
  91. package/dist/metadata.js +426 -0
  92. package/dist/metadata.js.map +1 -0
  93. package/dist/node.d.ts +19 -0
  94. package/dist/node.js +78 -0
  95. package/dist/node.js.map +1 -0
  96. package/dist/scoring.d.ts +31 -0
  97. package/dist/scoring.js +472 -0
  98. package/dist/scoring.js.map +1 -0
  99. package/dist/standardize.d.ts +2 -0
  100. package/dist/standardize.js +1101 -0
  101. package/dist/standardize.js.map +1 -0
  102. package/dist/types/extractors.d.ts +41 -0
  103. package/dist/types/extractors.js +3 -0
  104. package/dist/types/extractors.js.map +1 -0
  105. package/dist/types.d.ts +135 -0
  106. package/dist/types.js +3 -0
  107. package/dist/types.js.map +1 -0
  108. package/dist/utils/comments.d.ts +44 -0
  109. package/dist/utils/comments.js +103 -0
  110. package/dist/utils/comments.js.map +1 -0
  111. package/dist/utils/dom.d.ts +42 -0
  112. package/dist/utils/dom.js +104 -0
  113. package/dist/utils/dom.js.map +1 -0
  114. package/dist/utils/linkedom-compat.d.ts +5 -0
  115. package/dist/utils/linkedom-compat.js +23 -0
  116. package/dist/utils/linkedom-compat.js.map +1 -0
  117. package/dist/utils/transcript.d.ts +37 -0
  118. package/dist/utils/transcript.js +61 -0
  119. package/dist/utils/transcript.js.map +1 -0
  120. package/dist/utils.d.ts +13 -0
  121. package/dist/utils.js +98 -0
  122. package/dist/utils.js.map +1 -0
  123. package/package.json +107 -0
@@ -0,0 +1 @@
1
+ {"version":3,"file":"constants.js","sourceRoot":"","sources":["../src/constants.ts"],"names":[],"mappings":";;;AAAA,uBAAuB;AACvB,oEAAoE;AACvD,QAAA,oBAAoB,GAAG;IACnC,OAAO;IACP,eAAe;IACf,YAAY;IACZ,kBAAkB;IAClB,kBAAkB;IAClB,eAAe;IACf,kBAAkB;IAClB,gBAAgB;IAChB,kBAAkB;IAClB,kBAAkB;IAClB,OAAO;IACP,gBAAgB;IAChB,SAAS;IACT,kBAAkB;IAClB,MAAM;IACN,eAAe;IACf,UAAU;IACV,MAAM,CAAC,kCAAkC;CACzC,CAAC;AAEW,QAAA,YAAY,GAAG,GAAG,CAAC;AACnB,QAAA,cAAc,GAAG,CAAC,KAAK,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,KAAK,EAAE,SAAS,CAAC,CAAC;AACtG,QAAA,uBAAuB,GAAG,sBAAc,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACnD,QAAA,kBAAkB,GAAG,IAAI,GAAG,CAAC,sBAAc,CAAC,CAAC;AAE1D,oFAAoF;AACvE,QAAA,oBAAoB,GAAG,IAAI,GAAG,CAAC;IAC3C,GAAG,sBAAc;IACjB,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;IACvC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;IAClC,KAAK,EAAE,YAAY,EAAE,QAAQ,EAAE,YAAY;IAC3C,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;IACpD,SAAS,EAAE,SAAS,EAAE,SAAS,EAAE,IAAI;IACrC,MAAM,EAAE,UAAU;CAClB,CAAC,CAAC;AAEH,wCAAwC;AAC3B,QAAA,iBAAiB,GAAG,IAAI,GAAG,CAAC;IACxC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;IAC1D,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;IAClC,QAAQ,EAAE,YAAY,EAAE,SAAS;IACjC,SAAS,EAAE,SAAS;IACpB,YAAY;IACZ,MAAM,EAAE,UAAU;CAClB,CAAC,CAAC;AAEH,+CAA+C;AAClC,QAAA,eAAe,GAAG,IAAI,GAAG,CAAC;IACtC,GAAG,EAAE,MAAM,EAAE,QAAQ,EAAE,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,OAAO;IACjE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,eAAe,EAAE,MAAM;IACxF,MAAM;CACN,CAAC,CAAC;AAEH,6EAA6E;AAC7E,uEAAuE;AACvE,8EAA8E;AAC9E,mCAAmC;AACtB,QAAA,wBAAwB,GAAG;IACvC,MAAM,EAAE,eAAe;IACvB,QAAQ,EAAE,eAAe,EAAE,gBAAgB;IAC3C,UAAU,EAAE,kBAAkB,EAAE,cAAc;IAC9C,eAAe;IACf,KAAK,EAAE,MAAM;IACb,OAAO;IACP,KAAK,EAAE,SAAS,EAAE,OAAO;IACzB,YAAY;IACZ,QAAQ;CACR,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,0BAA0B;AACb,QAAA,2BAA2B,GAAG;IAC1C,UAAU;IACV,sBAAsB;IACtB,SAAS;IACT,YAAY;CACZ,CAAC;AAEW,QAAA,sBAAsB,GAAG,mCAA2B,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CACzE,CAAC,KAAK,sBAAsB,CAAC,CAAC,CAAC,2CAA2C,CAAC,CAAC,CAAC,CAAC,CAC9E,CAAC;AAEF,0DAA0D;AAC7C,QAAA,qBAAqB,GAAG,8BAAsB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACzD,QAAA,0BAA0B,GAAG,mCAA2B,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAEnE,QAAA,eAAe,GAAG;IAC9B,kBAAkB;IAClB,UAAU;IACV,6BAA6B;IAC7B,OAAO;IACP,MAAM;IACN,MAAM;IAEN,MAAM;IACN,8BAA8B;IAC9B,kBAAkB;IAClB,kBAAkB;IAClB,eAAe;IACf,eAAe;IACf,mBAAmB;IACnB,mBAAmB;IACnB,QAAQ;IACR,QAAQ;IACR,eAAe,EAAE,SAAS;IAC1B,QAAQ;IAER,WAAW;IACX,mBAAmB;IACnB,kBAAkB;IAElB,eAAe;IACf,sBAAsB;IACtB,mBAAmB;IAEnB,cAAc;IACd,QAAQ;IACR,sBAAsB;IACtB,SAAS;IACT,SAAS;IACT,SAAS;IACT,SAAS;IACT,KAAK;IACL,aAAa;IACb,aAAa;IACb,6BAA6B;IAC7B,uBAAuB;IACvB,mBAAmB;IACnB,2BAA2B;IAC3B,yBAAyB;IACzB,OAAO;IACP,6BAA6B;IAC7B,UAAU;IACV,0BAA0B;IAC1B,WAAW;IAEX,WAAW;IACX,SAAS;IACT,SAAS;IACT,iBAAiB;IACjB,aAAa;IACb,cAAc;IACd,OAAO;IACP,OAAO;IACP,aAAa;IACb,aAAa;IACb,OAAO;IACP,OAAO;IACP,OAAO;IACP,aAAa;IACb,MAAM;IACN,MAAM;IACN,MAAM;IACN,WAAW;IACX,WAAW;IACX,QAAQ;IACR,QAAQ;IACR,aAAa;IACb,2CAA2C;IAC3C,6CAA6C;IAC7C,iBAAiB;IACjB,kBAAkB;IAClB,yCAAyC;IACzC,oBAAoB;IACpB,oBAAoB;IACpB,mBAAmB;IACnB,0BAA0B;IAC1B,mCAAmC;IACnC,gBAAgB;IAChB,eAAe;IACf,eAAe;IACf,uBAAuB;IACvB,mBAAmB;IACnB,wBAAwB;IACxB,wBAAwB;IACxB,iBAAiB;IACjB,iBAAiB;IAEjB,SAAS;IACT,QAAQ;IAER,0BAA0B;IAC1B,QAAQ;IACR,+BAA+B;IAC/B,QAAQ;IACP,mCAAmC;IACpC,QAAQ;IACR,MAAM;IACN,QAAQ;IACR,UAAU;IACV,MAAM;IACN,8BAA8B;IAC9B,OAAO;IACP,QAAQ;IACR,QAAQ;IACR,kBAAkB;IAClB,iBAAiB;IACjB,UAAU;IACV,4BAA4B;IAC5B,qCAAqC;IAErC,SAAS;IACT,GAAG,8BAAsB;IACzB,kFAAkF;IAClF,mFAAmF;IACnF,2DAA2D;IAE3D,UAAU;IACV,kBAAkB;IAClB,6IAA6I;IAE7I,QAAQ;IACR,kBAAkB;IAClB,OAAO;IACP,OAAO;IAEP,aAAa;IACb,aAAa;IACb,aAAa;IACb,YAAY;IAEZ,mBAAmB;IACnB,UAAU;IACV,8BAA8B;IAC9B,6BAA6B;IAE7B,uBAAuB;IACvB,6BAA6B;IAC7B,sDAAsD;IACtD,iCAAiC;IACjC,4CAA4C;IAE5C,aAAa;IACb,mCAAmC;IAEnC,UAAU;IACV,UAAU;IACV,UAAU;IACV,UAAU;IACV,UAAU;IACV,WAAW;IACX,UAAU;IAEV,aAAa;IACb,4BAA4B;IAC5B,wBAAwB;IAExB,QAAQ;IACR,YAAY;IACZ,YAAY;IACZ,aAAa;IACb,YAAY;IACZ,MAAM;IACN,OAAO;IACP,SAAS;IACT,eAAe,EAAE,MAAM;IACvB,gCAAgC;IAChC,oDAAoD,EAAE,iBAAiB;IACvE,eAAe;IACf,gDAAgD,EAAE,gBAAgB;IAClE,+BAA+B;IAC/B,mBAAmB,EAAE,SAAS;IAC9B,uCAAuC,EAAE,SAAS;CAClD,CAAC;AAEW,QAAA,sBAAsB,GAAG,uBAAe,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAEhE,iDAAiD;AACpC,QAAA,eAAe,GAAG;IAC9B,OAAO;IACP,IAAI;IACJ,WAAW;IACX,aAAa;IACb,cAAc;IACd,SAAS;IACT,SAAS;CACT,CAAC;AAEF,mDAAmD;AACnD,4CAA4C;AAC/B,QAAA,iBAAiB,GAAG;IAChC,aAAa;IACb,aAAa;IACb,aAAa;IACb,YAAY;IACZ,YAAY;IACZ,UAAU;IACV,QAAQ;IACT,wBAAwB;IACvB,UAAU;IACV,SAAS;IACT,cAAc;IACd,eAAe;IACf,MAAM;IACN,UAAU;IACV,QAAQ;IACR,eAAe;IACf,oBAAoB;IACpB,WAAW;IACX,UAAU;IACV,SAAS;IACT,WAAW;IACZ,cAAc;IACb,UAAU;IACV,gBAAgB;IAChB,aAAa;IACb,gBAAgB;IAChB,gBAAgB;IAChB,gBAAgB;IAChB,wBAAwB;IACxB,gBAAgB;IAChB,kBAAkB;IAClB,cAAc;IACd,kBAAkB;IAClB,eAAe;IACf,cAAc;IACd,cAAc;IACd,cAAc;IACd,gBAAgB;IAChB,gBAAgB;IAChB,iBAAiB;IACjB,eAAe;IACf,eAAe;IACf,cAAc;IACd,cAAc;IACd,cAAc;IACd,eAAe;IACf,aAAa;IACb,iBAAiB;IACjB,iBAAiB;IACjB,iBAAiB;IACjB,mBAAmB;IACnB,gBAAgB;IAChB,iBAAiB;IACjB,aAAa;IACb,cAAc;IACd,cAAc;IACd,cAAc;IACd,eAAe;IACf,eAAe;IACf,eAAe;IACf,gBAAgB;IACjB,kBAAkB;IACjB,iBAAiB;IACjB,eAAe,EAAE,YAAY;IAC7B,aAAa;IACb,mBAAmB;IACnB,YAAY;IACb,kBAAkB;IAClB,aAAa;IACZ,YAAY;IACZ,YAAY;IACZ,aAAa;IACb,aAAa;IACb,SAAS;IACT,iBAAiB;IACjB,aAAa;IACb,qBAAqB;IACrB,aAAa;IACb,QAAQ;IAER,aAAa;IACb,oBAAoB;IACpB,mBAAmB;IACpB,YAAY;IACX,WAAW;IACX,QAAQ;IACR,YAAY;IACZ,WAAW;IACX,WAAW;IACX,YAAY;IACZ,WAAW;IACX,mBAAmB;IACnB,gBAAgB;IAChB,WAAW;IACX,QAAQ;IACR,YAAY;IACZ,UAAU;IACV,gBAAgB;IAChB,mBAAmB;IACnB,MAAM;IACN,MAAM;IACN,QAAQ;IAER,SAAS;IACT,WAAW;IACX,YAAY;IACZ,WAAW;IACZ,cAAc;IACb,mBAAmB;IACnB,oBAAoB;IACpB,YAAY;IACZ,UAAU;IACV,aAAa;IACb,aAAa;IACb,cAAc;IACd,cAAc,EAAE,gBAAgB;IAChC,aAAa;IACb,UAAU;IACV,UAAU,EAAE,0DAA0D;IACtE,YAAY;IACZ,gBAAgB;IAChB,aAAa;IACb,iBAAiB;IACjB,eAAe;IACf,cAAc;IACd,gBAAgB;IAChB,iBAAiB;IACjB,gBAAgB;IAChB,cAAc;IACd,eAAe;IACf,SAAS;IACT,UAAU;IACV,cAAc,EAAE,YAAY;IAC5B,gBAAgB;IAChB,cAAc;IACd,aAAa;IACb,gBAAgB,EAAE,UAAU;IAC5B,iBAAiB;IACjB,aAAa;IACb,aAAa;IACb,YAAY;IACZ,cAAc;IACd,mBAAmB;IACnB,aAAa;IACb,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,eAAe,EAAE,aAAa;IAC9B,oBAAoB;IAEpB,UAAU;IACV,YAAY;IACZ,aAAa;IACb,UAAU;IACX,YAAY;IACX,YAAY;IACZ,YAAY;IACZ,YAAY;IACZ,UAAU;IACV,UAAU;IACV,QAAQ;IACR,QAAQ;IACR,UAAU;IACV,UAAU,EAAE,eAAe;IAE3B,mBAAmB;IACnB,UAAU;IACV,aAAa;IACb,WAAW;IACX,mBAAmB;IACnB,cAAc;IACd,mBAAmB;IACnB,kBAAkB;IAClB,YAAY;IACb,gBAAgB;IACf,aAAa;IACb,eAAe;IACf,QAAQ;IACR,QAAQ;IACR,SAAS;IACT,eAAe;IACf,iBAAiB;IACjB,0BAA0B,EAAE,iBAAiB;IAC7C,gBAAgB;IAChB,aAAa;IAEb,UAAU;IACV,WAAW;IACX,UAAU;IACV,kBAAkB;IAClB,cAAc;IACd,UAAU;IACV,YAAY;IACZ,qBAAqB;IACrB,WAAW;IACX,cAAc;IACf,YAAY;IACX,UAAU;IACV,QAAQ;IACR,eAAe;IACf,cAAc;IACd,YAAY;IACZ,SAAS;IACT,aAAa;IACb,iBAAiB;IACjB,iBAAiB;IAEjB,eAAe;IACf,QAAQ;IACR,SAAS;IACT,WAAW;IACZ,YAAY;IACZ,YAAY;IACX,OAAO;IACP,YAAY;IAEZ,WAAW;IACX,aAAa;IACb,aAAa;IACb,gBAAgB,EAAE,YAAY;IAC/B,2BAA2B;IAC1B,WAAW;IACZ,aAAa;IACZ,gBAAgB;IAChB,YAAY;IACZ,qBAAqB;IACrB,cAAc;IACd,iBAAiB;IACjB,sBAAsB;IAEtB,UAAU;IACV,cAAc;IACd,sBAAsB;IACtB,WAAW;IACX,aAAa;IACb,YAAY;IACZ,YAAY;IACZ,WAAW;IAEX,gBAAgB;IAChB,iBAAiB;IACjB,WAAW;IACX,YAAY;IACZ,UAAU;IACV,QAAQ;IACR,UAAU;IACV,oBAAoB;IAEpB,aAAa;IACb,cAAc;IACd,cAAc;IACf,4CAA4C;IAC3C,cAAc;IACd,QAAQ;IAER,SAAS,EAAE,QAAQ;IACnB,SAAS;IACT,eAAe;IACf,aAAa;IACb,gBAAgB;IAChB,SAAS,EAAE,YAAY;IACvB,UAAU;IACV,UAAU;IACV,gBAAgB;IAChB,aAAa;IACb,UAAU;IACV,YAAY,EAAE,MAAM;IACpB,aAAa,EAAE,MAAM;IACrB,uBAAuB,EAAE,gBAAgB;IACzC,WAAW;IACX,UAAU;IACV,SAAS;IACT,UAAU;IACV,gBAAgB;IAChB,iBAAiB,EAAE,QAAQ;IAC3B,iBAAiB;IACjB,WAAW;IAEX,UAAU;IACV,WAAW;IACX,eAAe;IACf,OAAO;IACP,OAAO;IACR,+BAA+B;IAC9B,UAAU;IACV,WAAW;IACX,UAAU;IACV,YAAY;IACZ,SAAS;IACT,YAAY;IACZ,aAAa;IACb,QAAQ;IACR,QAAQ;IACR,OAAO;IACP,UAAU;IACV,aAAa;IACb,cAAc;IACd,WAAW;IACX,aAAa;IACb,gBAAgB;IAChB,kBAAkB;IAClB,eAAe;IACf,cAAc;IAEd,MAAM;IACN,MAAM;IACP,YAAY;IACZ,gBAAgB;IACf,iBAAiB;IACjB,OAAO;IACP,aAAa;IACb,kBAAkB;IACnB,iCAAiC;IAChC,aAAa;IACb,kBAAkB;IAClB,qBAAqB;IACrB,iBAAiB;IACjB,mBAAmB;IACnB,kBAAkB;IAClB,kBAAkB;IAClB,mBAAmB;IACnB,WAAW;IACX,cAAc;IACd,UAAU;IACV,SAAS;IAET,gBAAgB;IAChB,sBAAsB,EAAE,eAAe;IACvC,aAAa;IACb,cAAc;IACf,aAAa;IAEZ,UAAU;IACV,aAAa;IACb,YAAY;IACZ,iBAAiB;IACjB,WAAW;IACX,aAAa;IACb,MAAM;IACN,SAAS;IACV,iBAAiB;IAChB,aAAa;IACd,aAAa;IACZ,aAAa;IACb,QAAQ;IACR,aAAa;IACb,aAAa;IACb,gBAAgB;IAChB,aAAa;IACb,UAAU;IACV,WAAW;IACX,WAAW;IACX,cAAc;IACd,YAAY;IACZ,UAAU;IACV,WAAW;IACX,WAAW;IACX,kBAAkB;IAClB,YAAY;IACZ,UAAU;IACV,WAAW;IACX,WAAW;IACX,WAAW;IACX,UAAU;IACV,WAAW;IACX,UAAU;IACV,iBAAiB;IACjB,UAAU;IACV,cAAc;IACd,aAAa;IACb,cAAc;IACd,cAAc;IACd,cAAc;IACd,SAAS;IACT,UAAU;IACV,UAAU;IACV,SAAS;IACT,UAAU;IACV,UAAU;IACV,WAAW;IACX,WAAW;IACX,YAAY;IACZ,YAAY;IACZ,aAAa;IACb,iBAAiB;IAClB,sCAAsC;IACrC,WAAW;IACX,UAAU;IACV,WAAW;IACX,WAAW;IACX,cAAc;IACd,iBAAiB;IACjB,YAAY;IACZ,cAAc;IACd,cAAc;IACd,gBAAgB;IAChB,kBAAkB;IAClB,SAAS;IACV,WAAW;IACV,eAAe;IACf,WAAW;IACX,WAAW;IACX,SAAS;IACT,UAAU;IACV,UAAU;IACV,cAAc;IACd,cAAc;IACd,kBAAkB;IAClB,iBAAiB,EAAE,SAAS;IAE5B,SAAS;IACT,SAAS;IACT,UAAU;IAEV,OAAO;IACP,gBAAgB;IAChB,WAAW;IACX,UAAU;IACV,WAAW;IACX,WAAW;IACX,WAAW;IACX,WAAW;IACX,cAAc;IACd,cAAc;IACd,cAAc;IACd,SAAS;IACT,iBAAiB;IACjB,YAAY;IACZ,aAAa;IACb,aAAa;IACb,WAAW;IACX,gBAAgB;IAChB,QAAQ;IACR,UAAU;IACV,SAAS;IACT,UAAU;IACV,iBAAiB;IACjB,kBAAkB;IAClB,MAAM;IACN,UAAU;IAEV,oBAAoB;IACpB,WAAW;IACX,WAAW;IACX,SAAS;IACT,SAAS;IACT,aAAa;IACb,eAAe;IAChB,WAAW;IACX,6BAA6B;IAC5B,WAAW;IACX,YAAY;IACZ,aAAa;IACb,YAAY;IACZ,YAAY;IACZ,aAAa;IACb,eAAe;IACf,UAAU;IACV,gBAAgB;IAChB,cAAc;IACf,cAAc;IACb,iBAAiB;IACjB,iBAAiB;IACjB,WAAW;IACX,gBAAgB;IAChB,cAAc;IACd,UAAU;IACV,WAAW;IACX,cAAc;IACd,UAAU;IACV,UAAU;IACV,WAAW;IACX,YAAY;IACZ,aAAa;IACb,YAAY;IACZ,WAAW;IACX,WAAW;IACX,gBAAgB;IACjB,WAAW;IACV,cAAc;IACd,iBAAiB;IACjB,WAAW;IACX,aAAa;IACb,YAAY;IACZ,SAAS;IACT,WAAW;IACZ,YAAY;IACX,eAAe;IACf,aAAa;IACb,aAAa;IACb,kBAAkB;IAClB,WAAW;IACX,SAAS;IACT,kBAAkB;IAClB,SAAS;IACV,YAAY;IACX,QAAQ;IACT,YAAY;IACX,YAAY;IACZ,kBAAkB;IAClB,eAAe,EAAE,SAAS;IAC1B,YAAY;IACZ,kBAAkB,EAAE,SAAS;IAC7B,eAAe;IACf,SAAS;IACT,SAAS;IACV,eAAe;IACd,aAAa;IACb,kBAAkB;IAClB,eAAe;IAEf,OAAO;IACP,YAAY;IACZ,UAAU;IACV,UAAU;IACX,mBAAmB;IAClB,mBAAmB;IACnB,OAAO;IACR,mBAAmB;IAClB,cAAc;IACd,aAAa;IACb,WAAW;IACX,WAAW;IACX,cAAc;IACd,SAAS;IACT,SAAS;IACT,QAAQ;IACT,UAAU;IACT,eAAe;IACf,gBAAgB;IACjB,sBAAsB;IACrB,iBAAiB;IACjB,QAAQ;IACR,eAAe;IACf,cAAc;IACd,YAAY;IACZ,cAAc;IACf,iBAAiB;IAChB,aAAa;IACb,WAAW;IACX,UAAU;IACV,YAAY;IACZ,aAAa;IACb,eAAe;IACf,SAAS;IAET,QAAQ;IACR,QAAQ;IAER,YAAY;IACZ,eAAe;IACf,iBAAiB;IACjB,YAAY;IACZ,cAAc;IACf,YAAY;CACX,CAAC;AAEF,uFAAuF;AAC1E,QAAA,uBAAuB,GAAG,IAAI,MAAM,CAAC,yBAAiB,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC;AAEpF,kEAAkE;AACrD,QAAA,wBAAwB,GAAG,uBAAe,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAE3F,wCAAwC;AAC3B,QAAA,0BAA0B,GAAG;IACzC,eAAe;IACf,eAAe;IACf,gBAAgB;IAChB,iBAAiB;IACjB,6BAA6B;IAC7B,6BAA6B;IAC7B,oBAAoB;IACpB,YAAY;IACZ,mBAAmB;IACnB,gBAAgB;IAChB,kBAAkB;IAClB,uBAAuB;IACvB,sBAAsB;IACtB,eAAe,EAAE,4BAA4B;IAC7C,eAAe,EAAE,qCAAqC;IACtD,sBAAsB;IACtB,qBAAqB;IACrB,mBAAmB,EAAE,WAAW;IAChC,kCAAkC,EAAE,WAAW;IAC/C,yBAAyB,EAAE,cAAc;IACzC,gBAAgB;IAChB,mBAAmB,EAAE,aAAa;IAClC,iBAAiB,EAAE,UAAU;CAC7B,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAEC,QAAA,uBAAuB,GAAG;IACtC,iBAAiB;IACjB,kBAAkB;IAClB,0BAA0B;IAC1B,2BAA2B;IAC3B,mBAAmB;IACnB,cAAc;IACd,eAAe;IACf,iCAAiC;IACjC,sBAAsB;IACtB,8BAA8B;IAC9B,+BAA+B;IAC/B,kCAAkC;IAClC,mBAAmB;IACnB,gBAAgB;IAChB,mDAAmD,EAAE,WAAW;IAChE,sBAAsB,CAAC,UAAU;CACjC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAEZ,wCAAwC;AACxC,qDAAqD;AACxC,QAAA,sBAAsB,GAAG,IAAI,GAAG,CAAC;IAC7C,MAAM;IACN,OAAO;IACP,MAAM;IACN,IAAI;IACJ,QAAQ;IACR,KAAK;IACL,MAAM;IACN,SAAS;IACT,OAAO;IACP,QAAQ;IACR,GAAG;IACH,IAAI;IACJ,QAAQ;IACR,KAAK;IACL,OAAO;IACP,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,QAAQ;IACR,OAAO;IACP,MAAM;IACN,SAAS;IACT,SAAS;IACT,SAAS;IACT,UAAU;IACV,MAAM;IACN,QAAQ;IACR,MAAM;IACN,KAAK;IACL,IAAI;IACJ,IAAI;IACJ,OAAO;IACP,KAAK;IACL,OAAO;IACP,KAAK;CACL,CAAC,CAAC;AAEH,qBAAqB;AACR,QAAA,kBAAkB,GAAG,IAAI,GAAG,CAAC;IACzC,KAAK;IACL,OAAO;IACP,iBAAiB;IACjB,YAAY;IACZ,SAAS;IACT,SAAS;IACT,UAAU;IACV,YAAY;IACZ,UAAU;IACV,aAAa;IACb,cAAc;IACd,oBAAoB;IACpB,WAAW;IACX,KAAK;IACL,SAAS;IACT,aAAa;IACb,SAAS;IACT,QAAQ;IACR,MAAM;IACN,MAAM;IACN,OAAO;IACP,MAAM;IACN,MAAM;IACN,SAAS;IACT,KAAK;IACL,SAAS;IACT,QAAQ;IACR,OAAO;IACP,MAAM;IACN,OAAO;IAEP,oBAAoB;IACpB,QAAQ;IACR,aAAa;IACb,OAAO;IACP,aAAa;IACb,aAAa;IACb,eAAe;IACf,YAAY;IACZ,mBAAmB;IACnB,OAAO;IACP,cAAc;IACd,OAAO;IACP,OAAO;IACP,cAAc;IACd,eAAe;IACf,QAAQ;IACR,UAAU;IACV,aAAa;IACb,SAAS;IACT,SAAS;IACT,eAAe;IACf,UAAU;IACV,UAAU;IACV,UAAU;IACV,YAAY;IACZ,SAAS;IACT,QAAQ;IACR,aAAa;IACb,WAAW;IACX,UAAU;IACV,WAAW;IACX,SAAS;IACT,OAAO;CACP,CAAC,CAAC;AACU,QAAA,wBAAwB,GAAG,IAAI,GAAG,CAAC;IAC/C,OAAO;IACP,IAAI;CACJ,CAAC,CAAC"}
@@ -0,0 +1,136 @@
1
+ import { DefuddleOptions, DefuddleResponse } from './types';
2
+ export declare class Defuddle {
3
+ private readonly doc;
4
+ private options;
5
+ private debug;
6
+ private _schemaOrgData;
7
+ private _schemaOrgExtracted;
8
+ private _metaTags;
9
+ private _metadata;
10
+ private _mobileStyles;
11
+ private _smallImages;
12
+ /**
13
+ * Create a new Defuddle instance
14
+ * @param doc - The document to parse
15
+ * @param options - Options for parsing
16
+ */
17
+ constructor(doc: Document, options?: DefuddleOptions);
18
+ /**
19
+ * Lazily extract and cache schema.org data. Must be called before
20
+ * parse() strips script tags from the document.
21
+ */
22
+ private getSchemaOrgData;
23
+ /**
24
+ * Parse the document and extract its main content
25
+ */
26
+ parse(): DefuddleResponse;
27
+ /**
28
+ * Extract text content from schema.org data (e.g. SocialMediaPosting, Article)
29
+ */
30
+ private _getSchemaText;
31
+ /**
32
+ * Remove dangerous elements and attributes from this.doc.
33
+ * Called after parseInternal so that extractors and schema extraction
34
+ * can still read script tags they depend on.
35
+ */
36
+ private _stripUnsafeElements;
37
+ /**
38
+ * Find the smallest DOM element whose text contains the search phrase
39
+ * and whose word count is at least 80% of the expected count.
40
+ * Shared by _findSchemaContentElement and _findContentBySchemaText.
41
+ */
42
+ private _findElementBySchemaText;
43
+ /**
44
+ * Find a DOM element whose text matches the schema.org text content.
45
+ * Used when the content scorer picked the wrong element from a feed page.
46
+ * Returns the element's inner HTML including sibling media (images, etc.)
47
+ */
48
+ private _findContentBySchemaText;
49
+ private findLargestHiddenContentSelector;
50
+ /**
51
+ * Get the largest available src from an img element,
52
+ * checking srcset for higher-resolution versions.
53
+ */
54
+ private _getLargestImageSrc;
55
+ /**
56
+ * Parse the document asynchronously. Checks for extractors that prefer
57
+ * async (e.g. YouTube transcripts) before sync, then falls back to async
58
+ * extractors if sync parse yields no content.
59
+ */
60
+ parseAsync(): Promise<DefuddleResponse>;
61
+ /**
62
+ * Fetch only async variables (e.g. transcript) without re-parsing.
63
+ * Safe to call after parse() — uses cached schema.org data since
64
+ * parse() strips script tags from the document.
65
+ */
66
+ fetchAsyncVariables(): Promise<{
67
+ [key: string]: string;
68
+ } | null>;
69
+ private tryAsyncExtractor;
70
+ /**
71
+ * Internal parse method that does the actual work
72
+ */
73
+ private parseInternal;
74
+ private countHtmlWords;
75
+ private _log;
76
+ private _evaluateMediaQueries;
77
+ private applyMobileStyles;
78
+ private removeImages;
79
+ private removeHiddenElements;
80
+ private removeBySelector;
81
+ private findSmallImages;
82
+ private removeSmallImages;
83
+ private getElementIdentifier;
84
+ private findMainContent;
85
+ private findTableBasedContent;
86
+ private findContentByScoring;
87
+ private getElementSelector;
88
+ private getComputedStyle;
89
+ /**
90
+ * Resolve relative URLs to absolute within a DOM element
91
+ */
92
+ private resolveRelativeUrls;
93
+ /**
94
+ * Flatten shadow DOM content into a cloned document.
95
+ * Walks both trees in parallel so positional correspondence is exact.
96
+ */
97
+ private flattenShadowRoots;
98
+ /**
99
+ * Resolve React streaming SSR suspense boundaries.
100
+ * React's streaming SSR places content in hidden divs (id="S:0") and
101
+ * template placeholders (id="B:0") with $RC scripts to swap them.
102
+ * Since we don't execute scripts, we perform the swap manually.
103
+ */
104
+ private resolveStreamedContent;
105
+ /**
106
+ * Replace a shadow DOM host element with a div containing its shadow content.
107
+ * Custom elements (tag names with hyphens) would re-initialize when inserted
108
+ * into a live DOM, recreating their shadow roots and hiding the content.
109
+ */
110
+ private replaceShadowHost;
111
+ /**
112
+ * Resolve relative URLs in an HTML string
113
+ */
114
+ private resolveContentUrls;
115
+ private _extractSchemaOrgData;
116
+ private _collectMetaTags;
117
+ private _decodeHTMLEntities;
118
+ /**
119
+ * Build a DefuddleResponse from an extractor result with metadata
120
+ */
121
+ private buildExtractorResponse;
122
+ /**
123
+ * Filter extractor variables to only include custom ones
124
+ * (exclude standard fields that are already mapped to top-level properties)
125
+ */
126
+ private getExtractorVariables;
127
+ /**
128
+ * Content-based pattern removal for elements that can't be detected by
129
+ * CSS selectors (e.g. Tailwind/CSS-in-JS sites with non-semantic class names).
130
+ */
131
+ private removeByContentPattern;
132
+ /**
133
+ * Remove an element's following siblings, and optionally the element itself.
134
+ */
135
+ private removeTrailingSiblings;
136
+ }