@mz1999/defuddle 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +371 -0
  3. package/dist/cli.d.ts +2 -0
  4. package/dist/cli.js +145 -0
  5. package/dist/cli.js.map +1 -0
  6. package/dist/constants.d.ts +24 -0
  7. package/dist/constants.js +950 -0
  8. package/dist/constants.js.map +1 -0
  9. package/dist/defuddle.d.ts +136 -0
  10. package/dist/defuddle.js +1816 -0
  11. package/dist/defuddle.js.map +1 -0
  12. package/dist/elements/callouts.d.ts +6 -0
  13. package/dist/elements/callouts.js +74 -0
  14. package/dist/elements/callouts.js.map +1 -0
  15. package/dist/elements/code.d.ts +5 -0
  16. package/dist/elements/code.js +346 -0
  17. package/dist/elements/code.js.map +1 -0
  18. package/dist/elements/footnotes.d.ts +5 -0
  19. package/dist/elements/footnotes.js +619 -0
  20. package/dist/elements/footnotes.js.map +1 -0
  21. package/dist/elements/headings.d.ts +11 -0
  22. package/dist/elements/headings.js +100 -0
  23. package/dist/elements/headings.js.map +1 -0
  24. package/dist/elements/images.d.ts +8 -0
  25. package/dist/elements/images.js +877 -0
  26. package/dist/elements/images.js.map +1 -0
  27. package/dist/elements/math.base.d.ts +9 -0
  28. package/dist/elements/math.base.js +195 -0
  29. package/dist/elements/math.base.js.map +1 -0
  30. package/dist/elements/math.core.d.ts +7 -0
  31. package/dist/elements/math.core.js +52 -0
  32. package/dist/elements/math.core.js.map +1 -0
  33. package/dist/elements/math.d.ts +2 -0
  34. package/dist/elements/math.full.d.ts +8 -0
  35. package/dist/elements/math.js +7 -0
  36. package/dist/elements/math.js.map +1 -0
  37. package/dist/extractor-registry.d.ts +16 -0
  38. package/dist/extractor-registry.js +140 -0
  39. package/dist/extractor-registry.js.map +1 -0
  40. package/dist/extractors/_base.d.ts +22 -0
  41. package/dist/extractors/_base.js +27 -0
  42. package/dist/extractors/_base.js.map +1 -0
  43. package/dist/extractors/_conversation.d.ts +9 -0
  44. package/dist/extractors/_conversation.js +78 -0
  45. package/dist/extractors/_conversation.js.map +1 -0
  46. package/dist/extractors/chatgpt.d.ts +14 -0
  47. package/dist/extractors/chatgpt.js +138 -0
  48. package/dist/extractors/chatgpt.js.map +1 -0
  49. package/dist/extractors/claude.d.ts +10 -0
  50. package/dist/extractors/claude.js +91 -0
  51. package/dist/extractors/claude.js.map +1 -0
  52. package/dist/extractors/gemini.d.ts +14 -0
  53. package/dist/extractors/gemini.js +111 -0
  54. package/dist/extractors/gemini.js.map +1 -0
  55. package/dist/extractors/github.d.ts +20 -0
  56. package/dist/extractors/github.js +251 -0
  57. package/dist/extractors/github.js.map +1 -0
  58. package/dist/extractors/grok.d.ts +15 -0
  59. package/dist/extractors/grok.js +142 -0
  60. package/dist/extractors/grok.js.map +1 -0
  61. package/dist/extractors/hackernews.d.ts +21 -0
  62. package/dist/extractors/hackernews.js +155 -0
  63. package/dist/extractors/hackernews.js.map +1 -0
  64. package/dist/extractors/reddit.d.ts +22 -0
  65. package/dist/extractors/reddit.js +197 -0
  66. package/dist/extractors/reddit.js.map +1 -0
  67. package/dist/extractors/twitter.d.ts +16 -0
  68. package/dist/extractors/twitter.js +204 -0
  69. package/dist/extractors/twitter.js.map +1 -0
  70. package/dist/extractors/x-article.d.ts +24 -0
  71. package/dist/extractors/x-article.js +267 -0
  72. package/dist/extractors/x-article.js.map +1 -0
  73. package/dist/extractors/x-oembed.d.ts +20 -0
  74. package/dist/extractors/x-oembed.js +350 -0
  75. package/dist/extractors/x-oembed.js.map +1 -0
  76. package/dist/extractors/youtube.d.ts +87 -0
  77. package/dist/extractors/youtube.js +869 -0
  78. package/dist/extractors/youtube.js.map +1 -0
  79. package/dist/fetch.d.ts +18 -0
  80. package/dist/fetch.js +265 -0
  81. package/dist/fetch.js.map +1 -0
  82. package/dist/index.d.ts +3 -0
  83. package/dist/index.full.d.ts +12 -0
  84. package/dist/index.full.js +1 -0
  85. package/dist/index.js +1 -0
  86. package/dist/index.js.map +1 -0
  87. package/dist/markdown.d.ts +30 -0
  88. package/dist/markdown.js +661 -0
  89. package/dist/markdown.js.map +1 -0
  90. package/dist/metadata.d.ts +25 -0
  91. package/dist/metadata.js +426 -0
  92. package/dist/metadata.js.map +1 -0
  93. package/dist/node.d.ts +19 -0
  94. package/dist/node.js +78 -0
  95. package/dist/node.js.map +1 -0
  96. package/dist/scoring.d.ts +31 -0
  97. package/dist/scoring.js +472 -0
  98. package/dist/scoring.js.map +1 -0
  99. package/dist/standardize.d.ts +2 -0
  100. package/dist/standardize.js +1101 -0
  101. package/dist/standardize.js.map +1 -0
  102. package/dist/types/extractors.d.ts +41 -0
  103. package/dist/types/extractors.js +3 -0
  104. package/dist/types/extractors.js.map +1 -0
  105. package/dist/types.d.ts +135 -0
  106. package/dist/types.js +3 -0
  107. package/dist/types.js.map +1 -0
  108. package/dist/utils/comments.d.ts +44 -0
  109. package/dist/utils/comments.js +103 -0
  110. package/dist/utils/comments.js.map +1 -0
  111. package/dist/utils/dom.d.ts +42 -0
  112. package/dist/utils/dom.js +104 -0
  113. package/dist/utils/dom.js.map +1 -0
  114. package/dist/utils/linkedom-compat.d.ts +5 -0
  115. package/dist/utils/linkedom-compat.js +23 -0
  116. package/dist/utils/linkedom-compat.js.map +1 -0
  117. package/dist/utils/transcript.d.ts +37 -0
  118. package/dist/utils/transcript.js +61 -0
  119. package/dist/utils/transcript.js.map +1 -0
  120. package/dist/utils.d.ts +13 -0
  121. package/dist/utils.js +98 -0
  122. package/dist/utils.js.map +1 -0
  123. package/package.json +107 -0
@@ -0,0 +1 @@
1
+ {"version":3,"file":"x-oembed.js","sourceRoot":"","sources":["../../src/extractors/x-oembed.ts"],"names":[],"mappings":";;;AAAA,mCAAwC;AAExC,sCAAoE;AA4FpE,MAAa,gBAAiB,SAAQ,qBAAa;IAClD,UAAU;QACT,OAAO,KAAK,CAAC;IACd,CAAC;IAED,OAAO;QACN,OAAO;YACN,OAAO,EAAE,EAAE;YACX,WAAW,EAAE,EAAE;SACf,CAAC;IACH,CAAC;IAED,eAAe;QACd,OAAO,yBAAyB,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACjD,CAAC;IAED,KAAK,CAAC,YAAY;QACjB,yDAAyD;QACzD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,mBAAmB,EAAE,CAAC;QAClD,IAAI,QAAQ,EAAE,CAAC;YACd,OAAO,QAAQ,CAAC;QACjB,CAAC;QAED,mEAAmE;QACnE,OAAO,IAAI,CAAC,aAAa,EAAE,CAAC;IAC7B,CAAC;IAEO,KAAK,CAAC,aAAa;QAC1B,MAAM,SAAS,GAAG,0CAA0C,kBAAkB,CAAC,IAAI,CAAC,GAAG,CAAC,mBAAmB,CAAC;QAC5G,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,SAAS,CAAC,CAAC;QAExC,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YAClB,MAAM,IAAI,KAAK,CAAC,0BAA0B,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QAC9D,CAAC;QAED,MAAM,IAAI,GAAmB,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QAEnD,8CAA8C;QAC9C,MAAM,GAAG,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;QAC/C,GAAG,CAAC,WAAW,CAAC,IAAA,eAAS,EAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;QAErD,iEAAiE;QACjE,8BAA8B;QAC9B,MAAM,UAAU,GAAG,GAAG,CAAC,aAAa,CAAC,YAAY,CAAC,CAAC;QACnD,MAAM,UAAU,GAAG,UAAU,EAAE,gBAAgB,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;QAC3D,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC;aACtC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,MAAM,IAAA,mBAAa,EAAC,CAAC,CAAC,MAAM,CAAC;aACtC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEb,MAAM,MAAM,GAAG,IAAI,CAAC,UAAU;YAC7B,CAAC,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;YACxC,CAAC,CAAC,EAAE,CAAC;QAEN,MAAM,QAAQ,GAAG,UAAU,EAAE,aAAa,CAAC,cAAc,CAAC,CAAC;QAC3D,MAAM,QAAQ,GAAG,QAAQ,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QACrD,MAAM,SAAS,GAAG,QAAQ,EAAE,YAAY,CAAC,MAAM,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC;QAE7D,MAAM,iBAAiB,GAAG,IAAA,gBAAU,EAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACvD,MAAM,aAAa,GAAG,IAAA,gBAAU,EAAC,MAAM,CAAC,CAAC;QACzC,MAAM,eAAe,GAAG,IAAA,gBAAU,EAAC,QAAQ,CAAC,CAAC;QAC7C,MAAM,gBAAgB,GAAG,IAAA,gBAAU,EAAC,SAAS,CAAC,CAAC;QAE/C,MAAM,WAAW,GAAG;;;;;4CAKsB,iBAAiB,wCAAwC,aAAa;SACzG,QAAQ,CAAC,CAAC,CAAC,YAAY,gBAAgB,wBAAwB,eAAe,MAAM,CAAC,CAAC,CAAC,EAAE;;QAE1F,SAAS,CAAC,CAAC,CAAC,2BAA2B,SAAS,QAAQ,CAAC,CAAC,CAAC,EAAE;;;;GAIlE,CAAC,IAAI,EAAE,CAAC;QAET,OAAO;YACN,OAAO,EAAE,WAAW;YACpB,WAAW,EAAE,WAAW;YACxB,SAAS,EAAE;gBACV,KAAK,EAAE,WAAW,MAAM,IAAI,IAAI,CAAC,WAAW,EAAE;gBAC9C,MAAM,EAAE,MAAM,IAAI,IAAI,CAAC,WAAW;gBAClC,IAAI,EAAE,aAAa;aACnB;SACD,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,mBAAmB;QAChC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,yDAAyD,CAAC,CAAC;QACxF,IAAI,CAAC,KAAK;YAAE,OAAO,IAAI,CAAC;QAExB,IAAI,CAAC;YACJ,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YAC3D,oDAAoD;YACpD,IAAI,IAAI,CAAC,KAAK,EAAE,OAAO,EAAE,CAAC;gBACzB,OAAO,IAAI,CAAC,kBAAkB,CAAC,IAAI,CAAC,CAAC;YACtC,CAAC;YACD,mDAAmD;YACnD,IAAI,IAAI,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC;gBACtB,OAAO,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC;YACpC,CAAC;YACD,OAAO,IAAI,CAAC;QACb,CAAC;QAAC,MAAM,CAAC;YACR,OAAO,IAAI,CAAC;QACb,CAAC;IACF,CAAC;IAEO,KAAK,CAAC,cAAc,CAAC,QAAgB,EAAE,EAAU;QACxD,MAAM,MAAM,GAAG,6BAA6B,QAAQ,WAAW,EAAE,EAAE,CAAC;QACpE,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,MAAM,EAAE;YACpC,OAAO,EAAE;gBACR,YAAY,EAAE,8DAA8D;aAC5E;SACD,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YAClB,MAAM,IAAI,KAAK,CAAC,iCAAiC,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QACrE,CAAC;QAED,OAAO,QAAQ,CAAC,IAAI,EAAE,CAAC;IACxB,CAAC;IAEO,kBAAkB,CAAC,IAAuB;QACjD,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,OAAQ,CAAC;QACpC,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,GAAG,OAAO,CAAC,OAAO,CAAC;QAC9C,MAAM,WAAW,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,EAAE,SAAS,EAAE,OAAO,CAAC,WAAW,CAAC,CAAC;QAC/E,MAAM,MAAM,GAAG,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;QAEnD,OAAO;YACN,OAAO,EAAE,WAAW;YACpB,WAAW;YACX,SAAS,EAAE;gBACV,KAAK,EAAE,OAAO,CAAC,KAAK;gBACpB,MAAM,EAAE,MAAM;gBACd,IAAI,EAAE,aAAa;gBACnB,WAAW,EAAE,OAAO,CAAC,YAAY;aACjC;SACD,CAAC;IACH,CAAC;IAEO,gBAAgB,CAAC,IAAuB;QAC/C,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC;QACzB,MAAM,MAAM,GAAG,IAAI,KAAK,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;QAC9C,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QAE5C,OAAO;YACN,OAAO,EAAE,WAAW;YACpB,WAAW;YACX,SAAS,EAAE;gBACV,KAAK,EAAE,WAAW,MAAM,EAAE;gBAC1B,MAAM,EAAE,MAAM;gBACd,IAAI,EAAE,aAAa;aACnB;SACD,CAAC;IACH,CAAC;IAEO,WAAW,CAAC,KAAiC;QACpD,MAAM,IAAI,GAAG,KAAK,CAAC,QAAQ,EAAE,IAAI,IAAI,KAAK,CAAC,IAAI,CAAC;QAChD,qEAAqE;QACrE,wDAAwD;QACxD,MAAM,MAAM,GAAG,CAAC,KAAK,CAAC,QAAQ,EAAE,MAAM,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,OAAO,CAAC,CAAC;QAE9E,gDAAgD;QAChD,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QACvC,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,MAAM,SAAS,GAAa,EAAE,CAAC;QAE/B,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;YAC/B,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YAC7C,MAAM,OAAO,GAAG,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC;YACxC,MAAM,GAAG,OAAO,CAAC;YAEjB,0DAA0D;YAC1D,MAAM,YAAY,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;YACtD,IAAI,QAAQ,GAAG,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;YAC3E,MAAM,aAAa,GAAG,YAAY;gBACjC,CAAC,CAAC,SAAS,GAAG,CAAC,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC,MAAM,CAAC;gBAC7I,CAAC,CAAC,SAAS,CAAC;YAEb,qCAAqC;YACrC,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,QAAQ,EAAE,aAAa,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC;YAE5E,sCAAsC;YACtC,MAAM,UAAU,GAAG,QAAQ,CAAC,OAAO,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;YAEnD,IAAI,YAAY,EAAE,CAAC;gBAClB,SAAS,CAAC,IAAI,CAAC,kBAAkB,UAAU,mBAAmB,CAAC,CAAC;YACjE,CAAC;iBAAM,IAAI,UAAU,CAAC,IAAI,EAAE,EAAE,CAAC;gBAC9B,SAAS,CAAC,IAAI,CAAC,MAAM,UAAU,MAAM,CAAC,CAAC;YACxC,CAAC;QACF,CAAC;QAED,sBAAsB;QACtB,IAAI,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,CAAC;YACzB,KAAK,MAAM,KAAK,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC;gBACxC,SAAS,CAAC,IAAI,CAAC,aAAa,IAAA,gBAAU,EAAC,KAAK,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;YAC/D,CAAC;QACF,CAAC;QAED,MAAM,MAAM,GAAG,IAAA,gBAAU,EAAC,IAAI,KAAK,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC,CAAC;QAC1D,MAAM,UAAU,GAAG,IAAA,gBAAU,EAAC,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QAEjD,OAAO,uEAAuE;YAC7E,gEAAgE,UAAU,wCAAwC,MAAM,sBAAsB;YAC9I,2BAA2B,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ;YACvD,oBAAoB,CAAC;IACvB,CAAC;IAEO,YAAY,CAAC,IAAY,EAAE,OAAiB;QACnD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1B,OAAO,IAAA,gBAAU,EAAC,IAAI,CAAC,CAAC;QACzB,CAAC;QAED,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YACrB,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM;gBAAE,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,MAAM,CAAC;YACtD,IAAI,CAAC,CAAC,IAAI,KAAK,OAAO,IAAI,CAAC,CAAC,IAAI,KAAK,MAAM;gBAAE,OAAO,CAAC,CAAC,CAAC;YACvD,IAAI,CAAC,CAAC,IAAI,KAAK,MAAM,IAAI,CAAC,CAAC,IAAI,KAAK,OAAO;gBAAE,OAAO,CAAC,CAAC;YACtD,OAAO,CAAC,CAAC;QACV,CAAC,CAAC,CAAC;QAEH,IAAI,MAAM,GAAG,EAAE,CAAC;QAChB,IAAI,GAAG,GAAG,CAAC,CAAC;QACZ,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC9B,IAAI,MAAM,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;gBACzB,MAAM,IAAI,IAAA,gBAAU,EAAC,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC;YACtD,CAAC;YACD,MAAM,IAAI,MAAM,CAAC,GAAG,CAAC;YACrB,GAAG,GAAG,MAAM,CAAC,MAAM,CAAC;QACrB,CAAC;QACD,IAAI,GAAG,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;YACvB,MAAM,IAAI,IAAA,gBAAU,EAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC;QACvC,CAAC;QACD,OAAO,MAAM,CAAC;IACf,CAAC;IAEO,WAAW,CAAC,IAAY,EAAE,SAAiB,EAAE,OAAe,EAAE,MAAwB;QAC7F,MAAM,OAAO,GAAa,EAAE,CAAC;QAE7B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC5B,MAAM,CAAC,MAAM,EAAE,IAAI,CAAC,GAAG,KAAK,CAAC,OAAO,CAAC;YACrC,IAAI,IAAI,IAAI,SAAS,IAAI,MAAM,IAAI,OAAO;gBAAE,SAAS;YAErD,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC,CAAC;YACjD,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,GAAG,SAAS,CAAC,CAAC;YAEvD,IAAI,KAAK,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;gBAC7B,OAAO,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,CAAC,CAAC;gBAC9D,OAAO,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC,CAAC;YAC/D,CAAC;iBAAM,IAAI,KAAK,CAAC,IAAI,KAAK,SAAS,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC;gBACnD,MAAM,GAAG,GAAG,iBAAiB,IAAA,gBAAU,EAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;gBACtD,OAAO,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,YAAY,GAAG,IAAI,EAAE,CAAC,CAAC;gBAC3E,OAAO,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,CAAC,CAAC;YAC9D,CAAC;iBAAM,IAAI,KAAK,CAAC,IAAI,KAAK,KAAK,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;gBACnD,MAAM,GAAG,GAAG,IAAA,gBAAU,EAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;gBACvC,OAAO,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,YAAY,GAAG,IAAI,EAAE,CAAC,CAAC;gBAC3E,OAAO,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,CAAC,CAAC;YAC9D,CAAC;QACF,CAAC;QAED,OAAO,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IACzC,CAAC;IAEO,aAAa,CACpB,MAAoB,EACpB,SAAgC,EAChC,UAA2D;QAE3D,MAAM,KAAK,GAAa,EAAE,CAAC;QAE3B,+BAA+B;QAC/B,IAAI,UAAU,EAAE,UAAU,EAAE,gBAAgB,EAAE,CAAC;YAC9C,KAAK,CAAC,IAAI,CAAC,aAAa,IAAA,gBAAU,EAAC,UAAU,CAAC,UAAU,CAAC,gBAAgB,CAAC,sBAAsB,CAAC,CAAC;QACnG,CAAC;QAED,IAAI,CAAC,GAAG,CAAC,CAAC;QACV,OAAO,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC;YAC1B,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;YAExB,IAAI,KAAK,CAAC,IAAI,KAAK,qBAAqB,EAAE,CAAC;gBAC1C,2CAA2C;gBAC3C,MAAM,KAAK,GAAa,EAAE,CAAC;gBAC3B,OAAO,CAAC,GAAG,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,KAAK,qBAAqB,EAAE,CAAC;oBACtE,KAAK,CAAC,IAAI,CAAC,OAAO,IAAI,CAAC,mBAAmB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,SAAS,CAAC,OAAO,CAAC,CAAC;oBACzE,CAAC,EAAE,CAAC;gBACL,CAAC;gBACD,KAAK,CAAC,IAAI,CAAC,OAAO,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC;gBACzC,SAAS;YACV,CAAC;YAED,MAAM,IAAI,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;YAChD,IAAI,IAAI,EAAE,CAAC;gBACV,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAClB,CAAC;YACD,CAAC,EAAE,CAAC;QACL,CAAC;QAED,OAAO,8BAA8B,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,YAAY,CAAC;IACjE,CAAC;IAEO,WAAW,CAAC,KAAiB,EAAE,SAAgC;QACtE,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;YACpB,KAAK,UAAU,CAAC,CAAC,CAAC;gBACjB,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE;oBAAE,OAAO,EAAE,CAAC;gBAClC,OAAO,MAAM,IAAI,CAAC,mBAAmB,CAAC,KAAK,EAAE,SAAS,CAAC,MAAM,CAAC;YAC/D,CAAC;YACD,KAAK,YAAY;gBAChB,OAAO,OAAO,IAAI,CAAC,mBAAmB,CAAC,KAAK,EAAE,SAAS,CAAC,OAAO,CAAC;YACjE,KAAK,cAAc;gBAClB,OAAO,OAAO,IAAI,CAAC,mBAAmB,CAAC,KAAK,EAAE,SAAS,CAAC,OAAO,CAAC;YACjE,KAAK,QAAQ;gBACZ,OAAO,IAAI,CAAC,iBAAiB,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;YACjD,OAAO,CAAC,CAAC,CAAC;gBACT,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE;oBAAE,OAAO,EAAE,CAAC;gBAClC,OAAO,MAAM,IAAI,CAAC,mBAAmB,CAAC,KAAK,EAAE,SAAS,CAAC,MAAM,CAAC;YAC/D,CAAC;QACF,CAAC;IACF,CAAC;IAEO,iBAAiB,CAAC,KAAiB,EAAE,SAAgC;QAC5E,IAAI,KAAK,CAAC,YAAY,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAE/C,MAAM,WAAW,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,KAAK,MAAM,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACrF,IAAI,CAAC,WAAW;YAAE,OAAO,EAAE,CAAC;QAE5B,MAAM,MAAM,GAAG,WAAW,CAAC,KAAK,CAAC;QAEjC,QAAQ,MAAM,CAAC,IAAI,EAAE,CAAC;YACrB,KAAK,OAAO,CAAC,CAAC,CAAC;gBACd,MAAM,OAAO,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC;gBACpC,IAAI,OAAO,EAAE,CAAC;oBACb,OAAO,uBAAuB,IAAA,gBAAU,EAAC,OAAO,CAAC,wBAAwB,CAAC;gBAC3E,CAAC;gBACD,OAAO,EAAE,CAAC;YACX,CAAC;YACD,KAAK,UAAU,CAAC,CAAC,CAAC;gBACjB,MAAM,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC,QAAQ,IAAI,EAAE,CAAC;gBAC5C,sCAAsC;gBACtC,MAAM,SAAS,GAAG,QAAQ,CAAC,KAAK,CAAC,8BAA8B,CAAC,CAAC;gBACjE,IAAI,SAAS,EAAE,CAAC;oBACf,MAAM,IAAI,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;oBAC1B,MAAM,IAAI,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;oBAC1B,MAAM,QAAQ,GAAG,IAAI,CAAC,CAAC,CAAC,oBAAoB,IAAA,gBAAU,EAAC,IAAI,CAAC,gBAAgB,IAAA,gBAAU,EAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;oBACrG,OAAO,aAAa,QAAQ,IAAI,IAAA,gBAAU,EAAC,IAAI,CAAC,eAAe,CAAC;gBACjE,CAAC;gBACD,OAAO,cAAc,IAAA,gBAAU,EAAC,QAAQ,CAAC,eAAe,CAAC;YAC1D,CAAC;YACD;gBACC,OAAO,EAAE,CAAC;QACZ,CAAC;IACF,CAAC;IAEO,mBAAmB,CAAC,KAAiB,EAAE,SAAgC;QAC9E,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC;QACxB,IAAI,CAAC,IAAI;YAAE,OAAO,EAAE,CAAC;QAErB,MAAM,OAAO,GAAa,EAAE,CAAC;QAE7B,KAAK,MAAM,KAAK,IAAI,KAAK,CAAC,iBAAiB,EAAE,CAAC;YAC7C,IAAI,KAAK,CAAC,KAAK,KAAK,MAAM,EAAE,CAAC;gBAC5B,OAAO,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,UAAU,EAAE,CAAC,CAAC;gBACtE,OAAO,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,CAAC,CAAC;YACxF,CAAC;QACF,CAAC;QAED,KAAK,MAAM,KAAK,IAAI,KAAK,CAAC,YAAY,EAAE,CAAC;YACxC,MAAM,WAAW,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,KAAK,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC;YACrE,IAAI,WAAW,EAAE,KAAK,CAAC,IAAI,KAAK,MAAM,IAAI,WAAW,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC;gBACtE,MAAM,GAAG,GAAG,IAAA,gBAAU,EAAC,WAAW,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACnD,OAAO,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,YAAY,GAAG,IAAI,EAAE,CAAC,CAAC;gBAC/E,OAAO,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,CAAC,CAAC;YACnF,CAAC;QACF,CAAC;QAED,IAAI,KAAK,CAAC,IAAI,EAAE,QAAQ,EAAE,CAAC;YAC1B,KAAK,MAAM,OAAO,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;gBAC3C,MAAM,GAAG,GAAG,iBAAiB,IAAA,gBAAU,EAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;gBACxD,OAAO,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,YAAY,GAAG,IAAI,EAAE,CAAC,CAAC;gBACpF,OAAO,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,CAAC,CAAC;YACvE,CAAC;QACF,CAAC;QAED,IAAI,KAAK,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC;YACtB,KAAK,MAAM,OAAO,IAAI,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;gBACvC,MAAM,GAAG,GAAG,IAAA,gBAAU,EAAC,OAAO,CAAC,IAAI,CAAC,CAAC;gBACrC,OAAO,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,YAAY,GAAG,IAAI,EAAE,CAAC,CAAC;gBACpF,OAAO,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,CAAC,CAAC;YACvE,CAAC;QACF,CAAC;QAED,OAAO,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IACzC,CAAC;CAED;AAxYD,4CAwYC"}
@@ -0,0 +1,87 @@
1
+ import { BaseExtractor } from './_base';
2
+ import { ExtractorResult } from '../types/extractors';
3
+ export declare class YoutubeExtractor extends BaseExtractor {
4
+ private videoElement;
5
+ private inlineJsonCache;
6
+ protected schemaOrgData: any;
7
+ constructor(document: Document, url: string, schemaOrgData?: any);
8
+ canExtract(): boolean;
9
+ canExtractAsync(): boolean;
10
+ prefersAsync(): boolean;
11
+ extract(): ExtractorResult;
12
+ extractAsync(): Promise<ExtractorResult>;
13
+ private getCaptionTracks;
14
+ private pickCaptionTrack;
15
+ private getTrackDisplayName;
16
+ private normalizeLanguageLabel;
17
+ private getTranscriptLanguageCodeFromDom;
18
+ private getInlineChapters;
19
+ private getTranscriptContainer;
20
+ private getTranscriptSelectors;
21
+ private buildTranscriptFromContainer;
22
+ private extractTranscriptFromExistingDom;
23
+ private canOpenTranscriptPanel;
24
+ private buildResult;
25
+ private formatDescription;
26
+ private getVideoData;
27
+ private getChannelName;
28
+ private getChannelNameFromDom;
29
+ private getChannelNameFromMicrodata;
30
+ private getChannelNameFromPlayerResponse;
31
+ private parseInlineJson;
32
+ private fetchTranscript;
33
+ private pollFor;
34
+ private waitForTranscriptSegments;
35
+ private waitForTranscriptContainer;
36
+ private waitForElement;
37
+ private isMobileYoutube;
38
+ /**
39
+ * Fallback: open YouTube's transcript panel and read segments from the DOM.
40
+ * Used when fetch-based extraction fails and the transcript is not already rendered.
41
+ */
42
+ private extractTranscriptFromOpenedDom;
43
+ /**
44
+ * Mobile YouTube (m.youtube.com) transcript panel opening flow:
45
+ * 1. Click "...more" to expand description
46
+ * 2. Click "View all" next to Chapters to open the engagement panel
47
+ * 3. Click "Timeline" tab to switch to the transcript view
48
+ * 4. Wait for transcript segments to render
49
+ */
50
+ private openMobileTranscriptPanel;
51
+ private fetchPlayerData;
52
+ private fetchChapters;
53
+ private extractChaptersFromPlayerBar;
54
+ private extractChaptersFromEngagementPanels;
55
+ private parseTimestamp;
56
+ private parseTranscriptXml;
57
+ private decodeEntities;
58
+ private getVideoId;
59
+ /**
60
+ * Group raw transcript segments into readable blocks.
61
+ * If speaker markers (>>) are present, groups by speaker turn.
62
+ * Otherwise, groups by sentence boundaries.
63
+ */
64
+ private groupTranscriptSegments;
65
+ /**
66
+ * Group segments by speaker turns, then by sentences within each turn.
67
+ * Each ">>" or "- " marker starts a new speaker turn (with blank line separation).
68
+ * Within a turn, text is split at sentence boundaries for readability.
69
+ * Tracks alternating speaker identity (0/1).
70
+ */
71
+ private groupBySpeaker;
72
+ /**
73
+ * Split turns that start with a short affirmative response (e.g. "Mhm.", "Yeah.")
74
+ * followed by longer content. The affirmative belongs to the current speaker,
75
+ * but the rest is likely the other speaker (missed diarization in auto-captions).
76
+ */
77
+ private splitAffirmativeTurns;
78
+ private mergeSentenceGroupsWithinTurn;
79
+ private shouldMergeSentenceGroups;
80
+ private isShortStandaloneUtterance;
81
+ /**
82
+ * Group segments by sentence boundaries for transcripts without speaker markers.
83
+ * Accumulates text until a segment ends with sentence-ending punctuation (.!?),
84
+ * or until a very large time gap between segments.
85
+ */
86
+ private groupBySentence;
87
+ }