defuddle 0.14.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/dist/constants.d.ts +0 -2
  2. package/dist/constants.js +39 -10
  3. package/dist/constants.js.map +1 -1
  4. package/dist/defuddle.d.ts +1 -20
  5. package/dist/defuddle.js +151 -807
  6. package/dist/defuddle.js.map +1 -1
  7. package/dist/elements/code.js +76 -11
  8. package/dist/elements/code.js.map +1 -1
  9. package/dist/elements/footnotes.js +420 -45
  10. package/dist/elements/footnotes.js.map +1 -1
  11. package/dist/elements/headings.js +5 -0
  12. package/dist/elements/headings.js.map +1 -1
  13. package/dist/elements/math.base.d.ts +1 -0
  14. package/dist/elements/math.base.js +4 -1
  15. package/dist/elements/math.base.js.map +1 -1
  16. package/dist/elements/math.core.d.ts +1 -0
  17. package/dist/elements/math.d.ts +1 -1
  18. package/dist/elements/math.full.d.ts +1 -0
  19. package/dist/elements/math.full.js +90 -0
  20. package/dist/elements/math.full.js.map +1 -0
  21. package/dist/elements/math.js +3 -3
  22. package/dist/extractor-registry.js +20 -0
  23. package/dist/extractor-registry.js.map +1 -1
  24. package/dist/extractors/bbcode-data.d.ts +10 -0
  25. package/dist/extractors/bbcode-data.js +59 -0
  26. package/dist/extractors/bbcode-data.js.map +1 -0
  27. package/dist/extractors/c2-wiki.d.ts +15 -0
  28. package/dist/extractors/c2-wiki.js +143 -0
  29. package/dist/extractors/c2-wiki.js.map +1 -0
  30. package/dist/extractors/reddit.d.ts +1 -0
  31. package/dist/extractors/reddit.js +14 -14
  32. package/dist/extractors/reddit.js.map +1 -1
  33. package/dist/extractors/substack.d.ts +17 -0
  34. package/dist/extractors/substack.js +188 -0
  35. package/dist/extractors/substack.js.map +1 -0
  36. package/dist/extractors/x-article.d.ts +1 -0
  37. package/dist/extractors/x-article.js +27 -2
  38. package/dist/extractors/x-article.js.map +1 -1
  39. package/dist/extractors/x-oembed.js +1 -1
  40. package/dist/extractors/x-oembed.js.map +1 -1
  41. package/dist/extractors/youtube.d.ts +9 -2
  42. package/dist/extractors/youtube.js +161 -29
  43. package/dist/extractors/youtube.js.map +1 -1
  44. package/dist/fetch.js +183 -14
  45. package/dist/fetch.js.map +1 -1
  46. package/dist/index.full.js +1 -1
  47. package/dist/index.js +1 -1
  48. package/dist/markdown.js +27 -2
  49. package/dist/markdown.js.map +1 -1
  50. package/dist/metadata.d.ts +4 -3
  51. package/dist/metadata.js +195 -41
  52. package/dist/metadata.js.map +1 -1
  53. package/dist/node.d.ts +1 -1
  54. package/dist/node.js +3 -6
  55. package/dist/node.js.map +1 -1
  56. package/dist/removals/content-patterns.d.ts +2 -0
  57. package/dist/removals/content-patterns.js +835 -0
  58. package/dist/removals/content-patterns.js.map +1 -0
  59. package/dist/removals/hidden.d.ts +2 -0
  60. package/dist/removals/hidden.js +78 -0
  61. package/dist/removals/hidden.js.map +1 -0
  62. package/dist/removals/metadata-block.d.ts +8 -0
  63. package/dist/removals/metadata-block.js +40 -0
  64. package/dist/removals/metadata-block.js.map +1 -0
  65. package/dist/{scoring.d.ts → removals/scoring.d.ts} +1 -1
  66. package/dist/{scoring.js → removals/scoring.js} +7 -9
  67. package/dist/removals/scoring.js.map +1 -0
  68. package/dist/removals/selectors.d.ts +2 -0
  69. package/dist/removals/selectors.js +118 -0
  70. package/dist/removals/selectors.js.map +1 -0
  71. package/dist/removals/small-images.d.ts +3 -0
  72. package/dist/removals/small-images.js +116 -0
  73. package/dist/removals/small-images.js.map +1 -0
  74. package/dist/standardize.d.ts +2 -1
  75. package/dist/standardize.js +106 -62
  76. package/dist/standardize.js.map +1 -1
  77. package/dist/types/extractors.d.ts +1 -0
  78. package/dist/types.d.ts +5 -0
  79. package/dist/utils/bbcode.d.ts +6 -0
  80. package/dist/utils/bbcode.js +57 -0
  81. package/dist/utils/bbcode.js.map +1 -0
  82. package/dist/utils.js +1 -1
  83. package/dist/utils.js.map +1 -1
  84. package/package.json +1 -1
  85. package/dist/elements/math.core.js +0 -52
  86. package/dist/elements/math.core.js.map +0 -1
  87. package/dist/index.js.map +0 -1
  88. package/dist/scoring.js.map +0 -1
@@ -0,0 +1,143 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.C2WikiExtractor = void 0;
4
+ const _base_1 = require("./_base");
5
+ const dom_1 = require("../utils/dom");
6
+ const C2_API = 'https://c2.com/wiki/remodel/pages/';
7
+ class C2WikiExtractor extends _base_1.BaseExtractor {
8
+ canExtract() {
9
+ return false;
10
+ }
11
+ canExtractAsync() {
12
+ return this.getPageTitle() !== null;
13
+ }
14
+ prefersAsync() {
15
+ return true;
16
+ }
17
+ extract() {
18
+ return { content: '', contentHtml: '' };
19
+ }
20
+ async extractAsync() {
21
+ const title = this.getPageTitle();
22
+ if (!title)
23
+ return { content: '', contentHtml: '' };
24
+ const json = await fetch(C2_API + title).then(res => res.json());
25
+ if (!json || !json.text)
26
+ return { content: '', contentHtml: '' };
27
+ const words = title.replace(/([a-z])([A-Z])/g, '$1 $2');
28
+ const contentHtml = this.renderPage(json);
29
+ return {
30
+ content: contentHtml,
31
+ contentHtml,
32
+ variables: {
33
+ title: words,
34
+ site: 'C2 Wiki',
35
+ ...(json.date ? { published: json.date } : {}),
36
+ },
37
+ };
38
+ }
39
+ getPageTitle() {
40
+ if (this.pageTitle !== undefined)
41
+ return this.pageTitle;
42
+ try {
43
+ const search = new URL(this.url).search;
44
+ const match = search.match(/[?&]([A-Za-z]\w*)/);
45
+ this.pageTitle = match ? match[1] : 'WelcomeVisitors';
46
+ }
47
+ catch {
48
+ this.pageTitle = null;
49
+ }
50
+ return this.pageTitle;
51
+ }
52
+ renderPage(json) {
53
+ const body = this.markup(json.text);
54
+ const footer = json.date ? `<hr><p>Last edit ${(0, dom_1.escapeHtml)(json.date)}</p>` : '';
55
+ return `${body}${footer}`;
56
+ }
57
+ markup(text) {
58
+ const lines = text.replace(/\\\n/g, ' ').split(/\r?\n/);
59
+ const parts = [];
60
+ let openTags = [];
61
+ for (const line of lines) {
62
+ const { html, openTags: nextTags } = this.applyBullets(line, openTags);
63
+ parts.push(this.applyInline(html));
64
+ openTags = nextTags;
65
+ }
66
+ while (openTags.length > 0) {
67
+ parts.push(`</${openTags.pop()}>`);
68
+ }
69
+ return parts.join('\n');
70
+ }
71
+ applyBullets(text, openTags) {
72
+ const newOpenTags = [...openTags];
73
+ let prefix = '';
74
+ const closeToDepth = (depth, tag) => {
75
+ while (newOpenTags.length > depth) {
76
+ prefix += `</${newOpenTags.pop()}>`;
77
+ }
78
+ if (tag && newOpenTags.length < depth) {
79
+ prefix += `<${tag}>`;
80
+ newOpenTags.push(tag);
81
+ }
82
+ else if (tag && newOpenTags.length === depth && newOpenTags[depth - 1] !== tag) {
83
+ prefix += `</${newOpenTags.pop()}><${tag}>`;
84
+ newOpenTags.push(tag);
85
+ }
86
+ };
87
+ if (/^\s*$/.test(text)) {
88
+ const inList = newOpenTags.some(t => t === 'ul' || t === 'ol' || t === 'dl');
89
+ if (inList)
90
+ return { html: '', openTags: newOpenTags };
91
+ closeToDepth(0);
92
+ return { html: prefix + '<p></p>', openTags: newOpenTags };
93
+ }
94
+ if (/^-----*/.test(text)) {
95
+ closeToDepth(0);
96
+ return { html: prefix + '<hr>', openTags: newOpenTags };
97
+ }
98
+ const dlMatch = text.match(/^(\t+)(.+):\t/);
99
+ if (dlMatch) {
100
+ closeToDepth(dlMatch[1].length, 'dl');
101
+ return { html: prefix + `<dt>${dlMatch[2]}<dd>` + text.slice(dlMatch[0].length), openTags: newOpenTags };
102
+ }
103
+ const tabUlMatch = text.match(/^(\t+)\*/);
104
+ if (tabUlMatch) {
105
+ closeToDepth(tabUlMatch[1].length, 'ul');
106
+ return { html: prefix + '<li>' + text.slice(tabUlMatch[0].length), openTags: newOpenTags };
107
+ }
108
+ const starUlMatch = text.match(/^(\*+)/);
109
+ if (starUlMatch) {
110
+ closeToDepth(starUlMatch[1].length, 'ul');
111
+ return { html: prefix + '<li>' + text.slice(starUlMatch[0].length), openTags: newOpenTags };
112
+ }
113
+ const olMatch = text.match(/^(\t+)\d+\.?/);
114
+ if (olMatch) {
115
+ closeToDepth(olMatch[1].length, 'ol');
116
+ return { html: prefix + '<li>' + text.slice(olMatch[0].length), openTags: newOpenTags };
117
+ }
118
+ if (/^\s/.test(text)) {
119
+ closeToDepth(1, 'pre');
120
+ return { html: prefix + text, openTags: newOpenTags };
121
+ }
122
+ closeToDepth(0);
123
+ return { html: prefix + text, openTags: newOpenTags };
124
+ }
125
+ applyInline(text) {
126
+ return text
127
+ .replace(/'''(.*?)'''/g, '<strong>$1</strong>')
128
+ .replace(/''(.*?)''/g, '<em>$1</em>')
129
+ .replace(/\b(https?|ftp|mailto|file|telnet|news):[^\s<>[\]"'()]*[^\s<>[\]"'(),.?]/g, (url) => {
130
+ if ((0, dom_1.isDangerousUrl)(url))
131
+ return (0, dom_1.escapeHtml)(url);
132
+ if (/\.(gif|jpg|jpeg|png)$/i.test(url)) {
133
+ return `<img src="${escapeAttr(url)}">`;
134
+ }
135
+ return `<a href="${escapeAttr(url)}" rel="nofollow" target="_blank">${(0, dom_1.escapeHtml)(url)}</a>`;
136
+ });
137
+ }
138
+ }
139
+ exports.C2WikiExtractor = C2WikiExtractor;
140
+ function escapeAttr(text) {
141
+ return text.replace(/"/g, '&quot;').replace(/'/g, '&#39;');
142
+ }
143
+ //# sourceMappingURL=c2-wiki.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"c2-wiki.js","sourceRoot":"","sources":["../../src/extractors/c2-wiki.ts"],"names":[],"mappings":";;;AAAA,mCAAwC;AAExC,sCAA0D;AAE1D,MAAM,MAAM,GAAG,oCAAoC,CAAC;AAEpD,MAAa,eAAgB,SAAQ,qBAAa;IAGjD,UAAU;QACT,OAAO,KAAK,CAAC;IACd,CAAC;IAED,eAAe;QACd,OAAO,IAAI,CAAC,YAAY,EAAE,KAAK,IAAI,CAAC;IACrC,CAAC;IAED,YAAY;QACX,OAAO,IAAI,CAAC;IACb,CAAC;IAED,OAAO;QACN,OAAO,EAAE,OAAO,EAAE,EAAE,EAAE,WAAW,EAAE,EAAE,EAAE,CAAC;IACzC,CAAC;IAED,KAAK,CAAC,YAAY;QACjB,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,EAAE,CAAC;QAClC,IAAI,CAAC,KAAK;YAAE,OAAO,EAAE,OAAO,EAAE,EAAE,EAAE,WAAW,EAAE,EAAE,EAAE,CAAC;QAEpD,MAAM,IAAI,GAAG,MAAM,KAAK,CAAC,MAAM,GAAG,KAAK,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC;QACjE,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI;YAAE,OAAO,EAAE,OAAO,EAAE,EAAE,EAAE,WAAW,EAAE,EAAE,EAAE,CAAC;QAEjE,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,iBAAiB,EAAE,OAAO,CAAC,CAAC;QACxD,MAAM,WAAW,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;QAE1C,OAAO;YACN,OAAO,EAAE,WAAW;YACpB,WAAW;YACX,SAAS,EAAE;gBACV,KAAK,EAAE,KAAK;gBACZ,IAAI,EAAE,SAAS;gBACf,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;aAC9C;SACD,CAAC;IACH,CAAC;IAEO,YAAY;QACnB,IAAI,IAAI,CAAC,SAAS,KAAK,SAAS;YAAE,OAAO,IAAI,CAAC,SAAS,CAAC;QACxD,IAAI,CAAC;YACJ,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC;YACxC,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,mBAAmB,CAAC,CAAC;YAChD,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,iBAAiB,CAAC;QACvD,CAAC;QAAC,MAAM,CAAC;YACR,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QACvB,CAAC;QACD,OAAO,IAAI,CAAC,SAAS,CAAC;IACvB,CAAC;IAEO,UAAU,CAAC,IAAS;QAC3B,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACpC,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,oBAAoB,IAAA,gBAAU,EAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;QAChF,OAAO,GAAG,IAAI,GAAG,MAAM,EAAE,CAAC;IAC3B,CAAC;IAEO,MAAM,CAAC,IAAY;QAC1B,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QACxD,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,IAAI,QAAQ,GAAa,EAAE,CAAC;QAE5B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YAC1B,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,GAAG,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;YACvE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC;YACnC,QAAQ,GAAG,QAAQ,CAAC;QACrB,CAAC;QAED,OAAO,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5B,KAAK,CAAC,IAAI,CAAC,KAAK,QAAQ,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;QACpC,CAAC;QAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACzB,CAAC;IAEO,YAAY,CAAC,IAAY,EAAE,QAAkB;QACpD,MAAM,WAAW,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC;QAClC,IAAI,MAAM,GAAG,EAAE,CAAC;QAEhB,MAAM,YAAY,GAAG,CAAC,KAAa,EAAE,GAAY,EAAE,EAAE;YACpD,OAAO,WAAW,CAAC,MAAM,GAAG,KAAK,EAAE,CAAC;gBACnC,MAAM,IAAI,KAAK,WAAW,CAAC,GAAG,EAAE,GAAG,CAAC;YACrC,CAAC;YACD,IAAI,GAAG,IAAI,WAAW,CAAC,MAAM,GAAG,KAAK,EAAE,CAAC;gBACvC,MAAM,IAAI,IAAI,GAAG,GAAG,CAAC;gBACrB,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACvB,CAAC;iBAAM,IAAI,GAAG,IAAI,WAAW,CAAC,MAAM,KAAK,KAAK,IAAI,WAAW,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,GAAG,EAAE,CAAC;gBAClF,MAAM,IAAI,KAAK,WAAW,CAAC,GAAG,EAAE,KAAK,GAAG,GAAG,CAAC;gBAC5C,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACvB,CAAC;QACF,CAAC,CAAC;QAEF,IAAI,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YACxB,MAAM,MAAM,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,IAAI,CAAC,CAAC;YAC7E,IAAI,MAAM;gBAAE,OAAO,EAAE,IAAI,EAAE,EAAE,EAAE,QAAQ,EAAE,WAAW,EAAE,CAAC;YACvD,YAAY,CAAC,CAAC,CAAC,CAAC;YAChB,OAAO,EAAE,IAAI,EAAE,MAAM,GAAG,SAAS,EAAE,QAAQ,EAAE,WAAW,EAAE,CAAC;QAC5D,CAAC;QAED,IAAI,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC1B,YAAY,CAAC,CAAC,CAAC,CAAC;YAChB,OAAO,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,QAAQ,EAAE,WAAW,EAAE,CAAC;QACzD,CAAC;QAED,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC;QAC5C,IAAI,OAAO,EAAE,CAAC;YACb,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;YACtC,OAAO,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,OAAO,CAAC,CAAC,CAAC,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,WAAW,EAAE,CAAC;QAC1G,CAAC;QAED,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;QAC1C,IAAI,UAAU,EAAE,CAAC;YAChB,YAAY,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;YACzC,OAAO,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,WAAW,EAAE,CAAC;QAC5F,CAAC;QAED,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;QACzC,IAAI,WAAW,EAAE,CAAC;YACjB,YAAY,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;YAC1C,OAAO,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,WAAW,EAAE,CAAC;QAC7F,CAAC;QAED,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;QAC3C,IAAI,OAAO,EAAE,CAAC;YACb,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;YACtC,OAAO,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,WAAW,EAAE,CAAC;QACzF,CAAC;QAED,IAAI,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YACtB,YAAY,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;YACvB,OAAO,EAAE,IAAI,EAAE,MAAM,GAAG,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,CAAC;QACvD,CAAC;QAED,YAAY,CAAC,CAAC,CAAC,CAAC;QAChB,OAAO,EAAE,IAAI,EAAE,MAAM,GAAG,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,CAAC;IACvD,CAAC;IAEO,WAAW,CAAC,IAAY;QAC/B,OAAO,IAAI;aACT,OAAO,CAAC,cAAc,EAAE,qBAAqB,CAAC;aAC9C,OAAO,CAAC,YAAY,EAAE,aAAa,CAAC;aACpC,OAAO,CACP,0EAA0E,EAC1E,CAAC,GAAG,EAAE,EAAE;YACP,IAAI,IAAA,oBAAc,EAAC,GAAG,CAAC;gBAAE,OAAO,IAAA,gBAAU,EAAC,GAAG,CAAC,CAAC;YAChD,IAAI,wBAAwB,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBACxC,OAAO,aAAa,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC;YACzC,CAAC;YACD,OAAO,YAAY,UAAU,CAAC,GAAG,CAAC,oCAAoC,IAAA,gBAAU,EAAC,GAAG,CAAC,MAAM,CAAC;QAC7F,CAAC,CACD,CAAC;IACJ,CAAC;CACD;AAzJD,0CAyJC;AAED,SAAS,UAAU,CAAC,IAAY;IAC/B,OAAO,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;AAC5D,CAAC"}
@@ -6,6 +6,7 @@ export declare class RedditExtractor extends BaseExtractor {
6
6
  constructor(document: Document, url: string);
7
7
  canExtract(): boolean;
8
8
  canExtractAsync(): boolean;
9
+ prefersAsync(): boolean;
9
10
  private isCommentsPage;
10
11
  extractAsync(): Promise<ExtractorResult>;
11
12
  extract(): ExtractorResult;
@@ -14,11 +14,15 @@ class RedditExtractor extends _base_1.BaseExtractor {
14
14
  return !!this.shredditPost || this.isOldReddit;
15
15
  }
16
16
  canExtractAsync() {
17
- // For new reddit comment pages, extract() returns empty content
18
- // when shreddit-comment elements are missing (server-side fetch),
19
- // causing parseAsync() to fall through to this async path.
20
17
  return this.isCommentsPage() && !this.isOldReddit;
21
18
  }
19
+ prefersAsync() {
20
+ // In server/worker contexts, fetch old.reddit.com for full content including
21
+ // comments. In browser (real window), use the rendered DOM directly since
22
+ // old.reddit.com is CORS-blocked from www.reddit.com.
23
+ const isBrowser = typeof window !== 'undefined' && this.document.defaultView === window;
24
+ return this.isCommentsPage() && !this.isOldReddit && !isBrowser;
25
+ }
22
26
  isCommentsPage() {
23
27
  return /\/r\/.+\/comments\//.test(this.url);
24
28
  }
@@ -46,21 +50,17 @@ class RedditExtractor extends _base_1.BaseExtractor {
46
50
  if (this.isOldReddit) {
47
51
  return this.extractOldReddit(this.document);
48
52
  }
49
- // New reddit server-side HTML includes shreddit-post but not
50
- // shreddit-comment elements (those require JS). Return empty
51
- // so parseAsync() falls through to extractAsync() which fetches
52
- // old.reddit.com with full content.
53
- const hasComments = this.document.querySelectorAll('shreddit-comment').length > 0;
54
- if (this.isCommentsPage() && !hasComments) {
55
- return { content: '', contentHtml: '' };
56
- }
57
- const postContent = this.getPostContent();
58
- const comments = this.options.includeReplies !== false ? this.extractComments() : '';
59
- const contentHtml = this.createContentHtml(postContent, comments);
60
53
  const postTitle = this.document.querySelector('h1')?.textContent?.trim() || '';
61
54
  const subreddit = this.getSubreddit();
62
55
  const postAuthor = this.getPostAuthor();
56
+ const postContent = this.getPostContent();
63
57
  const description = this.createDescription(postContent);
58
+ // Extract any comments already in the DOM (browser renders these via JS;
59
+ // SSR/Node HTML won't have them, so comments will be empty there).
60
+ const comments = this.options.includeReplies !== false ? this.extractComments() : '';
61
+ const contentHtml = this.createContentHtml(postContent, comments);
62
+ // If contentHtml is empty (link/image post with no body and no DOM comments),
63
+ // parseAsync() will fall through to extractAsync() → old.reddit.com fetch.
64
64
  return {
65
65
  content: contentHtml,
66
66
  contentHtml: contentHtml,
@@ -1 +1 @@
1
- {"version":3,"file":"reddit.js","sourceRoot":"","sources":["../../src/extractors/reddit.ts"],"names":[],"mappings":";;;AAAA,mCAAwC;AAExC,sCAAwD;AACxD,gDAAyF;AAEzF,MAAa,eAAgB,SAAQ,qBAAa;IAIjD,YAAY,QAAkB,EAAE,GAAW;QAC1C,KAAK,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;QACrB,IAAI,CAAC,YAAY,GAAG,QAAQ,CAAC,aAAa,CAAC,eAAe,CAAC,CAAC;QAC5D,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC,QAAQ,CAAC,aAAa,CAAC,aAAa,CAAC,CAAC;IAC5D,CAAC;IAED,UAAU;QACT,OAAO,CAAC,CAAC,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,WAAW,CAAC;IAChD,CAAC;IAED,eAAe;QACd,gEAAgE;QAChE,kEAAkE;QAClE,2DAA2D;QAC3D,OAAO,IAAI,CAAC,cAAc,EAAE,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC;IACnD,CAAC;IAEO,cAAc;QACrB,OAAO,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAC7C,CAAC;IAED,KAAK,CAAC,YAAY;QACjB,gCAAgC;QAChC,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACjC,MAAM,CAAC,QAAQ,GAAG,gBAAgB,CAAC;QAEnC,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,EAAE;YAC/C,OAAO,EAAE;gBACR,YAAY,EAAE,wCAAwC;aACtD;SACD,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YAClB,MAAM,IAAI,KAAK,CAAC,mCAAmC,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QACvE,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACnC,MAAM,MAAM,GAAG,IAAI,CAAC,QAAQ,CAAC,WAAW,EAAE,SAAS,IAAI,CAAC,OAAO,SAAS,KAAK,WAAW,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAC7G,IAAI,CAAC,MAAM,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CAAC,gDAAgD,CAAC,CAAC;QACnE,CAAC;QACD,MAAM,GAAG,GAAG,IAAI,MAAM,EAAE,CAAC,eAAe,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC;QAE5D,OAAO,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,CAAC;IACnC,CAAC;IAED,OAAO;QACN,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACtB,OAAO,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC7C,CAAC;QAED,6DAA6D;QAC7D,6DAA6D;QAC7D,gEAAgE;QAChE,oCAAoC;QACpC,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,kBAAkB,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;QAClF,IAAI,IAAI,CAAC,cAAc,EAAE,IAAI,CAAC,WAAW,EAAE,CAAC;YAC3C,OAAO,EAAE,OAAO,EAAE,EAAE,EAAE,WAAW,EAAE,EAAE,EAAE,CAAC;QACzC,CAAC;QAED,MAAM,WAAW,GAAG,IAAI,CAAC,cAAc,EAAE,CAAC;QAC1C,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,cAAc,KAAK,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,eAAe,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAErF,MAAM,WAAW,GAAG,IAAI,CAAC,iBAAiB,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;QAClE,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,IAAI,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QAC/E,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,EAAE,CAAC;QACtC,MAAM,UAAU,GAAG,IAAI,CAAC,aAAa,EAAE,CAAC;QACxC,MAAM,WAAW,GAAG,IAAI,CAAC,iBAAiB,CAAC,WAAW,CAAC,CAAC;QAExD,OAAO;YACN,OAAO,EAAE,WAAW;YACpB,WAAW,EAAE,WAAW;YACxB,gBAAgB,EAAE;gBACjB,MAAM,EAAE,IAAI,CAAC,SAAS,EAAE;gBACxB,SAAS;gBACT,UAAU;aACV;YACD,SAAS,EAAE;gBACV,KAAK,EAAE,SAAS;gBAChB,MAAM,EAAE,UAAU;gBAClB,IAAI,EAAE,KAAK,SAAS,EAAE;gBACtB,WAAW;aACX;SACD,CAAC;IACH,CAAC;IAEO,gBAAgB,CAAC,IAAwB;QAChD,MAAM,SAAS,GAAG,IAAI,CAAC,aAAa,CAAC,aAAa,CAAC,CAAC;QACpD,MAAM,SAAS,GAAG,SAAS,EAAE,aAAa,CAAC,SAAS,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QACjF,MAAM,UAAU,GAAG,SAAS,EAAE,YAAY,CAAC,aAAa,CAAC,IAAI,EAAE,CAAC;QAChE,MAAM,SAAS,GAAG,SAAS,EAAE,YAAY,CAAC,gBAAgB,CAAC,IAAI,EAAE,CAAC;QAClE,MAAM,UAAU,GAAG,SAAS,EAAE,aAAa,CAAC,oBAAoB,CAAC,CAAC;QAClE,MAAM,QAAQ,GAAG,UAAU,CAAC,CAAC,CAAC,IAAA,mBAAa,EAAC,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAE7D,IAAI,QAAQ,GAAG,EAAE,CAAC;QAClB,IAAI,IAAI,CAAC,OAAO,CAAC,cAAc,KAAK,KAAK,EAAE,CAAC;YAC3C,MAAM,WAAW,GAAG,IAAI,CAAC,aAAa,CAAC,yBAAyB,CAAC,CAAC;YAClE,MAAM,WAAW,GAAG,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,wBAAwB,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAClF,QAAQ,GAAG,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAA,2BAAgB,EAAC,WAAW,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QACxE,CAAC;QAED,MAAM,WAAW,GAAG,IAAI,CAAC,iBAAiB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;QAC/D,MAAM,WAAW,GAAG,IAAI,CAAC,iBAAiB,CAAC,QAAQ,CAAC,CAAC;QAErD,OAAO;YACN,OAAO,EAAE,WAAW;YACpB,WAAW,EAAE,WAAW;YACxB,gBAAgB,EAAE;gBACjB,MAAM,EAAE,IAAI,CAAC,SAAS,EAAE;gBACxB,SAAS;gBACT,UAAU;aACV;YACD,SAAS,EAAE;gBACV,KAAK,EAAE,SAAS;gBAChB,MAAM,EAAE,UAAU;gBAClB,IAAI,EAAE,KAAK,SAAS,EAAE;gBACtB,WAAW;aACX;SACD,CAAC;IACH,CAAC;IAEO,cAAc;QACrB,MAAM,UAAU,GAAG,IAAI,CAAC,YAAY,EAAE,aAAa,CAAC,oBAAoB,CAAC,CAAC;QAC1E,MAAM,QAAQ,GAAG,UAAU,CAAC,CAAC,CAAC,IAAA,mBAAa,EAAC,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAC7D,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,EAAE,aAAa,CAAC,aAAa,CAAC,EAAE,SAAS,IAAI,EAAE,CAAC;QAEnF,OAAO,QAAQ,GAAG,SAAS,CAAC;IAC7B,CAAC;IAEO,iBAAiB,CAAC,WAAmB,EAAE,QAAgB;QAC9D,OAAO,IAAA,2BAAgB,EAAC,QAAQ,EAAE,WAAW,EAAE,QAAQ,CAAC,CAAC;IAC1D,CAAC;IAEO,eAAe;QACtB,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,kBAAkB,CAAC,CAAC,CAAC;QAChF,OAAO,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC,CAAC;IACvC,CAAC;IAEO,SAAS;QAChB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,0BAA0B,CAAC,CAAC;QACzD,OAAO,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IACzB,CAAC;IAEO,YAAY;QACnB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;QAC7C,OAAO,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IACzB,CAAC;IAEO,aAAa;QACpB,OAAO,IAAI,CAAC,YAAY,EAAE,YAAY,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;IACxD,CAAC;IAEO,iBAAiB,CAAC,WAAmB;QAC5C,IAAI,CAAC,WAAW;YAAE,OAAO,EAAE,CAAC;QAE5B,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;QACnD,OAAO,CAAC,WAAW,CAAC,IAAA,eAAS,EAAC,IAAI,CAAC,QAAQ,EAAE,WAAW,CAAC,CAAC,CAAC;QAC3D,OAAO,OAAO,CAAC,WAAW,EAAE,IAAI,EAAE;aAChC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;aACb,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,IAAI,EAAE,CAAC;IAC9B,CAAC;IAEO,wBAAwB,CAAC,SAAkB,EAAE,QAAgB,CAAC;QACrE,MAAM,MAAM,GAAkB,EAAE,CAAC;QACjC,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,gBAAgB,CAAC,yBAAyB,CAAC,CAAC,CAAC;QAEnF,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAChC,MAAM,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC,aAAa,CAAC,IAAI,EAAE,CAAC;YACzD,MAAM,SAAS,GAAG,OAAO,CAAC,YAAY,CAAC,gBAAgB,CAAC,IAAI,EAAE,CAAC;YAC/D,MAAM,KAAK,GAAG,OAAO,CAAC,aAAa,CAAC,gCAAgC,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YACjG,MAAM,MAAM,GAAG,OAAO,CAAC,aAAa,CAAC,gCAAgC,CAAC,CAAC;YACvE,MAAM,QAAQ,GAAG,MAAM,EAAE,YAAY,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC;YACxD,MAAM,IAAI,GAAG,QAAQ,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAC5E,MAAM,MAAM,GAAG,OAAO,CAAC,aAAa,CAAC,2BAA2B,CAAC,CAAC;YAClE,MAAM,IAAI,GAAG,MAAM,CAAC,CAAC,CAAC,IAAA,mBAAa,EAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAEjD,MAAM,CAAC,IAAI,CAAC;gBACX,MAAM;gBACN,IAAI;gBACJ,OAAO,EAAE,IAAI;gBACb,KAAK;gBACL,KAAK,EAAE,KAAK,IAAI,SAAS;gBACzB,GAAG,EAAE,SAAS,CAAC,CAAC,CAAC,qBAAqB,SAAS,EAAE,CAAC,CAAC,CAAC,SAAS;aAC7D,CAAC,CAAC;YAEH,MAAM,cAAc,GAAG,OAAO,CAAC,aAAa,CAAC,qBAAqB,CAAC,CAAC;YACpE,IAAI,cAAc,EAAE,CAAC;gBACpB,MAAM,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,wBAAwB,CAAC,cAAc,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC;YAC1E,CAAC;QACF,CAAC;QAED,OAAO,MAAM,CAAC;IACf,CAAC;IAEO,eAAe,CAAC,QAAmB;QAC1C,MAAM,WAAW,GAAkB,EAAE,CAAC;QAEtC,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAChC,MAAM,KAAK,GAAG,QAAQ,CAAC,OAAO,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,GAAG,CAAC,CAAC;YAC7D,MAAM,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YACpD,MAAM,KAAK,GAAG,OAAO,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,GAAG,CAAC;YACnD,MAAM,SAAS,GAAG,OAAO,CAAC,YAAY,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;YAC1D,MAAM,SAAS,GAAG,OAAO,CAAC,aAAa,CAAC,kBAAkB,CAAC,CAAC;YAC5D,MAAM,OAAO,GAAG,SAAS,CAAC,CAAC,CAAC,IAAA,mBAAa,EAAC,SAAS,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAE1D,MAAM,SAAS,GAAG,OAAO,CAAC,YAAY,CAAC,SAAS,CAAC;mBAC7C,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,EAAE,YAAY,CAAC,UAAU,CAAC;mBACvD,EAAE,CAAC;YACP,MAAM,IAAI,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAE9E,WAAW,CAAC,IAAI,CAAC;gBAChB,MAAM;gBACN,IAAI;gBACJ,OAAO;gBACP,KAAK;gBACL,KAAK,EAAE,GAAG,KAAK,SAAS;gBACxB,GAAG,EAAE,SAAS,CAAC,CAAC,CAAC,qBAAqB,SAAS,EAAE,CAAC,CAAC,CAAC,SAAS;aAC7D,CAAC,CAAC;QACJ,CAAC;QAED,OAAO,IAAA,2BAAgB,EAAC,WAAW,CAAC,CAAC;IACtC,CAAC;CACD;AAlOD,0CAkOC"}
1
+ {"version":3,"file":"reddit.js","sourceRoot":"","sources":["../../src/extractors/reddit.ts"],"names":[],"mappings":";;;AAAA,mCAAwC;AAExC,sCAAwD;AACxD,gDAAyF;AAEzF,MAAa,eAAgB,SAAQ,qBAAa;IAIjD,YAAY,QAAkB,EAAE,GAAW;QAC1C,KAAK,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;QACrB,IAAI,CAAC,YAAY,GAAG,QAAQ,CAAC,aAAa,CAAC,eAAe,CAAC,CAAC;QAC5D,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC,QAAQ,CAAC,aAAa,CAAC,aAAa,CAAC,CAAC;IAC5D,CAAC;IAED,UAAU;QACT,OAAO,CAAC,CAAC,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,WAAW,CAAC;IAChD,CAAC;IAED,eAAe;QACd,OAAO,IAAI,CAAC,cAAc,EAAE,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC;IACnD,CAAC;IAED,YAAY;QACX,6EAA6E;QAC7E,0EAA0E;QAC1E,sDAAsD;QACtD,MAAM,SAAS,GAAG,OAAO,MAAM,KAAK,WAAW,IAAI,IAAI,CAAC,QAAQ,CAAC,WAAW,KAAK,MAAM,CAAC;QACxF,OAAO,IAAI,CAAC,cAAc,EAAE,IAAI,CAAC,IAAI,CAAC,WAAW,IAAI,CAAC,SAAS,CAAC;IACjE,CAAC;IAEO,cAAc;QACrB,OAAO,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAC7C,CAAC;IAED,KAAK,CAAC,YAAY;QACjB,gCAAgC;QAChC,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACjC,MAAM,CAAC,QAAQ,GAAG,gBAAgB,CAAC;QAEnC,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,EAAE;YAC/C,OAAO,EAAE;gBACR,YAAY,EAAE,wCAAwC;aACtD;SACD,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YAClB,MAAM,IAAI,KAAK,CAAC,mCAAmC,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QACvE,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACnC,MAAM,MAAM,GAAG,IAAI,CAAC,QAAQ,CAAC,WAAW,EAAE,SAAS,IAAI,CAAC,OAAO,SAAS,KAAK,WAAW,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAC7G,IAAI,CAAC,MAAM,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CAAC,gDAAgD,CAAC,CAAC;QACnE,CAAC;QACD,MAAM,GAAG,GAAG,IAAI,MAAM,EAAE,CAAC,eAAe,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC;QAE5D,OAAO,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,CAAC;IACnC,CAAC;IAED,OAAO;QACN,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACtB,OAAO,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC7C,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,IAAI,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QAC/E,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,EAAE,CAAC;QACtC,MAAM,UAAU,GAAG,IAAI,CAAC,aAAa,EAAE,CAAC;QACxC,MAAM,WAAW,GAAG,IAAI,CAAC,cAAc,EAAE,CAAC;QAC1C,MAAM,WAAW,GAAG,IAAI,CAAC,iBAAiB,CAAC,WAAW,CAAC,CAAC;QAExD,yEAAyE;QACzE,mEAAmE;QACnE,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,cAAc,KAAK,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,eAAe,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACrF,MAAM,WAAW,GAAG,IAAI,CAAC,iBAAiB,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;QAClE,8EAA8E;QAC9E,2EAA2E;QAE3E,OAAO;YACN,OAAO,EAAE,WAAW;YACpB,WAAW,EAAE,WAAW;YACxB,gBAAgB,EAAE;gBACjB,MAAM,EAAE,IAAI,CAAC,SAAS,EAAE;gBACxB,SAAS;gBACT,UAAU;aACV;YACD,SAAS,EAAE;gBACV,KAAK,EAAE,SAAS;gBAChB,MAAM,EAAE,UAAU;gBAClB,IAAI,EAAE,KAAK,SAAS,EAAE;gBACtB,WAAW;aACX;SACD,CAAC;IACH,CAAC;IAEO,gBAAgB,CAAC,IAAwB;QAChD,MAAM,SAAS,GAAG,IAAI,CAAC,aAAa,CAAC,aAAa,CAAC,CAAC;QACpD,MAAM,SAAS,GAAG,SAAS,EAAE,aAAa,CAAC,SAAS,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QACjF,MAAM,UAAU,GAAG,SAAS,EAAE,YAAY,CAAC,aAAa,CAAC,IAAI,EAAE,CAAC;QAChE,MAAM,SAAS,GAAG,SAAS,EAAE,YAAY,CAAC,gBAAgB,CAAC,IAAI,EAAE,CAAC;QAClE,MAAM,UAAU,GAAG,SAAS,EAAE,aAAa,CAAC,oBAAoB,CAAC,CAAC;QAClE,MAAM,QAAQ,GAAG,UAAU,CAAC,CAAC,CAAC,IAAA,mBAAa,EAAC,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAE7D,IAAI,QAAQ,GAAG,EAAE,CAAC;QAClB,IAAI,IAAI,CAAC,OAAO,CAAC,cAAc,KAAK,KAAK,EAAE,CAAC;YAC3C,MAAM,WAAW,GAAG,IAAI,CAAC,aAAa,CAAC,yBAAyB,CAAC,CAAC;YAClE,MAAM,WAAW,GAAG,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,wBAAwB,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAClF,QAAQ,GAAG,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAA,2BAAgB,EAAC,WAAW,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QACxE,CAAC;QAED,MAAM,WAAW,GAAG,IAAI,CAAC,iBAAiB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;QAC/D,MAAM,WAAW,GAAG,IAAI,CAAC,iBAAiB,CAAC,QAAQ,CAAC,CAAC;QAErD,OAAO;YACN,OAAO,EAAE,WAAW;YACpB,WAAW,EAAE,WAAW;YACxB,gBAAgB,EAAE;gBACjB,MAAM,EAAE,IAAI,CAAC,SAAS,EAAE;gBACxB,SAAS;gBACT,UAAU;aACV;YACD,SAAS,EAAE;gBACV,KAAK,EAAE,SAAS;gBAChB,MAAM,EAAE,UAAU;gBAClB,IAAI,EAAE,KAAK,SAAS,EAAE;gBACtB,WAAW;aACX;SACD,CAAC;IACH,CAAC;IAEO,cAAc;QACrB,MAAM,UAAU,GAAG,IAAI,CAAC,YAAY,EAAE,aAAa,CAAC,oBAAoB,CAAC,CAAC;QAC1E,MAAM,QAAQ,GAAG,UAAU,CAAC,CAAC,CAAC,IAAA,mBAAa,EAAC,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAC7D,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,EAAE,aAAa,CAAC,aAAa,CAAC,EAAE,SAAS,IAAI,EAAE,CAAC;QAEnF,OAAO,QAAQ,GAAG,SAAS,CAAC;IAC7B,CAAC;IAEO,iBAAiB,CAAC,WAAmB,EAAE,QAAgB;QAC9D,OAAO,IAAA,2BAAgB,EAAC,QAAQ,EAAE,WAAW,EAAE,QAAQ,CAAC,CAAC;IAC1D,CAAC;IAEO,eAAe;QACtB,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,kBAAkB,CAAC,CAAC,CAAC;QAChF,OAAO,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC,CAAC;IACvC,CAAC;IAEO,SAAS;QAChB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,0BAA0B,CAAC,CAAC;QACzD,OAAO,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IACzB,CAAC;IAEO,YAAY;QACnB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;QAC7C,OAAO,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IACzB,CAAC;IAEO,aAAa;QACpB,OAAO,IAAI,CAAC,YAAY,EAAE,YAAY,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;IACxD,CAAC;IAEO,iBAAiB,CAAC,WAAmB;QAC5C,IAAI,CAAC,WAAW;YAAE,OAAO,EAAE,CAAC;QAE5B,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;QACnD,OAAO,CAAC,WAAW,CAAC,IAAA,eAAS,EAAC,IAAI,CAAC,QAAQ,EAAE,WAAW,CAAC,CAAC,CAAC;QAC3D,OAAO,OAAO,CAAC,WAAW,EAAE,IAAI,EAAE;aAChC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;aACb,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,IAAI,EAAE,CAAC;IAC9B,CAAC;IAEO,wBAAwB,CAAC,SAAkB,EAAE,QAAgB,CAAC;QACrE,MAAM,MAAM,GAAkB,EAAE,CAAC;QACjC,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,gBAAgB,CAAC,yBAAyB,CAAC,CAAC,CAAC;QAEnF,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAChC,MAAM,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC,aAAa,CAAC,IAAI,EAAE,CAAC;YACzD,MAAM,SAAS,GAAG,OAAO,CAAC,YAAY,CAAC,gBAAgB,CAAC,IAAI,EAAE,CAAC;YAC/D,MAAM,KAAK,GAAG,OAAO,CAAC,aAAa,CAAC,gCAAgC,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YACjG,MAAM,MAAM,GAAG,OAAO,CAAC,aAAa,CAAC,gCAAgC,CAAC,CAAC;YACvE,MAAM,QAAQ,GAAG,MAAM,EAAE,YAAY,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC;YACxD,MAAM,IAAI,GAAG,QAAQ,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAC5E,MAAM,MAAM,GAAG,OAAO,CAAC,aAAa,CAAC,2BAA2B,CAAC,CAAC;YAClE,MAAM,IAAI,GAAG,MAAM,CAAC,CAAC,CAAC,IAAA,mBAAa,EAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAEjD,MAAM,CAAC,IAAI,CAAC;gBACX,MAAM;gBACN,IAAI;gBACJ,OAAO,EAAE,IAAI;gBACb,KAAK;gBACL,KAAK,EAAE,KAAK,IAAI,SAAS;gBACzB,GAAG,EAAE,SAAS,CAAC,CAAC,CAAC,qBAAqB,SAAS,EAAE,CAAC,CAAC,CAAC,SAAS;aAC7D,CAAC,CAAC;YAEH,MAAM,cAAc,GAAG,OAAO,CAAC,aAAa,CAAC,qBAAqB,CAAC,CAAC;YACpE,IAAI,cAAc,EAAE,CAAC;gBACpB,MAAM,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,wBAAwB,CAAC,cAAc,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC;YAC1E,CAAC;QACF,CAAC;QAED,OAAO,MAAM,CAAC;IACf,CAAC;IAEO,eAAe,CAAC,QAAmB;QAC1C,MAAM,WAAW,GAAkB,EAAE,CAAC;QAEtC,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAChC,MAAM,KAAK,GAAG,QAAQ,CAAC,OAAO,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,GAAG,CAAC,CAAC;YAC7D,MAAM,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YACpD,MAAM,KAAK,GAAG,OAAO,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,GAAG,CAAC;YACnD,MAAM,SAAS,GAAG,OAAO,CAAC,YAAY,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;YAC1D,MAAM,SAAS,GAAG,OAAO,CAAC,aAAa,CAAC,kBAAkB,CAAC,CAAC;YAC5D,MAAM,OAAO,GAAG,SAAS,CAAC,CAAC,CAAC,IAAA,mBAAa,EAAC,SAAS,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAE1D,MAAM,SAAS,GAAG,OAAO,CAAC,YAAY,CAAC,SAAS,CAAC;mBAC7C,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,EAAE,YAAY,CAAC,UAAU,CAAC;mBACvD,EAAE,CAAC;YACP,MAAM,IAAI,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAE9E,WAAW,CAAC,IAAI,CAAC;gBAChB,MAAM;gBACN,IAAI;gBACJ,OAAO;gBACP,KAAK;gBACL,KAAK,EAAE,GAAG,KAAK,SAAS;gBACxB,GAAG,EAAE,SAAS,CAAC,CAAC,CAAC,qBAAqB,SAAS,EAAE,CAAC,CAAC,CAAC,SAAS;aAC7D,CAAC,CAAC;QACJ,CAAC;QAED,OAAO,IAAA,2BAAgB,EAAC,WAAW,CAAC,CAAC;IACtC,CAAC;CACD;AAlOD,0CAkOC"}
@@ -0,0 +1,17 @@
1
+ import { BaseExtractor } from './_base';
2
+ import { ExtractorResult } from '../types/extractors';
3
+ export declare class SubstackExtractor extends BaseExtractor {
4
+ private noteText;
5
+ private noteImage;
6
+ private postData;
7
+ private postContentSelector;
8
+ constructor(document: Document, url: string, schemaOrgData?: any, options?: any);
9
+ canExtract(): boolean;
10
+ extract(): ExtractorResult;
11
+ private extractPost;
12
+ private extractNote;
13
+ private parseDateFromByline;
14
+ private extractPreloadData;
15
+ private buildImageHtml;
16
+ private getLargestSrc;
17
+ }
@@ -0,0 +1,188 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.SubstackExtractor = void 0;
4
+ const _base_1 = require("./_base");
5
+ const dom_1 = require("../utils/dom");
6
+ const INJECTED_ATTR = 'data-defuddle-substack-post';
7
+ class SubstackExtractor extends _base_1.BaseExtractor {
8
+ constructor(document, url, schemaOrgData, options) {
9
+ super(document, url, schemaOrgData, options);
10
+ this.noteText = null;
11
+ this.noteImage = null;
12
+ this.postData = null;
13
+ this.postContentSelector = null;
14
+ // Check for rendered post body first (browser/extension context, after React hydration)
15
+ if (document.querySelector('div.body.markup')) {
16
+ this.postData = this.extractPreloadData(); // metadata only
17
+ this.postContentSelector = 'div.body.markup';
18
+ return;
19
+ }
20
+ // Fall back to window._preloads script (SSR/curl/worker context)
21
+ this.postData = this.extractPreloadData();
22
+ if (this.postData?.body_html) {
23
+ // Inject body_html into the document so the pipeline can process it
24
+ const existing = document.querySelector(`[${INJECTED_ATTR}]`);
25
+ if (!existing) {
26
+ const wrapper = document.createElement('div');
27
+ wrapper.setAttribute(INJECTED_ATTR, '');
28
+ wrapper.appendChild((0, dom_1.parseHTML)(document, this.postData.body_html));
29
+ document.body.appendChild(wrapper);
30
+ }
31
+ this.postContentSelector = `[${INJECTED_ATTR}]`;
32
+ return;
33
+ }
34
+ // Fall back to Notes extraction (ProseMirror editor div)
35
+ this.noteText = document.querySelector('div.ProseMirror.FeedProseMirror');
36
+ if (this.noteText) {
37
+ const feedCommentBody = this.noteText.closest('[class*="feedCommentBody"]:not([class*="feedCommentBodyInner"])');
38
+ const sibling = feedCommentBody?.parentElement?.nextElementSibling;
39
+ const siblingClass = sibling?.getAttribute('class') || '';
40
+ if (sibling && siblingClass.includes('imageGrid')) {
41
+ this.noteImage = sibling;
42
+ }
43
+ }
44
+ }
45
+ canExtract() {
46
+ return this.postContentSelector !== null || this.noteText !== null;
47
+ }
48
+ extract() {
49
+ if (this.postContentSelector) {
50
+ return this.extractPost();
51
+ }
52
+ return this.extractNote();
53
+ }
54
+ extractPost() {
55
+ const title = this.postData?.title || this.document.querySelector('meta[property="og:title"]')?.getAttribute('content') || '';
56
+ const description = this.postData?.subtitle || this.document.querySelector('meta[property="og:description"]')?.getAttribute('content') || '';
57
+ const author = this.postData?.publishedBylines?.[0]?.name
58
+ || this.document.querySelector('a[href*="substack.com/@"]')?.textContent?.trim()
59
+ || '';
60
+ const published = this.postData?.post_date
61
+ || this.parseDateFromByline()
62
+ || '';
63
+ return {
64
+ content: '',
65
+ contentHtml: '',
66
+ contentSelector: this.postContentSelector,
67
+ variables: {
68
+ title,
69
+ author,
70
+ site: 'Substack',
71
+ description,
72
+ published,
73
+ },
74
+ };
75
+ }
76
+ extractNote() {
77
+ const textHtml = this.noteText.outerHTML;
78
+ const imageHtml = this.buildImageHtml();
79
+ const content = imageHtml ? `${textHtml}\n${imageHtml}` : textHtml;
80
+ const title = this.document.querySelector('meta[property="og:title"]')?.getAttribute('content') || '';
81
+ const description = this.document.querySelector('meta[property="og:description"]')?.getAttribute('content') || '';
82
+ const author = title.replace(/\s*\(@[^)]+\)\s*$/, '').trim();
83
+ return {
84
+ content,
85
+ contentHtml: content,
86
+ variables: {
87
+ title,
88
+ author,
89
+ site: 'Substack',
90
+ description,
91
+ },
92
+ };
93
+ }
94
+ parseDateFromByline() {
95
+ const byline = this.document.querySelector('[class*="byline-wrapper"]');
96
+ if (!byline)
97
+ return '';
98
+ // textContent runs adjacent words together (e.g. "ZhutovFeb") — insert space at case boundaries
99
+ const text = (byline.textContent || '').trim().replace(/([a-z])([A-Z])/g, '$1 $2');
100
+ // Match "Feb 24, 2026" style (Substack uses abbreviated month names in the UI)
101
+ const ABBREV_MONTHS = 'Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec';
102
+ const MONTH_MAP = {
103
+ Jan: '01', Feb: '02', Mar: '03', Apr: '04', May: '05', Jun: '06',
104
+ Jul: '07', Aug: '08', Sep: '09', Oct: '10', Nov: '11', Dec: '12',
105
+ };
106
+ const match = text.match(new RegExp(`\\b(${ABBREV_MONTHS})\\s+(\\d{1,2}),?\\s+(\\d{4})\\b`));
107
+ if (match) {
108
+ const month = MONTH_MAP[match[1]];
109
+ const day = match[2].padStart(2, '0');
110
+ return `${match[3]}-${month}-${day}T00:00:00+00:00`;
111
+ }
112
+ return '';
113
+ }
114
+ extractPreloadData() {
115
+ const scripts = Array.from(this.document.querySelectorAll('script'));
116
+ for (const script of scripts) {
117
+ const text = script.textContent || '';
118
+ if (!text.includes('window._preloads') || !text.includes('body_html'))
119
+ continue;
120
+ const jsonParseIdx = text.indexOf('JSON.parse("');
121
+ if (jsonParseIdx === -1)
122
+ continue;
123
+ const startIdx = jsonParseIdx + 'JSON.parse("'.length;
124
+ let i = startIdx;
125
+ while (i < text.length) {
126
+ if (text[i] === '\\') {
127
+ i += 2;
128
+ }
129
+ else if (text[i] === '"') {
130
+ break;
131
+ }
132
+ else {
133
+ i++;
134
+ }
135
+ }
136
+ try {
137
+ const innerStr = text.slice(startIdx, i);
138
+ const jsonString = JSON.parse('"' + innerStr + '"');
139
+ const data = JSON.parse(jsonString);
140
+ const post = data?.feedData?.initialPost?.post;
141
+ if (post?.body_html)
142
+ return post;
143
+ }
144
+ catch {
145
+ // ignore parse errors
146
+ }
147
+ }
148
+ return null;
149
+ }
150
+ buildImageHtml() {
151
+ if (!this.noteImage)
152
+ return '';
153
+ const ogImage = this.document.querySelector('meta[property="og:image"]')?.getAttribute('content');
154
+ if (ogImage)
155
+ return `<img src="${ogImage}" alt="" />`;
156
+ const img = this.noteImage.querySelector('img');
157
+ if (!img)
158
+ return '';
159
+ const src = this.getLargestSrc(img);
160
+ return src ? `<img src="${src}" alt="" />` : '';
161
+ }
162
+ getLargestSrc(img) {
163
+ const srcset = img.getAttribute('srcset') || '';
164
+ if (srcset) {
165
+ const entryPattern = /(.+?)\s+(\d+(?:\.\d+)?)w/g;
166
+ let bestUrl = '';
167
+ let bestWidth = 0;
168
+ let match;
169
+ let lastIndex = 0;
170
+ while ((match = entryPattern.exec(srcset)) !== null) {
171
+ let url = match[1].trim();
172
+ if (lastIndex > 0)
173
+ url = url.replace(/^,\s*/, '');
174
+ lastIndex = entryPattern.lastIndex;
175
+ const width = parseFloat(match[2]);
176
+ if (url && width > bestWidth) {
177
+ bestWidth = width;
178
+ bestUrl = url;
179
+ }
180
+ }
181
+ if (bestUrl)
182
+ return bestUrl.replace(/,w_\d+/g, '').replace(/,c_\w+/g, '');
183
+ }
184
+ return img.getAttribute('src') || '';
185
+ }
186
+ }
187
+ exports.SubstackExtractor = SubstackExtractor;
188
+ //# sourceMappingURL=substack.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"substack.js","sourceRoot":"","sources":["../../src/extractors/substack.ts"],"names":[],"mappings":";;;AAAA,mCAAwC;AAExC,sCAAyC;AAEzC,MAAM,aAAa,GAAG,6BAA6B,CAAC;AAWpD,MAAa,iBAAkB,SAAQ,qBAAa;IAMnD,YAAY,QAAkB,EAAE,GAAW,EAAE,aAAmB,EAAE,OAAa;QAC9E,KAAK,CAAC,QAAQ,EAAE,GAAG,EAAE,aAAa,EAAE,OAAO,CAAC,CAAC;QANtC,aAAQ,GAAmB,IAAI,CAAC;QAChC,cAAS,GAAmB,IAAI,CAAC;QACjC,aAAQ,GAA4B,IAAI,CAAC;QACzC,wBAAmB,GAAkB,IAAI,CAAC;QAKjD,wFAAwF;QACxF,IAAI,QAAQ,CAAC,aAAa,CAAC,iBAAiB,CAAC,EAAE,CAAC;YAC/C,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,kBAAkB,EAAE,CAAC,CAAC,gBAAgB;YAC3D,IAAI,CAAC,mBAAmB,GAAG,iBAAiB,CAAC;YAC7C,OAAO;QACR,CAAC;QAED,iEAAiE;QACjE,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,kBAAkB,EAAE,CAAC;QAC1C,IAAI,IAAI,CAAC,QAAQ,EAAE,SAAS,EAAE,CAAC;YAC9B,oEAAoE;YACpE,MAAM,QAAQ,GAAG,QAAQ,CAAC,aAAa,CAAC,IAAI,aAAa,GAAG,CAAC,CAAC;YAC9D,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACf,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;gBAC9C,OAAO,CAAC,YAAY,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC;gBACxC,OAAO,CAAC,WAAW,CAAC,IAAA,eAAS,EAAC,QAAQ,EAAE,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;gBAClE,QAAQ,CAAC,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;YACpC,CAAC;YACD,IAAI,CAAC,mBAAmB,GAAG,IAAI,aAAa,GAAG,CAAC;YAChD,OAAO;QACR,CAAC;QAED,yDAAyD;QACzD,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC,aAAa,CAAC,iCAAiC,CAAC,CAAC;QAC1E,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YACnB,MAAM,eAAe,GAAG,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,iEAAiE,CAAC,CAAC;YACjH,MAAM,OAAO,GAAG,eAAe,EAAE,aAAa,EAAE,kBAAkB,CAAC;YACnE,MAAM,YAAY,GAAG,OAAO,EAAE,YAAY,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;YAC1D,IAAI,OAAO,IAAI,YAAY,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;gBACnD,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC;YAC1B,CAAC;QACF,CAAC;IACF,CAAC;IAED,UAAU;QACT,OAAO,IAAI,CAAC,mBAAmB,KAAK,IAAI,IAAI,IAAI,CAAC,QAAQ,KAAK,IAAI,CAAC;IACpE,CAAC;IAED,OAAO;QACN,IAAI,IAAI,CAAC,mBAAmB,EAAE,CAAC;YAC9B,OAAO,IAAI,CAAC,WAAW,EAAE,CAAC;QAC3B,CAAC;QACD,OAAO,IAAI,CAAC,WAAW,EAAE,CAAC;IAC3B,CAAC;IAEO,WAAW;QAClB,MAAM,KAAK,GAAG,IAAI,CAAC,QAAQ,EAAE,KAAK,IAAI,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,2BAA2B,CAAC,EAAE,YAAY,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;QAC9H,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,EAAE,QAAQ,IAAI,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,iCAAiC,CAAC,EAAE,YAAY,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;QAC7I,MAAM,MAAM,GAAG,IAAI,CAAC,QAAQ,EAAE,gBAAgB,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI;eACrD,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,2BAA2B,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE;eAC7E,EAAE,CAAC;QACP,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,EAAE,SAAS;eACtC,IAAI,CAAC,mBAAmB,EAAE;eAC1B,EAAE,CAAC;QAEP,OAAO;YACN,OAAO,EAAE,EAAE;YACX,WAAW,EAAE,EAAE;YACf,eAAe,EAAE,IAAI,CAAC,mBAAoB;YAC1C,SAAS,EAAE;gBACV,KAAK;gBACL,MAAM;gBACN,IAAI,EAAE,UAAU;gBAChB,WAAW;gBACX,SAAS;aACT;SACD,CAAC;IACH,CAAC;IAEO,WAAW;QAClB,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAS,CAAC,SAAS,CAAC;QAC1C,MAAM,SAAS,GAAG,IAAI,CAAC,cAAc,EAAE,CAAC;QACxC,MAAM,OAAO,GAAG,SAAS,CAAC,CAAC,CAAC,GAAG,QAAQ,KAAK,SAAS,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;QAEnE,MAAM,KAAK,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,2BAA2B,CAAC,EAAE,YAAY,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;QACtG,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,iCAAiC,CAAC,EAAE,YAAY,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;QAClH,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC,mBAAmB,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;QAE7D,OAAO;YACN,OAAO;YACP,WAAW,EAAE,OAAO;YACpB,SAAS,EAAE;gBACV,KAAK;gBACL,MAAM;gBACN,IAAI,EAAE,UAAU;gBAChB,WAAW;aACX;SACD,CAAC;IACH,CAAC;IAEO,mBAAmB;QAC1B,MAAM,MAAM,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,2BAA2B,CAAC,CAAC;QACxE,IAAI,CAAC,MAAM;YAAE,OAAO,EAAE,CAAC;QACvB,gGAAgG;QAChG,MAAM,IAAI,GAAG,CAAC,MAAM,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,iBAAiB,EAAE,OAAO,CAAC,CAAC;QACnF,+EAA+E;QAC/E,MAAM,aAAa,GAAG,iDAAiD,CAAC;QACxE,MAAM,SAAS,GAA2B;YACzC,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI;YAChE,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI;SAChE,CAAC;QACF,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,aAAa,kCAAkC,CAAC,CAAC,CAAC;QAC7F,IAAI,KAAK,EAAE,CAAC;YACX,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YAClC,MAAM,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;YACtC,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,IAAI,GAAG,iBAAiB,CAAC;QACrD,CAAC;QACD,OAAO,EAAE,CAAC;IACX,CAAC;IAEO,kBAAkB;QACzB,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC;QACrE,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC9B,MAAM,IAAI,GAAG,MAAM,CAAC,WAAW,IAAI,EAAE,CAAC;YACtC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC;gBAAE,SAAS;YAEhF,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;YAClD,IAAI,YAAY,KAAK,CAAC,CAAC;gBAAE,SAAS;YAElC,MAAM,QAAQ,GAAG,YAAY,GAAG,cAAc,CAAC,MAAM,CAAC;YACtD,IAAI,CAAC,GAAG,QAAQ,CAAC;YACjB,OAAO,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;gBACxB,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;oBACtB,CAAC,IAAI,CAAC,CAAC;gBACR,CAAC;qBAAM,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,GAAG,EAAE,CAAC;oBAC5B,MAAM;gBACP,CAAC;qBAAM,CAAC;oBACP,CAAC,EAAE,CAAC;gBACL,CAAC;YACF,CAAC;YAED,IAAI,CAAC;gBACJ,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;gBACzC,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,GAAG,QAAQ,GAAG,GAAG,CAAW,CAAC;gBAC9D,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;gBACpC,MAAM,IAAI,GAAqB,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,IAAI,CAAC;gBACjE,IAAI,IAAI,EAAE,SAAS;oBAAE,OAAO,IAAI,CAAC;YAClC,CAAC;YAAC,MAAM,CAAC;gBACR,sBAAsB;YACvB,CAAC;QACF,CAAC;QACD,OAAO,IAAI,CAAC;IACb,CAAC;IAEO,cAAc;QACrB,IAAI,CAAC,IAAI,CAAC,SAAS;YAAE,OAAO,EAAE,CAAC;QAE/B,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,2BAA2B,CAAC,EAAE,YAAY,CAAC,SAAS,CAAC,CAAC;QAClG,IAAI,OAAO;YAAE,OAAO,aAAa,OAAO,aAAa,CAAC;QAEtD,MAAM,GAAG,GAAG,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;QAChD,IAAI,CAAC,GAAG;YAAE,OAAO,EAAE,CAAC;QACpB,MAAM,GAAG,GAAG,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;QACpC,OAAO,GAAG,CAAC,CAAC,CAAC,aAAa,GAAG,aAAa,CAAC,CAAC,CAAC,EAAE,CAAC;IACjD,CAAC;IAEO,aAAa,CAAC,GAAY;QACjC,MAAM,MAAM,GAAG,GAAG,CAAC,YAAY,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;QAChD,IAAI,MAAM,EAAE,CAAC;YACZ,MAAM,YAAY,GAAG,2BAA2B,CAAC;YACjD,IAAI,OAAO,GAAG,EAAE,CAAC;YACjB,IAAI,SAAS,GAAG,CAAC,CAAC;YAClB,IAAI,KAAK,CAAC;YACV,IAAI,SAAS,GAAG,CAAC,CAAC;YAClB,OAAO,CAAC,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;gBACrD,IAAI,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC1B,IAAI,SAAS,GAAG,CAAC;oBAAE,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;gBAClD,SAAS,GAAG,YAAY,CAAC,SAAS,CAAC;gBACnC,MAAM,KAAK,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;gBACnC,IAAI,GAAG,IAAI,KAAK,GAAG,SAAS,EAAE,CAAC;oBAC9B,SAAS,GAAG,KAAK,CAAC;oBAClB,OAAO,GAAG,GAAG,CAAC;gBACf,CAAC;YACF,CAAC;YACD,IAAI,OAAO;gBAAE,OAAO,OAAO,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;QAC3E,CAAC;QACD,OAAO,GAAG,CAAC,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;IACtC,CAAC;CACD;AA3LD,8CA2LC"}
@@ -20,5 +20,6 @@ export declare class XArticleExtractor extends BaseExtractor {
20
20
  private convertDraftParagraphs;
21
21
  private convertBoldSpans;
22
22
  private removeDraftAttributes;
23
+ private repairSurrogatePairs;
23
24
  private createDescription;
24
25
  }
@@ -61,8 +61,8 @@ class XArticleExtractor extends _base_1.BaseExtractor {
61
61
  return name || handle || this.getAuthorFromUrl();
62
62
  }
63
63
  getAuthorFromUrl() {
64
- // match username before /article/, excluding system paths like /i/
65
- const match = this.url.match(/\/([a-zA-Z][a-zA-Z0-9_]{0,14})\/article\/\d+/);
64
+ // match username before /article/ or /status/, excluding system paths like /i/
65
+ const match = this.url.match(/\/([a-zA-Z0-9_][a-zA-Z0-9_]{0,14})\/(article|status)\/\d+/);
66
66
  return match ? `@${match[1]}` : this.getAuthorFromOgTitle();
67
67
  }
68
68
  getAuthorFromOgTitle() {
@@ -94,6 +94,7 @@ class XArticleExtractor extends _base_1.BaseExtractor {
94
94
  this.convertBoldSpans(container, ownerDoc);
95
95
  this.convertDraftParagraphs(container, ownerDoc);
96
96
  this.removeDraftAttributes(container);
97
+ this.repairSurrogatePairs(container);
97
98
  }
98
99
  convertEmbeddedTweets(container, ownerDoc) {
99
100
  container.querySelectorAll(SELECTORS.EMBEDDED_TWEET).forEach(tweet => {
@@ -258,6 +259,30 @@ class XArticleExtractor extends _base_1.BaseExtractor {
258
259
  el.removeAttribute('data-offset-key');
259
260
  });
260
261
  }
262
+ repairSurrogatePairs(container) {
263
+ const SHOW_TEXT = 4;
264
+ const ownerDoc = container.ownerDocument || this.document;
265
+ const walker = ownerDoc.createTreeWalker(container, SHOW_TEXT);
266
+ let prev = null;
267
+ let node;
268
+ while ((node = walker.nextNode())) {
269
+ const curr = node;
270
+ if (prev) {
271
+ const prevText = prev.textContent || '';
272
+ const currText = curr.textContent || '';
273
+ if (prevText && currText) {
274
+ const lastCode = prevText.charCodeAt(prevText.length - 1);
275
+ const firstCode = currText.charCodeAt(0);
276
+ // high surrogate followed by low surrogate across a node boundary
277
+ if (lastCode >= 0xD800 && lastCode <= 0xDBFF && firstCode >= 0xDC00 && firstCode <= 0xDFFF) {
278
+ prev.textContent = prevText.slice(0, -1);
279
+ curr.textContent = prevText.slice(-1) + currText;
280
+ }
281
+ }
282
+ }
283
+ prev = curr;
284
+ }
285
+ }
261
286
  createDescription() {
262
287
  const text = this.articleContainer?.textContent?.trim() || '';
263
288
  return text.slice(0, 140) + (text.length > 140 ? '...' : '');
@@ -1 +1 @@
1
- {"version":3,"file":"x-article.js","sourceRoot":"","sources":["../../src/extractors/x-article.ts"],"names":[],"mappings":";;;AAAA,mCAAwC;AAExC,sCAA6C;AAE7C,MAAM,SAAS,GAAG;IACjB,iBAAiB,EAAE,4CAA4C;IAC/D,KAAK,EAAE,uCAAuC;IAC9C,MAAM,EAAE,qBAAqB;IAC7B,WAAW,EAAE,uBAAuB;IACpC,aAAa,EAAE,iCAAiC;IAChD,MAAM,EAAE,gCAAgC;IACxC,gBAAgB,EAAE,qDAAqD;IACvE,UAAU,EAAE,kCAAkC;IAC9C,gBAAgB,EAAE,mBAAmB;IACrC,cAAc,EAAE,6BAA6B;IAC7C,UAAU,EAAE,2BAA2B;IACvC,SAAS,EAAE,2BAA2B;IACtC,UAAU,EAAE,qCAAqC;IACjD,YAAY,EAAE,iCAAiC;CACtC,CAAC;AAEX,MAAa,iBAAkB,SAAQ,qBAAa;IAGnD,YAAY,QAAkB,EAAE,GAAW,EAAE,aAAmB;QAC/D,KAAK,CAAC,QAAQ,EAAE,GAAG,EAAE,aAAa,CAAC,CAAC;QACpC,IAAI,CAAC,gBAAgB,GAAG,QAAQ,CAAC,aAAa,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IAC7E,CAAC;IAED,UAAU;QACT,OAAO,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC;IAChC,CAAC;IAED,OAAO;QACN,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,EAAE,CAAC;QAClC,MAAM,MAAM,GAAG,IAAI,CAAC,aAAa,EAAE,CAAC;QACpC,MAAM,WAAW,GAAG,IAAI,CAAC,cAAc,EAAE,CAAC;QAC1C,MAAM,WAAW,GAAG,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAE7C,OAAO;YACN,OAAO,EAAE,WAAW;YACpB,WAAW;YACX,gBAAgB,EAAE;gBACjB,SAAS,EAAE,IAAI,CAAC,YAAY,EAAE;aAC9B;YACD,SAAS,EAAE;gBACV,KAAK;gBACL,MAAM;gBACN,IAAI,EAAE,aAAa;gBACnB,WAAW;aACX;SACD,CAAC;IACH,CAAC;IAEO,YAAY;QACnB,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;QAC7D,OAAO,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,oBAAoB,CAAC;IAC7D,CAAC;IAEO,aAAa;QACpB,MAAM,eAAe,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QACtE,IAAI,CAAC,eAAe;YAAE,OAAO,IAAI,CAAC,gBAAgB,EAAE,CAAC;QAErD,MAAM,IAAI,GAAG,eAAe,CAAC,aAAa,CAAC,SAAS,CAAC,WAAW,CAAC,EAAE,YAAY,CAAC,SAAS,CAAC,CAAC;QAC3F,MAAM,MAAM,GAAG,eAAe,CAAC,aAAa,CAAC,SAAS,CAAC,aAAa,CAAC,EAAE,YAAY,CAAC,SAAS,CAAC,CAAC;QAE/F,IAAI,IAAI,IAAI,MAAM;YAAE,OAAO,GAAG,IAAI,MAAM,MAAM,GAAG,CAAC;QAClD,OAAO,IAAI,IAAI,MAAM,IAAI,IAAI,CAAC,gBAAgB,EAAE,CAAC;IAClD,CAAC;IAEO,gBAAgB;QACvB,mEAAmE;QACnE,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAC7E,OAAO,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,oBAAoB,EAAE,CAAC;IAC7D,CAAC;IAEO,oBAAoB;QAC3B,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,2BAA2B,CAAC,EAAE,YAAY,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;QACxG,uEAAuE;QACvE,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,oCAAoC,CAAC,CAAC;QAClE,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;IAC5C,CAAC;IAEO,YAAY;QACnB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;QAC/C,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAC9B,CAAC;IAEO,cAAc;QACrB,IAAI,CAAC,IAAI,CAAC,gBAAgB;YAAE,OAAO,EAAE,CAAC;QAEtC,MAAM,KAAK,GAAG,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,IAAI,CAAgB,CAAC;QACnE,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC;QAEzB,OAAO,8BAA8B,IAAA,mBAAa,EAAC,KAAK,CAAC,YAAY,CAAC;IACvE,CAAC;IAEO,YAAY,CAAC,SAAsB;QAC1C,MAAM,QAAQ,GAAG,SAAS,CAAC,aAAa,IAAI,IAAI,CAAC,QAAQ,CAAC;QAE1D,gEAAgE;QAChE,IAAI,CAAC,qBAAqB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAChD,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAC5C,IAAI,CAAC,cAAc,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QACzC,IAAI,CAAC,kBAAkB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAC7C,IAAI,CAAC,mBAAmB,CAAC,SAAS,CAAC,CAAC;QACpC,kEAAkE;QAClE,IAAI,CAAC,gBAAgB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAC3C,IAAI,CAAC,sBAAsB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QACjD,IAAI,CAAC,qBAAqB,CAAC,SAAS,CAAC,CAAC;IACvC,CAAC;IAEO,qBAAqB,CAAC,SAAsB,EAAE,QAAkB;QACvE,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;YACpE,MAAM,UAAU,GAAG,QAAQ,CAAC,aAAa,CAAC,YAAY,CAAC,CAAC;YACxD,UAAU,CAAC,SAAS,GAAG,gBAAgB,CAAC;YAExC,sBAAsB;YACtB,MAAM,UAAU,GAAG,KAAK,CAAC,aAAa,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC;YAC5D,MAAM,WAAW,GAAG,UAAU,EAAE,gBAAgB,CAAC,GAAG,CAAC,CAAC;YACtD,MAAM,QAAQ,GAAG,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAC7D,MAAM,MAAM,GAAG,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAE3D,qBAAqB;YACrB,MAAM,WAAW,GAAG,KAAK,CAAC,aAAa,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;YAC9D,MAAM,SAAS,GAAG,WAAW,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAEzD,iCAAiC;YACjC,IAAI,QAAQ,IAAI,MAAM,EAAE,CAAC;gBACxB,MAAM,IAAI,GAAG,QAAQ,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;gBAC5C,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,QAAQ,IAAI,MAAM,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;gBAC/D,UAAU,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;YAC9B,CAAC;YAED,IAAI,SAAS,EAAE,CAAC;gBACf,MAAM,CAAC,GAAG,QAAQ,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;gBACtC,CAAC,CAAC,WAAW,GAAG,SAAS,CAAC;gBAC1B,UAAU,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;YAC3B,CAAC;YAED,KAAK,CAAC,WAAW,CAAC,UAAU,CAAC,CAAC;QAC/B,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,iBAAiB,CAAC,SAAsB,EAAE,QAAkB;QACnE,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;YAChE,MAAM,GAAG,GAAG,KAAK,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;YACvC,MAAM,IAAI,GAAG,KAAK,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;YACzC,IAAI,CAAC,GAAG,IAAI,CAAC,IAAI;gBAAE,OAAO;YAE1B,mEAAmE;YACnE,IAAI,QAAQ,GAAG,EAAE,CAAC;YAClB,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;YACzD,IAAI,SAAS,EAAE,CAAC;gBACf,QAAQ,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;YACzB,CAAC;iBAAM,CAAC;gBACP,wDAAwD;gBACxD,MAAM,QAAQ,GAAG,KAAK,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;gBAC7C,QAAQ,GAAG,QAAQ,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAChD,CAAC;YAED,kCAAkC;YAClC,MAAM,MAAM,GAAG,QAAQ,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;YAC7C,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;YAC/C,IAAI,QAAQ,EAAE,CAAC;gBACd,OAAO,CAAC,YAAY,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;gBAC5C,OAAO,CAAC,SAAS,GAAG,YAAY,QAAQ,EAAE,CAAC;YAC5C,CAAC;YACD,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC;YAC7C,MAAM,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;YAE5B,qCAAqC;YACrC,KAAK,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;QAC3B,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,cAAc,CAAC,SAAsB,EAAE,QAAkB;QAChE,wEAAwE;QACxE,SAAS,CAAC,gBAAgB,CAAC,wBAAwB,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE;YACrE,MAAM,KAAK,GAAG,MAAM,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;YAC3C,MAAM,IAAI,GAAG,MAAM,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAC9C,IAAI,CAAC,IAAI;gBAAE,OAAO;YAElB,MAAM,SAAS,GAAG,QAAQ,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;YAChD,SAAS,CAAC,WAAW,GAAG,IAAI,CAAC;YAC7B,MAAM,CAAC,WAAW,CAAC,SAAS,CAAC,CAAC;QAC/B,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,kBAAkB,CAAC,SAAsB,EAAE,QAAkB;QACpE,wEAAwE;QACxE,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE;YAC1D,+BAA+B;YAC/B,MAAM,MAAM,GAAG,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YAChC,IAAI,CAAC,MAAM,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC;gBAAE,OAAO;YAEnD,0EAA0E;YAC1E,IAAI,GAAG,GAAG,GAAG,CAAC,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;YACxC,MAAM,GAAG,GAAG,GAAG,CAAC,YAAY,CAAC,KAAK,CAAC,EAAE,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,OAAO,CAAC;YAE5E,wBAAwB;YACxB,IAAI,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC5B,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,WAAW,EAAE,aAAa,CAAC,CAAC;YAC/C,CAAC;iBAAM,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC9B,GAAG,GAAG,GAAG,GAAG,aAAa,CAAC;YAC3B,CAAC;iBAAM,CAAC;gBACP,GAAG,GAAG,GAAG,GAAG,aAAa,CAAC;YAC3B,CAAC;YAED,MAAM,QAAQ,GAAG,QAAQ,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;YAC/C,QAAQ,CAAC,YAAY,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;YAClC,QAAQ,CAAC,YAAY,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;YAElC,kCAAkC;YAClC,MAAM,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;QAC9B,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,mBAAmB,CAAC,SAAsB;QACjD,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE;YAC1D,MAAM,GAAG,GAAG,GAAG,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC;YACpC,IAAI,CAAC,GAAG;gBAAE,OAAO;YAEjB,IAAI,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC5B,GAAG,CAAC,YAAY,CAAC,KAAK,EAAE,GAAG,CAAC,OAAO,CAAC,WAAW,EAAE,aAAa,CAAC,CAAC,CAAC;YAClE,CAAC;iBAAM,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC9B,GAAG,CAAC,YAAY,CAAC,KAAK,EAAE,GAAG,GAAG,aAAa,CAAC,CAAC;YAC9C,CAAC;iBAAM,CAAC;gBACP,GAAG,CAAC,YAAY,CAAC,KAAK,EAAE,GAAG,GAAG,aAAa,CAAC,CAAC;YAC9C,CAAC;QACF,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,sBAAsB,CAAC,SAAsB,EAAE,QAAkB;QACxE,0FAA0F;QAC1F,MAAM,SAAS,GAAG,CAAC,CAAC;QACpB,MAAM,YAAY,GAAG,CAAC,CAAC;QAEvB,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,gBAAgB,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE;YACpE,MAAM,CAAC,GAAG,QAAQ,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;YAEtC,mEAAmE;YACnE,MAAM,WAAW,GAAG,CAAC,IAAU,EAAQ,EAAE;gBACxC,IAAI,IAAI,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;oBACjC,CAAC,CAAC,WAAW,CAAC,QAAQ,CAAC,cAAc,CAAC,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,CAAC;gBAChE,CAAC;qBAAM,IAAI,IAAI,CAAC,QAAQ,KAAK,YAAY,EAAE,CAAC;oBAC3C,MAAM,EAAE,GAAG,IAAe,CAAC;oBAC3B,MAAM,GAAG,GAAG,EAAE,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;oBAErC,IAAI,GAAG,KAAK,QAAQ,EAAE,CAAC;wBACtB,MAAM,MAAM,GAAG,QAAQ,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;wBAChD,MAAM,CAAC,WAAW,GAAG,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC;wBAC1C,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;oBACvB,CAAC;yBAAM,IAAI,GAAG,KAAK,GAAG,EAAE,CAAC;wBACxB,MAAM,IAAI,GAAG,QAAQ,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;wBACzC,IAAI,CAAC,YAAY,CAAC,MAAM,EAAE,EAAE,CAAC,YAAY,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;wBACzD,IAAI,CAAC,WAAW,GAAG,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC;wBACxC,CAAC,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;oBACrB,CAAC;yBAAM,IAAI,GAAG,KAAK,MAAM,EAAE,CAAC;wBAC3B,MAAM,IAAI,GAAG,QAAQ,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;wBAC5C,IAAI,CAAC,WAAW,GAAG,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC;wBACxC,CAAC,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;oBACrB,CAAC;yBAAM,CAAC;wBACP,kDAAkD;wBAClD,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC;oBACpD,CAAC;gBACF,CAAC;YACF,CAAC,CAAC;YAEF,GAAG,CAAC,UAAU,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC;YACpD,GAAG,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;QACpB,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,gBAAgB,CAAC,SAAsB,EAAE,QAAkB;QAClE,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE;YAC/D,MAAM,MAAM,GAAG,QAAQ,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;YAChD,MAAM,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC;YAC5C,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;QAC1B,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,qBAAqB,CAAC,SAAsB;QACnD,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,gBAAgB,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,EAAE;YACnE,EAAE,CAAC,eAAe,CAAC,iBAAiB,CAAC,CAAC;QACvC,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,iBAAiB;QACxB,MAAM,IAAI,GAAG,IAAI,CAAC,gBAAgB,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QAC9D,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IAC9D,CAAC;CACD;AA/QD,8CA+QC"}
1
+ {"version":3,"file":"x-article.js","sourceRoot":"","sources":["../../src/extractors/x-article.ts"],"names":[],"mappings":";;;AAAA,mCAAwC;AAExC,sCAA6C;AAE7C,MAAM,SAAS,GAAG;IACjB,iBAAiB,EAAE,4CAA4C;IAC/D,KAAK,EAAE,uCAAuC;IAC9C,MAAM,EAAE,qBAAqB;IAC7B,WAAW,EAAE,uBAAuB;IACpC,aAAa,EAAE,iCAAiC;IAChD,MAAM,EAAE,gCAAgC;IACxC,gBAAgB,EAAE,qDAAqD;IACvE,UAAU,EAAE,kCAAkC;IAC9C,gBAAgB,EAAE,mBAAmB;IACrC,cAAc,EAAE,6BAA6B;IAC7C,UAAU,EAAE,2BAA2B;IACvC,SAAS,EAAE,2BAA2B;IACtC,UAAU,EAAE,qCAAqC;IACjD,YAAY,EAAE,iCAAiC;CACtC,CAAC;AAEX,MAAa,iBAAkB,SAAQ,qBAAa;IAGnD,YAAY,QAAkB,EAAE,GAAW,EAAE,aAAmB;QAC/D,KAAK,CAAC,QAAQ,EAAE,GAAG,EAAE,aAAa,CAAC,CAAC;QACpC,IAAI,CAAC,gBAAgB,GAAG,QAAQ,CAAC,aAAa,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IAC7E,CAAC;IAED,UAAU;QACT,OAAO,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC;IAChC,CAAC;IAED,OAAO;QACN,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,EAAE,CAAC;QAClC,MAAM,MAAM,GAAG,IAAI,CAAC,aAAa,EAAE,CAAC;QACpC,MAAM,WAAW,GAAG,IAAI,CAAC,cAAc,EAAE,CAAC;QAC1C,MAAM,WAAW,GAAG,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAE7C,OAAO;YACN,OAAO,EAAE,WAAW;YACpB,WAAW;YACX,gBAAgB,EAAE;gBACjB,SAAS,EAAE,IAAI,CAAC,YAAY,EAAE;aAC9B;YACD,SAAS,EAAE;gBACV,KAAK;gBACL,MAAM;gBACN,IAAI,EAAE,aAAa;gBACnB,WAAW;aACX;SACD,CAAC;IACH,CAAC;IAEO,YAAY;QACnB,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;QAC7D,OAAO,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,oBAAoB,CAAC;IAC7D,CAAC;IAEO,aAAa;QACpB,MAAM,eAAe,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QACtE,IAAI,CAAC,eAAe;YAAE,OAAO,IAAI,CAAC,gBAAgB,EAAE,CAAC;QAErD,MAAM,IAAI,GAAG,eAAe,CAAC,aAAa,CAAC,SAAS,CAAC,WAAW,CAAC,EAAE,YAAY,CAAC,SAAS,CAAC,CAAC;QAC3F,MAAM,MAAM,GAAG,eAAe,CAAC,aAAa,CAAC,SAAS,CAAC,aAAa,CAAC,EAAE,YAAY,CAAC,SAAS,CAAC,CAAC;QAE/F,IAAI,IAAI,IAAI,MAAM;YAAE,OAAO,GAAG,IAAI,MAAM,MAAM,GAAG,CAAC;QAClD,OAAO,IAAI,IAAI,MAAM,IAAI,IAAI,CAAC,gBAAgB,EAAE,CAAC;IAClD,CAAC;IAEO,gBAAgB;QACvB,+EAA+E;QAC/E,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,2DAA2D,CAAC,CAAC;QAC1F,OAAO,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,oBAAoB,EAAE,CAAC;IAC7D,CAAC;IAEO,oBAAoB;QAC3B,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,2BAA2B,CAAC,EAAE,YAAY,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;QACxG,uEAAuE;QACvE,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,oCAAoC,CAAC,CAAC;QAClE,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;IAC5C,CAAC;IAEO,YAAY;QACnB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;QAC/C,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAC9B,CAAC;IAEO,cAAc;QACrB,IAAI,CAAC,IAAI,CAAC,gBAAgB;YAAE,OAAO,EAAE,CAAC;QAEtC,MAAM,KAAK,GAAG,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,IAAI,CAAgB,CAAC;QACnE,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC;QAEzB,OAAO,8BAA8B,IAAA,mBAAa,EAAC,KAAK,CAAC,YAAY,CAAC;IACvE,CAAC;IAEO,YAAY,CAAC,SAAsB;QAC1C,MAAM,QAAQ,GAAG,SAAS,CAAC,aAAa,IAAI,IAAI,CAAC,QAAQ,CAAC;QAE1D,gEAAgE;QAChE,IAAI,CAAC,qBAAqB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAChD,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAC5C,IAAI,CAAC,cAAc,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QACzC,IAAI,CAAC,kBAAkB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAC7C,IAAI,CAAC,mBAAmB,CAAC,SAAS,CAAC,CAAC;QACpC,kEAAkE;QAClE,IAAI,CAAC,gBAAgB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAC3C,IAAI,CAAC,sBAAsB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QACjD,IAAI,CAAC,qBAAqB,CAAC,SAAS,CAAC,CAAC;QACtC,IAAI,CAAC,oBAAoB,CAAC,SAAS,CAAC,CAAC;IACtC,CAAC;IAEO,qBAAqB,CAAC,SAAsB,EAAE,QAAkB;QACvE,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;YACpE,MAAM,UAAU,GAAG,QAAQ,CAAC,aAAa,CAAC,YAAY,CAAC,CAAC;YACxD,UAAU,CAAC,SAAS,GAAG,gBAAgB,CAAC;YAExC,sBAAsB;YACtB,MAAM,UAAU,GAAG,KAAK,CAAC,aAAa,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC;YAC5D,MAAM,WAAW,GAAG,UAAU,EAAE,gBAAgB,CAAC,GAAG,CAAC,CAAC;YACtD,MAAM,QAAQ,GAAG,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAC7D,MAAM,MAAM,GAAG,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAE3D,qBAAqB;YACrB,MAAM,WAAW,GAAG,KAAK,CAAC,aAAa,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;YAC9D,MAAM,SAAS,GAAG,WAAW,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAEzD,iCAAiC;YACjC,IAAI,QAAQ,IAAI,MAAM,EAAE,CAAC;gBACxB,MAAM,IAAI,GAAG,QAAQ,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;gBAC5C,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,QAAQ,IAAI,MAAM,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;gBAC/D,UAAU,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;YAC9B,CAAC;YAED,IAAI,SAAS,EAAE,CAAC;gBACf,MAAM,CAAC,GAAG,QAAQ,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;gBACtC,CAAC,CAAC,WAAW,GAAG,SAAS,CAAC;gBAC1B,UAAU,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;YAC3B,CAAC;YAED,KAAK,CAAC,WAAW,CAAC,UAAU,CAAC,CAAC;QAC/B,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,iBAAiB,CAAC,SAAsB,EAAE,QAAkB;QACnE,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;YAChE,MAAM,GAAG,GAAG,KAAK,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;YACvC,MAAM,IAAI,GAAG,KAAK,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;YACzC,IAAI,CAAC,GAAG,IAAI,CAAC,IAAI;gBAAE,OAAO;YAE1B,mEAAmE;YACnE,IAAI,QAAQ,GAAG,EAAE,CAAC;YAClB,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;YACzD,IAAI,SAAS,EAAE,CAAC;gBACf,QAAQ,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;YACzB,CAAC;iBAAM,CAAC;gBACP,wDAAwD;gBACxD,MAAM,QAAQ,GAAG,KAAK,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;gBAC7C,QAAQ,GAAG,QAAQ,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAChD,CAAC;YAED,kCAAkC;YAClC,MAAM,MAAM,GAAG,QAAQ,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;YAC7C,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;YAC/C,IAAI,QAAQ,EAAE,CAAC;gBACd,OAAO,CAAC,YAAY,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;gBAC5C,OAAO,CAAC,SAAS,GAAG,YAAY,QAAQ,EAAE,CAAC;YAC5C,CAAC;YACD,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC;YAC7C,MAAM,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;YAE5B,qCAAqC;YACrC,KAAK,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;QAC3B,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,cAAc,CAAC,SAAsB,EAAE,QAAkB;QAChE,wEAAwE;QACxE,SAAS,CAAC,gBAAgB,CAAC,wBAAwB,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE;YACrE,MAAM,KAAK,GAAG,MAAM,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;YAC3C,MAAM,IAAI,GAAG,MAAM,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAC9C,IAAI,CAAC,IAAI;gBAAE,OAAO;YAElB,MAAM,SAAS,GAAG,QAAQ,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;YAChD,SAAS,CAAC,WAAW,GAAG,IAAI,CAAC;YAC7B,MAAM,CAAC,WAAW,CAAC,SAAS,CAAC,CAAC;QAC/B,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,kBAAkB,CAAC,SAAsB,EAAE,QAAkB;QACpE,wEAAwE;QACxE,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE;YAC1D,+BAA+B;YAC/B,MAAM,MAAM,GAAG,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YAChC,IAAI,CAAC,MAAM,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC;gBAAE,OAAO;YAEnD,0EAA0E;YAC1E,IAAI,GAAG,GAAG,GAAG,CAAC,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;YACxC,MAAM,GAAG,GAAG,GAAG,CAAC,YAAY,CAAC,KAAK,CAAC,EAAE,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,OAAO,CAAC;YAE5E,wBAAwB;YACxB,IAAI,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC5B,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,WAAW,EAAE,aAAa,CAAC,CAAC;YAC/C,CAAC;iBAAM,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC9B,GAAG,GAAG,GAAG,GAAG,aAAa,CAAC;YAC3B,CAAC;iBAAM,CAAC;gBACP,GAAG,GAAG,GAAG,GAAG,aAAa,CAAC;YAC3B,CAAC;YAED,MAAM,QAAQ,GAAG,QAAQ,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;YAC/C,QAAQ,CAAC,YAAY,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;YAClC,QAAQ,CAAC,YAAY,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;YAElC,kCAAkC;YAClC,MAAM,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;QAC9B,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,mBAAmB,CAAC,SAAsB;QACjD,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE;YAC1D,MAAM,GAAG,GAAG,GAAG,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC;YACpC,IAAI,CAAC,GAAG;gBAAE,OAAO;YAEjB,IAAI,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC5B,GAAG,CAAC,YAAY,CAAC,KAAK,EAAE,GAAG,CAAC,OAAO,CAAC,WAAW,EAAE,aAAa,CAAC,CAAC,CAAC;YAClE,CAAC;iBAAM,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC9B,GAAG,CAAC,YAAY,CAAC,KAAK,EAAE,GAAG,GAAG,aAAa,CAAC,CAAC;YAC9C,CAAC;iBAAM,CAAC;gBACP,GAAG,CAAC,YAAY,CAAC,KAAK,EAAE,GAAG,GAAG,aAAa,CAAC,CAAC;YAC9C,CAAC;QACF,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,sBAAsB,CAAC,SAAsB,EAAE,QAAkB;QACxE,0FAA0F;QAC1F,MAAM,SAAS,GAAG,CAAC,CAAC;QACpB,MAAM,YAAY,GAAG,CAAC,CAAC;QAEvB,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,gBAAgB,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE;YACpE,MAAM,CAAC,GAAG,QAAQ,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;YAEtC,mEAAmE;YACnE,MAAM,WAAW,GAAG,CAAC,IAAU,EAAQ,EAAE;gBACxC,IAAI,IAAI,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;oBACjC,CAAC,CAAC,WAAW,CAAC,QAAQ,CAAC,cAAc,CAAC,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,CAAC;gBAChE,CAAC;qBAAM,IAAI,IAAI,CAAC,QAAQ,KAAK,YAAY,EAAE,CAAC;oBAC3C,MAAM,EAAE,GAAG,IAAe,CAAC;oBAC3B,MAAM,GAAG,GAAG,EAAE,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;oBAErC,IAAI,GAAG,KAAK,QAAQ,EAAE,CAAC;wBACtB,MAAM,MAAM,GAAG,QAAQ,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;wBAChD,MAAM,CAAC,WAAW,GAAG,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC;wBAC1C,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;oBACvB,CAAC;yBAAM,IAAI,GAAG,KAAK,GAAG,EAAE,CAAC;wBACxB,MAAM,IAAI,GAAG,QAAQ,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;wBACzC,IAAI,CAAC,YAAY,CAAC,MAAM,EAAE,EAAE,CAAC,YAAY,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;wBACzD,IAAI,CAAC,WAAW,GAAG,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC;wBACxC,CAAC,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;oBACrB,CAAC;yBAAM,IAAI,GAAG,KAAK,MAAM,EAAE,CAAC;wBAC3B,MAAM,IAAI,GAAG,QAAQ,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;wBAC5C,IAAI,CAAC,WAAW,GAAG,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC;wBACxC,CAAC,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;oBACrB,CAAC;yBAAM,CAAC;wBACP,kDAAkD;wBAClD,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC;oBACpD,CAAC;gBACF,CAAC;YACF,CAAC,CAAC;YAEF,GAAG,CAAC,UAAU,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC;YACpD,GAAG,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;QACpB,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,gBAAgB,CAAC,SAAsB,EAAE,QAAkB;QAClE,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE;YAC/D,MAAM,MAAM,GAAG,QAAQ,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;YAChD,MAAM,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC;YAC5C,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;QAC1B,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,qBAAqB,CAAC,SAAsB;QACnD,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,gBAAgB,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,EAAE;YACnE,EAAE,CAAC,eAAe,CAAC,iBAAiB,CAAC,CAAC;QACvC,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,oBAAoB,CAAC,SAAkB;QAC9C,MAAM,SAAS,GAAG,CAAC,CAAC;QACpB,MAAM,QAAQ,GAAG,SAAS,CAAC,aAAa,IAAI,IAAI,CAAC,QAAQ,CAAC;QAC1D,MAAM,MAAM,GAAG,QAAQ,CAAC,gBAAgB,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;QAE/D,IAAI,IAAI,GAAgB,IAAI,CAAC;QAC7B,IAAI,IAAiB,CAAC;QACtB,OAAO,CAAC,IAAI,GAAG,MAAM,CAAC,QAAQ,EAAE,CAAC,EAAE,CAAC;YACnC,MAAM,IAAI,GAAG,IAAY,CAAC;YAC1B,IAAI,IAAI,EAAE,CAAC;gBACV,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC;gBACxC,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC;gBACxC,IAAI,QAAQ,IAAI,QAAQ,EAAE,CAAC;oBAC1B,MAAM,QAAQ,GAAG,QAAQ,CAAC,UAAU,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;oBAC1D,MAAM,SAAS,GAAG,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;oBACzC,kEAAkE;oBAClE,IAAI,QAAQ,IAAI,MAAM,IAAI,QAAQ,IAAI,MAAM,IAAI,SAAS,IAAI,MAAM,IAAI,SAAS,IAAI,MAAM,EAAE,CAAC;wBAC5F,IAAI,CAAC,WAAW,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;wBACzC,IAAI,CAAC,WAAW,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,QAAQ,CAAC;oBAClD,CAAC;gBACF,CAAC;YACF,CAAC;YACD,IAAI,GAAG,IAAI,CAAC;QACb,CAAC;IACF,CAAC;IAEO,iBAAiB;QACxB,MAAM,IAAI,GAAG,IAAI,CAAC,gBAAgB,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QAC9D,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IAC9D,CAAC;CACD;AA1SD,8CA0SC"}
@@ -76,7 +76,7 @@ class XOembedExtractor extends _base_1.BaseExtractor {
76
76
  };
77
77
  }
78
78
  async tryExtractFxTwitter() {
79
- const match = this.url.match(/\/([a-zA-Z][a-zA-Z0-9_]{0,14})\/(status|article)\/(\d+)/);
79
+ const match = this.url.match(/\/([a-zA-Z0-9_][a-zA-Z0-9_]{0,14})\/(status|article)\/(\d+)/);
80
80
  if (!match)
81
81
  return null;
82
82
  try {