defuddle 0.14.0 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/constants.d.ts +0 -2
- package/dist/constants.js +39 -10
- package/dist/constants.js.map +1 -1
- package/dist/defuddle.d.ts +1 -20
- package/dist/defuddle.js +151 -807
- package/dist/defuddle.js.map +1 -1
- package/dist/elements/code.js +76 -11
- package/dist/elements/code.js.map +1 -1
- package/dist/elements/footnotes.js +420 -45
- package/dist/elements/footnotes.js.map +1 -1
- package/dist/elements/headings.js +5 -0
- package/dist/elements/headings.js.map +1 -1
- package/dist/elements/math.base.d.ts +1 -0
- package/dist/elements/math.base.js +4 -1
- package/dist/elements/math.base.js.map +1 -1
- package/dist/elements/math.core.d.ts +1 -0
- package/dist/elements/math.d.ts +1 -1
- package/dist/elements/math.full.d.ts +1 -0
- package/dist/elements/math.full.js +90 -0
- package/dist/elements/math.full.js.map +1 -0
- package/dist/elements/math.js +3 -3
- package/dist/extractor-registry.js +20 -0
- package/dist/extractor-registry.js.map +1 -1
- package/dist/extractors/bbcode-data.d.ts +10 -0
- package/dist/extractors/bbcode-data.js +59 -0
- package/dist/extractors/bbcode-data.js.map +1 -0
- package/dist/extractors/c2-wiki.d.ts +15 -0
- package/dist/extractors/c2-wiki.js +143 -0
- package/dist/extractors/c2-wiki.js.map +1 -0
- package/dist/extractors/reddit.d.ts +1 -0
- package/dist/extractors/reddit.js +14 -14
- package/dist/extractors/reddit.js.map +1 -1
- package/dist/extractors/substack.d.ts +17 -0
- package/dist/extractors/substack.js +188 -0
- package/dist/extractors/substack.js.map +1 -0
- package/dist/extractors/x-article.d.ts +1 -0
- package/dist/extractors/x-article.js +27 -2
- package/dist/extractors/x-article.js.map +1 -1
- package/dist/extractors/x-oembed.js +1 -1
- package/dist/extractors/x-oembed.js.map +1 -1
- package/dist/extractors/youtube.d.ts +9 -2
- package/dist/extractors/youtube.js +161 -29
- package/dist/extractors/youtube.js.map +1 -1
- package/dist/fetch.js +183 -14
- package/dist/fetch.js.map +1 -1
- package/dist/index.full.js +1 -1
- package/dist/index.js +1 -1
- package/dist/markdown.js +27 -2
- package/dist/markdown.js.map +1 -1
- package/dist/metadata.d.ts +4 -3
- package/dist/metadata.js +195 -41
- package/dist/metadata.js.map +1 -1
- package/dist/node.d.ts +1 -1
- package/dist/node.js +3 -6
- package/dist/node.js.map +1 -1
- package/dist/removals/content-patterns.d.ts +2 -0
- package/dist/removals/content-patterns.js +835 -0
- package/dist/removals/content-patterns.js.map +1 -0
- package/dist/removals/hidden.d.ts +2 -0
- package/dist/removals/hidden.js +78 -0
- package/dist/removals/hidden.js.map +1 -0
- package/dist/removals/metadata-block.d.ts +8 -0
- package/dist/removals/metadata-block.js +40 -0
- package/dist/removals/metadata-block.js.map +1 -0
- package/dist/{scoring.d.ts → removals/scoring.d.ts} +1 -1
- package/dist/{scoring.js → removals/scoring.js} +7 -9
- package/dist/removals/scoring.js.map +1 -0
- package/dist/removals/selectors.d.ts +2 -0
- package/dist/removals/selectors.js +118 -0
- package/dist/removals/selectors.js.map +1 -0
- package/dist/removals/small-images.d.ts +3 -0
- package/dist/removals/small-images.js +116 -0
- package/dist/removals/small-images.js.map +1 -0
- package/dist/standardize.d.ts +2 -1
- package/dist/standardize.js +106 -62
- package/dist/standardize.js.map +1 -1
- package/dist/types/extractors.d.ts +1 -0
- package/dist/types.d.ts +5 -0
- package/dist/utils/bbcode.d.ts +6 -0
- package/dist/utils/bbcode.js +57 -0
- package/dist/utils/bbcode.js.map +1 -0
- package/dist/utils.js +1 -1
- package/dist/utils.js.map +1 -1
- package/package.json +1 -1
- package/dist/elements/math.core.js +0 -52
- package/dist/elements/math.core.js.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/scoring.js.map +0 -1
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.C2WikiExtractor = void 0;
|
|
4
|
+
const _base_1 = require("./_base");
|
|
5
|
+
const dom_1 = require("../utils/dom");
|
|
6
|
+
const C2_API = 'https://c2.com/wiki/remodel/pages/';
|
|
7
|
+
class C2WikiExtractor extends _base_1.BaseExtractor {
|
|
8
|
+
canExtract() {
|
|
9
|
+
return false;
|
|
10
|
+
}
|
|
11
|
+
canExtractAsync() {
|
|
12
|
+
return this.getPageTitle() !== null;
|
|
13
|
+
}
|
|
14
|
+
prefersAsync() {
|
|
15
|
+
return true;
|
|
16
|
+
}
|
|
17
|
+
extract() {
|
|
18
|
+
return { content: '', contentHtml: '' };
|
|
19
|
+
}
|
|
20
|
+
async extractAsync() {
|
|
21
|
+
const title = this.getPageTitle();
|
|
22
|
+
if (!title)
|
|
23
|
+
return { content: '', contentHtml: '' };
|
|
24
|
+
const json = await fetch(C2_API + title).then(res => res.json());
|
|
25
|
+
if (!json || !json.text)
|
|
26
|
+
return { content: '', contentHtml: '' };
|
|
27
|
+
const words = title.replace(/([a-z])([A-Z])/g, '$1 $2');
|
|
28
|
+
const contentHtml = this.renderPage(json);
|
|
29
|
+
return {
|
|
30
|
+
content: contentHtml,
|
|
31
|
+
contentHtml,
|
|
32
|
+
variables: {
|
|
33
|
+
title: words,
|
|
34
|
+
site: 'C2 Wiki',
|
|
35
|
+
...(json.date ? { published: json.date } : {}),
|
|
36
|
+
},
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
getPageTitle() {
|
|
40
|
+
if (this.pageTitle !== undefined)
|
|
41
|
+
return this.pageTitle;
|
|
42
|
+
try {
|
|
43
|
+
const search = new URL(this.url).search;
|
|
44
|
+
const match = search.match(/[?&]([A-Za-z]\w*)/);
|
|
45
|
+
this.pageTitle = match ? match[1] : 'WelcomeVisitors';
|
|
46
|
+
}
|
|
47
|
+
catch {
|
|
48
|
+
this.pageTitle = null;
|
|
49
|
+
}
|
|
50
|
+
return this.pageTitle;
|
|
51
|
+
}
|
|
52
|
+
renderPage(json) {
|
|
53
|
+
const body = this.markup(json.text);
|
|
54
|
+
const footer = json.date ? `<hr><p>Last edit ${(0, dom_1.escapeHtml)(json.date)}</p>` : '';
|
|
55
|
+
return `${body}${footer}`;
|
|
56
|
+
}
|
|
57
|
+
markup(text) {
|
|
58
|
+
const lines = text.replace(/\\\n/g, ' ').split(/\r?\n/);
|
|
59
|
+
const parts = [];
|
|
60
|
+
let openTags = [];
|
|
61
|
+
for (const line of lines) {
|
|
62
|
+
const { html, openTags: nextTags } = this.applyBullets(line, openTags);
|
|
63
|
+
parts.push(this.applyInline(html));
|
|
64
|
+
openTags = nextTags;
|
|
65
|
+
}
|
|
66
|
+
while (openTags.length > 0) {
|
|
67
|
+
parts.push(`</${openTags.pop()}>`);
|
|
68
|
+
}
|
|
69
|
+
return parts.join('\n');
|
|
70
|
+
}
|
|
71
|
+
applyBullets(text, openTags) {
|
|
72
|
+
const newOpenTags = [...openTags];
|
|
73
|
+
let prefix = '';
|
|
74
|
+
const closeToDepth = (depth, tag) => {
|
|
75
|
+
while (newOpenTags.length > depth) {
|
|
76
|
+
prefix += `</${newOpenTags.pop()}>`;
|
|
77
|
+
}
|
|
78
|
+
if (tag && newOpenTags.length < depth) {
|
|
79
|
+
prefix += `<${tag}>`;
|
|
80
|
+
newOpenTags.push(tag);
|
|
81
|
+
}
|
|
82
|
+
else if (tag && newOpenTags.length === depth && newOpenTags[depth - 1] !== tag) {
|
|
83
|
+
prefix += `</${newOpenTags.pop()}><${tag}>`;
|
|
84
|
+
newOpenTags.push(tag);
|
|
85
|
+
}
|
|
86
|
+
};
|
|
87
|
+
if (/^\s*$/.test(text)) {
|
|
88
|
+
const inList = newOpenTags.some(t => t === 'ul' || t === 'ol' || t === 'dl');
|
|
89
|
+
if (inList)
|
|
90
|
+
return { html: '', openTags: newOpenTags };
|
|
91
|
+
closeToDepth(0);
|
|
92
|
+
return { html: prefix + '<p></p>', openTags: newOpenTags };
|
|
93
|
+
}
|
|
94
|
+
if (/^-----*/.test(text)) {
|
|
95
|
+
closeToDepth(0);
|
|
96
|
+
return { html: prefix + '<hr>', openTags: newOpenTags };
|
|
97
|
+
}
|
|
98
|
+
const dlMatch = text.match(/^(\t+)(.+):\t/);
|
|
99
|
+
if (dlMatch) {
|
|
100
|
+
closeToDepth(dlMatch[1].length, 'dl');
|
|
101
|
+
return { html: prefix + `<dt>${dlMatch[2]}<dd>` + text.slice(dlMatch[0].length), openTags: newOpenTags };
|
|
102
|
+
}
|
|
103
|
+
const tabUlMatch = text.match(/^(\t+)\*/);
|
|
104
|
+
if (tabUlMatch) {
|
|
105
|
+
closeToDepth(tabUlMatch[1].length, 'ul');
|
|
106
|
+
return { html: prefix + '<li>' + text.slice(tabUlMatch[0].length), openTags: newOpenTags };
|
|
107
|
+
}
|
|
108
|
+
const starUlMatch = text.match(/^(\*+)/);
|
|
109
|
+
if (starUlMatch) {
|
|
110
|
+
closeToDepth(starUlMatch[1].length, 'ul');
|
|
111
|
+
return { html: prefix + '<li>' + text.slice(starUlMatch[0].length), openTags: newOpenTags };
|
|
112
|
+
}
|
|
113
|
+
const olMatch = text.match(/^(\t+)\d+\.?/);
|
|
114
|
+
if (olMatch) {
|
|
115
|
+
closeToDepth(olMatch[1].length, 'ol');
|
|
116
|
+
return { html: prefix + '<li>' + text.slice(olMatch[0].length), openTags: newOpenTags };
|
|
117
|
+
}
|
|
118
|
+
if (/^\s/.test(text)) {
|
|
119
|
+
closeToDepth(1, 'pre');
|
|
120
|
+
return { html: prefix + text, openTags: newOpenTags };
|
|
121
|
+
}
|
|
122
|
+
closeToDepth(0);
|
|
123
|
+
return { html: prefix + text, openTags: newOpenTags };
|
|
124
|
+
}
|
|
125
|
+
applyInline(text) {
|
|
126
|
+
return text
|
|
127
|
+
.replace(/'''(.*?)'''/g, '<strong>$1</strong>')
|
|
128
|
+
.replace(/''(.*?)''/g, '<em>$1</em>')
|
|
129
|
+
.replace(/\b(https?|ftp|mailto|file|telnet|news):[^\s<>[\]"'()]*[^\s<>[\]"'(),.?]/g, (url) => {
|
|
130
|
+
if ((0, dom_1.isDangerousUrl)(url))
|
|
131
|
+
return (0, dom_1.escapeHtml)(url);
|
|
132
|
+
if (/\.(gif|jpg|jpeg|png)$/i.test(url)) {
|
|
133
|
+
return `<img src="${escapeAttr(url)}">`;
|
|
134
|
+
}
|
|
135
|
+
return `<a href="${escapeAttr(url)}" rel="nofollow" target="_blank">${(0, dom_1.escapeHtml)(url)}</a>`;
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
exports.C2WikiExtractor = C2WikiExtractor;
|
|
140
|
+
function escapeAttr(text) {
|
|
141
|
+
return text.replace(/"/g, '"').replace(/'/g, ''');
|
|
142
|
+
}
|
|
143
|
+
//# sourceMappingURL=c2-wiki.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"c2-wiki.js","sourceRoot":"","sources":["../../src/extractors/c2-wiki.ts"],"names":[],"mappings":";;;AAAA,mCAAwC;AAExC,sCAA0D;AAE1D,MAAM,MAAM,GAAG,oCAAoC,CAAC;AAEpD,MAAa,eAAgB,SAAQ,qBAAa;IAGjD,UAAU;QACT,OAAO,KAAK,CAAC;IACd,CAAC;IAED,eAAe;QACd,OAAO,IAAI,CAAC,YAAY,EAAE,KAAK,IAAI,CAAC;IACrC,CAAC;IAED,YAAY;QACX,OAAO,IAAI,CAAC;IACb,CAAC;IAED,OAAO;QACN,OAAO,EAAE,OAAO,EAAE,EAAE,EAAE,WAAW,EAAE,EAAE,EAAE,CAAC;IACzC,CAAC;IAED,KAAK,CAAC,YAAY;QACjB,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,EAAE,CAAC;QAClC,IAAI,CAAC,KAAK;YAAE,OAAO,EAAE,OAAO,EAAE,EAAE,EAAE,WAAW,EAAE,EAAE,EAAE,CAAC;QAEpD,MAAM,IAAI,GAAG,MAAM,KAAK,CAAC,MAAM,GAAG,KAAK,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC;QACjE,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI;YAAE,OAAO,EAAE,OAAO,EAAE,EAAE,EAAE,WAAW,EAAE,EAAE,EAAE,CAAC;QAEjE,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,iBAAiB,EAAE,OAAO,CAAC,CAAC;QACxD,MAAM,WAAW,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;QAE1C,OAAO;YACN,OAAO,EAAE,WAAW;YACpB,WAAW;YACX,SAAS,EAAE;gBACV,KAAK,EAAE,KAAK;gBACZ,IAAI,EAAE,SAAS;gBACf,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;aAC9C;SACD,CAAC;IACH,CAAC;IAEO,YAAY;QACnB,IAAI,IAAI,CAAC,SAAS,KAAK,SAAS;YAAE,OAAO,IAAI,CAAC,SAAS,CAAC;QACxD,IAAI,CAAC;YACJ,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC;YACxC,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,mBAAmB,CAAC,CAAC;YAChD,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,iBAAiB,CAAC;QACvD,CAAC;QAAC,MAAM,CAAC;YACR,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QACvB,CAAC;QACD,OAAO,IAAI,CAAC,SAAS,CAAC;IACvB,CAAC;IAEO,UAAU,CAAC,IAAS;QAC3B,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACpC,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,oBAAoB,IAAA,gBAAU,EAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;QAChF,OAAO,GAAG,IAAI,GAAG,MAAM,EAAE,CAAC;IAC3B,CAAC;IAEO,MAAM,CAAC,IAAY;QAC1B,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QACxD,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,IAAI,QAAQ,GAAa,EAAE,CAAC;QAE5B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YAC1B,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,GAAG,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;YACvE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC;YACnC,QAAQ,GAAG,QAAQ,CAAC;QACrB,CAAC;QAED,OAAO,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5B,KAAK,CAAC,IAAI,CAAC,KAAK,QAAQ,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;QACpC,CAAC;QAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACzB,CAAC;IAEO,YAAY,CAAC,IAAY,EAAE,QAAkB;QACpD,MAAM,WAAW,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC;QAClC,IAAI,MAAM,GAAG,EAAE,CAAC;QAEhB,MAAM,YAAY,GAAG,CAAC,KAAa,EAAE,GAAY,EAAE,EAAE;YACpD,OAAO,WAAW,CAAC,MAAM,GAAG,KAAK,EAAE,CAAC;gBACnC,MAAM,IAAI,KAAK,WAAW,CAAC,GAAG,EAAE,GAAG,CAAC;YACrC,CAAC;YACD,IAAI,GAAG,IAAI,WAAW,CAAC,MAAM,GAAG,KAAK,EAAE,CAAC;gBACvC,MAAM,IAAI,IAAI,GAAG,GAAG,CAAC;gBACrB,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACvB,CAAC;iBAAM,IAAI,GAAG,IAAI,WAAW,CAAC,MAAM,KAAK,KAAK,IAAI,WAAW,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,GAAG,EAAE,CAAC;gBAClF,MAAM,IAAI,KAAK,WAAW,CAAC,GAAG,EAAE,KAAK,GAAG,GAAG,CAAC;gBAC5C,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACvB,CAAC;QACF,CAAC,CAAC;QAEF,IAAI,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YACxB,MAAM,MAAM,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,IAAI,CAAC,CAAC;YAC7E,IAAI,MAAM;gBAAE,OAAO,EAAE,IAAI,EAAE,EAAE,EAAE,QAAQ,EAAE,WAAW,EAAE,CAAC;YACvD,YAAY,CAAC,CAAC,CAAC,CAAC;YAChB,OAAO,EAAE,IAAI,EAAE,MAAM,GAAG,SAAS,EAAE,QAAQ,EAAE,WAAW,EAAE,CAAC;QAC5D,CAAC;QAED,IAAI,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC1B,YAAY,CAAC,CAAC,CAAC,CAAC;YAChB,OAAO,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,QAAQ,EAAE,WAAW,EAAE,CAAC;QACzD,CAAC;QAED,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC;QAC5C,IAAI,OAAO,EAAE,CAAC;YACb,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;YACtC,OAAO,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,OAAO,CAAC,CAAC,CAAC,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,WAAW,EAAE,CAAC;QAC1G,CAAC;QAED,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;QAC1C,IAAI,UAAU,EAAE,CAAC;YAChB,YAAY,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;YACzC,OAAO,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,WAAW,EAAE,CAAC;QAC5F,CAAC;QAED,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;QACzC,IAAI,WAAW,EAAE,CAAC;YACjB,YAAY,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;YAC1C,OAAO,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,WAAW,EAAE,CAAC;QAC7F,CAAC;QAED,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;QAC3C,IAAI,OAAO,EAAE,CAAC;YACb,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;YACtC,OAAO,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,WAAW,EAAE,CAAC;QACzF,CAAC;QAED,IAAI,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YACtB,YAAY,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;YACvB,OAAO,EAAE,IAAI,EAAE,MAAM,GAAG,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,CAAC;QACvD,CAAC;QAED,YAAY,CAAC,CAAC,CAAC,CAAC;QAChB,OAAO,EAAE,IAAI,EAAE,MAAM,GAAG,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,CAAC;IACvD,CAAC;IAEO,WAAW,CAAC,IAAY;QAC/B,OAAO,IAAI;aACT,OAAO,CAAC,cAAc,EAAE,qBAAqB,CAAC;aAC9C,OAAO,CAAC,YAAY,EAAE,aAAa,CAAC;aACpC,OAAO,CACP,0EAA0E,EAC1E,CAAC,GAAG,EAAE,EAAE;YACP,IAAI,IAAA,oBAAc,EAAC,GAAG,CAAC;gBAAE,OAAO,IAAA,gBAAU,EAAC,GAAG,CAAC,CAAC;YAChD,IAAI,wBAAwB,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBACxC,OAAO,aAAa,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC;YACzC,CAAC;YACD,OAAO,YAAY,UAAU,CAAC,GAAG,CAAC,oCAAoC,IAAA,gBAAU,EAAC,GAAG,CAAC,MAAM,CAAC;QAC7F,CAAC,CACD,CAAC;IACJ,CAAC;CACD;AAzJD,0CAyJC;AAED,SAAS,UAAU,CAAC,IAAY;IAC/B,OAAO,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;AAC5D,CAAC"}
|
|
@@ -6,6 +6,7 @@ export declare class RedditExtractor extends BaseExtractor {
|
|
|
6
6
|
constructor(document: Document, url: string);
|
|
7
7
|
canExtract(): boolean;
|
|
8
8
|
canExtractAsync(): boolean;
|
|
9
|
+
prefersAsync(): boolean;
|
|
9
10
|
private isCommentsPage;
|
|
10
11
|
extractAsync(): Promise<ExtractorResult>;
|
|
11
12
|
extract(): ExtractorResult;
|
|
@@ -14,11 +14,15 @@ class RedditExtractor extends _base_1.BaseExtractor {
|
|
|
14
14
|
return !!this.shredditPost || this.isOldReddit;
|
|
15
15
|
}
|
|
16
16
|
canExtractAsync() {
|
|
17
|
-
// For new reddit comment pages, extract() returns empty content
|
|
18
|
-
// when shreddit-comment elements are missing (server-side fetch),
|
|
19
|
-
// causing parseAsync() to fall through to this async path.
|
|
20
17
|
return this.isCommentsPage() && !this.isOldReddit;
|
|
21
18
|
}
|
|
19
|
+
prefersAsync() {
|
|
20
|
+
// In server/worker contexts, fetch old.reddit.com for full content including
|
|
21
|
+
// comments. In browser (real window), use the rendered DOM directly since
|
|
22
|
+
// old.reddit.com is CORS-blocked from www.reddit.com.
|
|
23
|
+
const isBrowser = typeof window !== 'undefined' && this.document.defaultView === window;
|
|
24
|
+
return this.isCommentsPage() && !this.isOldReddit && !isBrowser;
|
|
25
|
+
}
|
|
22
26
|
isCommentsPage() {
|
|
23
27
|
return /\/r\/.+\/comments\//.test(this.url);
|
|
24
28
|
}
|
|
@@ -46,21 +50,17 @@ class RedditExtractor extends _base_1.BaseExtractor {
|
|
|
46
50
|
if (this.isOldReddit) {
|
|
47
51
|
return this.extractOldReddit(this.document);
|
|
48
52
|
}
|
|
49
|
-
// New reddit server-side HTML includes shreddit-post but not
|
|
50
|
-
// shreddit-comment elements (those require JS). Return empty
|
|
51
|
-
// so parseAsync() falls through to extractAsync() which fetches
|
|
52
|
-
// old.reddit.com with full content.
|
|
53
|
-
const hasComments = this.document.querySelectorAll('shreddit-comment').length > 0;
|
|
54
|
-
if (this.isCommentsPage() && !hasComments) {
|
|
55
|
-
return { content: '', contentHtml: '' };
|
|
56
|
-
}
|
|
57
|
-
const postContent = this.getPostContent();
|
|
58
|
-
const comments = this.options.includeReplies !== false ? this.extractComments() : '';
|
|
59
|
-
const contentHtml = this.createContentHtml(postContent, comments);
|
|
60
53
|
const postTitle = this.document.querySelector('h1')?.textContent?.trim() || '';
|
|
61
54
|
const subreddit = this.getSubreddit();
|
|
62
55
|
const postAuthor = this.getPostAuthor();
|
|
56
|
+
const postContent = this.getPostContent();
|
|
63
57
|
const description = this.createDescription(postContent);
|
|
58
|
+
// Extract any comments already in the DOM (browser renders these via JS;
|
|
59
|
+
// SSR/Node HTML won't have them, so comments will be empty there).
|
|
60
|
+
const comments = this.options.includeReplies !== false ? this.extractComments() : '';
|
|
61
|
+
const contentHtml = this.createContentHtml(postContent, comments);
|
|
62
|
+
// If contentHtml is empty (link/image post with no body and no DOM comments),
|
|
63
|
+
// parseAsync() will fall through to extractAsync() → old.reddit.com fetch.
|
|
64
64
|
return {
|
|
65
65
|
content: contentHtml,
|
|
66
66
|
contentHtml: contentHtml,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"reddit.js","sourceRoot":"","sources":["../../src/extractors/reddit.ts"],"names":[],"mappings":";;;AAAA,mCAAwC;AAExC,sCAAwD;AACxD,gDAAyF;AAEzF,MAAa,eAAgB,SAAQ,qBAAa;IAIjD,YAAY,QAAkB,EAAE,GAAW;QAC1C,KAAK,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;QACrB,IAAI,CAAC,YAAY,GAAG,QAAQ,CAAC,aAAa,CAAC,eAAe,CAAC,CAAC;QAC5D,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC,QAAQ,CAAC,aAAa,CAAC,aAAa,CAAC,CAAC;IAC5D,CAAC;IAED,UAAU;QACT,OAAO,CAAC,CAAC,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,WAAW,CAAC;IAChD,CAAC;IAED,eAAe;QACd,
|
|
1
|
+
{"version":3,"file":"reddit.js","sourceRoot":"","sources":["../../src/extractors/reddit.ts"],"names":[],"mappings":";;;AAAA,mCAAwC;AAExC,sCAAwD;AACxD,gDAAyF;AAEzF,MAAa,eAAgB,SAAQ,qBAAa;IAIjD,YAAY,QAAkB,EAAE,GAAW;QAC1C,KAAK,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;QACrB,IAAI,CAAC,YAAY,GAAG,QAAQ,CAAC,aAAa,CAAC,eAAe,CAAC,CAAC;QAC5D,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC,QAAQ,CAAC,aAAa,CAAC,aAAa,CAAC,CAAC;IAC5D,CAAC;IAED,UAAU;QACT,OAAO,CAAC,CAAC,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,WAAW,CAAC;IAChD,CAAC;IAED,eAAe;QACd,OAAO,IAAI,CAAC,cAAc,EAAE,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC;IACnD,CAAC;IAED,YAAY;QACX,6EAA6E;QAC7E,0EAA0E;QAC1E,sDAAsD;QACtD,MAAM,SAAS,GAAG,OAAO,MAAM,KAAK,WAAW,IAAI,IAAI,CAAC,QAAQ,CAAC,WAAW,KAAK,MAAM,CAAC;QACxF,OAAO,IAAI,CAAC,cAAc,EAAE,IAAI,CAAC,IAAI,CAAC,WAAW,IAAI,CAAC,SAAS,CAAC;IACjE,CAAC;IAEO,cAAc;QACrB,OAAO,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAC7C,CAAC;IAED,KAAK,CAAC,YAAY;QACjB,gCAAgC;QAChC,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACjC,MAAM,CAAC,QAAQ,GAAG,gBAAgB,CAAC;QAEnC,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,EAAE;YAC/C,OAAO,EAAE;gBACR,YAAY,EAAE,wCAAwC;aACtD;SACD,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YAClB,MAAM,IAAI,KAAK,CAAC,mCAAmC,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QACvE,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACnC,MAAM,MAAM,GAAG,IAAI,CAAC,QAAQ,CAAC,WAAW,EAAE,SAAS,IAAI,CAAC,OAAO,SAAS,KAAK,WAAW,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAC7G,IAAI,CAAC,MAAM,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CAAC,gDAAgD,CAAC,CAAC;QACnE,CAAC;QACD,MAAM,GAAG,GAAG,IAAI,MAAM,EAAE,CAAC,eAAe,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC;QAE5D,OAAO,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,CAAC;IACnC,CAAC;IAED,OAAO;QACN,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACtB,OAAO,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC7C,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,IAAI,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QAC/E,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,EAAE,CAAC;QACtC,MAAM,UAAU,GAAG,IAAI,CAAC,aAAa,EAAE,CAAC;QACxC,MAAM,WAAW,GAAG,IAAI,CAAC,cAAc,EAAE,CAAC;QAC1C,MAAM,WAAW,GAAG,IAAI,CAAC,iBAAiB,CAAC,WAAW,CAAC,CAAC;QAExD,yEAAyE;QACzE,mEAAmE;QACnE,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,cAAc,KAAK,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,eAAe,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACrF,MAAM,WAAW,GAAG,IAAI,CAAC,iBAAiB,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;QAClE,8EAA8E;QAC9E,2EAA2E;QAE3E,OAAO;YACN,OAAO,EAAE,WAAW;YACpB,WAAW,EAAE,WAAW;YACxB,gBAAgB,EAAE;gBACjB,MAAM,EAAE,IAAI,CAAC,SAAS,EAAE;gBACxB,SAAS;gBACT,UAAU;aACV;YACD,SAAS,EAAE;gBACV,KAAK,EAAE,SAAS;gBAChB,MAAM,EAAE,UAAU;gBAClB,IAAI,EAAE,KAAK,SAAS,EAAE;gBACtB,WAAW;aACX;SACD,CAAC;IACH,CAAC;IAEO,gBAAgB,CAAC,IAAwB;QAChD,MAAM,SAAS,GAAG,IAAI,CAAC,aAAa,CAAC,aAAa,CAAC,CAAC;QACpD,MAAM,SAAS,GAAG,SAAS,EAAE,aAAa,CAAC,SAAS,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QACjF,MAAM,UAAU,GAAG,SAAS,EAAE,YAAY,CAAC,aAAa,CAAC,IAAI,EAAE,CAAC;QAChE,MAAM,SAAS,GAAG,SAAS,EAAE,YAAY,CAAC,gBAAgB,CAAC,IAAI,EAAE,CAAC;QAClE,MAAM,UAAU,GAAG,SAAS,EAAE,aAAa,CAAC,oBAAoB,CAAC,CAAC;QAClE,MAAM,QAAQ,GAAG,UAAU,CAAC,CAAC,CAAC,IAAA,mBAAa,EAAC,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAE7D,IAAI,QAAQ,GAAG,EAAE,CAAC;QAClB,IAAI,IAAI,CAAC,OAAO,CAAC,cAAc,KAAK,KAAK,EAAE,CAAC;YAC3C,MAAM,WAAW,GAAG,IAAI,CAAC,aAAa,CAAC,yBAAyB,CAAC,CAAC;YAClE,MAAM,WAAW,GAAG,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,wBAAwB,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAClF,QAAQ,GAAG,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAA,2BAAgB,EAAC,WAAW,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QACxE,CAAC;QAED,MAAM,WAAW,GAAG,IAAI,CAAC,iBAAiB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;QAC/D,MAAM,WAAW,GAAG,IAAI,CAAC,iBAAiB,CAAC,QAAQ,CAAC,CAAC;QAErD,OAAO;YACN,OAAO,EAAE,WAAW;YACpB,WAAW,EAAE,WAAW;YACxB,gBAAgB,EAAE;gBACjB,MAAM,EAAE,IAAI,CAAC,SAAS,EAAE;gBACxB,SAAS;gBACT,UAAU;aACV;YACD,SAAS,EAAE;gBACV,KAAK,EAAE,SAAS;gBAChB,MAAM,EAAE,UAAU;gBAClB,IAAI,EAAE,KAAK,SAAS,EAAE;gBACtB,WAAW;aACX;SACD,CAAC;IACH,CAAC;IAEO,cAAc;QACrB,MAAM,UAAU,GAAG,IAAI,CAAC,YAAY,EAAE,aAAa,CAAC,oBAAoB,CAAC,CAAC;QAC1E,MAAM,QAAQ,GAAG,UAAU,CAAC,CAAC,CAAC,IAAA,mBAAa,EAAC,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAC7D,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,EAAE,aAAa,CAAC,aAAa,CAAC,EAAE,SAAS,IAAI,EAAE,CAAC;QAEnF,OAAO,QAAQ,GAAG,SAAS,CAAC;IAC7B,CAAC;IAEO,iBAAiB,CAAC,WAAmB,EAAE,QAAgB;QAC9D,OAAO,IAAA,2BAAgB,EAAC,QAAQ,EAAE,WAAW,EAAE,QAAQ,CAAC,CAAC;IAC1D,CAAC;IAEO,eAAe;QACtB,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,kBAAkB,CAAC,CAAC,CAAC;QAChF,OAAO,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC,CAAC;IACvC,CAAC;IAEO,SAAS;QAChB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,0BAA0B,CAAC,CAAC;QACzD,OAAO,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IACzB,CAAC;IAEO,YAAY;QACnB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;QAC7C,OAAO,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IACzB,CAAC;IAEO,aAAa;QACpB,OAAO,IAAI,CAAC,YAAY,EAAE,YAAY,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;IACxD,CAAC;IAEO,iBAAiB,CAAC,WAAmB;QAC5C,IAAI,CAAC,WAAW;YAAE,OAAO,EAAE,CAAC;QAE5B,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;QACnD,OAAO,CAAC,WAAW,CAAC,IAAA,eAAS,EAAC,IAAI,CAAC,QAAQ,EAAE,WAAW,CAAC,CAAC,CAAC;QAC3D,OAAO,OAAO,CAAC,WAAW,EAAE,IAAI,EAAE;aAChC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;aACb,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,IAAI,EAAE,CAAC;IAC9B,CAAC;IAEO,wBAAwB,CAAC,SAAkB,EAAE,QAAgB,CAAC;QACrE,MAAM,MAAM,GAAkB,EAAE,CAAC;QACjC,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,gBAAgB,CAAC,yBAAyB,CAAC,CAAC,CAAC;QAEnF,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAChC,MAAM,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC,aAAa,CAAC,IAAI,EAAE,CAAC;YACzD,MAAM,SAAS,GAAG,OAAO,CAAC,YAAY,CAAC,gBAAgB,CAAC,IAAI,EAAE,CAAC;YAC/D,MAAM,KAAK,GAAG,OAAO,CAAC,aAAa,CAAC,gCAAgC,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YACjG,MAAM,MAAM,GAAG,OAAO,CAAC,aAAa,CAAC,gCAAgC,CAAC,CAAC;YACvE,MAAM,QAAQ,GAAG,MAAM,EAAE,YAAY,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC;YACxD,MAAM,IAAI,GAAG,QAAQ,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAC5E,MAAM,MAAM,GAAG,OAAO,CAAC,aAAa,CAAC,2BAA2B,CAAC,CAAC;YAClE,MAAM,IAAI,GAAG,MAAM,CAAC,CAAC,CAAC,IAAA,mBAAa,EAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAEjD,MAAM,CAAC,IAAI,CAAC;gBACX,MAAM;gBACN,IAAI;gBACJ,OAAO,EAAE,IAAI;gBACb,KAAK;gBACL,KAAK,EAAE,KAAK,IAAI,SAAS;gBACzB,GAAG,EAAE,SAAS,CAAC,CAAC,CAAC,qBAAqB,SAAS,EAAE,CAAC,CAAC,CAAC,SAAS;aAC7D,CAAC,CAAC;YAEH,MAAM,cAAc,GAAG,OAAO,CAAC,aAAa,CAAC,qBAAqB,CAAC,CAAC;YACpE,IAAI,cAAc,EAAE,CAAC;gBACpB,MAAM,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,wBAAwB,CAAC,cAAc,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC;YAC1E,CAAC;QACF,CAAC;QAED,OAAO,MAAM,CAAC;IACf,CAAC;IAEO,eAAe,CAAC,QAAmB;QAC1C,MAAM,WAAW,GAAkB,EAAE,CAAC;QAEtC,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAChC,MAAM,KAAK,GAAG,QAAQ,CAAC,OAAO,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,GAAG,CAAC,CAAC;YAC7D,MAAM,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YACpD,MAAM,KAAK,GAAG,OAAO,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,GAAG,CAAC;YACnD,MAAM,SAAS,GAAG,OAAO,CAAC,YAAY,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;YAC1D,MAAM,SAAS,GAAG,OAAO,CAAC,aAAa,CAAC,kBAAkB,CAAC,CAAC;YAC5D,MAAM,OAAO,GAAG,SAAS,CAAC,CAAC,CAAC,IAAA,mBAAa,EAAC,SAAS,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAE1D,MAAM,SAAS,GAAG,OAAO,CAAC,YAAY,CAAC,SAAS,CAAC;mBAC7C,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,EAAE,YAAY,CAAC,UAAU,CAAC;mBACvD,EAAE,CAAC;YACP,MAAM,IAAI,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAE9E,WAAW,CAAC,IAAI,CAAC;gBAChB,MAAM;gBACN,IAAI;gBACJ,OAAO;gBACP,KAAK;gBACL,KAAK,EAAE,GAAG,KAAK,SAAS;gBACxB,GAAG,EAAE,SAAS,CAAC,CAAC,CAAC,qBAAqB,SAAS,EAAE,CAAC,CAAC,CAAC,SAAS;aAC7D,CAAC,CAAC;QACJ,CAAC;QAED,OAAO,IAAA,2BAAgB,EAAC,WAAW,CAAC,CAAC;IACtC,CAAC;CACD;AAlOD,0CAkOC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { BaseExtractor } from './_base';
|
|
2
|
+
import { ExtractorResult } from '../types/extractors';
|
|
3
|
+
export declare class SubstackExtractor extends BaseExtractor {
|
|
4
|
+
private noteText;
|
|
5
|
+
private noteImage;
|
|
6
|
+
private postData;
|
|
7
|
+
private postContentSelector;
|
|
8
|
+
constructor(document: Document, url: string, schemaOrgData?: any, options?: any);
|
|
9
|
+
canExtract(): boolean;
|
|
10
|
+
extract(): ExtractorResult;
|
|
11
|
+
private extractPost;
|
|
12
|
+
private extractNote;
|
|
13
|
+
private parseDateFromByline;
|
|
14
|
+
private extractPreloadData;
|
|
15
|
+
private buildImageHtml;
|
|
16
|
+
private getLargestSrc;
|
|
17
|
+
}
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.SubstackExtractor = void 0;
|
|
4
|
+
const _base_1 = require("./_base");
|
|
5
|
+
const dom_1 = require("../utils/dom");
|
|
6
|
+
const INJECTED_ATTR = 'data-defuddle-substack-post';
|
|
7
|
+
class SubstackExtractor extends _base_1.BaseExtractor {
|
|
8
|
+
constructor(document, url, schemaOrgData, options) {
|
|
9
|
+
super(document, url, schemaOrgData, options);
|
|
10
|
+
this.noteText = null;
|
|
11
|
+
this.noteImage = null;
|
|
12
|
+
this.postData = null;
|
|
13
|
+
this.postContentSelector = null;
|
|
14
|
+
// Check for rendered post body first (browser/extension context, after React hydration)
|
|
15
|
+
if (document.querySelector('div.body.markup')) {
|
|
16
|
+
this.postData = this.extractPreloadData(); // metadata only
|
|
17
|
+
this.postContentSelector = 'div.body.markup';
|
|
18
|
+
return;
|
|
19
|
+
}
|
|
20
|
+
// Fall back to window._preloads script (SSR/curl/worker context)
|
|
21
|
+
this.postData = this.extractPreloadData();
|
|
22
|
+
if (this.postData?.body_html) {
|
|
23
|
+
// Inject body_html into the document so the pipeline can process it
|
|
24
|
+
const existing = document.querySelector(`[${INJECTED_ATTR}]`);
|
|
25
|
+
if (!existing) {
|
|
26
|
+
const wrapper = document.createElement('div');
|
|
27
|
+
wrapper.setAttribute(INJECTED_ATTR, '');
|
|
28
|
+
wrapper.appendChild((0, dom_1.parseHTML)(document, this.postData.body_html));
|
|
29
|
+
document.body.appendChild(wrapper);
|
|
30
|
+
}
|
|
31
|
+
this.postContentSelector = `[${INJECTED_ATTR}]`;
|
|
32
|
+
return;
|
|
33
|
+
}
|
|
34
|
+
// Fall back to Notes extraction (ProseMirror editor div)
|
|
35
|
+
this.noteText = document.querySelector('div.ProseMirror.FeedProseMirror');
|
|
36
|
+
if (this.noteText) {
|
|
37
|
+
const feedCommentBody = this.noteText.closest('[class*="feedCommentBody"]:not([class*="feedCommentBodyInner"])');
|
|
38
|
+
const sibling = feedCommentBody?.parentElement?.nextElementSibling;
|
|
39
|
+
const siblingClass = sibling?.getAttribute('class') || '';
|
|
40
|
+
if (sibling && siblingClass.includes('imageGrid')) {
|
|
41
|
+
this.noteImage = sibling;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
canExtract() {
|
|
46
|
+
return this.postContentSelector !== null || this.noteText !== null;
|
|
47
|
+
}
|
|
48
|
+
extract() {
|
|
49
|
+
if (this.postContentSelector) {
|
|
50
|
+
return this.extractPost();
|
|
51
|
+
}
|
|
52
|
+
return this.extractNote();
|
|
53
|
+
}
|
|
54
|
+
extractPost() {
|
|
55
|
+
const title = this.postData?.title || this.document.querySelector('meta[property="og:title"]')?.getAttribute('content') || '';
|
|
56
|
+
const description = this.postData?.subtitle || this.document.querySelector('meta[property="og:description"]')?.getAttribute('content') || '';
|
|
57
|
+
const author = this.postData?.publishedBylines?.[0]?.name
|
|
58
|
+
|| this.document.querySelector('a[href*="substack.com/@"]')?.textContent?.trim()
|
|
59
|
+
|| '';
|
|
60
|
+
const published = this.postData?.post_date
|
|
61
|
+
|| this.parseDateFromByline()
|
|
62
|
+
|| '';
|
|
63
|
+
return {
|
|
64
|
+
content: '',
|
|
65
|
+
contentHtml: '',
|
|
66
|
+
contentSelector: this.postContentSelector,
|
|
67
|
+
variables: {
|
|
68
|
+
title,
|
|
69
|
+
author,
|
|
70
|
+
site: 'Substack',
|
|
71
|
+
description,
|
|
72
|
+
published,
|
|
73
|
+
},
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
extractNote() {
|
|
77
|
+
const textHtml = this.noteText.outerHTML;
|
|
78
|
+
const imageHtml = this.buildImageHtml();
|
|
79
|
+
const content = imageHtml ? `${textHtml}\n${imageHtml}` : textHtml;
|
|
80
|
+
const title = this.document.querySelector('meta[property="og:title"]')?.getAttribute('content') || '';
|
|
81
|
+
const description = this.document.querySelector('meta[property="og:description"]')?.getAttribute('content') || '';
|
|
82
|
+
const author = title.replace(/\s*\(@[^)]+\)\s*$/, '').trim();
|
|
83
|
+
return {
|
|
84
|
+
content,
|
|
85
|
+
contentHtml: content,
|
|
86
|
+
variables: {
|
|
87
|
+
title,
|
|
88
|
+
author,
|
|
89
|
+
site: 'Substack',
|
|
90
|
+
description,
|
|
91
|
+
},
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
parseDateFromByline() {
|
|
95
|
+
const byline = this.document.querySelector('[class*="byline-wrapper"]');
|
|
96
|
+
if (!byline)
|
|
97
|
+
return '';
|
|
98
|
+
// textContent runs adjacent words together (e.g. "ZhutovFeb") — insert space at case boundaries
|
|
99
|
+
const text = (byline.textContent || '').trim().replace(/([a-z])([A-Z])/g, '$1 $2');
|
|
100
|
+
// Match "Feb 24, 2026" style (Substack uses abbreviated month names in the UI)
|
|
101
|
+
const ABBREV_MONTHS = 'Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec';
|
|
102
|
+
const MONTH_MAP = {
|
|
103
|
+
Jan: '01', Feb: '02', Mar: '03', Apr: '04', May: '05', Jun: '06',
|
|
104
|
+
Jul: '07', Aug: '08', Sep: '09', Oct: '10', Nov: '11', Dec: '12',
|
|
105
|
+
};
|
|
106
|
+
const match = text.match(new RegExp(`\\b(${ABBREV_MONTHS})\\s+(\\d{1,2}),?\\s+(\\d{4})\\b`));
|
|
107
|
+
if (match) {
|
|
108
|
+
const month = MONTH_MAP[match[1]];
|
|
109
|
+
const day = match[2].padStart(2, '0');
|
|
110
|
+
return `${match[3]}-${month}-${day}T00:00:00+00:00`;
|
|
111
|
+
}
|
|
112
|
+
return '';
|
|
113
|
+
}
|
|
114
|
+
extractPreloadData() {
|
|
115
|
+
const scripts = Array.from(this.document.querySelectorAll('script'));
|
|
116
|
+
for (const script of scripts) {
|
|
117
|
+
const text = script.textContent || '';
|
|
118
|
+
if (!text.includes('window._preloads') || !text.includes('body_html'))
|
|
119
|
+
continue;
|
|
120
|
+
const jsonParseIdx = text.indexOf('JSON.parse("');
|
|
121
|
+
if (jsonParseIdx === -1)
|
|
122
|
+
continue;
|
|
123
|
+
const startIdx = jsonParseIdx + 'JSON.parse("'.length;
|
|
124
|
+
let i = startIdx;
|
|
125
|
+
while (i < text.length) {
|
|
126
|
+
if (text[i] === '\\') {
|
|
127
|
+
i += 2;
|
|
128
|
+
}
|
|
129
|
+
else if (text[i] === '"') {
|
|
130
|
+
break;
|
|
131
|
+
}
|
|
132
|
+
else {
|
|
133
|
+
i++;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
try {
|
|
137
|
+
const innerStr = text.slice(startIdx, i);
|
|
138
|
+
const jsonString = JSON.parse('"' + innerStr + '"');
|
|
139
|
+
const data = JSON.parse(jsonString);
|
|
140
|
+
const post = data?.feedData?.initialPost?.post;
|
|
141
|
+
if (post?.body_html)
|
|
142
|
+
return post;
|
|
143
|
+
}
|
|
144
|
+
catch {
|
|
145
|
+
// ignore parse errors
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
return null;
|
|
149
|
+
}
|
|
150
|
+
buildImageHtml() {
|
|
151
|
+
if (!this.noteImage)
|
|
152
|
+
return '';
|
|
153
|
+
const ogImage = this.document.querySelector('meta[property="og:image"]')?.getAttribute('content');
|
|
154
|
+
if (ogImage)
|
|
155
|
+
return `<img src="${ogImage}" alt="" />`;
|
|
156
|
+
const img = this.noteImage.querySelector('img');
|
|
157
|
+
if (!img)
|
|
158
|
+
return '';
|
|
159
|
+
const src = this.getLargestSrc(img);
|
|
160
|
+
return src ? `<img src="${src}" alt="" />` : '';
|
|
161
|
+
}
|
|
162
|
+
getLargestSrc(img) {
|
|
163
|
+
const srcset = img.getAttribute('srcset') || '';
|
|
164
|
+
if (srcset) {
|
|
165
|
+
const entryPattern = /(.+?)\s+(\d+(?:\.\d+)?)w/g;
|
|
166
|
+
let bestUrl = '';
|
|
167
|
+
let bestWidth = 0;
|
|
168
|
+
let match;
|
|
169
|
+
let lastIndex = 0;
|
|
170
|
+
while ((match = entryPattern.exec(srcset)) !== null) {
|
|
171
|
+
let url = match[1].trim();
|
|
172
|
+
if (lastIndex > 0)
|
|
173
|
+
url = url.replace(/^,\s*/, '');
|
|
174
|
+
lastIndex = entryPattern.lastIndex;
|
|
175
|
+
const width = parseFloat(match[2]);
|
|
176
|
+
if (url && width > bestWidth) {
|
|
177
|
+
bestWidth = width;
|
|
178
|
+
bestUrl = url;
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
if (bestUrl)
|
|
182
|
+
return bestUrl.replace(/,w_\d+/g, '').replace(/,c_\w+/g, '');
|
|
183
|
+
}
|
|
184
|
+
return img.getAttribute('src') || '';
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
exports.SubstackExtractor = SubstackExtractor;
|
|
188
|
+
//# sourceMappingURL=substack.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"substack.js","sourceRoot":"","sources":["../../src/extractors/substack.ts"],"names":[],"mappings":";;;AAAA,mCAAwC;AAExC,sCAAyC;AAEzC,MAAM,aAAa,GAAG,6BAA6B,CAAC;AAWpD,MAAa,iBAAkB,SAAQ,qBAAa;IAMnD,YAAY,QAAkB,EAAE,GAAW,EAAE,aAAmB,EAAE,OAAa;QAC9E,KAAK,CAAC,QAAQ,EAAE,GAAG,EAAE,aAAa,EAAE,OAAO,CAAC,CAAC;QANtC,aAAQ,GAAmB,IAAI,CAAC;QAChC,cAAS,GAAmB,IAAI,CAAC;QACjC,aAAQ,GAA4B,IAAI,CAAC;QACzC,wBAAmB,GAAkB,IAAI,CAAC;QAKjD,wFAAwF;QACxF,IAAI,QAAQ,CAAC,aAAa,CAAC,iBAAiB,CAAC,EAAE,CAAC;YAC/C,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,kBAAkB,EAAE,CAAC,CAAC,gBAAgB;YAC3D,IAAI,CAAC,mBAAmB,GAAG,iBAAiB,CAAC;YAC7C,OAAO;QACR,CAAC;QAED,iEAAiE;QACjE,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,kBAAkB,EAAE,CAAC;QAC1C,IAAI,IAAI,CAAC,QAAQ,EAAE,SAAS,EAAE,CAAC;YAC9B,oEAAoE;YACpE,MAAM,QAAQ,GAAG,QAAQ,CAAC,aAAa,CAAC,IAAI,aAAa,GAAG,CAAC,CAAC;YAC9D,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACf,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;gBAC9C,OAAO,CAAC,YAAY,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC;gBACxC,OAAO,CAAC,WAAW,CAAC,IAAA,eAAS,EAAC,QAAQ,EAAE,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;gBAClE,QAAQ,CAAC,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;YACpC,CAAC;YACD,IAAI,CAAC,mBAAmB,GAAG,IAAI,aAAa,GAAG,CAAC;YAChD,OAAO;QACR,CAAC;QAED,yDAAyD;QACzD,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC,aAAa,CAAC,iCAAiC,CAAC,CAAC;QAC1E,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YACnB,MAAM,eAAe,GAAG,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,iEAAiE,CAAC,CAAC;YACjH,MAAM,OAAO,GAAG,eAAe,EAAE,aAAa,EAAE,kBAAkB,CAAC;YACnE,MAAM,YAAY,GAAG,OAAO,EAAE,YAAY,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;YAC1D,IAAI,OAAO,IAAI,YAAY,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;gBACnD,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC;YAC1B,CAAC;QACF,CAAC;IACF,CAAC;IAED,UAAU;QACT,OAAO,IAAI,CAAC,mBAAmB,KAAK,IAAI,IAAI,IAAI,CAAC,QAAQ,KAAK,IAAI,CAAC;IACpE,CAAC;IAED,OAAO;QACN,IAAI,IAAI,CAAC,mBAAmB,EAAE,CAAC;YAC9B,OAAO,IAAI,CAAC,WAAW,EAAE,CAAC;QAC3B,CAAC;QACD,OAAO,IAAI,CAAC,WAAW,EAAE,CAAC;IAC3B,CAAC;IAEO,WAAW;QAClB,MAAM,KAAK,GAAG,IAAI,CAAC,QAAQ,EAAE,KAAK,IAAI,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,2BAA2B,CAAC,EAAE,YAAY,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;QAC9H,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,EAAE,QAAQ,IAAI,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,iCAAiC,CAAC,EAAE,YAAY,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;QAC7I,MAAM,MAAM,GAAG,IAAI,CAAC,QAAQ,EAAE,gBAAgB,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI;eACrD,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,2BAA2B,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE;eAC7E,EAAE,CAAC;QACP,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,EAAE,SAAS;eACtC,IAAI,CAAC,mBAAmB,EAAE;eAC1B,EAAE,CAAC;QAEP,OAAO;YACN,OAAO,EAAE,EAAE;YACX,WAAW,EAAE,EAAE;YACf,eAAe,EAAE,IAAI,CAAC,mBAAoB;YAC1C,SAAS,EAAE;gBACV,KAAK;gBACL,MAAM;gBACN,IAAI,EAAE,UAAU;gBAChB,WAAW;gBACX,SAAS;aACT;SACD,CAAC;IACH,CAAC;IAEO,WAAW;QAClB,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAS,CAAC,SAAS,CAAC;QAC1C,MAAM,SAAS,GAAG,IAAI,CAAC,cAAc,EAAE,CAAC;QACxC,MAAM,OAAO,GAAG,SAAS,CAAC,CAAC,CAAC,GAAG,QAAQ,KAAK,SAAS,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;QAEnE,MAAM,KAAK,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,2BAA2B,CAAC,EAAE,YAAY,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;QACtG,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,iCAAiC,CAAC,EAAE,YAAY,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;QAClH,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC,mBAAmB,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;QAE7D,OAAO;YACN,OAAO;YACP,WAAW,EAAE,OAAO;YACpB,SAAS,EAAE;gBACV,KAAK;gBACL,MAAM;gBACN,IAAI,EAAE,UAAU;gBAChB,WAAW;aACX;SACD,CAAC;IACH,CAAC;IAEO,mBAAmB;QAC1B,MAAM,MAAM,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,2BAA2B,CAAC,CAAC;QACxE,IAAI,CAAC,MAAM;YAAE,OAAO,EAAE,CAAC;QACvB,gGAAgG;QAChG,MAAM,IAAI,GAAG,CAAC,MAAM,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,iBAAiB,EAAE,OAAO,CAAC,CAAC;QACnF,+EAA+E;QAC/E,MAAM,aAAa,GAAG,iDAAiD,CAAC;QACxE,MAAM,SAAS,GAA2B;YACzC,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI;YAChE,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI;SAChE,CAAC;QACF,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,aAAa,kCAAkC,CAAC,CAAC,CAAC;QAC7F,IAAI,KAAK,EAAE,CAAC;YACX,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YAClC,MAAM,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;YACtC,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,IAAI,GAAG,iBAAiB,CAAC;QACrD,CAAC;QACD,OAAO,EAAE,CAAC;IACX,CAAC;IAEO,kBAAkB;QACzB,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC;QACrE,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC9B,MAAM,IAAI,GAAG,MAAM,CAAC,WAAW,IAAI,EAAE,CAAC;YACtC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC;gBAAE,SAAS;YAEhF,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;YAClD,IAAI,YAAY,KAAK,CAAC,CAAC;gBAAE,SAAS;YAElC,MAAM,QAAQ,GAAG,YAAY,GAAG,cAAc,CAAC,MAAM,CAAC;YACtD,IAAI,CAAC,GAAG,QAAQ,CAAC;YACjB,OAAO,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;gBACxB,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;oBACtB,CAAC,IAAI,CAAC,CAAC;gBACR,CAAC;qBAAM,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,GAAG,EAAE,CAAC;oBAC5B,MAAM;gBACP,CAAC;qBAAM,CAAC;oBACP,CAAC,EAAE,CAAC;gBACL,CAAC;YACF,CAAC;YAED,IAAI,CAAC;gBACJ,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;gBACzC,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,GAAG,QAAQ,GAAG,GAAG,CAAW,CAAC;gBAC9D,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;gBACpC,MAAM,IAAI,GAAqB,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,IAAI,CAAC;gBACjE,IAAI,IAAI,EAAE,SAAS;oBAAE,OAAO,IAAI,CAAC;YAClC,CAAC;YAAC,MAAM,CAAC;gBACR,sBAAsB;YACvB,CAAC;QACF,CAAC;QACD,OAAO,IAAI,CAAC;IACb,CAAC;IAEO,cAAc;QACrB,IAAI,CAAC,IAAI,CAAC,SAAS;YAAE,OAAO,EAAE,CAAC;QAE/B,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,2BAA2B,CAAC,EAAE,YAAY,CAAC,SAAS,CAAC,CAAC;QAClG,IAAI,OAAO;YAAE,OAAO,aAAa,OAAO,aAAa,CAAC;QAEtD,MAAM,GAAG,GAAG,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;QAChD,IAAI,CAAC,GAAG;YAAE,OAAO,EAAE,CAAC;QACpB,MAAM,GAAG,GAAG,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;QACpC,OAAO,GAAG,CAAC,CAAC,CAAC,aAAa,GAAG,aAAa,CAAC,CAAC,CAAC,EAAE,CAAC;IACjD,CAAC;IAEO,aAAa,CAAC,GAAY;QACjC,MAAM,MAAM,GAAG,GAAG,CAAC,YAAY,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;QAChD,IAAI,MAAM,EAAE,CAAC;YACZ,MAAM,YAAY,GAAG,2BAA2B,CAAC;YACjD,IAAI,OAAO,GAAG,EAAE,CAAC;YACjB,IAAI,SAAS,GAAG,CAAC,CAAC;YAClB,IAAI,KAAK,CAAC;YACV,IAAI,SAAS,GAAG,CAAC,CAAC;YAClB,OAAO,CAAC,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;gBACrD,IAAI,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC1B,IAAI,SAAS,GAAG,CAAC;oBAAE,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;gBAClD,SAAS,GAAG,YAAY,CAAC,SAAS,CAAC;gBACnC,MAAM,KAAK,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;gBACnC,IAAI,GAAG,IAAI,KAAK,GAAG,SAAS,EAAE,CAAC;oBAC9B,SAAS,GAAG,KAAK,CAAC;oBAClB,OAAO,GAAG,GAAG,CAAC;gBACf,CAAC;YACF,CAAC;YACD,IAAI,OAAO;gBAAE,OAAO,OAAO,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;QAC3E,CAAC;QACD,OAAO,GAAG,CAAC,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;IACtC,CAAC;CACD;AA3LD,8CA2LC"}
|
|
@@ -61,8 +61,8 @@ class XArticleExtractor extends _base_1.BaseExtractor {
|
|
|
61
61
|
return name || handle || this.getAuthorFromUrl();
|
|
62
62
|
}
|
|
63
63
|
getAuthorFromUrl() {
|
|
64
|
-
// match username before /article/, excluding system paths like /i/
|
|
65
|
-
const match = this.url.match(/\/([a-zA-
|
|
64
|
+
// match username before /article/ or /status/, excluding system paths like /i/
|
|
65
|
+
const match = this.url.match(/\/([a-zA-Z0-9_][a-zA-Z0-9_]{0,14})\/(article|status)\/\d+/);
|
|
66
66
|
return match ? `@${match[1]}` : this.getAuthorFromOgTitle();
|
|
67
67
|
}
|
|
68
68
|
getAuthorFromOgTitle() {
|
|
@@ -94,6 +94,7 @@ class XArticleExtractor extends _base_1.BaseExtractor {
|
|
|
94
94
|
this.convertBoldSpans(container, ownerDoc);
|
|
95
95
|
this.convertDraftParagraphs(container, ownerDoc);
|
|
96
96
|
this.removeDraftAttributes(container);
|
|
97
|
+
this.repairSurrogatePairs(container);
|
|
97
98
|
}
|
|
98
99
|
convertEmbeddedTweets(container, ownerDoc) {
|
|
99
100
|
container.querySelectorAll(SELECTORS.EMBEDDED_TWEET).forEach(tweet => {
|
|
@@ -258,6 +259,30 @@ class XArticleExtractor extends _base_1.BaseExtractor {
|
|
|
258
259
|
el.removeAttribute('data-offset-key');
|
|
259
260
|
});
|
|
260
261
|
}
|
|
262
|
+
repairSurrogatePairs(container) {
|
|
263
|
+
const SHOW_TEXT = 4;
|
|
264
|
+
const ownerDoc = container.ownerDocument || this.document;
|
|
265
|
+
const walker = ownerDoc.createTreeWalker(container, SHOW_TEXT);
|
|
266
|
+
let prev = null;
|
|
267
|
+
let node;
|
|
268
|
+
while ((node = walker.nextNode())) {
|
|
269
|
+
const curr = node;
|
|
270
|
+
if (prev) {
|
|
271
|
+
const prevText = prev.textContent || '';
|
|
272
|
+
const currText = curr.textContent || '';
|
|
273
|
+
if (prevText && currText) {
|
|
274
|
+
const lastCode = prevText.charCodeAt(prevText.length - 1);
|
|
275
|
+
const firstCode = currText.charCodeAt(0);
|
|
276
|
+
// high surrogate followed by low surrogate across a node boundary
|
|
277
|
+
if (lastCode >= 0xD800 && lastCode <= 0xDBFF && firstCode >= 0xDC00 && firstCode <= 0xDFFF) {
|
|
278
|
+
prev.textContent = prevText.slice(0, -1);
|
|
279
|
+
curr.textContent = prevText.slice(-1) + currText;
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
prev = curr;
|
|
284
|
+
}
|
|
285
|
+
}
|
|
261
286
|
createDescription() {
|
|
262
287
|
const text = this.articleContainer?.textContent?.trim() || '';
|
|
263
288
|
return text.slice(0, 140) + (text.length > 140 ? '...' : '');
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"x-article.js","sourceRoot":"","sources":["../../src/extractors/x-article.ts"],"names":[],"mappings":";;;AAAA,mCAAwC;AAExC,sCAA6C;AAE7C,MAAM,SAAS,GAAG;IACjB,iBAAiB,EAAE,4CAA4C;IAC/D,KAAK,EAAE,uCAAuC;IAC9C,MAAM,EAAE,qBAAqB;IAC7B,WAAW,EAAE,uBAAuB;IACpC,aAAa,EAAE,iCAAiC;IAChD,MAAM,EAAE,gCAAgC;IACxC,gBAAgB,EAAE,qDAAqD;IACvE,UAAU,EAAE,kCAAkC;IAC9C,gBAAgB,EAAE,mBAAmB;IACrC,cAAc,EAAE,6BAA6B;IAC7C,UAAU,EAAE,2BAA2B;IACvC,SAAS,EAAE,2BAA2B;IACtC,UAAU,EAAE,qCAAqC;IACjD,YAAY,EAAE,iCAAiC;CACtC,CAAC;AAEX,MAAa,iBAAkB,SAAQ,qBAAa;IAGnD,YAAY,QAAkB,EAAE,GAAW,EAAE,aAAmB;QAC/D,KAAK,CAAC,QAAQ,EAAE,GAAG,EAAE,aAAa,CAAC,CAAC;QACpC,IAAI,CAAC,gBAAgB,GAAG,QAAQ,CAAC,aAAa,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IAC7E,CAAC;IAED,UAAU;QACT,OAAO,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC;IAChC,CAAC;IAED,OAAO;QACN,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,EAAE,CAAC;QAClC,MAAM,MAAM,GAAG,IAAI,CAAC,aAAa,EAAE,CAAC;QACpC,MAAM,WAAW,GAAG,IAAI,CAAC,cAAc,EAAE,CAAC;QAC1C,MAAM,WAAW,GAAG,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAE7C,OAAO;YACN,OAAO,EAAE,WAAW;YACpB,WAAW;YACX,gBAAgB,EAAE;gBACjB,SAAS,EAAE,IAAI,CAAC,YAAY,EAAE;aAC9B;YACD,SAAS,EAAE;gBACV,KAAK;gBACL,MAAM;gBACN,IAAI,EAAE,aAAa;gBACnB,WAAW;aACX;SACD,CAAC;IACH,CAAC;IAEO,YAAY;QACnB,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;QAC7D,OAAO,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,oBAAoB,CAAC;IAC7D,CAAC;IAEO,aAAa;QACpB,MAAM,eAAe,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QACtE,IAAI,CAAC,eAAe;YAAE,OAAO,IAAI,CAAC,gBAAgB,EAAE,CAAC;QAErD,MAAM,IAAI,GAAG,eAAe,CAAC,aAAa,CAAC,SAAS,CAAC,WAAW,CAAC,EAAE,YAAY,CAAC,SAAS,CAAC,CAAC;QAC3F,MAAM,MAAM,GAAG,eAAe,CAAC,aAAa,CAAC,SAAS,CAAC,aAAa,CAAC,EAAE,YAAY,CAAC,SAAS,CAAC,CAAC;QAE/F,IAAI,IAAI,IAAI,MAAM;YAAE,OAAO,GAAG,IAAI,MAAM,MAAM,GAAG,CAAC;QAClD,OAAO,IAAI,IAAI,MAAM,IAAI,IAAI,CAAC,gBAAgB,EAAE,CAAC;IAClD,CAAC;IAEO,gBAAgB;QACvB,mEAAmE;QACnE,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAC7E,OAAO,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,oBAAoB,EAAE,CAAC;IAC7D,CAAC;IAEO,oBAAoB;QAC3B,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,2BAA2B,CAAC,EAAE,YAAY,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;QACxG,uEAAuE;QACvE,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,oCAAoC,CAAC,CAAC;QAClE,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;IAC5C,CAAC;IAEO,YAAY;QACnB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;QAC/C,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAC9B,CAAC;IAEO,cAAc;QACrB,IAAI,CAAC,IAAI,CAAC,gBAAgB;YAAE,OAAO,EAAE,CAAC;QAEtC,MAAM,KAAK,GAAG,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,IAAI,CAAgB,CAAC;QACnE,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC;QAEzB,OAAO,8BAA8B,IAAA,mBAAa,EAAC,KAAK,CAAC,YAAY,CAAC;IACvE,CAAC;IAEO,YAAY,CAAC,SAAsB;QAC1C,MAAM,QAAQ,GAAG,SAAS,CAAC,aAAa,IAAI,IAAI,CAAC,QAAQ,CAAC;QAE1D,gEAAgE;QAChE,IAAI,CAAC,qBAAqB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAChD,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAC5C,IAAI,CAAC,cAAc,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QACzC,IAAI,CAAC,kBAAkB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAC7C,IAAI,CAAC,mBAAmB,CAAC,SAAS,CAAC,CAAC;QACpC,kEAAkE;QAClE,IAAI,CAAC,gBAAgB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAC3C,IAAI,CAAC,sBAAsB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QACjD,IAAI,CAAC,qBAAqB,CAAC,SAAS,CAAC,CAAC;IACvC,CAAC;IAEO,qBAAqB,CAAC,SAAsB,EAAE,QAAkB;QACvE,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;YACpE,MAAM,UAAU,GAAG,QAAQ,CAAC,aAAa,CAAC,YAAY,CAAC,CAAC;YACxD,UAAU,CAAC,SAAS,GAAG,gBAAgB,CAAC;YAExC,sBAAsB;YACtB,MAAM,UAAU,GAAG,KAAK,CAAC,aAAa,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC;YAC5D,MAAM,WAAW,GAAG,UAAU,EAAE,gBAAgB,CAAC,GAAG,CAAC,CAAC;YACtD,MAAM,QAAQ,GAAG,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAC7D,MAAM,MAAM,GAAG,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAE3D,qBAAqB;YACrB,MAAM,WAAW,GAAG,KAAK,CAAC,aAAa,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;YAC9D,MAAM,SAAS,GAAG,WAAW,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAEzD,iCAAiC;YACjC,IAAI,QAAQ,IAAI,MAAM,EAAE,CAAC;gBACxB,MAAM,IAAI,GAAG,QAAQ,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;gBAC5C,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,QAAQ,IAAI,MAAM,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;gBAC/D,UAAU,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;YAC9B,CAAC;YAED,IAAI,SAAS,EAAE,CAAC;gBACf,MAAM,CAAC,GAAG,QAAQ,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;gBACtC,CAAC,CAAC,WAAW,GAAG,SAAS,CAAC;gBAC1B,UAAU,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;YAC3B,CAAC;YAED,KAAK,CAAC,WAAW,CAAC,UAAU,CAAC,CAAC;QAC/B,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,iBAAiB,CAAC,SAAsB,EAAE,QAAkB;QACnE,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;YAChE,MAAM,GAAG,GAAG,KAAK,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;YACvC,MAAM,IAAI,GAAG,KAAK,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;YACzC,IAAI,CAAC,GAAG,IAAI,CAAC,IAAI;gBAAE,OAAO;YAE1B,mEAAmE;YACnE,IAAI,QAAQ,GAAG,EAAE,CAAC;YAClB,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;YACzD,IAAI,SAAS,EAAE,CAAC;gBACf,QAAQ,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;YACzB,CAAC;iBAAM,CAAC;gBACP,wDAAwD;gBACxD,MAAM,QAAQ,GAAG,KAAK,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;gBAC7C,QAAQ,GAAG,QAAQ,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAChD,CAAC;YAED,kCAAkC;YAClC,MAAM,MAAM,GAAG,QAAQ,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;YAC7C,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;YAC/C,IAAI,QAAQ,EAAE,CAAC;gBACd,OAAO,CAAC,YAAY,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;gBAC5C,OAAO,CAAC,SAAS,GAAG,YAAY,QAAQ,EAAE,CAAC;YAC5C,CAAC;YACD,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC;YAC7C,MAAM,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;YAE5B,qCAAqC;YACrC,KAAK,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;QAC3B,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,cAAc,CAAC,SAAsB,EAAE,QAAkB;QAChE,wEAAwE;QACxE,SAAS,CAAC,gBAAgB,CAAC,wBAAwB,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE;YACrE,MAAM,KAAK,GAAG,MAAM,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;YAC3C,MAAM,IAAI,GAAG,MAAM,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAC9C,IAAI,CAAC,IAAI;gBAAE,OAAO;YAElB,MAAM,SAAS,GAAG,QAAQ,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;YAChD,SAAS,CAAC,WAAW,GAAG,IAAI,CAAC;YAC7B,MAAM,CAAC,WAAW,CAAC,SAAS,CAAC,CAAC;QAC/B,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,kBAAkB,CAAC,SAAsB,EAAE,QAAkB;QACpE,wEAAwE;QACxE,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE;YAC1D,+BAA+B;YAC/B,MAAM,MAAM,GAAG,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YAChC,IAAI,CAAC,MAAM,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC;gBAAE,OAAO;YAEnD,0EAA0E;YAC1E,IAAI,GAAG,GAAG,GAAG,CAAC,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;YACxC,MAAM,GAAG,GAAG,GAAG,CAAC,YAAY,CAAC,KAAK,CAAC,EAAE,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,OAAO,CAAC;YAE5E,wBAAwB;YACxB,IAAI,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC5B,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,WAAW,EAAE,aAAa,CAAC,CAAC;YAC/C,CAAC;iBAAM,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC9B,GAAG,GAAG,GAAG,GAAG,aAAa,CAAC;YAC3B,CAAC;iBAAM,CAAC;gBACP,GAAG,GAAG,GAAG,GAAG,aAAa,CAAC;YAC3B,CAAC;YAED,MAAM,QAAQ,GAAG,QAAQ,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;YAC/C,QAAQ,CAAC,YAAY,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;YAClC,QAAQ,CAAC,YAAY,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;YAElC,kCAAkC;YAClC,MAAM,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;QAC9B,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,mBAAmB,CAAC,SAAsB;QACjD,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE;YAC1D,MAAM,GAAG,GAAG,GAAG,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC;YACpC,IAAI,CAAC,GAAG;gBAAE,OAAO;YAEjB,IAAI,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC5B,GAAG,CAAC,YAAY,CAAC,KAAK,EAAE,GAAG,CAAC,OAAO,CAAC,WAAW,EAAE,aAAa,CAAC,CAAC,CAAC;YAClE,CAAC;iBAAM,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC9B,GAAG,CAAC,YAAY,CAAC,KAAK,EAAE,GAAG,GAAG,aAAa,CAAC,CAAC;YAC9C,CAAC;iBAAM,CAAC;gBACP,GAAG,CAAC,YAAY,CAAC,KAAK,EAAE,GAAG,GAAG,aAAa,CAAC,CAAC;YAC9C,CAAC;QACF,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,sBAAsB,CAAC,SAAsB,EAAE,QAAkB;QACxE,0FAA0F;QAC1F,MAAM,SAAS,GAAG,CAAC,CAAC;QACpB,MAAM,YAAY,GAAG,CAAC,CAAC;QAEvB,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,gBAAgB,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE;YACpE,MAAM,CAAC,GAAG,QAAQ,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;YAEtC,mEAAmE;YACnE,MAAM,WAAW,GAAG,CAAC,IAAU,EAAQ,EAAE;gBACxC,IAAI,IAAI,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;oBACjC,CAAC,CAAC,WAAW,CAAC,QAAQ,CAAC,cAAc,CAAC,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,CAAC;gBAChE,CAAC;qBAAM,IAAI,IAAI,CAAC,QAAQ,KAAK,YAAY,EAAE,CAAC;oBAC3C,MAAM,EAAE,GAAG,IAAe,CAAC;oBAC3B,MAAM,GAAG,GAAG,EAAE,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;oBAErC,IAAI,GAAG,KAAK,QAAQ,EAAE,CAAC;wBACtB,MAAM,MAAM,GAAG,QAAQ,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;wBAChD,MAAM,CAAC,WAAW,GAAG,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC;wBAC1C,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;oBACvB,CAAC;yBAAM,IAAI,GAAG,KAAK,GAAG,EAAE,CAAC;wBACxB,MAAM,IAAI,GAAG,QAAQ,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;wBACzC,IAAI,CAAC,YAAY,CAAC,MAAM,EAAE,EAAE,CAAC,YAAY,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;wBACzD,IAAI,CAAC,WAAW,GAAG,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC;wBACxC,CAAC,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;oBACrB,CAAC;yBAAM,IAAI,GAAG,KAAK,MAAM,EAAE,CAAC;wBAC3B,MAAM,IAAI,GAAG,QAAQ,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;wBAC5C,IAAI,CAAC,WAAW,GAAG,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC;wBACxC,CAAC,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;oBACrB,CAAC;yBAAM,CAAC;wBACP,kDAAkD;wBAClD,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC;oBACpD,CAAC;gBACF,CAAC;YACF,CAAC,CAAC;YAEF,GAAG,CAAC,UAAU,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC;YACpD,GAAG,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;QACpB,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,gBAAgB,CAAC,SAAsB,EAAE,QAAkB;QAClE,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE;YAC/D,MAAM,MAAM,GAAG,QAAQ,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;YAChD,MAAM,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC;YAC5C,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;QAC1B,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,qBAAqB,CAAC,SAAsB;QACnD,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,gBAAgB,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,EAAE;YACnE,EAAE,CAAC,eAAe,CAAC,iBAAiB,CAAC,CAAC;QACvC,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,iBAAiB;QACxB,MAAM,IAAI,GAAG,IAAI,CAAC,gBAAgB,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QAC9D,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IAC9D,CAAC;CACD;AA/QD,8CA+QC"}
|
|
1
|
+
{"version":3,"file":"x-article.js","sourceRoot":"","sources":["../../src/extractors/x-article.ts"],"names":[],"mappings":";;;AAAA,mCAAwC;AAExC,sCAA6C;AAE7C,MAAM,SAAS,GAAG;IACjB,iBAAiB,EAAE,4CAA4C;IAC/D,KAAK,EAAE,uCAAuC;IAC9C,MAAM,EAAE,qBAAqB;IAC7B,WAAW,EAAE,uBAAuB;IACpC,aAAa,EAAE,iCAAiC;IAChD,MAAM,EAAE,gCAAgC;IACxC,gBAAgB,EAAE,qDAAqD;IACvE,UAAU,EAAE,kCAAkC;IAC9C,gBAAgB,EAAE,mBAAmB;IACrC,cAAc,EAAE,6BAA6B;IAC7C,UAAU,EAAE,2BAA2B;IACvC,SAAS,EAAE,2BAA2B;IACtC,UAAU,EAAE,qCAAqC;IACjD,YAAY,EAAE,iCAAiC;CACtC,CAAC;AAEX,MAAa,iBAAkB,SAAQ,qBAAa;IAGnD,YAAY,QAAkB,EAAE,GAAW,EAAE,aAAmB;QAC/D,KAAK,CAAC,QAAQ,EAAE,GAAG,EAAE,aAAa,CAAC,CAAC;QACpC,IAAI,CAAC,gBAAgB,GAAG,QAAQ,CAAC,aAAa,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IAC7E,CAAC;IAED,UAAU;QACT,OAAO,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC;IAChC,CAAC;IAED,OAAO;QACN,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,EAAE,CAAC;QAClC,MAAM,MAAM,GAAG,IAAI,CAAC,aAAa,EAAE,CAAC;QACpC,MAAM,WAAW,GAAG,IAAI,CAAC,cAAc,EAAE,CAAC;QAC1C,MAAM,WAAW,GAAG,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAE7C,OAAO;YACN,OAAO,EAAE,WAAW;YACpB,WAAW;YACX,gBAAgB,EAAE;gBACjB,SAAS,EAAE,IAAI,CAAC,YAAY,EAAE;aAC9B;YACD,SAAS,EAAE;gBACV,KAAK;gBACL,MAAM;gBACN,IAAI,EAAE,aAAa;gBACnB,WAAW;aACX;SACD,CAAC;IACH,CAAC;IAEO,YAAY;QACnB,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;QAC7D,OAAO,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,oBAAoB,CAAC;IAC7D,CAAC;IAEO,aAAa;QACpB,MAAM,eAAe,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QACtE,IAAI,CAAC,eAAe;YAAE,OAAO,IAAI,CAAC,gBAAgB,EAAE,CAAC;QAErD,MAAM,IAAI,GAAG,eAAe,CAAC,aAAa,CAAC,SAAS,CAAC,WAAW,CAAC,EAAE,YAAY,CAAC,SAAS,CAAC,CAAC;QAC3F,MAAM,MAAM,GAAG,eAAe,CAAC,aAAa,CAAC,SAAS,CAAC,aAAa,CAAC,EAAE,YAAY,CAAC,SAAS,CAAC,CAAC;QAE/F,IAAI,IAAI,IAAI,MAAM;YAAE,OAAO,GAAG,IAAI,MAAM,MAAM,GAAG,CAAC;QAClD,OAAO,IAAI,IAAI,MAAM,IAAI,IAAI,CAAC,gBAAgB,EAAE,CAAC;IAClD,CAAC;IAEO,gBAAgB;QACvB,+EAA+E;QAC/E,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,2DAA2D,CAAC,CAAC;QAC1F,OAAO,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,oBAAoB,EAAE,CAAC;IAC7D,CAAC;IAEO,oBAAoB;QAC3B,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,2BAA2B,CAAC,EAAE,YAAY,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;QACxG,uEAAuE;QACvE,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,oCAAoC,CAAC,CAAC;QAClE,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;IAC5C,CAAC;IAEO,YAAY;QACnB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;QAC/C,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAC9B,CAAC;IAEO,cAAc;QACrB,IAAI,CAAC,IAAI,CAAC,gBAAgB;YAAE,OAAO,EAAE,CAAC;QAEtC,MAAM,KAAK,GAAG,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,IAAI,CAAgB,CAAC;QACnE,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC;QAEzB,OAAO,8BAA8B,IAAA,mBAAa,EAAC,KAAK,CAAC,YAAY,CAAC;IACvE,CAAC;IAEO,YAAY,CAAC,SAAsB;QAC1C,MAAM,QAAQ,GAAG,SAAS,CAAC,aAAa,IAAI,IAAI,CAAC,QAAQ,CAAC;QAE1D,gEAAgE;QAChE,IAAI,CAAC,qBAAqB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAChD,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAC5C,IAAI,CAAC,cAAc,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QACzC,IAAI,CAAC,kBAAkB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAC7C,IAAI,CAAC,mBAAmB,CAAC,SAAS,CAAC,CAAC;QACpC,kEAAkE;QAClE,IAAI,CAAC,gBAAgB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAC3C,IAAI,CAAC,sBAAsB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QACjD,IAAI,CAAC,qBAAqB,CAAC,SAAS,CAAC,CAAC;QACtC,IAAI,CAAC,oBAAoB,CAAC,SAAS,CAAC,CAAC;IACtC,CAAC;IAEO,qBAAqB,CAAC,SAAsB,EAAE,QAAkB;QACvE,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;YACpE,MAAM,UAAU,GAAG,QAAQ,CAAC,aAAa,CAAC,YAAY,CAAC,CAAC;YACxD,UAAU,CAAC,SAAS,GAAG,gBAAgB,CAAC;YAExC,sBAAsB;YACtB,MAAM,UAAU,GAAG,KAAK,CAAC,aAAa,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC;YAC5D,MAAM,WAAW,GAAG,UAAU,EAAE,gBAAgB,CAAC,GAAG,CAAC,CAAC;YACtD,MAAM,QAAQ,GAAG,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAC7D,MAAM,MAAM,GAAG,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAE3D,qBAAqB;YACrB,MAAM,WAAW,GAAG,KAAK,CAAC,aAAa,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;YAC9D,MAAM,SAAS,GAAG,WAAW,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAEzD,iCAAiC;YACjC,IAAI,QAAQ,IAAI,MAAM,EAAE,CAAC;gBACxB,MAAM,IAAI,GAAG,QAAQ,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;gBAC5C,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,QAAQ,IAAI,MAAM,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;gBAC/D,UAAU,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;YAC9B,CAAC;YAED,IAAI,SAAS,EAAE,CAAC;gBACf,MAAM,CAAC,GAAG,QAAQ,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;gBACtC,CAAC,CAAC,WAAW,GAAG,SAAS,CAAC;gBAC1B,UAAU,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;YAC3B,CAAC;YAED,KAAK,CAAC,WAAW,CAAC,UAAU,CAAC,CAAC;QAC/B,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,iBAAiB,CAAC,SAAsB,EAAE,QAAkB;QACnE,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;YAChE,MAAM,GAAG,GAAG,KAAK,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;YACvC,MAAM,IAAI,GAAG,KAAK,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;YACzC,IAAI,CAAC,GAAG,IAAI,CAAC,IAAI;gBAAE,OAAO;YAE1B,mEAAmE;YACnE,IAAI,QAAQ,GAAG,EAAE,CAAC;YAClB,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;YACzD,IAAI,SAAS,EAAE,CAAC;gBACf,QAAQ,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;YACzB,CAAC;iBAAM,CAAC;gBACP,wDAAwD;gBACxD,MAAM,QAAQ,GAAG,KAAK,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;gBAC7C,QAAQ,GAAG,QAAQ,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAChD,CAAC;YAED,kCAAkC;YAClC,MAAM,MAAM,GAAG,QAAQ,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;YAC7C,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;YAC/C,IAAI,QAAQ,EAAE,CAAC;gBACd,OAAO,CAAC,YAAY,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;gBAC5C,OAAO,CAAC,SAAS,GAAG,YAAY,QAAQ,EAAE,CAAC;YAC5C,CAAC;YACD,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC;YAC7C,MAAM,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;YAE5B,qCAAqC;YACrC,KAAK,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;QAC3B,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,cAAc,CAAC,SAAsB,EAAE,QAAkB;QAChE,wEAAwE;QACxE,SAAS,CAAC,gBAAgB,CAAC,wBAAwB,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE;YACrE,MAAM,KAAK,GAAG,MAAM,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;YAC3C,MAAM,IAAI,GAAG,MAAM,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAC9C,IAAI,CAAC,IAAI;gBAAE,OAAO;YAElB,MAAM,SAAS,GAAG,QAAQ,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;YAChD,SAAS,CAAC,WAAW,GAAG,IAAI,CAAC;YAC7B,MAAM,CAAC,WAAW,CAAC,SAAS,CAAC,CAAC;QAC/B,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,kBAAkB,CAAC,SAAsB,EAAE,QAAkB;QACpE,wEAAwE;QACxE,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE;YAC1D,+BAA+B;YAC/B,MAAM,MAAM,GAAG,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YAChC,IAAI,CAAC,MAAM,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC;gBAAE,OAAO;YAEnD,0EAA0E;YAC1E,IAAI,GAAG,GAAG,GAAG,CAAC,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;YACxC,MAAM,GAAG,GAAG,GAAG,CAAC,YAAY,CAAC,KAAK,CAAC,EAAE,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,OAAO,CAAC;YAE5E,wBAAwB;YACxB,IAAI,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC5B,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,WAAW,EAAE,aAAa,CAAC,CAAC;YAC/C,CAAC;iBAAM,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC9B,GAAG,GAAG,GAAG,GAAG,aAAa,CAAC;YAC3B,CAAC;iBAAM,CAAC;gBACP,GAAG,GAAG,GAAG,GAAG,aAAa,CAAC;YAC3B,CAAC;YAED,MAAM,QAAQ,GAAG,QAAQ,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;YAC/C,QAAQ,CAAC,YAAY,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;YAClC,QAAQ,CAAC,YAAY,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;YAElC,kCAAkC;YAClC,MAAM,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;QAC9B,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,mBAAmB,CAAC,SAAsB;QACjD,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE;YAC1D,MAAM,GAAG,GAAG,GAAG,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC;YACpC,IAAI,CAAC,GAAG;gBAAE,OAAO;YAEjB,IAAI,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC5B,GAAG,CAAC,YAAY,CAAC,KAAK,EAAE,GAAG,CAAC,OAAO,CAAC,WAAW,EAAE,aAAa,CAAC,CAAC,CAAC;YAClE,CAAC;iBAAM,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC9B,GAAG,CAAC,YAAY,CAAC,KAAK,EAAE,GAAG,GAAG,aAAa,CAAC,CAAC;YAC9C,CAAC;iBAAM,CAAC;gBACP,GAAG,CAAC,YAAY,CAAC,KAAK,EAAE,GAAG,GAAG,aAAa,CAAC,CAAC;YAC9C,CAAC;QACF,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,sBAAsB,CAAC,SAAsB,EAAE,QAAkB;QACxE,0FAA0F;QAC1F,MAAM,SAAS,GAAG,CAAC,CAAC;QACpB,MAAM,YAAY,GAAG,CAAC,CAAC;QAEvB,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,gBAAgB,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE;YACpE,MAAM,CAAC,GAAG,QAAQ,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;YAEtC,mEAAmE;YACnE,MAAM,WAAW,GAAG,CAAC,IAAU,EAAQ,EAAE;gBACxC,IAAI,IAAI,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;oBACjC,CAAC,CAAC,WAAW,CAAC,QAAQ,CAAC,cAAc,CAAC,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,CAAC;gBAChE,CAAC;qBAAM,IAAI,IAAI,CAAC,QAAQ,KAAK,YAAY,EAAE,CAAC;oBAC3C,MAAM,EAAE,GAAG,IAAe,CAAC;oBAC3B,MAAM,GAAG,GAAG,EAAE,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;oBAErC,IAAI,GAAG,KAAK,QAAQ,EAAE,CAAC;wBACtB,MAAM,MAAM,GAAG,QAAQ,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;wBAChD,MAAM,CAAC,WAAW,GAAG,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC;wBAC1C,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;oBACvB,CAAC;yBAAM,IAAI,GAAG,KAAK,GAAG,EAAE,CAAC;wBACxB,MAAM,IAAI,GAAG,QAAQ,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;wBACzC,IAAI,CAAC,YAAY,CAAC,MAAM,EAAE,EAAE,CAAC,YAAY,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;wBACzD,IAAI,CAAC,WAAW,GAAG,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC;wBACxC,CAAC,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;oBACrB,CAAC;yBAAM,IAAI,GAAG,KAAK,MAAM,EAAE,CAAC;wBAC3B,MAAM,IAAI,GAAG,QAAQ,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;wBAC5C,IAAI,CAAC,WAAW,GAAG,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC;wBACxC,CAAC,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;oBACrB,CAAC;yBAAM,CAAC;wBACP,kDAAkD;wBAClD,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC;oBACpD,CAAC;gBACF,CAAC;YACF,CAAC,CAAC;YAEF,GAAG,CAAC,UAAU,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC;YACpD,GAAG,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;QACpB,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,gBAAgB,CAAC,SAAsB,EAAE,QAAkB;QAClE,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE;YAC/D,MAAM,MAAM,GAAG,QAAQ,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;YAChD,MAAM,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC;YAC5C,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;QAC1B,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,qBAAqB,CAAC,SAAsB;QACnD,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,gBAAgB,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,EAAE;YACnE,EAAE,CAAC,eAAe,CAAC,iBAAiB,CAAC,CAAC;QACvC,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,oBAAoB,CAAC,SAAkB;QAC9C,MAAM,SAAS,GAAG,CAAC,CAAC;QACpB,MAAM,QAAQ,GAAG,SAAS,CAAC,aAAa,IAAI,IAAI,CAAC,QAAQ,CAAC;QAC1D,MAAM,MAAM,GAAG,QAAQ,CAAC,gBAAgB,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;QAE/D,IAAI,IAAI,GAAgB,IAAI,CAAC;QAC7B,IAAI,IAAiB,CAAC;QACtB,OAAO,CAAC,IAAI,GAAG,MAAM,CAAC,QAAQ,EAAE,CAAC,EAAE,CAAC;YACnC,MAAM,IAAI,GAAG,IAAY,CAAC;YAC1B,IAAI,IAAI,EAAE,CAAC;gBACV,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC;gBACxC,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC;gBACxC,IAAI,QAAQ,IAAI,QAAQ,EAAE,CAAC;oBAC1B,MAAM,QAAQ,GAAG,QAAQ,CAAC,UAAU,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;oBAC1D,MAAM,SAAS,GAAG,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;oBACzC,kEAAkE;oBAClE,IAAI,QAAQ,IAAI,MAAM,IAAI,QAAQ,IAAI,MAAM,IAAI,SAAS,IAAI,MAAM,IAAI,SAAS,IAAI,MAAM,EAAE,CAAC;wBAC5F,IAAI,CAAC,WAAW,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;wBACzC,IAAI,CAAC,WAAW,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,QAAQ,CAAC;oBAClD,CAAC;gBACF,CAAC;YACF,CAAC;YACD,IAAI,GAAG,IAAI,CAAC;QACb,CAAC;IACF,CAAC;IAEO,iBAAiB;QACxB,MAAM,IAAI,GAAG,IAAI,CAAC,gBAAgB,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QAC9D,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IAC9D,CAAC;CACD;AA1SD,8CA0SC"}
|
|
@@ -76,7 +76,7 @@ class XOembedExtractor extends _base_1.BaseExtractor {
|
|
|
76
76
|
};
|
|
77
77
|
}
|
|
78
78
|
async tryExtractFxTwitter() {
|
|
79
|
-
const match = this.url.match(/\/([a-zA-
|
|
79
|
+
const match = this.url.match(/\/([a-zA-Z0-9_][a-zA-Z0-9_]{0,14})\/(status|article)\/(\d+)/);
|
|
80
80
|
if (!match)
|
|
81
81
|
return null;
|
|
82
82
|
try {
|