defuddle 0.6.6 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -1
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +139 -0
- package/dist/cli.js.map +1 -0
- package/dist/constants.js +7 -1
- package/dist/constants.js.map +1 -1
- package/dist/defuddle.js +28 -6
- package/dist/defuddle.js.map +1 -1
- package/dist/elements/code.js +7 -0
- package/dist/elements/code.js.map +1 -1
- package/dist/elements/footnotes.js +5 -1
- package/dist/elements/footnotes.js.map +1 -1
- package/dist/elements/images.js +35 -22
- package/dist/elements/images.js.map +1 -1
- package/dist/elements/math.base.js +1 -1
- package/dist/elements/math.base.js.map +1 -1
- package/dist/extractor-registry.d.ts +0 -2
- package/dist/extractor-registry.js +17 -15
- package/dist/extractor-registry.js.map +1 -1
- package/dist/extractors/_conversation.js +1 -1
- package/dist/extractors/_conversation.js.map +1 -1
- package/dist/extractors/chatgpt.d.ts +1 -0
- package/dist/extractors/chatgpt.js +5 -1
- package/dist/extractors/chatgpt.js.map +1 -1
- package/dist/extractors/claude.js +6 -3
- package/dist/extractors/claude.js.map +1 -1
- package/dist/extractors/gemini.js +1 -1
- package/dist/extractors/gemini.js.map +1 -1
- package/dist/extractors/reddit.js +1 -1
- package/dist/extractors/reddit.js.map +1 -1
- package/dist/extractors/twitter.js +5 -7
- package/dist/extractors/twitter.js.map +1 -1
- package/dist/extractors/x-article.d.ts +24 -0
- package/dist/extractors/x-article.js +266 -0
- package/dist/extractors/x-article.js.map +1 -0
- package/dist/extractors/youtube.d.ts +5 -0
- package/dist/extractors/youtube.js +86 -2
- package/dist/extractors/youtube.js.map +1 -1
- package/dist/index.full.d.ts +6 -1
- package/dist/index.full.js +1 -1
- package/dist/index.js +1 -1
- package/dist/markdown.d.ts +2 -0
- package/dist/markdown.js +32 -4
- package/dist/markdown.js.map +1 -1
- package/dist/metadata.d.ts +4 -0
- package/dist/metadata.js +104 -23
- package/dist/metadata.js.map +1 -1
- package/dist/node.js +1 -6
- package/dist/node.js.map +1 -1
- package/dist/standardize.js +21 -5
- package/dist/standardize.js.map +1 -1
- package/package.json +17 -5
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import { BaseExtractor } from './_base';
|
|
2
|
+
import { ExtractorResult } from '../types/extractors';
|
|
3
|
+
export declare class XArticleExtractor extends BaseExtractor {
|
|
4
|
+
private articleContainer;
|
|
5
|
+
constructor(document: Document, url: string, schemaOrgData?: any);
|
|
6
|
+
canExtract(): boolean;
|
|
7
|
+
extract(): ExtractorResult;
|
|
8
|
+
private extractTitle;
|
|
9
|
+
private extractAuthor;
|
|
10
|
+
private getAuthorFromUrl;
|
|
11
|
+
private getAuthorFromOgTitle;
|
|
12
|
+
private getArticleId;
|
|
13
|
+
private extractContent;
|
|
14
|
+
private cleanContent;
|
|
15
|
+
private convertEmbeddedTweets;
|
|
16
|
+
private convertCodeBlocks;
|
|
17
|
+
private convertHeaders;
|
|
18
|
+
private unwrapLinkedImages;
|
|
19
|
+
private upgradeImageQuality;
|
|
20
|
+
private convertDraftParagraphs;
|
|
21
|
+
private convertBoldSpans;
|
|
22
|
+
private removeDraftAttributes;
|
|
23
|
+
private createDescription;
|
|
24
|
+
}
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.XArticleExtractor = void 0;
|
|
4
|
+
const _base_1 = require("./_base");
|
|
5
|
+
const SELECTORS = {
|
|
6
|
+
ARTICLE_CONTAINER: '[data-testid="twitterArticleRichTextView"]',
|
|
7
|
+
TITLE: '[data-testid="twitter-article-title"]',
|
|
8
|
+
AUTHOR: '[itemprop="author"]',
|
|
9
|
+
AUTHOR_NAME: 'meta[itemprop="name"]',
|
|
10
|
+
AUTHOR_HANDLE: 'meta[itemprop="additionalName"]',
|
|
11
|
+
IMAGES: '[data-testid="tweetPhoto"] img',
|
|
12
|
+
DRAFT_PARAGRAPHS: '.longform-unstyled, .public-DraftStyleDefault-block',
|
|
13
|
+
BOLD_SPANS: 'span[style*="font-weight: bold"]',
|
|
14
|
+
DRAFT_ATTRIBUTES: '[data-offset-key]',
|
|
15
|
+
EMBEDDED_TWEET: '[data-testid="simpleTweet"]',
|
|
16
|
+
TWEET_TEXT: '[data-testid="tweetText"]',
|
|
17
|
+
USER_NAME: '[data-testid="User-Name"]',
|
|
18
|
+
CODE_BLOCK: '[data-testid="markdown-code-block"]',
|
|
19
|
+
HEADER_BLOCK: '[data-testid="longform-header"]',
|
|
20
|
+
};
|
|
21
|
+
class XArticleExtractor extends _base_1.BaseExtractor {
|
|
22
|
+
constructor(document, url, schemaOrgData) {
|
|
23
|
+
super(document, url, schemaOrgData);
|
|
24
|
+
this.articleContainer = document.querySelector(SELECTORS.ARTICLE_CONTAINER);
|
|
25
|
+
}
|
|
26
|
+
canExtract() {
|
|
27
|
+
return !!this.articleContainer;
|
|
28
|
+
}
|
|
29
|
+
extract() {
|
|
30
|
+
const title = this.extractTitle();
|
|
31
|
+
const author = this.extractAuthor();
|
|
32
|
+
const contentHtml = this.extractContent();
|
|
33
|
+
const description = this.createDescription();
|
|
34
|
+
return {
|
|
35
|
+
content: contentHtml,
|
|
36
|
+
contentHtml,
|
|
37
|
+
extractedContent: {
|
|
38
|
+
articleId: this.getArticleId(),
|
|
39
|
+
},
|
|
40
|
+
variables: {
|
|
41
|
+
title,
|
|
42
|
+
author,
|
|
43
|
+
site: 'X (Twitter)',
|
|
44
|
+
description,
|
|
45
|
+
}
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
extractTitle() {
|
|
49
|
+
const titleEl = this.document.querySelector(SELECTORS.TITLE);
|
|
50
|
+
return titleEl?.textContent?.trim() || 'Untitled X Article';
|
|
51
|
+
}
|
|
52
|
+
extractAuthor() {
|
|
53
|
+
const authorContainer = this.document.querySelector(SELECTORS.AUTHOR);
|
|
54
|
+
if (!authorContainer)
|
|
55
|
+
return this.getAuthorFromUrl();
|
|
56
|
+
const name = authorContainer.querySelector(SELECTORS.AUTHOR_NAME)?.getAttribute('content');
|
|
57
|
+
const handle = authorContainer.querySelector(SELECTORS.AUTHOR_HANDLE)?.getAttribute('content');
|
|
58
|
+
if (name && handle)
|
|
59
|
+
return `${name} (@${handle})`;
|
|
60
|
+
return name || handle || this.getAuthorFromUrl();
|
|
61
|
+
}
|
|
62
|
+
getAuthorFromUrl() {
|
|
63
|
+
// match username before /article/, excluding system paths like /i/
|
|
64
|
+
const match = this.url.match(/\/([a-zA-Z][a-zA-Z0-9_]{0,14})\/article\/\d+/);
|
|
65
|
+
return match ? `@${match[1]}` : this.getAuthorFromOgTitle();
|
|
66
|
+
}
|
|
67
|
+
getAuthorFromOgTitle() {
|
|
68
|
+
const ogTitle = this.document.querySelector('meta[property="og:title"]')?.getAttribute('content') || '';
|
|
69
|
+
// Match patterns like "(4) Heinrich on X: ..." or "Heinrich on X: ..."
|
|
70
|
+
const match = ogTitle.match(/^(?:\(\d+\)\s+)?(.+?)\s+on\s+X\s*:/);
|
|
71
|
+
return match ? match[1].trim() : 'Unknown';
|
|
72
|
+
}
|
|
73
|
+
getArticleId() {
|
|
74
|
+
const match = this.url.match(/article\/(\d+)/);
|
|
75
|
+
return match ? match[1] : '';
|
|
76
|
+
}
|
|
77
|
+
extractContent() {
|
|
78
|
+
if (!this.articleContainer)
|
|
79
|
+
return '';
|
|
80
|
+
const clone = this.articleContainer.cloneNode(true);
|
|
81
|
+
this.cleanContent(clone);
|
|
82
|
+
return `<article class="x-article">${clone.innerHTML}</article>`;
|
|
83
|
+
}
|
|
84
|
+
cleanContent(container) {
|
|
85
|
+
const ownerDoc = container.ownerDocument || this.document;
|
|
86
|
+
// convert complex elements first (before other transformations)
|
|
87
|
+
this.convertEmbeddedTweets(container, ownerDoc);
|
|
88
|
+
this.convertCodeBlocks(container, ownerDoc);
|
|
89
|
+
this.convertHeaders(container, ownerDoc);
|
|
90
|
+
this.unwrapLinkedImages(container, ownerDoc);
|
|
91
|
+
this.upgradeImageQuality(container);
|
|
92
|
+
// convert bold spans BEFORE paragraphs so formatting is preserved
|
|
93
|
+
this.convertBoldSpans(container, ownerDoc);
|
|
94
|
+
this.convertDraftParagraphs(container, ownerDoc);
|
|
95
|
+
this.removeDraftAttributes(container);
|
|
96
|
+
}
|
|
97
|
+
convertEmbeddedTweets(container, ownerDoc) {
|
|
98
|
+
container.querySelectorAll(SELECTORS.EMBEDDED_TWEET).forEach(tweet => {
|
|
99
|
+
const blockquote = ownerDoc.createElement('blockquote');
|
|
100
|
+
blockquote.className = 'embedded-tweet';
|
|
101
|
+
// extract author info
|
|
102
|
+
const userNameEl = tweet.querySelector(SELECTORS.USER_NAME);
|
|
103
|
+
const authorLinks = userNameEl?.querySelectorAll('a');
|
|
104
|
+
const fullName = authorLinks?.[0]?.textContent?.trim() || '';
|
|
105
|
+
const handle = authorLinks?.[1]?.textContent?.trim() || '';
|
|
106
|
+
// extract tweet text
|
|
107
|
+
const tweetTextEl = tweet.querySelector(SELECTORS.TWEET_TEXT);
|
|
108
|
+
const tweetText = tweetTextEl?.textContent?.trim() || '';
|
|
109
|
+
// build clean blockquote content
|
|
110
|
+
if (fullName || handle) {
|
|
111
|
+
const cite = ownerDoc.createElement('cite');
|
|
112
|
+
cite.textContent = handle ? `${fullName} ${handle}` : fullName;
|
|
113
|
+
blockquote.appendChild(cite);
|
|
114
|
+
}
|
|
115
|
+
if (tweetText) {
|
|
116
|
+
const p = ownerDoc.createElement('p');
|
|
117
|
+
p.textContent = tweetText;
|
|
118
|
+
blockquote.appendChild(p);
|
|
119
|
+
}
|
|
120
|
+
tweet.replaceWith(blockquote);
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
convertCodeBlocks(container, ownerDoc) {
|
|
124
|
+
container.querySelectorAll(SELECTORS.CODE_BLOCK).forEach(block => {
|
|
125
|
+
const pre = block.querySelector('pre');
|
|
126
|
+
const code = block.querySelector('code');
|
|
127
|
+
if (!pre || !code)
|
|
128
|
+
return;
|
|
129
|
+
// extract language from class (e.g., "language-bash") or from span
|
|
130
|
+
let language = '';
|
|
131
|
+
const langClass = code.className.match(/language-(\w+)/);
|
|
132
|
+
if (langClass) {
|
|
133
|
+
language = langClass[1];
|
|
134
|
+
}
|
|
135
|
+
else {
|
|
136
|
+
// fallback: look for language label in the block header
|
|
137
|
+
const langSpan = block.querySelector('span');
|
|
138
|
+
language = langSpan?.textContent?.trim() || '';
|
|
139
|
+
}
|
|
140
|
+
// create clean pre/code structure
|
|
141
|
+
const newPre = ownerDoc.createElement('pre');
|
|
142
|
+
const newCode = ownerDoc.createElement('code');
|
|
143
|
+
if (language) {
|
|
144
|
+
newCode.setAttribute('data-lang', language);
|
|
145
|
+
newCode.className = `language-${language}`;
|
|
146
|
+
}
|
|
147
|
+
newCode.textContent = code.textContent || '';
|
|
148
|
+
newPre.appendChild(newCode);
|
|
149
|
+
// replace the entire block container
|
|
150
|
+
block.replaceWith(newPre);
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
convertHeaders(container, ownerDoc) {
|
|
154
|
+
// X articles use h2/h3 elements but content may be nested in spans/divs
|
|
155
|
+
container.querySelectorAll('h1, h2, h3, h4, h5, h6').forEach(header => {
|
|
156
|
+
const level = header.tagName.toLowerCase();
|
|
157
|
+
const text = header.textContent?.trim() || '';
|
|
158
|
+
if (!text)
|
|
159
|
+
return;
|
|
160
|
+
const newHeader = ownerDoc.createElement(level);
|
|
161
|
+
newHeader.textContent = text;
|
|
162
|
+
header.replaceWith(newHeader);
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
unwrapLinkedImages(container, ownerDoc) {
|
|
166
|
+
// find all tweetPhoto images and extract them from any ancestor anchors
|
|
167
|
+
container.querySelectorAll(SELECTORS.IMAGES).forEach(img => {
|
|
168
|
+
// find closest anchor ancestor
|
|
169
|
+
const anchor = img.closest('a');
|
|
170
|
+
if (!anchor || !container.contains(anchor))
|
|
171
|
+
return;
|
|
172
|
+
// create clean img tag with upgraded quality (like TwitterExtractor does)
|
|
173
|
+
let src = img.getAttribute('src') || '';
|
|
174
|
+
const alt = img.getAttribute('alt')?.replace(/\s+/g, ' ').trim() || 'Image';
|
|
175
|
+
// upgrade image quality
|
|
176
|
+
if (src.includes('&name=')) {
|
|
177
|
+
src = src.replace(/&name=\w+/, '&name=large');
|
|
178
|
+
}
|
|
179
|
+
else if (src.includes('?')) {
|
|
180
|
+
src = `${src}&name=large`;
|
|
181
|
+
}
|
|
182
|
+
else {
|
|
183
|
+
src = `${src}?name=large`;
|
|
184
|
+
}
|
|
185
|
+
const cleanImg = ownerDoc.createElement('img');
|
|
186
|
+
cleanImg.setAttribute('src', src);
|
|
187
|
+
cleanImg.setAttribute('alt', alt);
|
|
188
|
+
// replace anchor with clean image
|
|
189
|
+
anchor.replaceWith(cleanImg);
|
|
190
|
+
});
|
|
191
|
+
}
|
|
192
|
+
upgradeImageQuality(container) {
|
|
193
|
+
container.querySelectorAll(SELECTORS.IMAGES).forEach(img => {
|
|
194
|
+
const src = img.getAttribute('src');
|
|
195
|
+
if (!src)
|
|
196
|
+
return;
|
|
197
|
+
if (src.includes('&name=')) {
|
|
198
|
+
img.setAttribute('src', src.replace(/&name=\w+/, '&name=large'));
|
|
199
|
+
}
|
|
200
|
+
else if (src.includes('?')) {
|
|
201
|
+
img.setAttribute('src', `${src}&name=large`);
|
|
202
|
+
}
|
|
203
|
+
else {
|
|
204
|
+
img.setAttribute('src', `${src}?name=large`);
|
|
205
|
+
}
|
|
206
|
+
});
|
|
207
|
+
}
|
|
208
|
+
convertDraftParagraphs(container, ownerDoc) {
|
|
209
|
+
// node type constants (avoid using Node global which isn't available in all environments)
|
|
210
|
+
const TEXT_NODE = 3;
|
|
211
|
+
const ELEMENT_NODE = 1;
|
|
212
|
+
container.querySelectorAll(SELECTORS.DRAFT_PARAGRAPHS).forEach(div => {
|
|
213
|
+
const p = ownerDoc.createElement('p');
|
|
214
|
+
// preserve formatting (strong, links, code) by processing children
|
|
215
|
+
const processNode = (node) => {
|
|
216
|
+
if (node.nodeType === TEXT_NODE) {
|
|
217
|
+
p.appendChild(ownerDoc.createTextNode(node.textContent || ''));
|
|
218
|
+
}
|
|
219
|
+
else if (node.nodeType === ELEMENT_NODE) {
|
|
220
|
+
const el = node;
|
|
221
|
+
const tag = el.tagName.toLowerCase();
|
|
222
|
+
if (tag === 'strong') {
|
|
223
|
+
const strong = ownerDoc.createElement('strong');
|
|
224
|
+
strong.textContent = el.textContent || '';
|
|
225
|
+
p.appendChild(strong);
|
|
226
|
+
}
|
|
227
|
+
else if (tag === 'a') {
|
|
228
|
+
const link = ownerDoc.createElement('a');
|
|
229
|
+
link.setAttribute('href', el.getAttribute('href') || '');
|
|
230
|
+
link.textContent = el.textContent || '';
|
|
231
|
+
p.appendChild(link);
|
|
232
|
+
}
|
|
233
|
+
else if (tag === 'code') {
|
|
234
|
+
const code = ownerDoc.createElement('code');
|
|
235
|
+
code.textContent = el.textContent || '';
|
|
236
|
+
p.appendChild(code);
|
|
237
|
+
}
|
|
238
|
+
else {
|
|
239
|
+
// recurse into other elements (spans, divs, etc.)
|
|
240
|
+
el.childNodes.forEach(child => processNode(child));
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
};
|
|
244
|
+
div.childNodes.forEach(child => processNode(child));
|
|
245
|
+
div.replaceWith(p);
|
|
246
|
+
});
|
|
247
|
+
}
|
|
248
|
+
convertBoldSpans(container, ownerDoc) {
|
|
249
|
+
container.querySelectorAll(SELECTORS.BOLD_SPANS).forEach(span => {
|
|
250
|
+
const strong = ownerDoc.createElement('strong');
|
|
251
|
+
strong.textContent = span.textContent || '';
|
|
252
|
+
span.replaceWith(strong);
|
|
253
|
+
});
|
|
254
|
+
}
|
|
255
|
+
removeDraftAttributes(container) {
|
|
256
|
+
container.querySelectorAll(SELECTORS.DRAFT_ATTRIBUTES).forEach(el => {
|
|
257
|
+
el.removeAttribute('data-offset-key');
|
|
258
|
+
});
|
|
259
|
+
}
|
|
260
|
+
createDescription() {
|
|
261
|
+
const text = this.articleContainer?.textContent?.trim() || '';
|
|
262
|
+
return text.slice(0, 140) + (text.length > 140 ? '...' : '');
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
exports.XArticleExtractor = XArticleExtractor;
|
|
266
|
+
//# sourceMappingURL=x-article.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"x-article.js","sourceRoot":"","sources":["../../src/extractors/x-article.ts"],"names":[],"mappings":";;;AAAA,mCAAwC;AAGxC,MAAM,SAAS,GAAG;IACjB,iBAAiB,EAAE,4CAA4C;IAC/D,KAAK,EAAE,uCAAuC;IAC9C,MAAM,EAAE,qBAAqB;IAC7B,WAAW,EAAE,uBAAuB;IACpC,aAAa,EAAE,iCAAiC;IAChD,MAAM,EAAE,gCAAgC;IACxC,gBAAgB,EAAE,qDAAqD;IACvE,UAAU,EAAE,kCAAkC;IAC9C,gBAAgB,EAAE,mBAAmB;IACrC,cAAc,EAAE,6BAA6B;IAC7C,UAAU,EAAE,2BAA2B;IACvC,SAAS,EAAE,2BAA2B;IACtC,UAAU,EAAE,qCAAqC;IACjD,YAAY,EAAE,iCAAiC;CACtC,CAAC;AAEX,MAAa,iBAAkB,SAAQ,qBAAa;IAGnD,YAAY,QAAkB,EAAE,GAAW,EAAE,aAAmB;QAC/D,KAAK,CAAC,QAAQ,EAAE,GAAG,EAAE,aAAa,CAAC,CAAC;QACpC,IAAI,CAAC,gBAAgB,GAAG,QAAQ,CAAC,aAAa,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IAC7E,CAAC;IAED,UAAU;QACT,OAAO,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC;IAChC,CAAC;IAED,OAAO;QACN,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,EAAE,CAAC;QAClC,MAAM,MAAM,GAAG,IAAI,CAAC,aAAa,EAAE,CAAC;QACpC,MAAM,WAAW,GAAG,IAAI,CAAC,cAAc,EAAE,CAAC;QAC1C,MAAM,WAAW,GAAG,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAE7C,OAAO;YACN,OAAO,EAAE,WAAW;YACpB,WAAW;YACX,gBAAgB,EAAE;gBACjB,SAAS,EAAE,IAAI,CAAC,YAAY,EAAE;aAC9B;YACD,SAAS,EAAE;gBACV,KAAK;gBACL,MAAM;gBACN,IAAI,EAAE,aAAa;gBACnB,WAAW;aACX;SACD,CAAC;IACH,CAAC;IAEO,YAAY;QACnB,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;QAC7D,OAAO,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,oBAAoB,CAAC;IAC7D,CAAC;IAEO,aAAa;QACpB,MAAM,eAAe,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QACtE,IAAI,CAAC,eAAe;YAAE,OAAO,IAAI,CAAC,gBAAgB,EAAE,CAAC;QAErD,MAAM,IAAI,GAAG,eAAe,CAAC,aAAa,CAAC,SAAS,CAAC,WAAW,CAAC,EAAE,YAAY,CAAC,SAAS,CAAC,CAAC;QAC3F,MAAM,MAAM,GAAG,eAAe,CAAC,aAAa,CAAC,SAAS,CAAC,aAAa,CAAC,EAAE,YAAY,CAAC,SAAS,CAAC,CAAC;QAE/F,IAAI,IAAI,IAAI,MAAM;YAAE,OAAO,GAAG,IAAI,MAAM,MAAM,GAAG,CAAC;QAClD,OAAO,IAAI,IAAI,MAAM,IAAI,IAAI,CAAC,gBAAgB,EAAE,CAAC;IAClD,CAAC;IAEO,gBAAgB;QACvB,mEAAmE;QACnE,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAC7E,OAAO,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,oBAAoB,EAAE,CAAC;IAC7D,CAAC;IAEO,oBAAoB;QAC3B,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,2BAA2B,CAAC,EAAE,YAAY,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;QACxG,uEAAuE;QACvE,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,oCAAoC,CAAC,CAAC;QAClE,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;IAC5C,CAAC;IAEO,YAAY;QACnB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;QAC/C,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAC9B,CAAC;IAEO,cAAc;QACrB,IAAI,CAAC,IAAI,CAAC,gBAAgB;YAAE,OAAO,EAAE,CAAC;QAEtC,MAAM,KAAK,GAAG,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,IAAI,CAAgB,CAAC;QACnE,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC;QAEzB,OAAO,8BAA8B,KAAK,CAAC,SAAS,YAAY,CAAC;IAClE,CAAC;IAEO,YAAY,CAAC,SAAsB;QAC1C,MAAM,QAAQ,GAAG,SAAS,CAAC,aAAa,IAAI,IAAI,CAAC,QAAQ,CAAC;QAE1D,gEAAgE;QAChE,IAAI,CAAC,qBAAqB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAChD,IAAI,CAAC,iBAAiB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAC5C,IAAI,CAAC,cAAc,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QACzC,IAAI,CAAC,kBAAkB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAC7C,IAAI,CAAC,mBAAmB,CAAC,SAAS,CAAC,CAAC;QACpC,kEAAkE;QAClE,IAAI,CAAC,gBAAgB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAC3C,IAAI,CAAC,sBAAsB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QACjD,IAAI,CAAC,qBAAqB,CAAC,SAAS,CAAC,CAAC;IACvC,CAAC;IAEO,qBAAqB,CAAC,SAAsB,EAAE,QAAkB;QACvE,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;YACpE,MAAM,UAAU,GAAG,QAAQ,CAAC,aAAa,CAAC,YAAY,CAAC,CAAC;YACxD,UAAU,CAAC,SAAS,GAAG,gBAAgB,CAAC;YAExC,sBAAsB;YACtB,MAAM,UAAU,GAAG,KAAK,CAAC,aAAa,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC;YAC5D,MAAM,WAAW,GAAG,UAAU,EAAE,gBAAgB,CAAC,GAAG,CAAC,CAAC;YACtD,MAAM,QAAQ,GAAG,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAC7D,MAAM,MAAM,GAAG,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAE3D,qBAAqB;YACrB,MAAM,WAAW,GAAG,KAAK,CAAC,aAAa,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;YAC9D,MAAM,SAAS,GAAG,WAAW,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAEzD,iCAAiC;YACjC,IAAI,QAAQ,IAAI,MAAM,EAAE,CAAC;gBACxB,MAAM,IAAI,GAAG,QAAQ,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;gBAC5C,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,QAAQ,IAAI,MAAM,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;gBAC/D,UAAU,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;YAC9B,CAAC;YAED,IAAI,SAAS,EAAE,CAAC;gBACf,MAAM,CAAC,GAAG,QAAQ,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;gBACtC,CAAC,CAAC,WAAW,GAAG,SAAS,CAAC;gBAC1B,UAAU,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;YAC3B,CAAC;YAED,KAAK,CAAC,WAAW,CAAC,UAAU,CAAC,CAAC;QAC/B,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,iBAAiB,CAAC,SAAsB,EAAE,QAAkB;QACnE,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;YAChE,MAAM,GAAG,GAAG,KAAK,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;YACvC,MAAM,IAAI,GAAG,KAAK,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;YACzC,IAAI,CAAC,GAAG,IAAI,CAAC,IAAI;gBAAE,OAAO;YAE1B,mEAAmE;YACnE,IAAI,QAAQ,GAAG,EAAE,CAAC;YAClB,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;YACzD,IAAI,SAAS,EAAE,CAAC;gBACf,QAAQ,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;YACzB,CAAC;iBAAM,CAAC;gBACP,wDAAwD;gBACxD,MAAM,QAAQ,GAAG,KAAK,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;gBAC7C,QAAQ,GAAG,QAAQ,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAChD,CAAC;YAED,kCAAkC;YAClC,MAAM,MAAM,GAAG,QAAQ,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;YAC7C,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;YAC/C,IAAI,QAAQ,EAAE,CAAC;gBACd,OAAO,CAAC,YAAY,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;gBAC5C,OAAO,CAAC,SAAS,GAAG,YAAY,QAAQ,EAAE,CAAC;YAC5C,CAAC;YACD,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC;YAC7C,MAAM,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;YAE5B,qCAAqC;YACrC,KAAK,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;QAC3B,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,cAAc,CAAC,SAAsB,EAAE,QAAkB;QAChE,wEAAwE;QACxE,SAAS,CAAC,gBAAgB,CAAC,wBAAwB,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE;YACrE,MAAM,KAAK,GAAG,MAAM,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;YAC3C,MAAM,IAAI,GAAG,MAAM,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAC9C,IAAI,CAAC,IAAI;gBAAE,OAAO;YAElB,MAAM,SAAS,GAAG,QAAQ,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;YAChD,SAAS,CAAC,WAAW,GAAG,IAAI,CAAC;YAC7B,MAAM,CAAC,WAAW,CAAC,SAAS,CAAC,CAAC;QAC/B,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,kBAAkB,CAAC,SAAsB,EAAE,QAAkB;QACpE,wEAAwE;QACxE,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE;YAC1D,+BAA+B;YAC/B,MAAM,MAAM,GAAG,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YAChC,IAAI,CAAC,MAAM,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC;gBAAE,OAAO;YAEnD,0EAA0E;YAC1E,IAAI,GAAG,GAAG,GAAG,CAAC,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;YACxC,MAAM,GAAG,GAAG,GAAG,CAAC,YAAY,CAAC,KAAK,CAAC,EAAE,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,OAAO,CAAC;YAE5E,wBAAwB;YACxB,IAAI,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC5B,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,WAAW,EAAE,aAAa,CAAC,CAAC;YAC/C,CAAC;iBAAM,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC9B,GAAG,GAAG,GAAG,GAAG,aAAa,CAAC;YAC3B,CAAC;iBAAM,CAAC;gBACP,GAAG,GAAG,GAAG,GAAG,aAAa,CAAC;YAC3B,CAAC;YAED,MAAM,QAAQ,GAAG,QAAQ,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;YAC/C,QAAQ,CAAC,YAAY,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;YAClC,QAAQ,CAAC,YAAY,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;YAElC,kCAAkC;YAClC,MAAM,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;QAC9B,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,mBAAmB,CAAC,SAAsB;QACjD,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE;YAC1D,MAAM,GAAG,GAAG,GAAG,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC;YACpC,IAAI,CAAC,GAAG;gBAAE,OAAO;YAEjB,IAAI,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC5B,GAAG,CAAC,YAAY,CAAC,KAAK,EAAE,GAAG,CAAC,OAAO,CAAC,WAAW,EAAE,aAAa,CAAC,CAAC,CAAC;YAClE,CAAC;iBAAM,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC9B,GAAG,CAAC,YAAY,CAAC,KAAK,EAAE,GAAG,GAAG,aAAa,CAAC,CAAC;YAC9C,CAAC;iBAAM,CAAC;gBACP,GAAG,CAAC,YAAY,CAAC,KAAK,EAAE,GAAG,GAAG,aAAa,CAAC,CAAC;YAC9C,CAAC;QACF,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,sBAAsB,CAAC,SAAsB,EAAE,QAAkB;QACxE,0FAA0F;QAC1F,MAAM,SAAS,GAAG,CAAC,CAAC;QACpB,MAAM,YAAY,GAAG,CAAC,CAAC;QAEvB,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,gBAAgB,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE;YACpE,MAAM,CAAC,GAAG,QAAQ,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;YAEtC,mEAAmE;YACnE,MAAM,WAAW,GAAG,CAAC,IAAU,EAAQ,EAAE;gBACxC,IAAI,IAAI,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;oBACjC,CAAC,CAAC,WAAW,CAAC,QAAQ,CAAC,cAAc,CAAC,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,CAAC;gBAChE,CAAC;qBAAM,IAAI,IAAI,CAAC,QAAQ,KAAK,YAAY,EAAE,CAAC;oBAC3C,MAAM,EAAE,GAAG,IAAe,CAAC;oBAC3B,MAAM,GAAG,GAAG,EAAE,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;oBAErC,IAAI,GAAG,KAAK,QAAQ,EAAE,CAAC;wBACtB,MAAM,MAAM,GAAG,QAAQ,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;wBAChD,MAAM,CAAC,WAAW,GAAG,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC;wBAC1C,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;oBACvB,CAAC;yBAAM,IAAI,GAAG,KAAK,GAAG,EAAE,CAAC;wBACxB,MAAM,IAAI,GAAG,QAAQ,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;wBACzC,IAAI,CAAC,YAAY,CAAC,MAAM,EAAE,EAAE,CAAC,YAAY,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;wBACzD,IAAI,CAAC,WAAW,GAAG,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC;wBACxC,CAAC,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;oBACrB,CAAC;yBAAM,IAAI,GAAG,KAAK,MAAM,EAAE,CAAC;wBAC3B,MAAM,IAAI,GAAG,QAAQ,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;wBAC5C,IAAI,CAAC,WAAW,GAAG,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC;wBACxC,CAAC,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;oBACrB,CAAC;yBAAM,CAAC;wBACP,kDAAkD;wBAClD,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC;oBACpD,CAAC;gBACF,CAAC;YACF,CAAC,CAAC;YAEF,GAAG,CAAC,UAAU,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC;YACpD,GAAG,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;QACpB,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,gBAAgB,CAAC,SAAsB,EAAE,QAAkB;QAClE,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE;YAC/D,MAAM,MAAM,GAAG,QAAQ,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;YAChD,MAAM,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC;YAC5C,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;QAC1B,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,qBAAqB,CAAC,SAAsB;QACnD,SAAS,CAAC,gBAAgB,CAAC,SAAS,CAAC,gBAAgB,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,EAAE;YACnE,EAAE,CAAC,eAAe,CAAC,iBAAiB,CAAC,CAAC;QACvC,CAAC,CAAC,CAAC;IACJ,CAAC;IAEO,iBAAiB;QACxB,MAAM,IAAI,GAAG,IAAI,CAAC,gBAAgB,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QAC9D,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IAC9D,CAAC;CACD;AA/QD,8CA+QC"}
|
|
@@ -8,5 +8,10 @@ export declare class YoutubeExtractor extends BaseExtractor {
|
|
|
8
8
|
extract(): ExtractorResult;
|
|
9
9
|
private formatDescription;
|
|
10
10
|
private getVideoData;
|
|
11
|
+
private getChannelName;
|
|
12
|
+
private getChannelNameFromDom;
|
|
13
|
+
private getChannelNameFromMicrodata;
|
|
14
|
+
private getChannelNameFromPlayerResponse;
|
|
15
|
+
private parseInlineJson;
|
|
11
16
|
private getVideoId;
|
|
12
17
|
}
|
|
@@ -13,6 +13,7 @@ class YoutubeExtractor extends _base_1.BaseExtractor {
|
|
|
13
13
|
}
|
|
14
14
|
extract() {
|
|
15
15
|
const videoData = this.getVideoData();
|
|
16
|
+
const channelName = this.getChannelName(videoData);
|
|
16
17
|
const description = videoData.description || '';
|
|
17
18
|
const formattedDescription = this.formatDescription(description);
|
|
18
19
|
const contentHtml = `<iframe width="560" height="315" src="https://www.youtube.com/embed/${this.getVideoId()}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" referrerpolicy="strict-origin-when-cross-origin" allowfullscreen></iframe><br>${formattedDescription}`;
|
|
@@ -21,11 +22,11 @@ class YoutubeExtractor extends _base_1.BaseExtractor {
|
|
|
21
22
|
contentHtml: contentHtml,
|
|
22
23
|
extractedContent: {
|
|
23
24
|
videoId: this.getVideoId(),
|
|
24
|
-
author:
|
|
25
|
+
author: channelName,
|
|
25
26
|
},
|
|
26
27
|
variables: {
|
|
27
28
|
title: videoData.name || '',
|
|
28
|
-
author:
|
|
29
|
+
author: channelName,
|
|
29
30
|
site: 'YouTube',
|
|
30
31
|
image: Array.isArray(videoData.thumbnailUrl) ? videoData.thumbnailUrl[0] || '' : '',
|
|
31
32
|
published: videoData.uploadDate,
|
|
@@ -44,6 +45,89 @@ class YoutubeExtractor extends _base_1.BaseExtractor {
|
|
|
44
45
|
: this.schemaOrgData['@type'] === 'VideoObject' ? this.schemaOrgData : null;
|
|
45
46
|
return videoData || {};
|
|
46
47
|
}
|
|
48
|
+
getChannelName(videoData) {
|
|
49
|
+
const fromDom = this.getChannelNameFromDom();
|
|
50
|
+
if (fromDom) {
|
|
51
|
+
return fromDom;
|
|
52
|
+
}
|
|
53
|
+
const fromPlayer = this.getChannelNameFromPlayerResponse();
|
|
54
|
+
if (fromPlayer) {
|
|
55
|
+
return fromPlayer;
|
|
56
|
+
}
|
|
57
|
+
return videoData?.author || '';
|
|
58
|
+
}
|
|
59
|
+
getChannelNameFromDom() {
|
|
60
|
+
const ownerSelectors = [
|
|
61
|
+
'ytd-video-owner-renderer #channel-name a[href^="/@"]',
|
|
62
|
+
'#owner-name a[href^="/@"]'
|
|
63
|
+
];
|
|
64
|
+
for (const selector of ownerSelectors) {
|
|
65
|
+
const element = this.document.querySelector(selector);
|
|
66
|
+
const value = element?.textContent?.trim();
|
|
67
|
+
if (value) {
|
|
68
|
+
return value;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
return this.getChannelNameFromMicrodata();
|
|
72
|
+
}
|
|
73
|
+
getChannelNameFromMicrodata() {
|
|
74
|
+
const authorRoot = this.document.querySelector('[itemprop="author"]');
|
|
75
|
+
if (!authorRoot)
|
|
76
|
+
return '';
|
|
77
|
+
const metaName = authorRoot.querySelector('meta[itemprop="name"]');
|
|
78
|
+
if (metaName?.getAttribute('content')) {
|
|
79
|
+
return metaName.getAttribute('content').trim();
|
|
80
|
+
}
|
|
81
|
+
const linkName = authorRoot.querySelector('link[itemprop="name"]');
|
|
82
|
+
if (linkName?.getAttribute('content')) {
|
|
83
|
+
return linkName.getAttribute('content').trim();
|
|
84
|
+
}
|
|
85
|
+
const text = authorRoot.querySelector('[itemprop="name"], a, span');
|
|
86
|
+
return text?.textContent?.trim() || '';
|
|
87
|
+
}
|
|
88
|
+
getChannelNameFromPlayerResponse() {
|
|
89
|
+
const data = this.parseInlineJson('ytInitialPlayerResponse');
|
|
90
|
+
if (!data)
|
|
91
|
+
return '';
|
|
92
|
+
const fromVideoDetails = data?.videoDetails?.author || data?.videoDetails?.ownerChannelName;
|
|
93
|
+
if (fromVideoDetails) {
|
|
94
|
+
return fromVideoDetails;
|
|
95
|
+
}
|
|
96
|
+
const fromMicroformat = data?.microformat?.playerMicroformatRenderer?.ownerChannelName;
|
|
97
|
+
return fromMicroformat || '';
|
|
98
|
+
}
|
|
99
|
+
parseInlineJson(globalName) {
|
|
100
|
+
const scripts = Array.from(this.document.querySelectorAll('script'));
|
|
101
|
+
for (const script of scripts) {
|
|
102
|
+
const text = script.textContent || '';
|
|
103
|
+
if (!text.includes(globalName))
|
|
104
|
+
continue;
|
|
105
|
+
const startIndex = text.indexOf('{', text.indexOf(globalName));
|
|
106
|
+
if (startIndex === -1)
|
|
107
|
+
continue;
|
|
108
|
+
let depth = 0;
|
|
109
|
+
for (let i = startIndex; i < text.length; i++) {
|
|
110
|
+
const char = text[i];
|
|
111
|
+
if (char === '{') {
|
|
112
|
+
depth += 1;
|
|
113
|
+
}
|
|
114
|
+
else if (char === '}') {
|
|
115
|
+
depth -= 1;
|
|
116
|
+
if (depth === 0) {
|
|
117
|
+
const jsonText = text.slice(startIndex, i + 1);
|
|
118
|
+
try {
|
|
119
|
+
return JSON.parse(jsonText);
|
|
120
|
+
}
|
|
121
|
+
catch (error) {
|
|
122
|
+
console.error('YoutubeExtractor: failed to parse inline JSON', error);
|
|
123
|
+
break;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
return null;
|
|
130
|
+
}
|
|
47
131
|
getVideoId() {
|
|
48
132
|
const url = new URL(this.url);
|
|
49
133
|
if (url.hostname === 'youtu.be') {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"youtube.js","sourceRoot":"","sources":["../../src/extractors/youtube.ts"],"names":[],"mappings":";;;AAAA,mCAAwC;AAGxC,MAAa,gBAAiB,SAAQ,qBAAa;IAIlD,YAAY,QAAkB,EAAE,GAAW,EAAE,aAAmB;QAC/D,KAAK,CAAC,QAAQ,EAAE,GAAG,EAAE,aAAa,CAAC,CAAC;QACpC,IAAI,CAAC,YAAY,GAAG,QAAQ,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;QACpD,IAAI,CAAC,aAAa,GAAG,aAAa,CAAC;IACpC,CAAC;IAED,UAAU;QACT,OAAO,IAAI,CAAC;IACb,CAAC;IAED,OAAO;QACN,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,EAAE,CAAC;QACtC,MAAM,WAAW,GAAG,SAAS,CAAC,WAAW,IAAI,EAAE,CAAC;QAChD,MAAM,oBAAoB,GAAG,IAAI,CAAC,iBAAiB,CAAC,WAAW,CAAC,CAAC;QACjE,MAAM,WAAW,GAAG,uEAAuE,IAAI,CAAC,UAAU,EAAE,4OAA4O,oBAAoB,EAAE,CAAC;QAE/W,OAAO;YACN,OAAO,EAAE,WAAW;YACpB,WAAW,EAAE,WAAW;YACxB,gBAAgB,EAAE;gBACjB,OAAO,EAAE,IAAI,CAAC,UAAU,EAAE;gBAC1B,MAAM,EAAE,
|
|
1
|
+
{"version":3,"file":"youtube.js","sourceRoot":"","sources":["../../src/extractors/youtube.ts"],"names":[],"mappings":";;;AAAA,mCAAwC;AAGxC,MAAa,gBAAiB,SAAQ,qBAAa;IAIlD,YAAY,QAAkB,EAAE,GAAW,EAAE,aAAmB;QAC/D,KAAK,CAAC,QAAQ,EAAE,GAAG,EAAE,aAAa,CAAC,CAAC;QACpC,IAAI,CAAC,YAAY,GAAG,QAAQ,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;QACpD,IAAI,CAAC,aAAa,GAAG,aAAa,CAAC;IACpC,CAAC;IAED,UAAU;QACT,OAAO,IAAI,CAAC;IACb,CAAC;IAED,OAAO;QACN,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,EAAE,CAAC;QACtC,MAAM,WAAW,GAAG,IAAI,CAAC,cAAc,CAAC,SAAS,CAAC,CAAC;QACnD,MAAM,WAAW,GAAG,SAAS,CAAC,WAAW,IAAI,EAAE,CAAC;QAChD,MAAM,oBAAoB,GAAG,IAAI,CAAC,iBAAiB,CAAC,WAAW,CAAC,CAAC;QACjE,MAAM,WAAW,GAAG,uEAAuE,IAAI,CAAC,UAAU,EAAE,4OAA4O,oBAAoB,EAAE,CAAC;QAE/W,OAAO;YACN,OAAO,EAAE,WAAW;YACpB,WAAW,EAAE,WAAW;YACxB,gBAAgB,EAAE;gBACjB,OAAO,EAAE,IAAI,CAAC,UAAU,EAAE;gBAC1B,MAAM,EAAE,WAAW;aACnB;YACD,SAAS,EAAE;gBACV,KAAK,EAAE,SAAS,CAAC,IAAI,IAAI,EAAE;gBAC3B,MAAM,EAAE,WAAW;gBACnB,IAAI,EAAE,SAAS;gBACf,KAAK,EAAE,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE;gBACnF,SAAS,EAAE,SAAS,CAAC,UAAU;gBAC/B,WAAW,EAAE,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE;aAC7C;SACD,CAAC;IACH,CAAC;IAEO,iBAAiB,CAAC,WAAmB;QAC5C,OAAO,MAAM,WAAW,CAAC,OAAO,CAAC,KAAK,EAAE,MAAM,CAAC,MAAM,CAAC;IACvD,CAAC;IAEO,YAAY;QACnB,IAAI,CAAC,IAAI,CAAC,aAAa;YAAE,OAAO,EAAE,CAAC;QAEnC,MAAM,SAAS,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,aAAa,CAAC;YAClD,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,aAAa,CAAC;YAClE,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,KAAK,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC;QAE7E,OAAO,SAAS,IAAI,EAAE,CAAC;IACxB,CAAC;IAEO,cAAc,CAAC,SAAc;QACpC,MAAM,OAAO,GAAG,IAAI,CAAC,qBAAqB,EAAE,CAAC;QAC7C,IAAI,OAAO,EAAE,CAAC;YACb,OAAO,OAAO,CAAC;QAChB,CAAC;QAED,MAAM,UAAU,GAAG,IAAI,CAAC,gCAAgC,EAAE,CAAC;QAC3D,IAAI,UAAU,EAAE,CAAC;YAChB,OAAO,UAAU,CAAC;QACnB,CAAC;QAED,OAAO,SAAS,EAAE,MAAM,IAAI,EAAE,CAAC;IAChC,CAAC;IAEO,qBAAqB;QAC5B,MAAM,cAAc,GAAG;YACtB,sDAAsD;YACtD,2BAA2B;SAC3B,CAAC;QAEF,KAAK,MAAM,QAAQ,IAAI,cAAc,EAAE,CAAC;YACvC,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;YACtD,MAAM,KAAK,GAAG,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;YAC3C,IAAI,KAAK,EAAE,CAAC;gBACX,OAAO,KAAK,CAAC;YACd,CAAC;QACF,CAAC;QAED,OAAO,IAAI,CAAC,2BAA2B,EAAE,CAAC;IAC3C,CAAC;IAEO,2BAA2B;QAClC,MAAM,UAAU,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,qBAAqB,CAAC,CAAC;QACtE,IAAI,CAAC,UAAU;YAAE,OAAO,EAAE,CAAC;QAE3B,MAAM,QAAQ,GAAG,UAAU,CAAC,aAAa,CAAC,uBAAuB,CAAC,CAAC;QACnE,IAAI,QAAQ,EAAE,YAAY,CAAC,SAAS,CAAC,EAAE,CAAC;YACvC,OAAO,QAAQ,CAAC,YAAY,CAAC,SAAS,CAAE,CAAC,IAAI,EAAE,CAAC;QACjD,CAAC;QAED,MAAM,QAAQ,GAAG,UAAU,CAAC,aAAa,CAAC,uBAAuB,CAAC,CAAC;QACnE,IAAI,QAAQ,EAAE,YAAY,CAAC,SAAS,CAAC,EAAE,CAAC;YACvC,OAAO,QAAQ,CAAC,YAAY,CAAC,SAAS,CAAE,CAAC,IAAI,EAAE,CAAC;QACjD,CAAC;QAED,MAAM,IAAI,GAAG,UAAU,CAAC,aAAa,CAAC,4BAA4B,CAAC,CAAC;QACpE,OAAO,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IACxC,CAAC;IAEO,gCAAgC;QACvC,MAAM,IAAI,GAAG,IAAI,CAAC,eAAe,CAAC,yBAAyB,CAAC,CAAC;QAC7D,IAAI,CAAC,IAAI;YAAE,OAAO,EAAE,CAAC;QAErB,MAAM,gBAAgB,GAAG,IAAI,EAAE,YAAY,EAAE,MAAM,IAAI,IAAI,EAAE,YAAY,EAAE,gBAAgB,CAAC;QAC5F,IAAI,gBAAgB,EAAE,CAAC;YACtB,OAAO,gBAAgB,CAAC;QACzB,CAAC;QAED,MAAM,eAAe,GAAG,IAAI,EAAE,WAAW,EAAE,yBAAyB,EAAE,gBAAgB,CAAC;QACvF,OAAO,eAAe,IAAI,EAAE,CAAC;IAC9B,CAAC;IAEO,eAAe,CAAC,UAAkB;QACzC,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC;QACrE,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC9B,MAAM,IAAI,GAAG,MAAM,CAAC,WAAW,IAAI,EAAE,CAAC;YACtC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC;gBAAE,SAAS;YAEzC,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC;YAC/D,IAAI,UAAU,KAAK,CAAC,CAAC;gBAAE,SAAS;YAEhC,IAAI,KAAK,GAAG,CAAC,CAAC;YACd,KAAK,IAAI,CAAC,GAAG,UAAU,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC/C,MAAM,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;gBACrB,IAAI,IAAI,KAAK,GAAG,EAAE,CAAC;oBAClB,KAAK,IAAI,CAAC,CAAC;gBACZ,CAAC;qBAAM,IAAI,IAAI,KAAK,GAAG,EAAE,CAAC;oBACzB,KAAK,IAAI,CAAC,CAAC;oBACX,IAAI,KAAK,KAAK,CAAC,EAAE,CAAC;wBACjB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;wBAC/C,IAAI,CAAC;4BACJ,OAAO,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;wBAC7B,CAAC;wBAAC,OAAO,KAAK,EAAE,CAAC;4BAChB,OAAO,CAAC,KAAK,CAAC,+CAA+C,EAAE,KAAK,CAAC,CAAC;4BACtE,MAAM;wBACP,CAAC;oBACF,CAAC;gBACF,CAAC;YACF,CAAC;QACF,CAAC;QAED,OAAO,IAAI,CAAC;IACb,CAAC;IAEO,UAAU;QACjB,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC9B,IAAI,GAAG,CAAC,QAAQ,KAAK,UAAU,EAAE,CAAC;YACjC,OAAO,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC9B,CAAC;QACD,OAAO,IAAI,eAAe,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;IACvD,CAAC;CACD;AA1JD,4CA0JC"}
|
package/dist/index.full.d.ts
CHANGED
|
@@ -1,4 +1,9 @@
|
|
|
1
|
-
import { Defuddle } from './defuddle';
|
|
2
1
|
import { DefuddleOptions, DefuddleResponse } from './types';
|
|
3
2
|
export type { DefuddleOptions, DefuddleResponse };
|
|
3
|
+
declare class Defuddle {
|
|
4
|
+
private defuddle;
|
|
5
|
+
private options;
|
|
6
|
+
constructor(doc: Document, options?: DefuddleOptions);
|
|
7
|
+
parse(): DefuddleResponse;
|
|
8
|
+
}
|
|
4
9
|
export default Defuddle;
|