defuddle 0.3.8 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ import { BaseExtractor } from './extractors/_base';
2
+ type ExtractorConstructor = new (document: Document, url: string, schemaOrgData?: any) => BaseExtractor;
3
+ interface ExtractorMapping {
4
+ patterns: (string | RegExp)[];
5
+ extractor: ExtractorConstructor;
6
+ }
7
+ export declare class ExtractorRegistry {
8
+ private static mappings;
9
+ private static domainCache;
10
+ static initialize(): void;
11
+ static register(mapping: ExtractorMapping): void;
12
+ static findExtractor(document: Document, url: string, schemaOrgData?: any): BaseExtractor | null;
13
+ static clearCache(): void;
14
+ }
15
+ export {};
@@ -0,0 +1,9 @@
1
+ import { ExtractorResult } from '../types/extractors';
2
+ export declare abstract class BaseExtractor {
3
+ protected document: Document;
4
+ protected url: string;
5
+ protected schemaOrgData?: any;
6
+ constructor(document: Document, url: string, schemaOrgData?: any);
7
+ abstract canExtract(): boolean;
8
+ abstract extract(): ExtractorResult;
9
+ }
@@ -0,0 +1,9 @@
1
+ import { BaseExtractor } from './_base';
2
+ import { ConversationMessage, ConversationMetadata, Footnote, ExtractorResult } from '../types/extractors';
3
+ export declare abstract class ConversationExtractor extends BaseExtractor {
4
+ protected abstract extractMessages(): ConversationMessage[];
5
+ protected abstract getMetadata(): ConversationMetadata;
6
+ protected getFootnotes(): Footnote[];
7
+ extract(): ExtractorResult;
8
+ protected createContentHtml(messages: ConversationMessage[], footnotes: Footnote[]): string;
9
+ }
@@ -0,0 +1,13 @@
1
+ import { ConversationExtractor } from './_conversation';
2
+ import { ConversationMessage, ConversationMetadata, Footnote } from '../types/extractors';
3
+ export declare class ChatGPTExtractor extends ConversationExtractor {
4
+ private articles;
5
+ private footnotes;
6
+ private footnoteCounter;
7
+ constructor(document: Document, url: string);
8
+ canExtract(): boolean;
9
+ protected extractMessages(): ConversationMessage[];
10
+ protected getFootnotes(): Footnote[];
11
+ protected getMetadata(): ConversationMetadata;
12
+ private getTitle;
13
+ }
@@ -0,0 +1,10 @@
1
+ import { ConversationExtractor } from './_conversation';
2
+ import { ConversationMessage, ConversationMetadata } from '../types/extractors';
3
+ export declare class ClaudeExtractor extends ConversationExtractor {
4
+ private articles;
5
+ constructor(document: Document, url: string);
6
+ canExtract(): boolean;
7
+ protected extractMessages(): ConversationMessage[];
8
+ protected getMetadata(): ConversationMetadata;
9
+ private getTitle;
10
+ }
@@ -0,0 +1,21 @@
1
+ import { BaseExtractor } from './_base';
2
+ import { ExtractorResult } from '../types/extractors';
3
+ export declare class HackerNewsExtractor extends BaseExtractor {
4
+ private mainPost;
5
+ private isCommentPage;
6
+ private mainComment;
7
+ constructor(document: Document, url: string);
8
+ private detectCommentPage;
9
+ private findMainComment;
10
+ canExtract(): boolean;
11
+ extract(): ExtractorResult;
12
+ private createContentHtml;
13
+ private getPostContent;
14
+ private extractComments;
15
+ private processComments;
16
+ private getPostId;
17
+ private getPostTitle;
18
+ private getPostAuthor;
19
+ private createDescription;
20
+ private getPostDate;
21
+ }
@@ -0,0 +1,16 @@
1
+ import { BaseExtractor } from './_base';
2
+ import { ExtractorResult } from '../types/extractors';
3
+ export declare class RedditExtractor extends BaseExtractor {
4
+ private shredditPost;
5
+ constructor(document: Document, url: string);
6
+ canExtract(): boolean;
7
+ extract(): ExtractorResult;
8
+ private getPostContent;
9
+ private createContentHtml;
10
+ private extractComments;
11
+ private getPostId;
12
+ private getSubreddit;
13
+ private getPostAuthor;
14
+ private createDescription;
15
+ private processComments;
16
+ }
@@ -0,0 +1,16 @@
1
+ import { BaseExtractor } from './_base';
2
+ import { ExtractorResult } from '../types/extractors';
3
+ export declare class TwitterExtractor extends BaseExtractor {
4
+ private mainTweet;
5
+ private threadTweets;
6
+ constructor(document: Document, url: string);
7
+ canExtract(): boolean;
8
+ extract(): ExtractorResult;
9
+ private formatTweetText;
10
+ private extractTweet;
11
+ private extractUserInfo;
12
+ private extractImages;
13
+ private getTweetId;
14
+ private getTweetAuthor;
15
+ private createDescription;
16
+ }
@@ -0,0 +1,12 @@
1
+ import { BaseExtractor } from './_base';
2
+ import { ExtractorResult } from '../types/extractors';
3
+ export declare class YoutubeExtractor extends BaseExtractor {
4
+ private videoElement;
5
+ protected schemaOrgData: any;
6
+ constructor(document: Document, url: string, schemaOrgData?: any);
7
+ canExtract(): boolean;
8
+ extract(): ExtractorResult;
9
+ private formatDescription;
10
+ private getVideoData;
11
+ private getVideoId;
12
+ }