@ioodev/nodescraper 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,152 @@
1
+ // Type definitions for @ioodev/nodescraper 1.1.0
2
+
3
+ export interface NodeScraperOptions {
4
+ /** Request timeout in milliseconds. Default: 10000. */
5
+ timeout?: number;
6
+ /** User-Agent header sent with the request. Defaults to a realistic browser-like UA. */
7
+ userAgent?: string;
8
+ /** Extra headers merged into the request. */
9
+ headers?: Record<string, string>;
10
+ /** Maximum number of redirects to follow. Default: 5. */
11
+ maxRedirects?: number;
12
+ /** Protocols accepted by the URL validator. Default: ['http:', 'https:']. */
13
+ allowedProtocols?: string[];
14
+ /** If true, init() rejects instead of swallowing errors. Default: false. */
15
+ throwOnError?: boolean;
16
+ }
17
+
18
+ export interface OpenGraphData {
19
+ 'og:site_name': string | null;
20
+ 'og:type': string | null;
21
+ 'og:title': string | null;
22
+ 'og:description': string | null;
23
+ 'og:url': string | null;
24
+ 'og:image': string | null;
25
+ [key: string]: string | null;
26
+ }
27
+
28
+ export interface TwitterCardData {
29
+ 'twitter:card': string | null;
30
+ 'twitter:title': string | null;
31
+ 'twitter:description': string | null;
32
+ 'twitter:url': string | null;
33
+ 'twitter:image': string | null;
34
+ [key: string]: string | null;
35
+ }
36
+
37
+ export interface ImageDetails {
38
+ url: string | null;
39
+ absolute_url: string | null;
40
+ alt_text: string | null;
41
+ title: string | null;
42
+ }
43
+
44
+ export interface LinkDetails {
45
+ url: string;
46
+ absolute_url: string | null;
47
+ protocol: string;
48
+ text: string;
49
+ title: string;
50
+ target: string;
51
+ rel: string[];
52
+ is_nofollow: boolean;
53
+ is_ugc: boolean;
54
+ is_noopener: boolean;
55
+ is_noreferrer: boolean;
56
+ }
57
+
58
+ export interface FilterParams {
59
+ /** Tag name to match, e.g. "div". */
60
+ element: string;
61
+ /** Exact attribute values to match. */
62
+ attributes?: Record<string, string>;
63
+ /** Return all matches instead of just the first. Default: false. */
64
+ multiple?: boolean;
65
+ /** Tag/class/id selectors to extract from each match. */
66
+ extract?: string[];
67
+ /** Return inner HTML instead of trimmed text. Default: true. */
68
+ returnHtml?: boolean;
69
+ }
70
+
71
+ export interface ScraperSnapshot {
72
+ url: string;
73
+ statusCode: number | null;
74
+ title: string | null;
75
+ description: string | null;
76
+ canonical: string | null;
77
+ lang: string | null;
78
+ charset: string | null;
79
+ robots: string | null;
80
+ keywords: string[] | null;
81
+ author: string | null;
82
+ image: string | null;
83
+ favicon: string | null;
84
+ openGraph: OpenGraphData | null;
85
+ twitterCard: TwitterCardData | null;
86
+ headings: { h1: string[]; h2: string[]; h3: string[] };
87
+ linkCount: number;
88
+ imageCount: number;
89
+ }
90
+
91
+ export default class NodeScraper {
92
+ url: string;
93
+ rawHtml: string | null;
94
+ statusCode: number | null;
95
+ error: Error | null;
96
+
97
+ constructor(url: string, options?: NodeScraperOptions);
98
+
99
+ init(): Promise<this>;
100
+ loadHTML(html: string): this;
101
+ isLoaded(): boolean;
102
+ getError(): Error | null;
103
+ getStatusCode(): number | null;
104
+
105
+ title(): string | null;
106
+ charset(): string | null;
107
+ viewport(): string[] | null;
108
+ viewport_string(): string | null;
109
+ viewport_object(): Record<string, string> | null;
110
+ canonical(): string | null;
111
+ content_type(): string | null;
112
+ csrf_token(): string | null;
113
+ author(): string | null;
114
+ description(): string | null;
115
+ image(): string | null;
116
+ lang(): string | null;
117
+ robots(): string | null;
118
+ favicon(): string | null;
119
+ keywords(): string[] | null;
120
+ keyword_string(): string | null;
121
+ meta(name: string, attr?: 'name' | 'property'): string | null;
122
+
123
+ open_graph(prop?: string | null): OpenGraphData | string | null;
124
+ twitter_card(prop?: string | null): TwitterCardData | string | null;
125
+ jsonLd(): Record<string, unknown>[] | null;
126
+
127
+ h1(): string[] | null;
128
+ h2(): string[] | null;
129
+ h3(): string[] | null;
130
+ h4(): string[] | null;
131
+ h5(): string[] | null;
132
+ h6(): string[] | null;
133
+ p(): string[] | null;
134
+ text(): string | null;
135
+ html(): string | null;
136
+
137
+ ul(): string[] | null;
138
+ ol(): string[] | null;
139
+
140
+ images(): (string | undefined)[] | null;
141
+ image_details(): ImageDetails[] | null;
142
+
143
+ links(): string[] | null;
144
+ link_details(): LinkDetails[] | null;
145
+
146
+ filter(params: FilterParams): unknown;
147
+
148
+ toJSON(): ScraperSnapshot | null;
149
+
150
+ static scrape(url: string, options?: NodeScraperOptions): Promise<NodeScraper>;
151
+ static scrapeAll(urls: string[], options?: NodeScraperOptions): Promise<NodeScraper[]>;
152
+ }