scrapebadger 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{index-qYk-iloT.d.cts → index-DnmHcsYR.d.cts} +7 -1
- package/dist/{index-qYk-iloT.d.ts → index-DnmHcsYR.d.ts} +7 -1
- package/dist/index.d.cts +161 -3
- package/dist/index.d.ts +161 -3
- package/dist/index.js +135 -22
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +135 -23
- package/dist/index.mjs.map +1 -1
- package/dist/twitter/index.d.cts +1 -1
- package/dist/twitter/index.d.ts +1 -1
- package/dist/twitter/index.js +32 -22
- package/dist/twitter/index.js.map +1 -1
- package/dist/twitter/index.mjs +32 -22
- package/dist/twitter/index.mjs.map +1 -1
- package/package.json +1 -1
|
@@ -1412,6 +1412,12 @@ declare class ListsClient {
|
|
|
1412
1412
|
declare class CommunitiesClient {
|
|
1413
1413
|
private readonly client;
|
|
1414
1414
|
constructor(client: BaseClient);
|
|
1415
|
+
/**
|
|
1416
|
+
* Parse a community member from the API response.
|
|
1417
|
+
* Handles both flat format (user_id, username, role at top level)
|
|
1418
|
+
* and nested format (user object with id, username).
|
|
1419
|
+
*/
|
|
1420
|
+
private parseCommunityMember;
|
|
1415
1421
|
/**
|
|
1416
1422
|
* Get details for a specific community.
|
|
1417
1423
|
*
|
|
@@ -1792,4 +1798,4 @@ declare class TwitterClient {
|
|
|
1792
1798
|
constructor(client: BaseClient);
|
|
1793
1799
|
}
|
|
1794
1800
|
|
|
1795
|
-
export { type ApiResponse as A, CommunitiesClient as C, GeoClient as G, type Hashtag as H, type IteratorOptions as I, ListsClient as L, type Media as M, type PaginatedResponse as P, type QueryType as Q, type ResolvedConfig as R, type ScrapeBadgerConfig as S, TwitterClient as T, UsersClient as U, type PaginationOptions as a, TweetsClient as b, collectAll as c, TrendsClient as d, type GeoSearchOptions as e, type TrendCategory as f, type CommunityTweetType as g, type PollOption as h, type Poll as i, type Url as j, type UserMention as k, type TweetPlace as l, type Tweet as m, type User as n, type UserAbout as o, type UserIds as p, type List as q, type CommunityBanner as r, type CommunityRule as s, type Community as t, type CommunityMember as u, type Trend as v, type Location as w, type PlaceTrends as x, type Place as y, type ListResponse as z };
|
|
1801
|
+
export { type ApiResponse as A, BaseClient as B, CommunitiesClient as C, GeoClient as G, type Hashtag as H, type IteratorOptions as I, ListsClient as L, type Media as M, type PaginatedResponse as P, type QueryType as Q, type ResolvedConfig as R, type ScrapeBadgerConfig as S, TwitterClient as T, UsersClient as U, type PaginationOptions as a, TweetsClient as b, collectAll as c, TrendsClient as d, type GeoSearchOptions as e, type TrendCategory as f, type CommunityTweetType as g, type PollOption as h, type Poll as i, type Url as j, type UserMention as k, type TweetPlace as l, type Tweet as m, type User as n, type UserAbout as o, type UserIds as p, type List as q, type CommunityBanner as r, type CommunityRule as s, type Community as t, type CommunityMember as u, type Trend as v, type Location as w, type PlaceTrends as x, type Place as y, type ListResponse as z };
|
|
@@ -1412,6 +1412,12 @@ declare class ListsClient {
|
|
|
1412
1412
|
declare class CommunitiesClient {
|
|
1413
1413
|
private readonly client;
|
|
1414
1414
|
constructor(client: BaseClient);
|
|
1415
|
+
/**
|
|
1416
|
+
* Parse a community member from the API response.
|
|
1417
|
+
* Handles both flat format (user_id, username, role at top level)
|
|
1418
|
+
* and nested format (user object with id, username).
|
|
1419
|
+
*/
|
|
1420
|
+
private parseCommunityMember;
|
|
1415
1421
|
/**
|
|
1416
1422
|
* Get details for a specific community.
|
|
1417
1423
|
*
|
|
@@ -1792,4 +1798,4 @@ declare class TwitterClient {
|
|
|
1792
1798
|
constructor(client: BaseClient);
|
|
1793
1799
|
}
|
|
1794
1800
|
|
|
1795
|
-
export { type ApiResponse as A, CommunitiesClient as C, GeoClient as G, type Hashtag as H, type IteratorOptions as I, ListsClient as L, type Media as M, type PaginatedResponse as P, type QueryType as Q, type ResolvedConfig as R, type ScrapeBadgerConfig as S, TwitterClient as T, UsersClient as U, type PaginationOptions as a, TweetsClient as b, collectAll as c, TrendsClient as d, type GeoSearchOptions as e, type TrendCategory as f, type CommunityTweetType as g, type PollOption as h, type Poll as i, type Url as j, type UserMention as k, type TweetPlace as l, type Tweet as m, type User as n, type UserAbout as o, type UserIds as p, type List as q, type CommunityBanner as r, type CommunityRule as s, type Community as t, type CommunityMember as u, type Trend as v, type Location as w, type PlaceTrends as x, type Place as y, type ListResponse as z };
|
|
1801
|
+
export { type ApiResponse as A, BaseClient as B, CommunitiesClient as C, GeoClient as G, type Hashtag as H, type IteratorOptions as I, ListsClient as L, type Media as M, type PaginatedResponse as P, type QueryType as Q, type ResolvedConfig as R, type ScrapeBadgerConfig as S, TwitterClient as T, UsersClient as U, type PaginationOptions as a, TweetsClient as b, collectAll as c, TrendsClient as d, type GeoSearchOptions as e, type TrendCategory as f, type CommunityTweetType as g, type PollOption as h, type Poll as i, type Url as j, type UserMention as k, type TweetPlace as l, type Tweet as m, type User as n, type UserAbout as o, type UserIds as p, type List as q, type CommunityBanner as r, type CommunityRule as s, type Community as t, type CommunityMember as u, type Trend as v, type Location as w, type PlaceTrends as x, type Place as y, type ListResponse as z };
|
package/dist/index.d.cts
CHANGED
|
@@ -1,5 +1,161 @@
|
|
|
1
|
-
import { T as TwitterClient, S as ScrapeBadgerConfig } from './index-
|
|
2
|
-
export { A as ApiResponse, C as CommunitiesClient, t as Community, r as CommunityBanner, u as CommunityMember, s as CommunityRule, g as CommunityTweetType, G as GeoClient, e as GeoSearchOptions, H as Hashtag, I as IteratorOptions, q as List, z as ListResponse, L as ListsClient, w as Location, M as Media, P as PaginatedResponse, a as PaginationOptions, y as Place, x as PlaceTrends, i as Poll, h as PollOption, Q as QueryType, R as ResolvedConfig, v as Trend, f as TrendCategory, d as TrendsClient, m as Tweet, l as TweetPlace, b as TweetsClient, j as Url, n as User, o as UserAbout, p as UserIds, k as UserMention, U as UsersClient, c as collectAll } from './index-
|
|
1
|
+
import { B as BaseClient, T as TwitterClient, S as ScrapeBadgerConfig } from './index-DnmHcsYR.cjs';
|
|
2
|
+
export { A as ApiResponse, C as CommunitiesClient, t as Community, r as CommunityBanner, u as CommunityMember, s as CommunityRule, g as CommunityTweetType, G as GeoClient, e as GeoSearchOptions, H as Hashtag, I as IteratorOptions, q as List, z as ListResponse, L as ListsClient, w as Location, M as Media, P as PaginatedResponse, a as PaginationOptions, y as Place, x as PlaceTrends, i as Poll, h as PollOption, Q as QueryType, R as ResolvedConfig, v as Trend, f as TrendCategory, d as TrendsClient, m as Tweet, l as TweetPlace, b as TweetsClient, j as Url, n as User, o as UserAbout, p as UserIds, k as UserMention, U as UsersClient, c as collectAll } from './index-DnmHcsYR.cjs';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* TypeScript types for web scraping API responses.
|
|
6
|
+
*/
|
|
7
|
+
interface ScrapeOptions {
|
|
8
|
+
/** Whether to render JavaScript */
|
|
9
|
+
renderJs?: boolean;
|
|
10
|
+
/** Output format (html, markdown, text, json) */
|
|
11
|
+
outputFormat?: "html" | "markdown" | "text" | "json";
|
|
12
|
+
/** Country code for proxy (e.g. "US") */
|
|
13
|
+
proxyCountry?: string;
|
|
14
|
+
/** Proxy type (datacenter, residential) */
|
|
15
|
+
proxyType?: "datacenter" | "residential" | "mobile" | "isp";
|
|
16
|
+
/** Reuse an existing session */
|
|
17
|
+
sessionId?: string;
|
|
18
|
+
/** Force a specific engine */
|
|
19
|
+
engine?: string;
|
|
20
|
+
/** Maximum credit cost */
|
|
21
|
+
maxCost?: number;
|
|
22
|
+
/** Custom HTTP headers */
|
|
23
|
+
headers?: Record<string, string>;
|
|
24
|
+
/** CSS selector to wait for */
|
|
25
|
+
waitFor?: string;
|
|
26
|
+
/** Request timeout in seconds */
|
|
27
|
+
timeout?: number;
|
|
28
|
+
/** JavaScript actions to execute */
|
|
29
|
+
jsScenario?: Array<Record<string, unknown>>;
|
|
30
|
+
}
|
|
31
|
+
interface ScreenshotOptions {
|
|
32
|
+
/** Capture full page (not just viewport) */
|
|
33
|
+
fullPage?: boolean;
|
|
34
|
+
/** Viewport width in pixels */
|
|
35
|
+
viewportWidth?: number;
|
|
36
|
+
/** Viewport height in pixels */
|
|
37
|
+
viewportHeight?: number;
|
|
38
|
+
/** Image format (png, jpeg) */
|
|
39
|
+
imageFormat?: "png" | "jpeg";
|
|
40
|
+
/** CSS selector to wait for */
|
|
41
|
+
waitFor?: string;
|
|
42
|
+
/** Request timeout in seconds */
|
|
43
|
+
timeout?: number;
|
|
44
|
+
}
|
|
45
|
+
interface ExtractOptions {
|
|
46
|
+
/** Extraction schema (CSS/XPath selectors) */
|
|
47
|
+
schema?: Record<string, unknown>;
|
|
48
|
+
/** Whether to render JavaScript */
|
|
49
|
+
renderJs?: boolean;
|
|
50
|
+
/** CSS selector to wait for */
|
|
51
|
+
waitFor?: string;
|
|
52
|
+
/** Request timeout in seconds */
|
|
53
|
+
timeout?: number;
|
|
54
|
+
}
|
|
55
|
+
interface BatchOptions {
|
|
56
|
+
/** Whether to render JavaScript */
|
|
57
|
+
renderJs?: boolean;
|
|
58
|
+
/** Output format */
|
|
59
|
+
outputFormat?: "html" | "markdown" | "text" | "json";
|
|
60
|
+
/** Maximum concurrent requests */
|
|
61
|
+
maxConcurrency?: number;
|
|
62
|
+
/** Force a specific engine */
|
|
63
|
+
engine?: string;
|
|
64
|
+
/** Request timeout in seconds */
|
|
65
|
+
timeout?: number;
|
|
66
|
+
}
|
|
67
|
+
interface ScrapeResult {
|
|
68
|
+
content: string;
|
|
69
|
+
status_code: number;
|
|
70
|
+
url: string;
|
|
71
|
+
engine_used?: string;
|
|
72
|
+
credits_used: number;
|
|
73
|
+
processing_time_ms?: number;
|
|
74
|
+
anti_bot_detected: boolean;
|
|
75
|
+
anti_bot_provider?: string;
|
|
76
|
+
captcha_solved: boolean;
|
|
77
|
+
session_id?: string;
|
|
78
|
+
session_reused: boolean;
|
|
79
|
+
}
|
|
80
|
+
interface ScreenshotResult {
|
|
81
|
+
image_data: string;
|
|
82
|
+
format: string;
|
|
83
|
+
url: string;
|
|
84
|
+
credits_used: number;
|
|
85
|
+
}
|
|
86
|
+
interface ExtractResult {
|
|
87
|
+
data: Record<string, unknown>;
|
|
88
|
+
url: string;
|
|
89
|
+
credits_used: number;
|
|
90
|
+
}
|
|
91
|
+
interface BatchResult {
|
|
92
|
+
results: ScrapeResult[];
|
|
93
|
+
total: number;
|
|
94
|
+
successful: number;
|
|
95
|
+
failed: number;
|
|
96
|
+
}
|
|
97
|
+
interface SessionInfo {
|
|
98
|
+
session_id: string;
|
|
99
|
+
domain: string;
|
|
100
|
+
reused: boolean;
|
|
101
|
+
fingerprint_id?: string;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Web scraping API client for ScrapeBadger SDK.
|
|
106
|
+
*/
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Client for web scraping operations.
|
|
110
|
+
*
|
|
111
|
+
* @example
|
|
112
|
+
* ```typescript
|
|
113
|
+
* const client = new ScrapeBadger({ apiKey: "key" });
|
|
114
|
+
*
|
|
115
|
+
* // Simple scrape
|
|
116
|
+
* const result = await client.web.scrape("https://example.com");
|
|
117
|
+
* console.log(result.content);
|
|
118
|
+
*
|
|
119
|
+
* // Screenshot
|
|
120
|
+
* const screenshot = await client.web.screenshot("https://example.com");
|
|
121
|
+
*
|
|
122
|
+
* // Extract structured data
|
|
123
|
+
* const data = await client.web.extract("https://example.com", {
|
|
124
|
+
* schema: { title: "css:h1" }
|
|
125
|
+
* });
|
|
126
|
+
*
|
|
127
|
+
* // Batch scrape
|
|
128
|
+
* const batch = await client.web.batch(["https://a.com", "https://b.com"]);
|
|
129
|
+
* ```
|
|
130
|
+
*/
|
|
131
|
+
declare class WebClient {
|
|
132
|
+
private readonly client;
|
|
133
|
+
constructor(client: BaseClient);
|
|
134
|
+
/**
|
|
135
|
+
* Scrape a web page.
|
|
136
|
+
*/
|
|
137
|
+
scrape(url: string, options?: ScrapeOptions): Promise<ScrapeResult>;
|
|
138
|
+
/**
|
|
139
|
+
* Take a screenshot of a web page.
|
|
140
|
+
*/
|
|
141
|
+
screenshot(url: string, options?: ScreenshotOptions): Promise<ScreenshotResult>;
|
|
142
|
+
/**
|
|
143
|
+
* Extract structured data from a web page.
|
|
144
|
+
*/
|
|
145
|
+
extract(url: string, options?: ExtractOptions): Promise<ExtractResult>;
|
|
146
|
+
/**
|
|
147
|
+
* Scrape multiple URLs in a batch.
|
|
148
|
+
*/
|
|
149
|
+
batch(urls: string[], options?: BatchOptions): Promise<BatchResult>;
|
|
150
|
+
/**
|
|
151
|
+
* Create a new scraping session for a domain.
|
|
152
|
+
*/
|
|
153
|
+
createSession(domain: string, persist?: boolean): Promise<SessionInfo>;
|
|
154
|
+
/**
|
|
155
|
+
* Scrape using an existing session.
|
|
156
|
+
*/
|
|
157
|
+
reuseSession(url: string, sessionId: string, options?: ScrapeOptions): Promise<ScrapeResult>;
|
|
158
|
+
}
|
|
3
159
|
|
|
4
160
|
/**
|
|
5
161
|
* Main ScrapeBadger client.
|
|
@@ -43,6 +199,8 @@ declare class ScrapeBadger {
|
|
|
43
199
|
private readonly baseClient;
|
|
44
200
|
/** Twitter API client */
|
|
45
201
|
readonly twitter: TwitterClient;
|
|
202
|
+
/** Web scraping API client */
|
|
203
|
+
readonly web: WebClient;
|
|
46
204
|
/**
|
|
47
205
|
* Create a new ScrapeBadger client.
|
|
48
206
|
*
|
|
@@ -153,4 +311,4 @@ declare class AccountRestrictedError extends ScrapeBadgerError {
|
|
|
153
311
|
constructor(message?: string, reason?: string);
|
|
154
312
|
}
|
|
155
313
|
|
|
156
|
-
export { AccountRestrictedError, AuthenticationError, InsufficientCreditsError, NotFoundError, RateLimitError, ScrapeBadger, ScrapeBadgerConfig, ScrapeBadgerError, ServerError, TimeoutError, TwitterClient, ValidationError };
|
|
314
|
+
export { AccountRestrictedError, AuthenticationError, type BatchOptions, type BatchResult, type ExtractOptions, type ExtractResult, InsufficientCreditsError, NotFoundError, RateLimitError, ScrapeBadger, ScrapeBadgerConfig, ScrapeBadgerError, type ScrapeOptions, type ScrapeResult, type ScreenshotOptions, type ScreenshotResult, ServerError, type SessionInfo, TimeoutError, TwitterClient, ValidationError, WebClient };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,161 @@
|
|
|
1
|
-
import { T as TwitterClient, S as ScrapeBadgerConfig } from './index-
|
|
2
|
-
export { A as ApiResponse, C as CommunitiesClient, t as Community, r as CommunityBanner, u as CommunityMember, s as CommunityRule, g as CommunityTweetType, G as GeoClient, e as GeoSearchOptions, H as Hashtag, I as IteratorOptions, q as List, z as ListResponse, L as ListsClient, w as Location, M as Media, P as PaginatedResponse, a as PaginationOptions, y as Place, x as PlaceTrends, i as Poll, h as PollOption, Q as QueryType, R as ResolvedConfig, v as Trend, f as TrendCategory, d as TrendsClient, m as Tweet, l as TweetPlace, b as TweetsClient, j as Url, n as User, o as UserAbout, p as UserIds, k as UserMention, U as UsersClient, c as collectAll } from './index-
|
|
1
|
+
import { B as BaseClient, T as TwitterClient, S as ScrapeBadgerConfig } from './index-DnmHcsYR.js';
|
|
2
|
+
export { A as ApiResponse, C as CommunitiesClient, t as Community, r as CommunityBanner, u as CommunityMember, s as CommunityRule, g as CommunityTweetType, G as GeoClient, e as GeoSearchOptions, H as Hashtag, I as IteratorOptions, q as List, z as ListResponse, L as ListsClient, w as Location, M as Media, P as PaginatedResponse, a as PaginationOptions, y as Place, x as PlaceTrends, i as Poll, h as PollOption, Q as QueryType, R as ResolvedConfig, v as Trend, f as TrendCategory, d as TrendsClient, m as Tweet, l as TweetPlace, b as TweetsClient, j as Url, n as User, o as UserAbout, p as UserIds, k as UserMention, U as UsersClient, c as collectAll } from './index-DnmHcsYR.js';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* TypeScript types for web scraping API responses.
|
|
6
|
+
*/
|
|
7
|
+
interface ScrapeOptions {
|
|
8
|
+
/** Whether to render JavaScript */
|
|
9
|
+
renderJs?: boolean;
|
|
10
|
+
/** Output format (html, markdown, text, json) */
|
|
11
|
+
outputFormat?: "html" | "markdown" | "text" | "json";
|
|
12
|
+
/** Country code for proxy (e.g. "US") */
|
|
13
|
+
proxyCountry?: string;
|
|
14
|
+
/** Proxy type (datacenter, residential) */
|
|
15
|
+
proxyType?: "datacenter" | "residential" | "mobile" | "isp";
|
|
16
|
+
/** Reuse an existing session */
|
|
17
|
+
sessionId?: string;
|
|
18
|
+
/** Force a specific engine */
|
|
19
|
+
engine?: string;
|
|
20
|
+
/** Maximum credit cost */
|
|
21
|
+
maxCost?: number;
|
|
22
|
+
/** Custom HTTP headers */
|
|
23
|
+
headers?: Record<string, string>;
|
|
24
|
+
/** CSS selector to wait for */
|
|
25
|
+
waitFor?: string;
|
|
26
|
+
/** Request timeout in seconds */
|
|
27
|
+
timeout?: number;
|
|
28
|
+
/** JavaScript actions to execute */
|
|
29
|
+
jsScenario?: Array<Record<string, unknown>>;
|
|
30
|
+
}
|
|
31
|
+
interface ScreenshotOptions {
|
|
32
|
+
/** Capture full page (not just viewport) */
|
|
33
|
+
fullPage?: boolean;
|
|
34
|
+
/** Viewport width in pixels */
|
|
35
|
+
viewportWidth?: number;
|
|
36
|
+
/** Viewport height in pixels */
|
|
37
|
+
viewportHeight?: number;
|
|
38
|
+
/** Image format (png, jpeg) */
|
|
39
|
+
imageFormat?: "png" | "jpeg";
|
|
40
|
+
/** CSS selector to wait for */
|
|
41
|
+
waitFor?: string;
|
|
42
|
+
/** Request timeout in seconds */
|
|
43
|
+
timeout?: number;
|
|
44
|
+
}
|
|
45
|
+
interface ExtractOptions {
|
|
46
|
+
/** Extraction schema (CSS/XPath selectors) */
|
|
47
|
+
schema?: Record<string, unknown>;
|
|
48
|
+
/** Whether to render JavaScript */
|
|
49
|
+
renderJs?: boolean;
|
|
50
|
+
/** CSS selector to wait for */
|
|
51
|
+
waitFor?: string;
|
|
52
|
+
/** Request timeout in seconds */
|
|
53
|
+
timeout?: number;
|
|
54
|
+
}
|
|
55
|
+
interface BatchOptions {
|
|
56
|
+
/** Whether to render JavaScript */
|
|
57
|
+
renderJs?: boolean;
|
|
58
|
+
/** Output format */
|
|
59
|
+
outputFormat?: "html" | "markdown" | "text" | "json";
|
|
60
|
+
/** Maximum concurrent requests */
|
|
61
|
+
maxConcurrency?: number;
|
|
62
|
+
/** Force a specific engine */
|
|
63
|
+
engine?: string;
|
|
64
|
+
/** Request timeout in seconds */
|
|
65
|
+
timeout?: number;
|
|
66
|
+
}
|
|
67
|
+
interface ScrapeResult {
|
|
68
|
+
content: string;
|
|
69
|
+
status_code: number;
|
|
70
|
+
url: string;
|
|
71
|
+
engine_used?: string;
|
|
72
|
+
credits_used: number;
|
|
73
|
+
processing_time_ms?: number;
|
|
74
|
+
anti_bot_detected: boolean;
|
|
75
|
+
anti_bot_provider?: string;
|
|
76
|
+
captcha_solved: boolean;
|
|
77
|
+
session_id?: string;
|
|
78
|
+
session_reused: boolean;
|
|
79
|
+
}
|
|
80
|
+
interface ScreenshotResult {
|
|
81
|
+
image_data: string;
|
|
82
|
+
format: string;
|
|
83
|
+
url: string;
|
|
84
|
+
credits_used: number;
|
|
85
|
+
}
|
|
86
|
+
interface ExtractResult {
|
|
87
|
+
data: Record<string, unknown>;
|
|
88
|
+
url: string;
|
|
89
|
+
credits_used: number;
|
|
90
|
+
}
|
|
91
|
+
interface BatchResult {
|
|
92
|
+
results: ScrapeResult[];
|
|
93
|
+
total: number;
|
|
94
|
+
successful: number;
|
|
95
|
+
failed: number;
|
|
96
|
+
}
|
|
97
|
+
interface SessionInfo {
|
|
98
|
+
session_id: string;
|
|
99
|
+
domain: string;
|
|
100
|
+
reused: boolean;
|
|
101
|
+
fingerprint_id?: string;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Web scraping API client for ScrapeBadger SDK.
|
|
106
|
+
*/
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Client for web scraping operations.
|
|
110
|
+
*
|
|
111
|
+
* @example
|
|
112
|
+
* ```typescript
|
|
113
|
+
* const client = new ScrapeBadger({ apiKey: "key" });
|
|
114
|
+
*
|
|
115
|
+
* // Simple scrape
|
|
116
|
+
* const result = await client.web.scrape("https://example.com");
|
|
117
|
+
* console.log(result.content);
|
|
118
|
+
*
|
|
119
|
+
* // Screenshot
|
|
120
|
+
* const screenshot = await client.web.screenshot("https://example.com");
|
|
121
|
+
*
|
|
122
|
+
* // Extract structured data
|
|
123
|
+
* const data = await client.web.extract("https://example.com", {
|
|
124
|
+
* schema: { title: "css:h1" }
|
|
125
|
+
* });
|
|
126
|
+
*
|
|
127
|
+
* // Batch scrape
|
|
128
|
+
* const batch = await client.web.batch(["https://a.com", "https://b.com"]);
|
|
129
|
+
* ```
|
|
130
|
+
*/
|
|
131
|
+
declare class WebClient {
|
|
132
|
+
private readonly client;
|
|
133
|
+
constructor(client: BaseClient);
|
|
134
|
+
/**
|
|
135
|
+
* Scrape a web page.
|
|
136
|
+
*/
|
|
137
|
+
scrape(url: string, options?: ScrapeOptions): Promise<ScrapeResult>;
|
|
138
|
+
/**
|
|
139
|
+
* Take a screenshot of a web page.
|
|
140
|
+
*/
|
|
141
|
+
screenshot(url: string, options?: ScreenshotOptions): Promise<ScreenshotResult>;
|
|
142
|
+
/**
|
|
143
|
+
* Extract structured data from a web page.
|
|
144
|
+
*/
|
|
145
|
+
extract(url: string, options?: ExtractOptions): Promise<ExtractResult>;
|
|
146
|
+
/**
|
|
147
|
+
* Scrape multiple URLs in a batch.
|
|
148
|
+
*/
|
|
149
|
+
batch(urls: string[], options?: BatchOptions): Promise<BatchResult>;
|
|
150
|
+
/**
|
|
151
|
+
* Create a new scraping session for a domain.
|
|
152
|
+
*/
|
|
153
|
+
createSession(domain: string, persist?: boolean): Promise<SessionInfo>;
|
|
154
|
+
/**
|
|
155
|
+
* Scrape using an existing session.
|
|
156
|
+
*/
|
|
157
|
+
reuseSession(url: string, sessionId: string, options?: ScrapeOptions): Promise<ScrapeResult>;
|
|
158
|
+
}
|
|
3
159
|
|
|
4
160
|
/**
|
|
5
161
|
* Main ScrapeBadger client.
|
|
@@ -43,6 +199,8 @@ declare class ScrapeBadger {
|
|
|
43
199
|
private readonly baseClient;
|
|
44
200
|
/** Twitter API client */
|
|
45
201
|
readonly twitter: TwitterClient;
|
|
202
|
+
/** Web scraping API client */
|
|
203
|
+
readonly web: WebClient;
|
|
46
204
|
/**
|
|
47
205
|
* Create a new ScrapeBadger client.
|
|
48
206
|
*
|
|
@@ -153,4 +311,4 @@ declare class AccountRestrictedError extends ScrapeBadgerError {
|
|
|
153
311
|
constructor(message?: string, reason?: string);
|
|
154
312
|
}
|
|
155
313
|
|
|
156
|
-
export { AccountRestrictedError, AuthenticationError, InsufficientCreditsError, NotFoundError, RateLimitError, ScrapeBadger, ScrapeBadgerConfig, ScrapeBadgerError, ServerError, TimeoutError, TwitterClient, ValidationError };
|
|
314
|
+
export { AccountRestrictedError, AuthenticationError, type BatchOptions, type BatchResult, type ExtractOptions, type ExtractResult, InsufficientCreditsError, NotFoundError, RateLimitError, ScrapeBadger, ScrapeBadgerConfig, ScrapeBadgerError, type ScrapeOptions, type ScrapeResult, type ScreenshotOptions, type ScreenshotResult, ServerError, type SessionInfo, TimeoutError, TwitterClient, ValidationError, WebClient };
|
package/dist/index.js
CHANGED
|
@@ -1061,6 +1061,32 @@ var CommunitiesClient = class {
|
|
|
1061
1061
|
constructor(client) {
|
|
1062
1062
|
this.client = client;
|
|
1063
1063
|
}
|
|
1064
|
+
/**
|
|
1065
|
+
* Parse a community member from the API response.
|
|
1066
|
+
* Handles both flat format (user_id, username, role at top level)
|
|
1067
|
+
* and nested format (user object with id, username).
|
|
1068
|
+
*/
|
|
1069
|
+
parseCommunityMember(item) {
|
|
1070
|
+
if ("user" in item && item.user) {
|
|
1071
|
+
return item;
|
|
1072
|
+
}
|
|
1073
|
+
return {
|
|
1074
|
+
user: {
|
|
1075
|
+
id: item.user_id ?? "",
|
|
1076
|
+
username: item.username ?? "",
|
|
1077
|
+
name: item.name ?? "",
|
|
1078
|
+
profile_image_url: item.profile_image_url,
|
|
1079
|
+
verified: item.verified ?? false,
|
|
1080
|
+
is_blue_verified: item.is_blue_verified,
|
|
1081
|
+
followers_count: 0,
|
|
1082
|
+
following_count: 0,
|
|
1083
|
+
tweet_count: 0,
|
|
1084
|
+
listed_count: 0
|
|
1085
|
+
},
|
|
1086
|
+
role: item.role,
|
|
1087
|
+
joined_at: item.joined_at
|
|
1088
|
+
};
|
|
1089
|
+
}
|
|
1064
1090
|
/**
|
|
1065
1091
|
* Get details for a specific community.
|
|
1066
1092
|
*
|
|
@@ -1149,17 +1175,9 @@ var CommunitiesClient = class {
|
|
|
1149
1175
|
const response = await this.client.request(`/v1/twitter/communities/${communityId}/members`, {
|
|
1150
1176
|
params: { count: options.count ?? 20, cursor: options.cursor }
|
|
1151
1177
|
});
|
|
1152
|
-
const data = (response.data ?? []).map(
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
}
|
|
1156
|
-
const userItem = item;
|
|
1157
|
-
return {
|
|
1158
|
-
user: item,
|
|
1159
|
-
role: userItem.role,
|
|
1160
|
-
joined_at: userItem.joined_at
|
|
1161
|
-
};
|
|
1162
|
-
});
|
|
1178
|
+
const data = (response.data ?? []).map(
|
|
1179
|
+
(item) => this.parseCommunityMember(item)
|
|
1180
|
+
);
|
|
1163
1181
|
return createPaginatedResponse(data, response.next_cursor);
|
|
1164
1182
|
}
|
|
1165
1183
|
/**
|
|
@@ -1173,17 +1191,9 @@ var CommunitiesClient = class {
|
|
|
1173
1191
|
const response = await this.client.request(`/v1/twitter/communities/${communityId}/moderators`, {
|
|
1174
1192
|
params: { count: options.count ?? 20, cursor: options.cursor }
|
|
1175
1193
|
});
|
|
1176
|
-
const data = (response.data ?? []).map(
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
}
|
|
1180
|
-
const userItem = item;
|
|
1181
|
-
return {
|
|
1182
|
-
user: item,
|
|
1183
|
-
role: "moderator",
|
|
1184
|
-
joined_at: userItem.joined_at
|
|
1185
|
-
};
|
|
1186
|
-
});
|
|
1194
|
+
const data = (response.data ?? []).map(
|
|
1195
|
+
(item) => this.parseCommunityMember(item)
|
|
1196
|
+
);
|
|
1187
1197
|
return createPaginatedResponse(data, response.next_cursor);
|
|
1188
1198
|
}
|
|
1189
1199
|
/**
|
|
@@ -1429,11 +1439,112 @@ var TwitterClient = class {
|
|
|
1429
1439
|
}
|
|
1430
1440
|
};
|
|
1431
1441
|
|
|
1442
|
+
// src/web/client.ts
|
|
1443
|
+
var WebClient = class {
|
|
1444
|
+
client;
|
|
1445
|
+
constructor(client) {
|
|
1446
|
+
this.client = client;
|
|
1447
|
+
}
|
|
1448
|
+
/**
|
|
1449
|
+
* Scrape a web page.
|
|
1450
|
+
*/
|
|
1451
|
+
async scrape(url, options = {}) {
|
|
1452
|
+
const body = { url };
|
|
1453
|
+
if (options.renderJs) body.render_js = true;
|
|
1454
|
+
if (options.outputFormat && options.outputFormat !== "html")
|
|
1455
|
+
body.output_format = options.outputFormat;
|
|
1456
|
+
if (options.proxyCountry) body.proxy_country = options.proxyCountry;
|
|
1457
|
+
if (options.proxyType) body.proxy_type = options.proxyType;
|
|
1458
|
+
if (options.sessionId) body.session_id = options.sessionId;
|
|
1459
|
+
if (options.engine) body.engine = options.engine;
|
|
1460
|
+
if (options.maxCost !== void 0) body.max_cost = options.maxCost;
|
|
1461
|
+
if (options.headers) body.headers = options.headers;
|
|
1462
|
+
if (options.waitFor) body.wait_for = options.waitFor;
|
|
1463
|
+
if (options.timeout !== void 0) body.timeout = options.timeout;
|
|
1464
|
+
if (options.jsScenario) body.js_scenario = options.jsScenario;
|
|
1465
|
+
return this.client.request("/v1/web/scrape", {
|
|
1466
|
+
method: "POST",
|
|
1467
|
+
body
|
|
1468
|
+
});
|
|
1469
|
+
}
|
|
1470
|
+
/**
|
|
1471
|
+
* Take a screenshot of a web page.
|
|
1472
|
+
*/
|
|
1473
|
+
async screenshot(url, options = {}) {
|
|
1474
|
+
const body = { url };
|
|
1475
|
+
if (options.fullPage) body.full_page = true;
|
|
1476
|
+
if (options.viewportWidth && options.viewportWidth !== 1280)
|
|
1477
|
+
body.viewport_width = options.viewportWidth;
|
|
1478
|
+
if (options.viewportHeight && options.viewportHeight !== 720)
|
|
1479
|
+
body.viewport_height = options.viewportHeight;
|
|
1480
|
+
if (options.imageFormat && options.imageFormat !== "png")
|
|
1481
|
+
body.image_format = options.imageFormat;
|
|
1482
|
+
if (options.waitFor) body.wait_for = options.waitFor;
|
|
1483
|
+
if (options.timeout !== void 0) body.timeout = options.timeout;
|
|
1484
|
+
return this.client.request("/v1/web/screenshot", {
|
|
1485
|
+
method: "POST",
|
|
1486
|
+
body
|
|
1487
|
+
});
|
|
1488
|
+
}
|
|
1489
|
+
/**
|
|
1490
|
+
* Extract structured data from a web page.
|
|
1491
|
+
*/
|
|
1492
|
+
async extract(url, options = {}) {
|
|
1493
|
+
const body = { url };
|
|
1494
|
+
if (options.schema) body.extraction_schema = options.schema;
|
|
1495
|
+
if (options.renderJs) body.render_js = true;
|
|
1496
|
+
if (options.waitFor) body.wait_for = options.waitFor;
|
|
1497
|
+
if (options.timeout !== void 0) body.timeout = options.timeout;
|
|
1498
|
+
return this.client.request("/v1/web/extract", {
|
|
1499
|
+
method: "POST",
|
|
1500
|
+
body
|
|
1501
|
+
});
|
|
1502
|
+
}
|
|
1503
|
+
/**
|
|
1504
|
+
* Scrape multiple URLs in a batch.
|
|
1505
|
+
*/
|
|
1506
|
+
async batch(urls, options = {}) {
|
|
1507
|
+
const body = { urls };
|
|
1508
|
+
if (options.renderJs) body.render_js = true;
|
|
1509
|
+
if (options.outputFormat && options.outputFormat !== "html")
|
|
1510
|
+
body.output_format = options.outputFormat;
|
|
1511
|
+
if (options.maxConcurrency && options.maxConcurrency !== 5)
|
|
1512
|
+
body.max_concurrency = options.maxConcurrency;
|
|
1513
|
+
if (options.engine) body.engine = options.engine;
|
|
1514
|
+
if (options.timeout !== void 0) body.timeout = options.timeout;
|
|
1515
|
+
return this.client.request("/v1/web/batch", {
|
|
1516
|
+
method: "POST",
|
|
1517
|
+
body
|
|
1518
|
+
});
|
|
1519
|
+
}
|
|
1520
|
+
/**
|
|
1521
|
+
* Create a new scraping session for a domain.
|
|
1522
|
+
*/
|
|
1523
|
+
async createSession(domain, persist = true) {
|
|
1524
|
+
return this.client.request("/v1/web/sessions", {
|
|
1525
|
+
method: "POST",
|
|
1526
|
+
body: {
|
|
1527
|
+
domain,
|
|
1528
|
+
new_session: true,
|
|
1529
|
+
persist_session: persist
|
|
1530
|
+
}
|
|
1531
|
+
});
|
|
1532
|
+
}
|
|
1533
|
+
/**
|
|
1534
|
+
* Scrape using an existing session.
|
|
1535
|
+
*/
|
|
1536
|
+
async reuseSession(url, sessionId, options = {}) {
|
|
1537
|
+
return this.scrape(url, { ...options, sessionId });
|
|
1538
|
+
}
|
|
1539
|
+
};
|
|
1540
|
+
|
|
1432
1541
|
// src/client.ts
|
|
1433
1542
|
var ScrapeBadger = class {
|
|
1434
1543
|
baseClient;
|
|
1435
1544
|
/** Twitter API client */
|
|
1436
1545
|
twitter;
|
|
1546
|
+
/** Web scraping API client */
|
|
1547
|
+
web;
|
|
1437
1548
|
/**
|
|
1438
1549
|
* Create a new ScrapeBadger client.
|
|
1439
1550
|
*
|
|
@@ -1469,6 +1580,7 @@ var ScrapeBadger = class {
|
|
|
1469
1580
|
const resolvedConfig = resolveConfig({ ...config, apiKey });
|
|
1470
1581
|
this.baseClient = new BaseClient(resolvedConfig);
|
|
1471
1582
|
this.twitter = new TwitterClient(this.baseClient);
|
|
1583
|
+
this.web = new WebClient(this.baseClient);
|
|
1472
1584
|
}
|
|
1473
1585
|
};
|
|
1474
1586
|
|
|
@@ -1489,6 +1601,7 @@ exports.TweetsClient = TweetsClient;
|
|
|
1489
1601
|
exports.TwitterClient = TwitterClient;
|
|
1490
1602
|
exports.UsersClient = UsersClient;
|
|
1491
1603
|
exports.ValidationError = ValidationError;
|
|
1604
|
+
exports.WebClient = WebClient;
|
|
1492
1605
|
exports.collectAll = collectAll;
|
|
1493
1606
|
//# sourceMappingURL=index.js.map
|
|
1494
1607
|
//# sourceMappingURL=index.js.map
|