scrapebadger 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{index-qYk-iloT.d.cts → index-9Mu-b1MB.d.cts} +11 -5
- package/dist/{index-qYk-iloT.d.ts → index-9Mu-b1MB.d.ts} +11 -5
- package/dist/index.d.cts +161 -3
- package/dist/index.d.ts +161 -3
- package/dist/index.js +131 -22
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +131 -23
- package/dist/index.mjs.map +1 -1
- package/dist/twitter/index.d.cts +1 -1
- package/dist/twitter/index.d.ts +1 -1
- package/dist/twitter/index.js +28 -22
- package/dist/twitter/index.js.map +1 -1
- package/dist/twitter/index.mjs +28 -22
- package/dist/twitter/index.mjs.map +1 -1
- package/package.json +1 -1
|
@@ -351,13 +351,13 @@ interface User {
|
|
|
351
351
|
/** Banner image URL */
|
|
352
352
|
profile_banner_url?: string;
|
|
353
353
|
/** Number of followers */
|
|
354
|
-
followers_count
|
|
354
|
+
followers_count?: number;
|
|
355
355
|
/** Number of accounts followed */
|
|
356
|
-
following_count
|
|
356
|
+
following_count?: number;
|
|
357
357
|
/** Total tweets posted */
|
|
358
|
-
tweet_count
|
|
358
|
+
tweet_count?: number;
|
|
359
359
|
/** Number of lists the user is on */
|
|
360
|
-
listed_count
|
|
360
|
+
listed_count?: number;
|
|
361
361
|
/** Number of tweets liked */
|
|
362
362
|
favourites_count?: number;
|
|
363
363
|
/** Number of media posts */
|
|
@@ -1412,6 +1412,12 @@ declare class ListsClient {
|
|
|
1412
1412
|
declare class CommunitiesClient {
|
|
1413
1413
|
private readonly client;
|
|
1414
1414
|
constructor(client: BaseClient);
|
|
1415
|
+
/**
|
|
1416
|
+
* Parse a community member from the API response.
|
|
1417
|
+
* Handles both flat format (user_id, username, role at top level)
|
|
1418
|
+
* and nested format (user object with id, username).
|
|
1419
|
+
*/
|
|
1420
|
+
private parseCommunityMember;
|
|
1415
1421
|
/**
|
|
1416
1422
|
* Get details for a specific community.
|
|
1417
1423
|
*
|
|
@@ -1792,4 +1798,4 @@ declare class TwitterClient {
|
|
|
1792
1798
|
constructor(client: BaseClient);
|
|
1793
1799
|
}
|
|
1794
1800
|
|
|
1795
|
-
export { type ApiResponse as A, CommunitiesClient as C, GeoClient as G, type Hashtag as H, type IteratorOptions as I, ListsClient as L, type Media as M, type PaginatedResponse as P, type QueryType as Q, type ResolvedConfig as R, type ScrapeBadgerConfig as S, TwitterClient as T, UsersClient as U, type PaginationOptions as a, TweetsClient as b, collectAll as c, TrendsClient as d, type GeoSearchOptions as e, type TrendCategory as f, type CommunityTweetType as g, type PollOption as h, type Poll as i, type Url as j, type UserMention as k, type TweetPlace as l, type Tweet as m, type User as n, type UserAbout as o, type UserIds as p, type List as q, type CommunityBanner as r, type CommunityRule as s, type Community as t, type CommunityMember as u, type Trend as v, type Location as w, type PlaceTrends as x, type Place as y, type ListResponse as z };
|
|
1801
|
+
export { type ApiResponse as A, BaseClient as B, CommunitiesClient as C, GeoClient as G, type Hashtag as H, type IteratorOptions as I, ListsClient as L, type Media as M, type PaginatedResponse as P, type QueryType as Q, type ResolvedConfig as R, type ScrapeBadgerConfig as S, TwitterClient as T, UsersClient as U, type PaginationOptions as a, TweetsClient as b, collectAll as c, TrendsClient as d, type GeoSearchOptions as e, type TrendCategory as f, type CommunityTweetType as g, type PollOption as h, type Poll as i, type Url as j, type UserMention as k, type TweetPlace as l, type Tweet as m, type User as n, type UserAbout as o, type UserIds as p, type List as q, type CommunityBanner as r, type CommunityRule as s, type Community as t, type CommunityMember as u, type Trend as v, type Location as w, type PlaceTrends as x, type Place as y, type ListResponse as z };
|
|
@@ -351,13 +351,13 @@ interface User {
|
|
|
351
351
|
/** Banner image URL */
|
|
352
352
|
profile_banner_url?: string;
|
|
353
353
|
/** Number of followers */
|
|
354
|
-
followers_count
|
|
354
|
+
followers_count?: number;
|
|
355
355
|
/** Number of accounts followed */
|
|
356
|
-
following_count
|
|
356
|
+
following_count?: number;
|
|
357
357
|
/** Total tweets posted */
|
|
358
|
-
tweet_count
|
|
358
|
+
tweet_count?: number;
|
|
359
359
|
/** Number of lists the user is on */
|
|
360
|
-
listed_count
|
|
360
|
+
listed_count?: number;
|
|
361
361
|
/** Number of tweets liked */
|
|
362
362
|
favourites_count?: number;
|
|
363
363
|
/** Number of media posts */
|
|
@@ -1412,6 +1412,12 @@ declare class ListsClient {
|
|
|
1412
1412
|
declare class CommunitiesClient {
|
|
1413
1413
|
private readonly client;
|
|
1414
1414
|
constructor(client: BaseClient);
|
|
1415
|
+
/**
|
|
1416
|
+
* Parse a community member from the API response.
|
|
1417
|
+
* Handles both flat format (user_id, username, role at top level)
|
|
1418
|
+
* and nested format (user object with id, username).
|
|
1419
|
+
*/
|
|
1420
|
+
private parseCommunityMember;
|
|
1415
1421
|
/**
|
|
1416
1422
|
* Get details for a specific community.
|
|
1417
1423
|
*
|
|
@@ -1792,4 +1798,4 @@ declare class TwitterClient {
|
|
|
1792
1798
|
constructor(client: BaseClient);
|
|
1793
1799
|
}
|
|
1794
1800
|
|
|
1795
|
-
export { type ApiResponse as A, CommunitiesClient as C, GeoClient as G, type Hashtag as H, type IteratorOptions as I, ListsClient as L, type Media as M, type PaginatedResponse as P, type QueryType as Q, type ResolvedConfig as R, type ScrapeBadgerConfig as S, TwitterClient as T, UsersClient as U, type PaginationOptions as a, TweetsClient as b, collectAll as c, TrendsClient as d, type GeoSearchOptions as e, type TrendCategory as f, type CommunityTweetType as g, type PollOption as h, type Poll as i, type Url as j, type UserMention as k, type TweetPlace as l, type Tweet as m, type User as n, type UserAbout as o, type UserIds as p, type List as q, type CommunityBanner as r, type CommunityRule as s, type Community as t, type CommunityMember as u, type Trend as v, type Location as w, type PlaceTrends as x, type Place as y, type ListResponse as z };
|
|
1801
|
+
export { type ApiResponse as A, BaseClient as B, CommunitiesClient as C, GeoClient as G, type Hashtag as H, type IteratorOptions as I, ListsClient as L, type Media as M, type PaginatedResponse as P, type QueryType as Q, type ResolvedConfig as R, type ScrapeBadgerConfig as S, TwitterClient as T, UsersClient as U, type PaginationOptions as a, TweetsClient as b, collectAll as c, TrendsClient as d, type GeoSearchOptions as e, type TrendCategory as f, type CommunityTweetType as g, type PollOption as h, type Poll as i, type Url as j, type UserMention as k, type TweetPlace as l, type Tweet as m, type User as n, type UserAbout as o, type UserIds as p, type List as q, type CommunityBanner as r, type CommunityRule as s, type Community as t, type CommunityMember as u, type Trend as v, type Location as w, type PlaceTrends as x, type Place as y, type ListResponse as z };
|
package/dist/index.d.cts
CHANGED
|
@@ -1,5 +1,161 @@
|
|
|
1
|
-
import { T as TwitterClient, S as ScrapeBadgerConfig } from './index-
|
|
2
|
-
export { A as ApiResponse, C as CommunitiesClient, t as Community, r as CommunityBanner, u as CommunityMember, s as CommunityRule, g as CommunityTweetType, G as GeoClient, e as GeoSearchOptions, H as Hashtag, I as IteratorOptions, q as List, z as ListResponse, L as ListsClient, w as Location, M as Media, P as PaginatedResponse, a as PaginationOptions, y as Place, x as PlaceTrends, i as Poll, h as PollOption, Q as QueryType, R as ResolvedConfig, v as Trend, f as TrendCategory, d as TrendsClient, m as Tweet, l as TweetPlace, b as TweetsClient, j as Url, n as User, o as UserAbout, p as UserIds, k as UserMention, U as UsersClient, c as collectAll } from './index-
|
|
1
|
+
import { B as BaseClient, T as TwitterClient, S as ScrapeBadgerConfig } from './index-9Mu-b1MB.cjs';
|
|
2
|
+
export { A as ApiResponse, C as CommunitiesClient, t as Community, r as CommunityBanner, u as CommunityMember, s as CommunityRule, g as CommunityTweetType, G as GeoClient, e as GeoSearchOptions, H as Hashtag, I as IteratorOptions, q as List, z as ListResponse, L as ListsClient, w as Location, M as Media, P as PaginatedResponse, a as PaginationOptions, y as Place, x as PlaceTrends, i as Poll, h as PollOption, Q as QueryType, R as ResolvedConfig, v as Trend, f as TrendCategory, d as TrendsClient, m as Tweet, l as TweetPlace, b as TweetsClient, j as Url, n as User, o as UserAbout, p as UserIds, k as UserMention, U as UsersClient, c as collectAll } from './index-9Mu-b1MB.cjs';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* TypeScript types for web scraping API responses.
|
|
6
|
+
*/
|
|
7
|
+
interface ScrapeOptions {
|
|
8
|
+
/** Whether to render JavaScript */
|
|
9
|
+
renderJs?: boolean;
|
|
10
|
+
/** Output format (html, markdown, text, json) */
|
|
11
|
+
outputFormat?: "html" | "markdown" | "text" | "json";
|
|
12
|
+
/** Country code for proxy (e.g. "US") */
|
|
13
|
+
proxyCountry?: string;
|
|
14
|
+
/** Proxy type (datacenter, residential) */
|
|
15
|
+
proxyType?: "datacenter" | "residential" | "mobile" | "isp";
|
|
16
|
+
/** Reuse an existing session */
|
|
17
|
+
sessionId?: string;
|
|
18
|
+
/** Force a specific engine */
|
|
19
|
+
engine?: string;
|
|
20
|
+
/** Maximum credit cost */
|
|
21
|
+
maxCost?: number;
|
|
22
|
+
/** Custom HTTP headers */
|
|
23
|
+
headers?: Record<string, string>;
|
|
24
|
+
/** CSS selector to wait for */
|
|
25
|
+
waitFor?: string;
|
|
26
|
+
/** Request timeout in seconds */
|
|
27
|
+
timeout?: number;
|
|
28
|
+
/** JavaScript actions to execute */
|
|
29
|
+
jsScenario?: Array<Record<string, unknown>>;
|
|
30
|
+
}
|
|
31
|
+
interface ScreenshotOptions {
|
|
32
|
+
/** Capture full page (not just viewport) */
|
|
33
|
+
fullPage?: boolean;
|
|
34
|
+
/** Viewport width in pixels */
|
|
35
|
+
viewportWidth?: number;
|
|
36
|
+
/** Viewport height in pixels */
|
|
37
|
+
viewportHeight?: number;
|
|
38
|
+
/** Image format (png, jpeg) */
|
|
39
|
+
imageFormat?: "png" | "jpeg";
|
|
40
|
+
/** CSS selector to wait for */
|
|
41
|
+
waitFor?: string;
|
|
42
|
+
/** Request timeout in seconds */
|
|
43
|
+
timeout?: number;
|
|
44
|
+
}
|
|
45
|
+
interface ExtractOptions {
|
|
46
|
+
/** Extraction schema (CSS/XPath selectors) */
|
|
47
|
+
schema?: Record<string, unknown>;
|
|
48
|
+
/** Whether to render JavaScript */
|
|
49
|
+
renderJs?: boolean;
|
|
50
|
+
/** CSS selector to wait for */
|
|
51
|
+
waitFor?: string;
|
|
52
|
+
/** Request timeout in seconds */
|
|
53
|
+
timeout?: number;
|
|
54
|
+
}
|
|
55
|
+
interface BatchOptions {
|
|
56
|
+
/** Whether to render JavaScript */
|
|
57
|
+
renderJs?: boolean;
|
|
58
|
+
/** Output format */
|
|
59
|
+
outputFormat?: "html" | "markdown" | "text" | "json";
|
|
60
|
+
/** Maximum concurrent requests */
|
|
61
|
+
maxConcurrency?: number;
|
|
62
|
+
/** Force a specific engine */
|
|
63
|
+
engine?: string;
|
|
64
|
+
/** Request timeout in seconds */
|
|
65
|
+
timeout?: number;
|
|
66
|
+
}
|
|
67
|
+
interface ScrapeResult {
|
|
68
|
+
content: string;
|
|
69
|
+
status_code: number;
|
|
70
|
+
url: string;
|
|
71
|
+
engine_used?: string;
|
|
72
|
+
credits_used: number;
|
|
73
|
+
processing_time_ms?: number;
|
|
74
|
+
anti_bot_detected: boolean;
|
|
75
|
+
anti_bot_provider?: string;
|
|
76
|
+
captcha_solved: boolean;
|
|
77
|
+
session_id?: string;
|
|
78
|
+
session_reused: boolean;
|
|
79
|
+
}
|
|
80
|
+
interface ScreenshotResult {
|
|
81
|
+
image_data: string;
|
|
82
|
+
format: string;
|
|
83
|
+
url: string;
|
|
84
|
+
credits_used: number;
|
|
85
|
+
}
|
|
86
|
+
interface ExtractResult {
|
|
87
|
+
data: Record<string, unknown>;
|
|
88
|
+
url: string;
|
|
89
|
+
credits_used: number;
|
|
90
|
+
}
|
|
91
|
+
interface BatchResult {
|
|
92
|
+
results: ScrapeResult[];
|
|
93
|
+
total: number;
|
|
94
|
+
successful: number;
|
|
95
|
+
failed: number;
|
|
96
|
+
}
|
|
97
|
+
interface SessionInfo {
|
|
98
|
+
session_id: string;
|
|
99
|
+
domain: string;
|
|
100
|
+
reused: boolean;
|
|
101
|
+
fingerprint_id?: string;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Web scraping API client for ScrapeBadger SDK.
|
|
106
|
+
*/
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Client for web scraping operations.
|
|
110
|
+
*
|
|
111
|
+
* @example
|
|
112
|
+
* ```typescript
|
|
113
|
+
* const client = new ScrapeBadger({ apiKey: "key" });
|
|
114
|
+
*
|
|
115
|
+
* // Simple scrape
|
|
116
|
+
* const result = await client.web.scrape("https://example.com");
|
|
117
|
+
* console.log(result.content);
|
|
118
|
+
*
|
|
119
|
+
* // Screenshot
|
|
120
|
+
* const screenshot = await client.web.screenshot("https://example.com");
|
|
121
|
+
*
|
|
122
|
+
* // Extract structured data
|
|
123
|
+
* const data = await client.web.extract("https://example.com", {
|
|
124
|
+
* schema: { title: "css:h1" }
|
|
125
|
+
* });
|
|
126
|
+
*
|
|
127
|
+
* // Batch scrape
|
|
128
|
+
* const batch = await client.web.batch(["https://a.com", "https://b.com"]);
|
|
129
|
+
* ```
|
|
130
|
+
*/
|
|
131
|
+
declare class WebClient {
|
|
132
|
+
private readonly client;
|
|
133
|
+
constructor(client: BaseClient);
|
|
134
|
+
/**
|
|
135
|
+
* Scrape a web page.
|
|
136
|
+
*/
|
|
137
|
+
scrape(url: string, options?: ScrapeOptions): Promise<ScrapeResult>;
|
|
138
|
+
/**
|
|
139
|
+
* Take a screenshot of a web page.
|
|
140
|
+
*/
|
|
141
|
+
screenshot(url: string, options?: ScreenshotOptions): Promise<ScreenshotResult>;
|
|
142
|
+
/**
|
|
143
|
+
* Extract structured data from a web page.
|
|
144
|
+
*/
|
|
145
|
+
extract(url: string, options?: ExtractOptions): Promise<ExtractResult>;
|
|
146
|
+
/**
|
|
147
|
+
* Scrape multiple URLs in a batch.
|
|
148
|
+
*/
|
|
149
|
+
batch(urls: string[], options?: BatchOptions): Promise<BatchResult>;
|
|
150
|
+
/**
|
|
151
|
+
* Create a new scraping session for a domain.
|
|
152
|
+
*/
|
|
153
|
+
createSession(domain: string, persist?: boolean): Promise<SessionInfo>;
|
|
154
|
+
/**
|
|
155
|
+
* Scrape using an existing session.
|
|
156
|
+
*/
|
|
157
|
+
reuseSession(url: string, sessionId: string, options?: ScrapeOptions): Promise<ScrapeResult>;
|
|
158
|
+
}
|
|
3
159
|
|
|
4
160
|
/**
|
|
5
161
|
* Main ScrapeBadger client.
|
|
@@ -43,6 +199,8 @@ declare class ScrapeBadger {
|
|
|
43
199
|
private readonly baseClient;
|
|
44
200
|
/** Twitter API client */
|
|
45
201
|
readonly twitter: TwitterClient;
|
|
202
|
+
/** Web scraping API client */
|
|
203
|
+
readonly web: WebClient;
|
|
46
204
|
/**
|
|
47
205
|
* Create a new ScrapeBadger client.
|
|
48
206
|
*
|
|
@@ -153,4 +311,4 @@ declare class AccountRestrictedError extends ScrapeBadgerError {
|
|
|
153
311
|
constructor(message?: string, reason?: string);
|
|
154
312
|
}
|
|
155
313
|
|
|
156
|
-
export { AccountRestrictedError, AuthenticationError, InsufficientCreditsError, NotFoundError, RateLimitError, ScrapeBadger, ScrapeBadgerConfig, ScrapeBadgerError, ServerError, TimeoutError, TwitterClient, ValidationError };
|
|
314
|
+
export { AccountRestrictedError, AuthenticationError, type BatchOptions, type BatchResult, type ExtractOptions, type ExtractResult, InsufficientCreditsError, NotFoundError, RateLimitError, ScrapeBadger, ScrapeBadgerConfig, ScrapeBadgerError, type ScrapeOptions, type ScrapeResult, type ScreenshotOptions, type ScreenshotResult, ServerError, type SessionInfo, TimeoutError, TwitterClient, ValidationError, WebClient };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,161 @@
|
|
|
1
|
-
import { T as TwitterClient, S as ScrapeBadgerConfig } from './index-
|
|
2
|
-
export { A as ApiResponse, C as CommunitiesClient, t as Community, r as CommunityBanner, u as CommunityMember, s as CommunityRule, g as CommunityTweetType, G as GeoClient, e as GeoSearchOptions, H as Hashtag, I as IteratorOptions, q as List, z as ListResponse, L as ListsClient, w as Location, M as Media, P as PaginatedResponse, a as PaginationOptions, y as Place, x as PlaceTrends, i as Poll, h as PollOption, Q as QueryType, R as ResolvedConfig, v as Trend, f as TrendCategory, d as TrendsClient, m as Tweet, l as TweetPlace, b as TweetsClient, j as Url, n as User, o as UserAbout, p as UserIds, k as UserMention, U as UsersClient, c as collectAll } from './index-
|
|
1
|
+
import { B as BaseClient, T as TwitterClient, S as ScrapeBadgerConfig } from './index-9Mu-b1MB.js';
|
|
2
|
+
export { A as ApiResponse, C as CommunitiesClient, t as Community, r as CommunityBanner, u as CommunityMember, s as CommunityRule, g as CommunityTweetType, G as GeoClient, e as GeoSearchOptions, H as Hashtag, I as IteratorOptions, q as List, z as ListResponse, L as ListsClient, w as Location, M as Media, P as PaginatedResponse, a as PaginationOptions, y as Place, x as PlaceTrends, i as Poll, h as PollOption, Q as QueryType, R as ResolvedConfig, v as Trend, f as TrendCategory, d as TrendsClient, m as Tweet, l as TweetPlace, b as TweetsClient, j as Url, n as User, o as UserAbout, p as UserIds, k as UserMention, U as UsersClient, c as collectAll } from './index-9Mu-b1MB.js';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* TypeScript types for web scraping API responses.
|
|
6
|
+
*/
|
|
7
|
+
interface ScrapeOptions {
|
|
8
|
+
/** Whether to render JavaScript */
|
|
9
|
+
renderJs?: boolean;
|
|
10
|
+
/** Output format (html, markdown, text, json) */
|
|
11
|
+
outputFormat?: "html" | "markdown" | "text" | "json";
|
|
12
|
+
/** Country code for proxy (e.g. "US") */
|
|
13
|
+
proxyCountry?: string;
|
|
14
|
+
/** Proxy type (datacenter, residential) */
|
|
15
|
+
proxyType?: "datacenter" | "residential" | "mobile" | "isp";
|
|
16
|
+
/** Reuse an existing session */
|
|
17
|
+
sessionId?: string;
|
|
18
|
+
/** Force a specific engine */
|
|
19
|
+
engine?: string;
|
|
20
|
+
/** Maximum credit cost */
|
|
21
|
+
maxCost?: number;
|
|
22
|
+
/** Custom HTTP headers */
|
|
23
|
+
headers?: Record<string, string>;
|
|
24
|
+
/** CSS selector to wait for */
|
|
25
|
+
waitFor?: string;
|
|
26
|
+
/** Request timeout in seconds */
|
|
27
|
+
timeout?: number;
|
|
28
|
+
/** JavaScript actions to execute */
|
|
29
|
+
jsScenario?: Array<Record<string, unknown>>;
|
|
30
|
+
}
|
|
31
|
+
interface ScreenshotOptions {
|
|
32
|
+
/** Capture full page (not just viewport) */
|
|
33
|
+
fullPage?: boolean;
|
|
34
|
+
/** Viewport width in pixels */
|
|
35
|
+
viewportWidth?: number;
|
|
36
|
+
/** Viewport height in pixels */
|
|
37
|
+
viewportHeight?: number;
|
|
38
|
+
/** Image format (png, jpeg) */
|
|
39
|
+
imageFormat?: "png" | "jpeg";
|
|
40
|
+
/** CSS selector to wait for */
|
|
41
|
+
waitFor?: string;
|
|
42
|
+
/** Request timeout in seconds */
|
|
43
|
+
timeout?: number;
|
|
44
|
+
}
|
|
45
|
+
interface ExtractOptions {
|
|
46
|
+
/** Extraction schema (CSS/XPath selectors) */
|
|
47
|
+
schema?: Record<string, unknown>;
|
|
48
|
+
/** Whether to render JavaScript */
|
|
49
|
+
renderJs?: boolean;
|
|
50
|
+
/** CSS selector to wait for */
|
|
51
|
+
waitFor?: string;
|
|
52
|
+
/** Request timeout in seconds */
|
|
53
|
+
timeout?: number;
|
|
54
|
+
}
|
|
55
|
+
interface BatchOptions {
|
|
56
|
+
/** Whether to render JavaScript */
|
|
57
|
+
renderJs?: boolean;
|
|
58
|
+
/** Output format */
|
|
59
|
+
outputFormat?: "html" | "markdown" | "text" | "json";
|
|
60
|
+
/** Maximum concurrent requests */
|
|
61
|
+
maxConcurrency?: number;
|
|
62
|
+
/** Force a specific engine */
|
|
63
|
+
engine?: string;
|
|
64
|
+
/** Request timeout in seconds */
|
|
65
|
+
timeout?: number;
|
|
66
|
+
}
|
|
67
|
+
interface ScrapeResult {
|
|
68
|
+
content: string;
|
|
69
|
+
status_code: number;
|
|
70
|
+
url: string;
|
|
71
|
+
engine_used?: string;
|
|
72
|
+
credits_used: number;
|
|
73
|
+
processing_time_ms?: number;
|
|
74
|
+
anti_bot_detected: boolean;
|
|
75
|
+
anti_bot_provider?: string;
|
|
76
|
+
captcha_solved: boolean;
|
|
77
|
+
session_id?: string;
|
|
78
|
+
session_reused: boolean;
|
|
79
|
+
}
|
|
80
|
+
interface ScreenshotResult {
|
|
81
|
+
image_data: string;
|
|
82
|
+
format: string;
|
|
83
|
+
url: string;
|
|
84
|
+
credits_used: number;
|
|
85
|
+
}
|
|
86
|
+
interface ExtractResult {
|
|
87
|
+
data: Record<string, unknown>;
|
|
88
|
+
url: string;
|
|
89
|
+
credits_used: number;
|
|
90
|
+
}
|
|
91
|
+
interface BatchResult {
|
|
92
|
+
results: ScrapeResult[];
|
|
93
|
+
total: number;
|
|
94
|
+
successful: number;
|
|
95
|
+
failed: number;
|
|
96
|
+
}
|
|
97
|
+
interface SessionInfo {
|
|
98
|
+
session_id: string;
|
|
99
|
+
domain: string;
|
|
100
|
+
reused: boolean;
|
|
101
|
+
fingerprint_id?: string;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Web scraping API client for ScrapeBadger SDK.
|
|
106
|
+
*/
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Client for web scraping operations.
|
|
110
|
+
*
|
|
111
|
+
* @example
|
|
112
|
+
* ```typescript
|
|
113
|
+
* const client = new ScrapeBadger({ apiKey: "key" });
|
|
114
|
+
*
|
|
115
|
+
* // Simple scrape
|
|
116
|
+
* const result = await client.web.scrape("https://example.com");
|
|
117
|
+
* console.log(result.content);
|
|
118
|
+
*
|
|
119
|
+
* // Screenshot
|
|
120
|
+
* const screenshot = await client.web.screenshot("https://example.com");
|
|
121
|
+
*
|
|
122
|
+
* // Extract structured data
|
|
123
|
+
* const data = await client.web.extract("https://example.com", {
|
|
124
|
+
* schema: { title: "css:h1" }
|
|
125
|
+
* });
|
|
126
|
+
*
|
|
127
|
+
* // Batch scrape
|
|
128
|
+
* const batch = await client.web.batch(["https://a.com", "https://b.com"]);
|
|
129
|
+
* ```
|
|
130
|
+
*/
|
|
131
|
+
declare class WebClient {
|
|
132
|
+
private readonly client;
|
|
133
|
+
constructor(client: BaseClient);
|
|
134
|
+
/**
|
|
135
|
+
* Scrape a web page.
|
|
136
|
+
*/
|
|
137
|
+
scrape(url: string, options?: ScrapeOptions): Promise<ScrapeResult>;
|
|
138
|
+
/**
|
|
139
|
+
* Take a screenshot of a web page.
|
|
140
|
+
*/
|
|
141
|
+
screenshot(url: string, options?: ScreenshotOptions): Promise<ScreenshotResult>;
|
|
142
|
+
/**
|
|
143
|
+
* Extract structured data from a web page.
|
|
144
|
+
*/
|
|
145
|
+
extract(url: string, options?: ExtractOptions): Promise<ExtractResult>;
|
|
146
|
+
/**
|
|
147
|
+
* Scrape multiple URLs in a batch.
|
|
148
|
+
*/
|
|
149
|
+
batch(urls: string[], options?: BatchOptions): Promise<BatchResult>;
|
|
150
|
+
/**
|
|
151
|
+
* Create a new scraping session for a domain.
|
|
152
|
+
*/
|
|
153
|
+
createSession(domain: string, persist?: boolean): Promise<SessionInfo>;
|
|
154
|
+
/**
|
|
155
|
+
* Scrape using an existing session.
|
|
156
|
+
*/
|
|
157
|
+
reuseSession(url: string, sessionId: string, options?: ScrapeOptions): Promise<ScrapeResult>;
|
|
158
|
+
}
|
|
3
159
|
|
|
4
160
|
/**
|
|
5
161
|
* Main ScrapeBadger client.
|
|
@@ -43,6 +199,8 @@ declare class ScrapeBadger {
|
|
|
43
199
|
private readonly baseClient;
|
|
44
200
|
/** Twitter API client */
|
|
45
201
|
readonly twitter: TwitterClient;
|
|
202
|
+
/** Web scraping API client */
|
|
203
|
+
readonly web: WebClient;
|
|
46
204
|
/**
|
|
47
205
|
* Create a new ScrapeBadger client.
|
|
48
206
|
*
|
|
@@ -153,4 +311,4 @@ declare class AccountRestrictedError extends ScrapeBadgerError {
|
|
|
153
311
|
constructor(message?: string, reason?: string);
|
|
154
312
|
}
|
|
155
313
|
|
|
156
|
-
export { AccountRestrictedError, AuthenticationError, InsufficientCreditsError, NotFoundError, RateLimitError, ScrapeBadger, ScrapeBadgerConfig, ScrapeBadgerError, ServerError, TimeoutError, TwitterClient, ValidationError };
|
|
314
|
+
export { AccountRestrictedError, AuthenticationError, type BatchOptions, type BatchResult, type ExtractOptions, type ExtractResult, InsufficientCreditsError, NotFoundError, RateLimitError, ScrapeBadger, ScrapeBadgerConfig, ScrapeBadgerError, type ScrapeOptions, type ScrapeResult, type ScreenshotOptions, type ScreenshotResult, ServerError, type SessionInfo, TimeoutError, TwitterClient, ValidationError, WebClient };
|
package/dist/index.js
CHANGED
|
@@ -1061,6 +1061,28 @@ var CommunitiesClient = class {
|
|
|
1061
1061
|
constructor(client) {
|
|
1062
1062
|
this.client = client;
|
|
1063
1063
|
}
|
|
1064
|
+
/**
|
|
1065
|
+
* Parse a community member from the API response.
|
|
1066
|
+
* Handles both flat format (user_id, username, role at top level)
|
|
1067
|
+
* and nested format (user object with id, username).
|
|
1068
|
+
*/
|
|
1069
|
+
parseCommunityMember(item) {
|
|
1070
|
+
if ("user" in item && item.user) {
|
|
1071
|
+
return item;
|
|
1072
|
+
}
|
|
1073
|
+
return {
|
|
1074
|
+
user: {
|
|
1075
|
+
id: item.user_id ?? "",
|
|
1076
|
+
username: item.username ?? "",
|
|
1077
|
+
name: item.name ?? "",
|
|
1078
|
+
profile_image_url: item.profile_image_url,
|
|
1079
|
+
verified: item.verified ?? false,
|
|
1080
|
+
is_blue_verified: item.is_blue_verified
|
|
1081
|
+
},
|
|
1082
|
+
role: item.role,
|
|
1083
|
+
joined_at: item.joined_at
|
|
1084
|
+
};
|
|
1085
|
+
}
|
|
1064
1086
|
/**
|
|
1065
1087
|
* Get details for a specific community.
|
|
1066
1088
|
*
|
|
@@ -1149,17 +1171,9 @@ var CommunitiesClient = class {
|
|
|
1149
1171
|
const response = await this.client.request(`/v1/twitter/communities/${communityId}/members`, {
|
|
1150
1172
|
params: { count: options.count ?? 20, cursor: options.cursor }
|
|
1151
1173
|
});
|
|
1152
|
-
const data = (response.data ?? []).map(
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
}
|
|
1156
|
-
const userItem = item;
|
|
1157
|
-
return {
|
|
1158
|
-
user: item,
|
|
1159
|
-
role: userItem.role,
|
|
1160
|
-
joined_at: userItem.joined_at
|
|
1161
|
-
};
|
|
1162
|
-
});
|
|
1174
|
+
const data = (response.data ?? []).map(
|
|
1175
|
+
(item) => this.parseCommunityMember(item)
|
|
1176
|
+
);
|
|
1163
1177
|
return createPaginatedResponse(data, response.next_cursor);
|
|
1164
1178
|
}
|
|
1165
1179
|
/**
|
|
@@ -1173,17 +1187,9 @@ var CommunitiesClient = class {
|
|
|
1173
1187
|
const response = await this.client.request(`/v1/twitter/communities/${communityId}/moderators`, {
|
|
1174
1188
|
params: { count: options.count ?? 20, cursor: options.cursor }
|
|
1175
1189
|
});
|
|
1176
|
-
const data = (response.data ?? []).map(
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
}
|
|
1180
|
-
const userItem = item;
|
|
1181
|
-
return {
|
|
1182
|
-
user: item,
|
|
1183
|
-
role: "moderator",
|
|
1184
|
-
joined_at: userItem.joined_at
|
|
1185
|
-
};
|
|
1186
|
-
});
|
|
1190
|
+
const data = (response.data ?? []).map(
|
|
1191
|
+
(item) => this.parseCommunityMember(item)
|
|
1192
|
+
);
|
|
1187
1193
|
return createPaginatedResponse(data, response.next_cursor);
|
|
1188
1194
|
}
|
|
1189
1195
|
/**
|
|
@@ -1429,11 +1435,112 @@ var TwitterClient = class {
|
|
|
1429
1435
|
}
|
|
1430
1436
|
};
|
|
1431
1437
|
|
|
1438
|
+
// src/web/client.ts
|
|
1439
|
+
var WebClient = class {
|
|
1440
|
+
client;
|
|
1441
|
+
constructor(client) {
|
|
1442
|
+
this.client = client;
|
|
1443
|
+
}
|
|
1444
|
+
/**
|
|
1445
|
+
* Scrape a web page.
|
|
1446
|
+
*/
|
|
1447
|
+
async scrape(url, options = {}) {
|
|
1448
|
+
const body = { url };
|
|
1449
|
+
if (options.renderJs) body.render_js = true;
|
|
1450
|
+
if (options.outputFormat && options.outputFormat !== "html")
|
|
1451
|
+
body.output_format = options.outputFormat;
|
|
1452
|
+
if (options.proxyCountry) body.proxy_country = options.proxyCountry;
|
|
1453
|
+
if (options.proxyType) body.proxy_type = options.proxyType;
|
|
1454
|
+
if (options.sessionId) body.session_id = options.sessionId;
|
|
1455
|
+
if (options.engine) body.engine = options.engine;
|
|
1456
|
+
if (options.maxCost !== void 0) body.max_cost = options.maxCost;
|
|
1457
|
+
if (options.headers) body.headers = options.headers;
|
|
1458
|
+
if (options.waitFor) body.wait_for = options.waitFor;
|
|
1459
|
+
if (options.timeout !== void 0) body.timeout = options.timeout;
|
|
1460
|
+
if (options.jsScenario) body.js_scenario = options.jsScenario;
|
|
1461
|
+
return this.client.request("/v1/web/scrape", {
|
|
1462
|
+
method: "POST",
|
|
1463
|
+
body
|
|
1464
|
+
});
|
|
1465
|
+
}
|
|
1466
|
+
/**
|
|
1467
|
+
* Take a screenshot of a web page.
|
|
1468
|
+
*/
|
|
1469
|
+
async screenshot(url, options = {}) {
|
|
1470
|
+
const body = { url };
|
|
1471
|
+
if (options.fullPage) body.full_page = true;
|
|
1472
|
+
if (options.viewportWidth && options.viewportWidth !== 1280)
|
|
1473
|
+
body.viewport_width = options.viewportWidth;
|
|
1474
|
+
if (options.viewportHeight && options.viewportHeight !== 720)
|
|
1475
|
+
body.viewport_height = options.viewportHeight;
|
|
1476
|
+
if (options.imageFormat && options.imageFormat !== "png")
|
|
1477
|
+
body.image_format = options.imageFormat;
|
|
1478
|
+
if (options.waitFor) body.wait_for = options.waitFor;
|
|
1479
|
+
if (options.timeout !== void 0) body.timeout = options.timeout;
|
|
1480
|
+
return this.client.request("/v1/web/screenshot", {
|
|
1481
|
+
method: "POST",
|
|
1482
|
+
body
|
|
1483
|
+
});
|
|
1484
|
+
}
|
|
1485
|
+
/**
|
|
1486
|
+
* Extract structured data from a web page.
|
|
1487
|
+
*/
|
|
1488
|
+
async extract(url, options = {}) {
|
|
1489
|
+
const body = { url };
|
|
1490
|
+
if (options.schema) body.extraction_schema = options.schema;
|
|
1491
|
+
if (options.renderJs) body.render_js = true;
|
|
1492
|
+
if (options.waitFor) body.wait_for = options.waitFor;
|
|
1493
|
+
if (options.timeout !== void 0) body.timeout = options.timeout;
|
|
1494
|
+
return this.client.request("/v1/web/extract", {
|
|
1495
|
+
method: "POST",
|
|
1496
|
+
body
|
|
1497
|
+
});
|
|
1498
|
+
}
|
|
1499
|
+
/**
|
|
1500
|
+
* Scrape multiple URLs in a batch.
|
|
1501
|
+
*/
|
|
1502
|
+
async batch(urls, options = {}) {
|
|
1503
|
+
const body = { urls };
|
|
1504
|
+
if (options.renderJs) body.render_js = true;
|
|
1505
|
+
if (options.outputFormat && options.outputFormat !== "html")
|
|
1506
|
+
body.output_format = options.outputFormat;
|
|
1507
|
+
if (options.maxConcurrency && options.maxConcurrency !== 5)
|
|
1508
|
+
body.max_concurrency = options.maxConcurrency;
|
|
1509
|
+
if (options.engine) body.engine = options.engine;
|
|
1510
|
+
if (options.timeout !== void 0) body.timeout = options.timeout;
|
|
1511
|
+
return this.client.request("/v1/web/batch", {
|
|
1512
|
+
method: "POST",
|
|
1513
|
+
body
|
|
1514
|
+
});
|
|
1515
|
+
}
|
|
1516
|
+
/**
|
|
1517
|
+
* Create a new scraping session for a domain.
|
|
1518
|
+
*/
|
|
1519
|
+
async createSession(domain, persist = true) {
|
|
1520
|
+
return this.client.request("/v1/web/sessions", {
|
|
1521
|
+
method: "POST",
|
|
1522
|
+
body: {
|
|
1523
|
+
domain,
|
|
1524
|
+
new_session: true,
|
|
1525
|
+
persist_session: persist
|
|
1526
|
+
}
|
|
1527
|
+
});
|
|
1528
|
+
}
|
|
1529
|
+
/**
|
|
1530
|
+
* Scrape using an existing session.
|
|
1531
|
+
*/
|
|
1532
|
+
async reuseSession(url, sessionId, options = {}) {
|
|
1533
|
+
return this.scrape(url, { ...options, sessionId });
|
|
1534
|
+
}
|
|
1535
|
+
};
|
|
1536
|
+
|
|
1432
1537
|
// src/client.ts
|
|
1433
1538
|
var ScrapeBadger = class {
|
|
1434
1539
|
baseClient;
|
|
1435
1540
|
/** Twitter API client */
|
|
1436
1541
|
twitter;
|
|
1542
|
+
/** Web scraping API client */
|
|
1543
|
+
web;
|
|
1437
1544
|
/**
|
|
1438
1545
|
* Create a new ScrapeBadger client.
|
|
1439
1546
|
*
|
|
@@ -1469,6 +1576,7 @@ var ScrapeBadger = class {
|
|
|
1469
1576
|
const resolvedConfig = resolveConfig({ ...config, apiKey });
|
|
1470
1577
|
this.baseClient = new BaseClient(resolvedConfig);
|
|
1471
1578
|
this.twitter = new TwitterClient(this.baseClient);
|
|
1579
|
+
this.web = new WebClient(this.baseClient);
|
|
1472
1580
|
}
|
|
1473
1581
|
};
|
|
1474
1582
|
|
|
@@ -1489,6 +1597,7 @@ exports.TweetsClient = TweetsClient;
|
|
|
1489
1597
|
exports.TwitterClient = TwitterClient;
|
|
1490
1598
|
exports.UsersClient = UsersClient;
|
|
1491
1599
|
exports.ValidationError = ValidationError;
|
|
1600
|
+
exports.WebClient = WebClient;
|
|
1492
1601
|
exports.collectAll = collectAll;
|
|
1493
1602
|
//# sourceMappingURL=index.js.map
|
|
1494
1603
|
//# sourceMappingURL=index.js.map
|