@graphext/cuery 0.7.0 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/mod.d.ts CHANGED
@@ -13,6 +13,8 @@ export * from './src/tools/personas.js';
13
13
  export * from './src/tools/search.js';
14
14
  export * from './src/tools/topics.js';
15
15
  export * from './src/helpers/utils.js';
16
+ export * from './src/helpers/async.js';
17
+ export * from './src/helpers/urls.js';
16
18
  export * from './src/tools/brands.js';
17
19
  export * from './src/tools/translate.js';
18
20
  export * from './src/tools/sentiment.js';
@@ -20,9 +22,10 @@ export * from './src/tools/summarize.js';
20
22
  export * from './src/tools/sources.js';
21
23
  export * from './src/tools/entities.js';
22
24
  export * from './src/tools/prompts.js';
25
+ export * from './src/tools/scorer.js';
23
26
  export * from './src/helpers/seedKeywords.js';
24
27
  export * from './src/tools/generic.js';
25
- export * from './src/api.js';
28
+ export * from './src/apis/hasdata/index.js';
26
29
  export * from './src/apis/chatgptScraper/index.js';
27
30
  export * from './src/apis/googleAds/keywordPlanner.js';
28
31
  export * from './src/schemas/index.js';
package/esm/mod.d.ts.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"mod.d.ts","sourceRoot":"","sources":["../src/mod.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,cAAc,cAAc,CAAC;AAC7B,OAAO,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAClD,OAAO,EACN,WAAW,EACX,mBAAmB,EACnB,eAAe,EACf,YAAY,EACZ,aAAa,EACb,KAAK,YAAY,EACjB,KAAK,SAAS,GACd,MAAM,0BAA0B,CAAC;AAGlC,cAAc,yBAAyB,CAAC;AACxC,cAAc,2BAA2B,CAAC;AAC1C,cAAc,uBAAuB,CAAC;AACtC,cAAc,yBAAyB,CAAC;AACxC,cAAc,uBAAuB,CAAC;AACtC,cAAc,uBAAuB,CAAC;AACtC,cAAc,wBAAwB,CAAC;AACvC,cAAc,uBAAuB,CAAC;AACtC,cAAc,0BAA0B,CAAC;AACzC,cAAc,0BAA0B,CAAC;AACzC,cAAc,0BAA0B,CAAC;AACzC,cAAc,wBAAwB,CAAC;AACvC,cAAc,yBAAyB,CAAC;AACxC,cAAc,wBAAwB,CAAC;AACvC,cAAc,+BAA+B,CAAC;AAC9C,cAAc,wBAAwB,CAAC;AACvC,cAAc,cAAc,CAAC;AAC7B,cAAc,oCAAoC,CAAC;AACnD,cAAc,wCAAwC,CAAC;AACvD,cAAc,wBAAwB,CAAC"}
1
+ {"version":3,"file":"mod.d.ts","sourceRoot":"","sources":["../src/mod.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,cAAc,cAAc,CAAC;AAC7B,OAAO,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAClD,OAAO,EACN,WAAW,EACX,mBAAmB,EACnB,eAAe,EACf,YAAY,EACZ,aAAa,EACb,KAAK,YAAY,EACjB,KAAK,SAAS,GACd,MAAM,0BAA0B,CAAC;AAGlC,cAAc,yBAAyB,CAAC;AACxC,cAAc,2BAA2B,CAAC;AAC1C,cAAc,uBAAuB,CAAC;AACtC,cAAc,yBAAyB,CAAC;AACxC,cAAc,uBAAuB,CAAC;AACtC,cAAc,uBAAuB,CAAC;AACtC,cAAc,wBAAwB,CAAC;AACvC,cAAc,wBAAwB,CAAC;AACvC,cAAc,uBAAuB,CAAC;AACtC,cAAc,uBAAuB,CAAC;AACtC,cAAc,0BAA0B,CAAC;AACzC,cAAc,0BAA0B,CAAC;AACzC,cAAc,0BAA0B,CAAC;AACzC,cAAc,wBAAwB,CAAC;AACvC,cAAc,yBAAyB,CAAC;AACxC,cAAc,wBAAwB,CAAC;AACvC,cAAc,uBAAuB,CAAC;AACtC,cAAc,+BAA+B,CAAC;AAC9C,cAAc,wBAAwB,CAAC;AACvC,cAAc,6BAA6B,CAAC;AAC5C,cAAc,oCAAoC,CAAC;AACnD,cAAc,wCAAwC,CAAC;AACvD,cAAc,wBAAwB,CAAC"}
package/esm/mod.js CHANGED
@@ -15,6 +15,8 @@ export * from './src/tools/personas.js';
15
15
  export * from './src/tools/search.js';
16
16
  export * from './src/tools/topics.js';
17
17
  export * from './src/helpers/utils.js';
18
+ export * from './src/helpers/async.js';
19
+ export * from './src/helpers/urls.js';
18
20
  export * from './src/tools/brands.js';
19
21
  export * from './src/tools/translate.js';
20
22
  export * from './src/tools/sentiment.js';
@@ -22,9 +24,10 @@ export * from './src/tools/summarize.js';
22
24
  export * from './src/tools/sources.js';
23
25
  export * from './src/tools/entities.js';
24
26
  export * from './src/tools/prompts.js';
27
+ export * from './src/tools/scorer.js';
25
28
  export * from './src/helpers/seedKeywords.js';
26
29
  export * from './src/tools/generic.js';
27
- export * from './src/api.js';
30
+ export * from './src/apis/hasdata/index.js';
28
31
  export * from './src/apis/chatgptScraper/index.js';
29
32
  export * from './src/apis/googleAds/keywordPlanner.js';
30
33
  export * from './src/schemas/index.js';
@@ -0,0 +1,6 @@
1
+ export * from './helpers.js';
2
+ export * from './aio.js';
3
+ export * from './aim.js';
4
+ export * from './serp.js';
5
+ export * from './scrape.js';
6
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/src/apis/hasdata/index.ts"],"names":[],"mappings":"AAAA,cAAc,cAAc,CAAC;AAC7B,cAAc,UAAU,CAAC;AACzB,cAAc,UAAU,CAAC;AACzB,cAAc,WAAW,CAAC;AAC1B,cAAc,aAAa,CAAC"}
@@ -0,0 +1,5 @@
1
+ export * from './helpers.js';
2
+ export * from './aio.js';
3
+ export * from './aim.js';
4
+ export * from './serp.js';
5
+ export * from './scrape.js';
@@ -0,0 +1,73 @@
1
+ type ProxyType = 'datacenter' | 'residential';
2
+ type OutputFormat = 'markdown' | 'text' | 'html';
3
+ interface JSScenarioAction {
4
+ click?: string;
5
+ fill?: [string, string];
6
+ wait?: number;
7
+ waitFor?: string;
8
+ scroll?: string;
9
+ evaluate?: string;
10
+ }
11
+ export interface ScrapeOptions {
12
+ formats: Array<OutputFormat>;
13
+ proxyType?: ProxyType;
14
+ proxyCountry?: string;
15
+ extractLinks?: boolean;
16
+ wait?: number;
17
+ waitFor?: string;
18
+ blockResources?: boolean;
19
+ blockAds?: boolean;
20
+ blockUrls?: Array<string>;
21
+ jsRendering?: boolean;
22
+ jsScenario?: Array<JSScenarioAction>;
23
+ headers?: Record<string, string>;
24
+ }
25
+ export interface ScrapeResponse {
26
+ url?: string;
27
+ markdown?: string;
28
+ text?: string;
29
+ html?: string;
30
+ links?: Array<string>;
31
+ }
32
+ export interface BatchJobResponse {
33
+ jobId: string;
34
+ status: string;
35
+ }
36
+ export interface BatchJobStatus {
37
+ jobId: string;
38
+ status: string;
39
+ data: {
40
+ status: string;
41
+ requestsCount: number;
42
+ responsesCount: number;
43
+ };
44
+ }
45
+ /**
46
+ * In batch jobs, results are only links to json files containing the actual scrape results.
47
+ */
48
+ export interface BatchResultItem {
49
+ query: Record<string, unknown>;
50
+ result: {
51
+ id: string;
52
+ status: string;
53
+ json?: string;
54
+ };
55
+ }
56
+ export interface BatchResults {
57
+ page: number;
58
+ limit: number;
59
+ total: number;
60
+ results: Array<BatchResultItem>;
61
+ }
62
+ export declare function scrapeWeb(url: string, options: ScrapeOptions): Promise<ScrapeResponse>;
63
+ export declare function scrapeWebBatch(urls: Array<string>, options: ScrapeOptions, maxConcurrency?: number): Promise<Array<ScrapeResponse>>;
64
+ /** Submit a batch scrape job to HasData API.
65
+ * IMPORTANT: results are not returned in original order! You need to match them by jobId and query.url.
66
+ */
67
+ export declare function submitBatchScrapeJob(urls: Array<string>, options: ScrapeOptions): Promise<BatchJobResponse>;
68
+ export declare function getBatchJobStatus(jobId: string): Promise<BatchJobStatus>;
69
+ export declare function waitForBatchCompletion(jobId: string, pollInterval?: number, maxWaitTime?: number): Promise<BatchJobStatus>;
70
+ export declare function getBatchJobPage(jobId: string, page?: number, limit?: number): Promise<BatchResults>;
71
+ export declare function runBatchScrape(urls: Array<string>, options: ScrapeOptions, pageSize?: number, pollInterval?: number, maxWaitTime?: number): Promise<Array<ScrapeResponse>>;
72
+ export {};
73
+ //# sourceMappingURL=scrape.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"scrape.d.ts","sourceRoot":"","sources":["../../../../src/src/apis/hasdata/scrape.ts"],"names":[],"mappings":"AAiBA,KAAK,SAAS,GAAG,YAAY,GAAG,aAAa,CAAC;AAE9C,KAAK,YAAY,GAAG,UAAU,GAAG,MAAM,GAAG,MAAM,CAAC;AAEjD,UAAU,gBAAgB;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACxB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,aAAa;IAC7B,OAAO,EAAE,KAAK,CAAC,YAAY,CAAC,CAAC;IAC7B,SAAS,CAAC,EAAE,SAAS,CAAC;IACtB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,SAAS,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IAC1B,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,UAAU,CAAC,EAAE,KAAK,CAAC,gBAAgB,CAAC,CAAC;IACrC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACjC;AAED,MAAM,WAAW,cAAc;IAC9B,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;CACtB;AAED,MAAM,WAAW,gBAAgB;IAChC,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,cAAc;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE;QACL,MAAM,EAAE,MAAM,CAAC;QACf,aAAa,EAAE,MAAM,CAAC;QACtB,cAAc,EAAE,MAAM,CAAC;KACvB,CAAA;CACD;AAED;;EAEE;AAEF,MAAM,WAAW,eAAe;IAC/B,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC/B,MAAM,EAAE;QACP,EAAE,EAAE,MAAM,CAAC;QACX,MAAM,EAAE,MAAM,CAAC;QACf,IAAI,CAAC,EAAE,MAAM,CAAC;KACd,CAAC;CACF;AAED,MAAM,WAAW,YAAY;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,KAAK,CAAC,eAAe,CAAC,CAAC;CAChC;AAkJD,wBAAsB,SAAS,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,aAAa,GAAG,OAAO,CAAC,cAAc,CAAC,CAwC5F;AAED,wBAAsB,cAAc,CACnC,IAAI,EAAE,KAAK,CAAC,MAAM,CAAC,EACnB,OAAO,EAAE,aAAa,EACtB,cAAc,GAAE,MAA4B,GAC1C,OAAO,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC,CAQhC;AAED;;EAEE;AACF,wBAAsB,oBAAoB,CACzC,IAAI,EAAE,KAAK,CAAC,MAAM,CAAC,EACnB,OAAO,EAAE,aAAa,GACpB,OAAO,CAAC,gBAAgB,CAAC,CA8B3B;AAED,wBAAsB,iBAAiB,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,cAAc,CAAC,CAqB9E;AAED,wBAAsB,sBAAsB,CAC3C,KAAK,EAAE,MAAM,EACb,YAAY,GAAE,MAAa,EAC3B,WAAW,GAAE,MAAe,GAC1B,OAAO,CAAC,cAAc,CAAC,CAsBzB;AAED,wBAAsB,eAAe,CACpC,KAAK,EAAE,MAAM,EACb,IAAI,GAAE,MAAU,EAChB,KAAK,GAAE,MAAY,GACjB,OAAO,CAAC,YAAY,CAAC,CAuBvB;AAED,wBAAsB,cAAc,CACnC,IAAI,EAAE,KAAK,CAAC,MAAM,CAAC,EACnB,OAAO,EAAE,aAAa,EACtB,QAAQ,GAAE,MAAY,EACtB,YAAY,GAAE,MAAa,EAC3B,WAAW,GAAE,MAAe,GAC1B,OAAO,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC,CA8DhC"}
@@ -0,0 +1,310 @@
1
+ /* eslint no-console: ["warn", { allow: ["log", "warn", "error"] }] */
2
+ import * as dntShim from "../../../_dnt.shims.js";
3
+ import { mapParallel, withRetries } from '../../helpers/async.js';
4
+ const HASDATA_CONCURRENCY = 29;
5
+ const HASDATA_RETRY_CONFIG = {
6
+ maxRetries: 3,
7
+ initialDelay: 1000,
8
+ maxDelay: 8000,
9
+ backoffMultiplier: 2,
10
+ statusCodes: [429, 500]
11
+ };
12
+ function cleanMarkdown(markdown, excludeImages = true) {
13
+ if (!markdown) {
14
+ return '';
15
+ }
16
+ if (excludeImages) {
17
+ // Remove markdown images: ![alt text](url)
18
+ markdown = markdown.replace(/!\[([^\]]*)\]\([^)]+\)/g, '');
19
+ // Remove standalone "Image" text between line breaks (from plain text format)
20
+ markdown = markdown.replace(/\n\s*Image\s*\n/g, '\n');
21
+ // Clean up multiple consecutive newlines
22
+ markdown = markdown.replace(/\n{3,}/g, '\n\n').trim();
23
+ }
24
+ markdown = markdown.replace(/\u00a0/g, ' ');
25
+ markdown = markdown.replace(/[ \t]+/g, ' ');
26
+ const lines = markdown.split('\n').map(line => line.trim());
27
+ const cleaned = [];
28
+ for (const line of lines) {
29
+ if (line || (cleaned.length > 0 && cleaned[cleaned.length - 1])) {
30
+ cleaned.push(line);
31
+ }
32
+ }
33
+ return cleaned.join('\n').trim();
34
+ }
35
+ async function fetchWithRetry(url, options, retryConfig = HASDATA_RETRY_CONFIG) {
36
+ const response = await withRetries(async () => fetch(url, {
37
+ ...options,
38
+ signal: dntShim.dntGlobalThis.abortSignal
39
+ }), retryConfig);
40
+ if (!response.ok) {
41
+ const status = response.status;
42
+ let errorMessage;
43
+ if (status === 400) {
44
+ let details = '';
45
+ try {
46
+ const body = await response.text();
47
+ details = ` - ${body}`;
48
+ }
49
+ catch {
50
+ }
51
+ errorMessage = `HasData API error (400): Bad Request${details}`;
52
+ }
53
+ else if (status === 401) {
54
+ errorMessage = 'HasData API error (401): Invalid API key';
55
+ }
56
+ else if (status === 403) {
57
+ errorMessage = 'HasData API error (403): API credits exhausted';
58
+ }
59
+ else if (status === 404) {
60
+ errorMessage = 'HasData API error (404): Resource not found';
61
+ }
62
+ else if (status === 422) {
63
+ let details = '';
64
+ try {
65
+ const body = await response.text();
66
+ details = ` - ${body}`;
67
+ }
68
+ catch {
69
+ }
70
+ errorMessage = `HasData API error (422): Unprocessable Entity${details}`;
71
+ }
72
+ else if (status === 429) {
73
+ errorMessage = 'HasData API error (429): Rate limit exceeded';
74
+ }
75
+ else {
76
+ errorMessage = `HasData API error: ${status} ${response.statusText}`;
77
+ }
78
+ console.error(errorMessage);
79
+ throw new Error(errorMessage);
80
+ }
81
+ return response;
82
+ }
83
+ function configureRequestBody(body, options) {
84
+ const formats = [...options.formats];
85
+ if (!formats.includes('json')) {
86
+ formats.push('json');
87
+ }
88
+ body.outputFormat = formats;
89
+ if (options.proxyType) {
90
+ body.proxyType = options.proxyType;
91
+ }
92
+ if (options.proxyCountry) {
93
+ body.proxyCountry = options.proxyCountry;
94
+ }
95
+ if (options.extractLinks != null) {
96
+ body.extractLinks = options.extractLinks;
97
+ }
98
+ if (options.wait != null) {
99
+ body.wait = options.wait;
100
+ }
101
+ if (options.waitFor) {
102
+ body.waitFor = options.waitFor;
103
+ }
104
+ if (options.blockResources != null) {
105
+ body.blockResources = options.blockResources;
106
+ }
107
+ if (options.blockAds != null) {
108
+ body.blockAds = options.blockAds;
109
+ }
110
+ if (options.blockUrls) {
111
+ body.blockUrls = options.blockUrls;
112
+ }
113
+ if (options.jsRendering != null) {
114
+ body.jsRendering = options.jsRendering;
115
+ }
116
+ if (options.jsScenario) {
117
+ body.jsScenario = options.jsScenario;
118
+ }
119
+ if (options.headers) {
120
+ body.headers = options.headers;
121
+ }
122
+ return body;
123
+ }
124
+ function getApiKey() {
125
+ const apiKey = dntShim.Deno.env.get('HASDATA_API_KEY');
126
+ if (!apiKey) {
127
+ throw new Error('HASDATA_API_KEY environment variable is required');
128
+ }
129
+ return apiKey;
130
+ }
131
+ export async function scrapeWeb(url, options) {
132
+ const apiKey = getApiKey();
133
+ const endpoint = 'https://api.hasdata.com/scrape/web';
134
+ let requestBody = { url: url };
135
+ requestBody = configureRequestBody(requestBody, options);
136
+ try {
137
+ const response = await fetchWithRetry(endpoint, {
138
+ method: 'POST',
139
+ headers: {
140
+ 'Content-Type': 'application/json',
141
+ 'x-api-key': apiKey
142
+ },
143
+ body: JSON.stringify(requestBody)
144
+ });
145
+ const responseJson = await response.json();
146
+ const result = { url: url };
147
+ if (responseJson.markdown) {
148
+ result.markdown = cleanMarkdown(responseJson.markdown);
149
+ }
150
+ if (responseJson.text) {
151
+ result.text = responseJson.text;
152
+ }
153
+ if (responseJson.content) {
154
+ result.html = responseJson.content;
155
+ }
156
+ if (options.extractLinks && responseJson.links) {
157
+ result.links = responseJson.links;
158
+ }
159
+ return result;
160
+ }
161
+ catch (error) {
162
+ console.error('HasData Web Scraping API error:', error);
163
+ return {}; // Return an empty object on error
164
+ }
165
+ }
166
+ export async function scrapeWebBatch(urls, options, maxConcurrency = HASDATA_CONCURRENCY) {
167
+ return mapParallel(urls, maxConcurrency, async (url) => {
168
+ return await scrapeWeb(url, options);
169
+ });
170
+ }
171
+ /** Submit a batch scrape job to HasData API.
172
+ * IMPORTANT: results are not returned in original order! You need to match them by jobId and query.url.
173
+ */
174
+ export async function submitBatchScrapeJob(urls, options) {
175
+ const apiKey = getApiKey();
176
+ const endpoint = 'https://api.hasdata.com/scrape/batch/web/';
177
+ const requestPayloads = urls.map((url) => {
178
+ let payload = { url: url };
179
+ payload = configureRequestBody(payload, options);
180
+ return payload;
181
+ });
182
+ const requestBody = { requests: requestPayloads };
183
+ try {
184
+ const response = await fetchWithRetry(endpoint, {
185
+ method: 'POST',
186
+ headers: {
187
+ 'Content-Type': 'application/json',
188
+ 'x-api-key': apiKey
189
+ },
190
+ body: JSON.stringify(requestBody)
191
+ });
192
+ return await response.json();
193
+ }
194
+ catch (error) {
195
+ console.error('HasData Batch Scrape submission error:', error);
196
+ throw error;
197
+ }
198
+ }
199
+ export async function getBatchJobStatus(jobId) {
200
+ const apiKey = getApiKey();
201
+ const endpoint = `https://api.hasdata.com/scrape/batch/web/${jobId}`;
202
+ try {
203
+ const response = await fetchWithRetry(endpoint, {
204
+ method: 'GET',
205
+ headers: {
206
+ 'x-api-key': apiKey
207
+ }
208
+ });
209
+ const status = await response.json();
210
+ return status;
211
+ }
212
+ catch (error) {
213
+ console.error('HasData Batch Job status error:', error);
214
+ throw error;
215
+ }
216
+ }
217
+ export async function waitForBatchCompletion(jobId, pollInterval = 5000, maxWaitTime = 300000) {
218
+ const startTime = Date.now();
219
+ while (true) {
220
+ const status = await getBatchJobStatus(jobId);
221
+ const endStates = ['done', 'stopped', 'finished', 'failed'];
222
+ if (endStates.includes(status.data.status)) {
223
+ return status;
224
+ }
225
+ else {
226
+ const total = status.data.requestsCount;
227
+ const completed = status.data.responsesCount;
228
+ console.log(`Batch job ${jobId} in progress: ${completed}/${total} completed.`);
229
+ }
230
+ const elapsed = Date.now() - startTime;
231
+ if (elapsed >= maxWaitTime) {
232
+ throw new Error(`Batch job ${jobId} did not complete within ${maxWaitTime}ms`);
233
+ }
234
+ await new Promise(resolve => setTimeout(resolve, pollInterval));
235
+ }
236
+ }
237
+ export async function getBatchJobPage(jobId, page = 0, limit = 100) {
238
+ const apiKey = getApiKey();
239
+ const url = new URL(`https://api.hasdata.com/scrape/batch/web/${jobId}/results`);
240
+ url.searchParams.set('page', page.toString());
241
+ url.searchParams.set('limit', limit.toString());
242
+ console.log(`Fetching batch job results from: ${url.toString()}`);
243
+ try {
244
+ const response = await fetchWithRetry(url.toString(), {
245
+ method: 'GET',
246
+ headers: {
247
+ 'x-api-key': apiKey
248
+ }
249
+ });
250
+ return await response.json();
251
+ }
252
+ catch (error) {
253
+ console.error('HasData Batch Job results error:', error);
254
+ throw error;
255
+ }
256
+ }
257
+ export async function runBatchScrape(urls, options, pageSize = 100, pollInterval = 5000, maxWaitTime = 300000) {
258
+ const { jobId } = await submitBatchScrapeJob(urls, options);
259
+ const status = await waitForBatchCompletion(jobId, pollInterval, maxWaitTime);
260
+ if (status.data.status === 'done') {
261
+ console.log(`Batch job ${jobId} finished successfully.`);
262
+ }
263
+ else {
264
+ throw new Error(`Batch job failed with status:\n${JSON.stringify(status, null, 2)}`);
265
+ }
266
+ const aggregatedResults = [];
267
+ let currentPage = 0;
268
+ let hasMore = true;
269
+ while (hasMore) {
270
+ const pageResults = await getBatchJobPage(jobId, currentPage, pageSize);
271
+ console.log(`Fetched page ${pageResults.page} with ${pageResults.results.length} results.`);
272
+ const scrapeResponses = await mapParallel(pageResults.results, HASDATA_CONCURRENCY, async (item) => {
273
+ if (item.result.status === 'ok' && item.result.json) {
274
+ try {
275
+ const response = await fetchWithRetry(item.result.json, { method: 'GET' });
276
+ const fullResponse = await response.json();
277
+ const scrapeResponse = {
278
+ url: item.query.url
279
+ };
280
+ if (options.formats.includes('markdown') && fullResponse.markdown) {
281
+ scrapeResponse.markdown = cleanMarkdown(fullResponse.markdown);
282
+ }
283
+ if (options.formats.includes('text') && fullResponse.text) {
284
+ scrapeResponse.text = fullResponse.text;
285
+ }
286
+ if (options.formats.includes('html') && fullResponse.content) {
287
+ scrapeResponse.html = fullResponse.content;
288
+ }
289
+ if (options.extractLinks && fullResponse.links) {
290
+ scrapeResponse.links = fullResponse.links;
291
+ }
292
+ return scrapeResponse;
293
+ }
294
+ catch (error) {
295
+ console.error(`Failed to fetch result for ${item.query.url}:`, error);
296
+ return {};
297
+ }
298
+ }
299
+ return {};
300
+ });
301
+ aggregatedResults.push(...scrapeResponses);
302
+ if (pageResults.results.length < pageSize || (pageResults.page + 1) * pageResults.limit >= pageResults.total) {
303
+ hasMore = false;
304
+ }
305
+ else {
306
+ currentPage += 1;
307
+ }
308
+ }
309
+ return aggregatedResults;
310
+ }
@@ -0,0 +1,152 @@
1
+ import { type AIOverview, type AIOParsed } from './helpers.js';
2
+ type SerpSearchType = 'all' | 'images' | 'videos' | 'news' | 'shopping' | 'local';
3
+ export interface SerpRequestOptions {
4
+ location: string;
5
+ country: string;
6
+ language: string;
7
+ contentLanguage?: string;
8
+ domain?: string;
9
+ filters?: string | Array<string>;
10
+ safeSearch?: 'active' | 'off' | boolean;
11
+ filterResults?: boolean;
12
+ preventAutoCorrect?: boolean;
13
+ offset?: number;
14
+ resultsPerPage?: number;
15
+ type?: SerpSearchType;
16
+ device?: 'desktop' | 'mobile' | 'tablet';
17
+ placeId?: string;
18
+ lsig?: string;
19
+ entityId?: string;
20
+ encodedLocation?: string;
21
+ searchId?: string;
22
+ }
23
+ interface SerpInlineSiteLink {
24
+ title?: string;
25
+ link?: string;
26
+ }
27
+ interface SerpListSiteLink {
28
+ title?: string;
29
+ link?: string;
30
+ snippet?: string;
31
+ }
32
+ interface SerpRichSnippetTop {
33
+ extensions?: Array<string>;
34
+ detectedExtensions?: Record<string, string | number>;
35
+ }
36
+ interface SerpRichSnippet {
37
+ top?: SerpRichSnippetTop;
38
+ }
39
+ interface SerpSiteLinks {
40
+ inline?: Array<SerpInlineSiteLink>;
41
+ list?: Array<SerpListSiteLink>;
42
+ }
43
+ export interface SerpOrganicResult {
44
+ position?: number;
45
+ title?: string;
46
+ link?: string;
47
+ url?: string;
48
+ displayedLink?: string;
49
+ source?: string;
50
+ snippet?: string;
51
+ snippetHighlitedWords?: Array<string>;
52
+ images?: Array<string>;
53
+ richSnippet?: SerpRichSnippet;
54
+ sitelinks?: SerpSiteLinks;
55
+ }
56
+ export interface SerpRequestMetadata {
57
+ id?: string;
58
+ status?: string;
59
+ html?: string;
60
+ url?: string;
61
+ }
62
+ export interface SerpSearchInformation {
63
+ totalResults?: string;
64
+ formattedTotalResults?: string;
65
+ timeTaken?: number;
66
+ searchTime?: number;
67
+ }
68
+ export interface SerpLocalPlace {
69
+ position?: number;
70
+ title?: string;
71
+ rating?: number;
72
+ reviews?: number;
73
+ reviewsOriginal?: string;
74
+ address?: string;
75
+ hours?: string;
76
+ placeId?: string;
77
+ description?: string;
78
+ }
79
+ export interface SerpLocalResults {
80
+ places?: Array<SerpLocalPlace>;
81
+ moreLocationsLink?: string;
82
+ }
83
+ export interface SerpRelatedSearch {
84
+ query?: string;
85
+ link?: string;
86
+ }
87
+ export interface SerpRelatedQuestion {
88
+ question?: string;
89
+ snippet?: string;
90
+ link?: string;
91
+ title?: string;
92
+ displayedLink?: string;
93
+ date?: string;
94
+ list?: Array<string>;
95
+ table?: Array<Array<string>>;
96
+ aiOverview?: AIOverview;
97
+ }
98
+ export interface SerpPerspective {
99
+ index?: number;
100
+ author?: string;
101
+ source?: string;
102
+ duration?: string;
103
+ extensions?: Array<string>;
104
+ thumbnail?: string;
105
+ title?: string;
106
+ link?: string;
107
+ date?: string;
108
+ snippet?: string;
109
+ }
110
+ export interface SerpImmersiveProduct {
111
+ position?: number;
112
+ category?: string;
113
+ title?: string;
114
+ productId?: string;
115
+ productLink?: string;
116
+ price?: string;
117
+ extractedPrice?: number;
118
+ source?: string;
119
+ reviews?: number;
120
+ rating?: number;
121
+ delivery?: string;
122
+ extensions?: Array<string>;
123
+ thumbnail?: string;
124
+ }
125
+ export interface SerpPagination {
126
+ next?: string;
127
+ pages?: Array<Record<string, string>>;
128
+ }
129
+ export interface SerpResponse {
130
+ requestMetadata?: SerpRequestMetadata;
131
+ searchMetadata?: Record<string, unknown>;
132
+ searchParameters?: Record<string, unknown>;
133
+ searchInformation?: SerpSearchInformation;
134
+ organicResults?: Array<SerpOrganicResult>;
135
+ adsResults?: Array<Record<string, unknown>>;
136
+ localResults?: SerpLocalResults;
137
+ knowledgeGraph?: Record<string, unknown>;
138
+ relatedSearches?: Array<SerpRelatedSearch>;
139
+ topStories?: Array<Record<string, unknown>>;
140
+ peopleAlsoAsk?: Array<Record<string, unknown>>;
141
+ relatedQuestions?: Array<SerpRelatedQuestion>;
142
+ imagesResults?: Array<Record<string, unknown>>;
143
+ videosResults?: Array<Record<string, unknown>>;
144
+ perspectives?: Array<SerpPerspective>;
145
+ immersiveProducts?: Array<SerpImmersiveProduct>;
146
+ pagination?: SerpPagination;
147
+ aiOverview?: AIOParsed;
148
+ }
149
+ export declare function fetchSerp(query: string, options: SerpRequestOptions): Promise<SerpResponse>;
150
+ export declare function fetchSerpBatch(queries: Array<string>, options: SerpRequestOptions, maxConcurrency?: number): Promise<Array<SerpResponse>>;
151
+ export {};
152
+ //# sourceMappingURL=serp.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"serp.d.ts","sourceRoot":"","sources":["../../../../src/src/apis/hasdata/serp.ts"],"names":[],"mappings":"AAGA,OAAO,EAIN,KAAK,UAAU,EACf,KAAK,SAAS,EACd,MAAM,cAAc,CAAC;AAEtB,KAAK,cAAc,GAAG,KAAK,GAAG,QAAQ,GAAG,QAAQ,GAAG,MAAM,GAAG,UAAU,GAAG,OAAO,CAAC;AAUlF,MAAM,WAAW,kBAAkB;IAClC,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;IACjC,UAAU,CAAC,EAAE,QAAQ,GAAG,KAAK,GAAG,OAAO,CAAC;IACxC,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,IAAI,CAAC,EAAE,cAAc,CAAC;IACtB,MAAM,CAAC,EAAE,SAAS,GAAG,QAAQ,GAAG,QAAQ,CAAC;IACzC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,UAAU,kBAAkB;IAC3B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;CACd;AAED,UAAU,gBAAgB;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,UAAU,kBAAkB;IAC3B,UAAU,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IAC3B,kBAAkB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,CAAC;CACrD;AAED,UAAU,eAAe;IACxB,GAAG,CAAC,EAAE,kBAAkB,CAAC;CACzB;AAED,UAAU,aAAa;IACtB,MAAM,CAAC,EAAE,KAAK,CAAC,kBAAkB,CAAC,CAAC;IACnC,IAAI,CAAC,EAAE,KAAK,CAAC,gBAAgB,CAAC,CAAC;CAC/B;AAED,MAAM,WAAW,iBAAiB;IACjC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,qBAAqB,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACtC,MAAM,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACvB,WAAW,CAAC,EAAE,eAAe,CAAC;IAC9B,SAAS,CAAC,EAAE,aAAa,CAAC;CAC1B;AAED,MAAM,WAAW,mBAAmB;IACnC,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;CACb;AAED,MAAM,WAAW,qBAAqB;IACrC,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAC/B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,cAAc;IAC9B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,gBAAgB;IAChC,MAAM,CAAC,EAAE,KAAK,CAAC,cAAc,CAAC,CAAC;IAC/B,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED,MAAM,WAAW,iBAAiB;IACjC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,mBAAmB;IACnC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACrB,KAAK,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;IAC7B,UAAU,CAAC,EAAE,UAAU,CAAC;CACxB;AAED,MAAM,WAAW,eAAe;IAC/B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IAC3B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,oBAAoB;IACpC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IAC3B,SAAS,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,cAAc;IAC9B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;CACtC;AAED,MAAM,WAAW,YAAY;IAC5B,eAAe,CAAC,EAAE,mBAAmB,CAAC;IACtC,cAAc,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACzC,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC3C,iBAAiB,CAAC,EAAE,qBAAqB,CAAC;IAC1C,cAAc,CAAC,EAAE,KAAK,CAAC,iBAAiB,CAAC,CAAC;IAC1C,UAAU,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;IAC5C,YAAY,CAAC,EAAE,gBAAgB,CAAC;IAChC,cAAc,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACzC,eAAe,CAAC,EAAE,KAAK,CAAC,iBAAiB,CAAC,CAAC;IAC3C,UAAU,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;IAC5C,aAAa,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;IAC/C,gBAAgB,CAAC,EAAE,KAAK,CAAC,mBAAmB,CAAC,CAAC;IAC9C,aAAa,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;IAC/C,aAAa,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;IAC/C,YAAY,CAAC,EAAE,KAAK,CAAC,eAAe,CAAC,CAAC;IACtC,iBAAiB,CAAC,EAAE,KAAK,CAAC,oBAAoB,CAAC,CAAC;IAChD,UAAU,CAAC,EAAE,cAAc,CAAC;IAC5B,UAAU,CAAC,EAAE,SAAS,CAAC;CACvB;AA0ID,wBAAsB,SAAS,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,kBAAkB,GAAG,OAAO,CAAC,YAAY,CAAC,CAMjG;AAED,wBAAsB,cAAc,CACnC,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,EACtB,OAAO,EAAE,kBAAkB,EAC3B,cAAc,GAAE,MAA4B,GAC1C,OAAO,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC,CAY9B"}