@graphext/cuery 0.9.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/esm/mod.d.ts +3 -0
  2. package/esm/mod.d.ts.map +1 -1
  3. package/esm/mod.js +3 -0
  4. package/esm/src/apis/hasdata/aim.d.ts +4 -0
  5. package/esm/src/apis/hasdata/aim.d.ts.map +1 -0
  6. package/esm/src/apis/hasdata/aim.js +32 -0
  7. package/esm/src/apis/hasdata/aio.d.ts +4 -0
  8. package/esm/src/apis/hasdata/aio.d.ts.map +1 -0
  9. package/esm/src/apis/hasdata/aio.js +42 -0
  10. package/esm/src/apis/hasdata/helpers.d.ts +55 -0
  11. package/esm/src/apis/hasdata/helpers.d.ts.map +1 -0
  12. package/esm/src/apis/hasdata/helpers.js +182 -0
  13. package/esm/src/apis/hasdata/index.d.ts +6 -0
  14. package/esm/src/apis/hasdata/index.d.ts.map +1 -0
  15. package/esm/src/apis/hasdata/index.js +5 -0
  16. package/esm/src/apis/hasdata/scrape.d.ts +73 -0
  17. package/esm/src/apis/hasdata/scrape.d.ts.map +1 -0
  18. package/esm/src/apis/hasdata/scrape.js +310 -0
  19. package/esm/src/apis/hasdata/serp.d.ts +152 -0
  20. package/esm/src/apis/hasdata/serp.d.ts.map +1 -0
  21. package/esm/src/apis/hasdata/serp.js +133 -0
  22. package/esm/src/schemas/index.d.ts +6 -2
  23. package/esm/src/schemas/index.d.ts.map +1 -1
  24. package/package.json +1 -1
  25. package/script/mod.d.ts +3 -0
  26. package/script/mod.d.ts.map +1 -1
  27. package/script/mod.js +3 -0
  28. package/script/src/apis/hasdata/aim.d.ts +4 -0
  29. package/script/src/apis/hasdata/aim.d.ts.map +1 -0
  30. package/script/src/apis/hasdata/aim.js +36 -0
  31. package/script/src/apis/hasdata/aio.d.ts +4 -0
  32. package/script/src/apis/hasdata/aio.d.ts.map +1 -0
  33. package/script/src/apis/hasdata/aio.js +46 -0
  34. package/script/src/apis/hasdata/helpers.d.ts +55 -0
  35. package/script/src/apis/hasdata/helpers.d.ts.map +1 -0
  36. package/script/src/apis/hasdata/helpers.js +222 -0
  37. package/script/src/apis/hasdata/index.d.ts +6 -0
  38. package/script/src/apis/hasdata/index.d.ts.map +1 -0
  39. package/script/src/apis/hasdata/index.js +21 -0
  40. package/script/src/apis/hasdata/scrape.d.ts +73 -0
  41. package/script/src/apis/hasdata/scrape.d.ts.map +1 -0
  42. package/script/src/apis/hasdata/scrape.js +352 -0
  43. package/script/src/apis/hasdata/serp.d.ts +152 -0
  44. package/script/src/apis/hasdata/serp.d.ts.map +1 -0
  45. package/script/src/apis/hasdata/serp.js +137 -0
  46. package/script/src/schemas/index.d.ts +6 -2
  47. package/script/src/schemas/index.d.ts.map +1 -1
package/esm/mod.d.ts CHANGED
@@ -13,6 +13,8 @@ export * from './src/tools/personas.js';
13
13
  export * from './src/tools/search.js';
14
14
  export * from './src/tools/topics.js';
15
15
  export * from './src/helpers/utils.js';
16
+ export * from './src/helpers/async.js';
17
+ export * from './src/helpers/urls.js';
16
18
  export * from './src/tools/brands.js';
17
19
  export * from './src/tools/translate.js';
18
20
  export * from './src/tools/sentiment.js';
@@ -23,6 +25,7 @@ export * from './src/tools/prompts.js';
23
25
  export * from './src/tools/scorer.js';
24
26
  export * from './src/helpers/seedKeywords.js';
25
27
  export * from './src/tools/generic.js';
28
+ export * from './src/apis/hasdata/index.js';
26
29
  export * from './src/apis/chatgptScraper/index.js';
27
30
  export * from './src/apis/googleAds/keywordPlanner.js';
28
31
  export * from './src/schemas/index.js';
package/esm/mod.d.ts.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"mod.d.ts","sourceRoot":"","sources":["../src/mod.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,cAAc,cAAc,CAAC;AAC7B,OAAO,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAClD,OAAO,EACN,WAAW,EACX,mBAAmB,EACnB,eAAe,EACf,YAAY,EACZ,aAAa,EACb,KAAK,YAAY,EACjB,KAAK,SAAS,GACd,MAAM,0BAA0B,CAAC;AAGlC,cAAc,yBAAyB,CAAC;AACxC,cAAc,2BAA2B,CAAC;AAC1C,cAAc,uBAAuB,CAAC;AACtC,cAAc,yBAAyB,CAAC;AACxC,cAAc,uBAAuB,CAAC;AACtC,cAAc,uBAAuB,CAAC;AACtC,cAAc,wBAAwB,CAAC;AACvC,cAAc,uBAAuB,CAAC;AACtC,cAAc,0BAA0B,CAAC;AACzC,cAAc,0BAA0B,CAAC;AACzC,cAAc,0BAA0B,CAAC;AACzC,cAAc,wBAAwB,CAAC;AACvC,cAAc,yBAAyB,CAAC;AACxC,cAAc,wBAAwB,CAAC;AACvC,cAAc,uBAAuB,CAAC;AACtC,cAAc,+BAA+B,CAAC;AAC9C,cAAc,wBAAwB,CAAC;AACvC,cAAc,oCAAoC,CAAC;AACnD,cAAc,wCAAwC,CAAC;AACvD,cAAc,wBAAwB,CAAC"}
1
+ {"version":3,"file":"mod.d.ts","sourceRoot":"","sources":["../src/mod.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,cAAc,cAAc,CAAC;AAC7B,OAAO,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAClD,OAAO,EACN,WAAW,EACX,mBAAmB,EACnB,eAAe,EACf,YAAY,EACZ,aAAa,EACb,KAAK,YAAY,EACjB,KAAK,SAAS,GACd,MAAM,0BAA0B,CAAC;AAGlC,cAAc,yBAAyB,CAAC;AACxC,cAAc,2BAA2B,CAAC;AAC1C,cAAc,uBAAuB,CAAC;AACtC,cAAc,yBAAyB,CAAC;AACxC,cAAc,uBAAuB,CAAC;AACtC,cAAc,uBAAuB,CAAC;AACtC,cAAc,wBAAwB,CAAC;AACvC,cAAc,wBAAwB,CAAC;AACvC,cAAc,uBAAuB,CAAC;AACtC,cAAc,uBAAuB,CAAC;AACtC,cAAc,0BAA0B,CAAC;AACzC,cAAc,0BAA0B,CAAC;AACzC,cAAc,0BAA0B,CAAC;AACzC,cAAc,wBAAwB,CAAC;AACvC,cAAc,yBAAyB,CAAC;AACxC,cAAc,wBAAwB,CAAC;AACvC,cAAc,uBAAuB,CAAC;AACtC,cAAc,+BAA+B,CAAC;AAC9C,cAAc,wBAAwB,CAAC;AACvC,cAAc,6BAA6B,CAAC;AAC5C,cAAc,oCAAoC,CAAC;AACnD,cAAc,wCAAwC,CAAC;AACvD,cAAc,wBAAwB,CAAC"}
package/esm/mod.js CHANGED
@@ -15,6 +15,8 @@ export * from './src/tools/personas.js';
15
15
  export * from './src/tools/search.js';
16
16
  export * from './src/tools/topics.js';
17
17
  export * from './src/helpers/utils.js';
18
+ export * from './src/helpers/async.js';
19
+ export * from './src/helpers/urls.js';
18
20
  export * from './src/tools/brands.js';
19
21
  export * from './src/tools/translate.js';
20
22
  export * from './src/tools/sentiment.js';
@@ -25,6 +27,7 @@ export * from './src/tools/prompts.js';
25
27
  export * from './src/tools/scorer.js';
26
28
  export * from './src/helpers/seedKeywords.js';
27
29
  export * from './src/tools/generic.js';
30
+ export * from './src/apis/hasdata/index.js';
28
31
  export * from './src/apis/chatgptScraper/index.js';
29
32
  export * from './src/apis/googleAds/keywordPlanner.js';
30
33
  export * from './src/schemas/index.js';
@@ -0,0 +1,4 @@
1
+ import type { AIOParsed } from './helpers.js';
2
+ export declare function fetchAIM(prompt: string, country?: string | null, language?: string | null, location?: string | null): Promise<AIOParsed>;
3
+ export declare function fetchAIMBatch(prompts: Array<string>, country?: string | null, language?: string | null, location?: string | null, maxConcurrency?: number): Promise<Array<AIOParsed>>;
4
+ //# sourceMappingURL=aim.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"aim.d.ts","sourceRoot":"","sources":["../../../../src/src/apis/hasdata/aim.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACX,SAAS,EACT,MAAM,cAAc,CAAC;AAOtB,wBAAsB,QAAQ,CAC7B,MAAM,EAAE,MAAM,EACd,OAAO,GAAE,MAAM,GAAG,IAAW,EAC7B,QAAQ,GAAE,MAAM,GAAG,IAAW,EAC9B,QAAQ,GAAE,MAAM,GAAG,IAAW,GAC5B,OAAO,CAAC,SAAS,CAAC,CA4BpB;AAED,wBAAsB,aAAa,CAClC,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,EACtB,OAAO,GAAE,MAAM,GAAG,IAAW,EAC7B,QAAQ,GAAE,MAAM,GAAG,IAAW,EAC9B,QAAQ,GAAE,MAAM,GAAG,IAAW,EAC9B,cAAc,GAAE,MAA4B,GAC1C,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAM3B"}
@@ -0,0 +1,32 @@
1
+ /* eslint no-console: ["warn", { allow: ["log", "warn", "error"] }] */
2
+ import { mapParallel } from '../../helpers/async.js';
3
+ import { fetchHasDataWithRetry, HASDATA_CONCURRENCY, parseAIM } from './helpers.js';
4
+ export async function fetchAIM(prompt, country = null, language = null, location = null) {
5
+ const aimEndpoint = 'https://api.hasdata.com/scrape/google/ai-mode';
6
+ const params = { q: prompt };
7
+ if (location) {
8
+ params.location = location;
9
+ }
10
+ if (country) {
11
+ params.gl = country.toLowerCase();
12
+ }
13
+ if (language) {
14
+ params.hl = language.toLowerCase();
15
+ }
16
+ const url = new URL(aimEndpoint);
17
+ for (const [key, value] of Object.entries(params)) {
18
+ url.searchParams.set(key, value);
19
+ }
20
+ try {
21
+ const response = await fetchHasDataWithRetry(url.toString());
22
+ const content = await response.json();
23
+ return parseAIM(content);
24
+ }
25
+ catch (error) {
26
+ console.error('HasData AI Mode API error:', error);
27
+ return { answer: '', sources: [] };
28
+ }
29
+ }
30
+ export async function fetchAIMBatch(prompts, country = null, language = null, location = null, maxConcurrency = HASDATA_CONCURRENCY) {
31
+ return mapParallel(prompts, maxConcurrency, async (prompt) => fetchAIM(prompt, country, language, location));
32
+ }
@@ -0,0 +1,4 @@
1
+ import { type AIOParsed } from './helpers.js';
2
+ export declare function fetchAIO(prompt: string, country?: string | null, language?: string | null): Promise<AIOParsed>;
3
+ export declare function fetchAIOBatch(prompts: Array<string>, country?: string | null, language?: string | null, maxConcurrency?: number): Promise<Array<AIOParsed>>;
4
+ //# sourceMappingURL=aio.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"aio.d.ts","sourceRoot":"","sources":["../../../../src/src/apis/hasdata/aio.ts"],"names":[],"mappings":"AAGA,OAAO,EAIN,KAAK,SAAS,EAEd,MAAM,cAAc,CAAC;AAStB,wBAAsB,QAAQ,CAC7B,MAAM,EAAE,MAAM,EACd,OAAO,GAAE,MAAM,GAAG,IAAW,EAC7B,QAAQ,GAAE,MAAM,GAAG,IAAW,GAC5B,OAAO,CAAC,SAAS,CAAC,CAiCpB;AAED,wBAAsB,aAAa,CAClC,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,EACtB,OAAO,GAAE,MAAM,GAAG,IAAW,EAC7B,QAAQ,GAAE,MAAM,GAAG,IAAW,EAC9B,cAAc,GAAE,MAA4B,GAC1C,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAM3B"}
@@ -0,0 +1,42 @@
1
+ /* eslint no-console: ["warn", { allow: ["log", "warn", "error"] }] */
2
+ import { mapParallel } from '../../helpers/async.js';
3
+ import { fetchHasDataWithRetry, HASDATA_CONCURRENCY, parseAIO } from './helpers.js';
4
+ function aioRequestUrl(aio) {
5
+ if (aio.pageToken && aio.hasdataLink) {
6
+ return aio.hasdataLink;
7
+ }
8
+ return null;
9
+ }
10
+ export async function fetchAIO(prompt, country = null, language = null) {
11
+ const serpEndpoint = 'https://api.hasdata.com/scrape/google/serp';
12
+ const params = { q: prompt };
13
+ if (country) {
14
+ params.gl = country.toLowerCase();
15
+ }
16
+ if (language) {
17
+ params.hl = language.toLowerCase();
18
+ }
19
+ const url = new URL(serpEndpoint);
20
+ for (const [key, value] of Object.entries(params)) {
21
+ url.searchParams.set(key, value);
22
+ }
23
+ try {
24
+ let response = await fetchHasDataWithRetry(url.toString());
25
+ let content = await response.json();
26
+ let aio = content.aiOverview || {};
27
+ const aioUrlString = aioRequestUrl(aio);
28
+ if (aioUrlString) {
29
+ response = await fetchHasDataWithRetry(aioUrlString);
30
+ content = await response.json();
31
+ aio = content.aiOverview || {};
32
+ }
33
+ return parseAIO(aio);
34
+ }
35
+ catch (error) {
36
+ console.error('HasData API error:', error);
37
+ return { answer: '', sources: [] };
38
+ }
39
+ }
40
+ export async function fetchAIOBatch(prompts, country = null, language = null, maxConcurrency = HASDATA_CONCURRENCY) {
41
+ return mapParallel(prompts, maxConcurrency, async (prompt) => fetchAIO(prompt, country, language));
42
+ }
@@ -0,0 +1,55 @@
1
+ import { type RetryConfig } from '../../helpers/async.js';
2
+ import type { Source } from '../../schemas/sources.schema.js';
3
+ export declare const HASDATA_CONCURRENCY = 29;
4
+ export declare const HASDATA_RETRY_CONFIG: RetryConfig;
5
+ export declare function getHasDataApiKey(): string;
6
+ export declare function fetchHasDataWithRetry(url: string, retryConfig?: RetryConfig): Promise<Response>;
7
+ interface ListItem {
8
+ title?: string;
9
+ snippet?: string;
10
+ list?: Array<ListItem>;
11
+ }
12
+ interface TextBlock {
13
+ type?: string;
14
+ snippet?: string;
15
+ snippetHighlightedWords?: Array<string>;
16
+ referenceIndexes?: Array<number>;
17
+ list?: Array<ListItem>;
18
+ rows?: Array<Array<string>>;
19
+ thumbnail?: string;
20
+ language?: string;
21
+ }
22
+ interface Reference {
23
+ index?: number;
24
+ title?: string;
25
+ link?: string;
26
+ url?: string;
27
+ snippet?: string;
28
+ source?: string;
29
+ }
30
+ export interface AIOverview {
31
+ textBlocks?: Array<TextBlock>;
32
+ references?: Array<Reference>;
33
+ aiOverview?: AIOverview;
34
+ pageToken?: string;
35
+ hasdataLink?: string;
36
+ }
37
+ interface RequestMetadata {
38
+ id?: string;
39
+ status?: string;
40
+ html?: string;
41
+ url?: string;
42
+ }
43
+ export interface AIMode {
44
+ requestMetadata?: RequestMetadata;
45
+ textBlocks?: Array<TextBlock>;
46
+ references?: Array<Reference>;
47
+ }
48
+ export interface AIOParsed {
49
+ answer: string;
50
+ sources: Array<Source>;
51
+ }
52
+ export declare function parseAIO(aio: AIOverview): AIOParsed;
53
+ export declare function parseAIM(aim: AIMode): AIOParsed;
54
+ export {};
55
+ //# sourceMappingURL=helpers.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"helpers.d.ts","sourceRoot":"","sources":["../../../../src/src/apis/hasdata/helpers.ts"],"names":[],"mappings":"AACA,OAAO,EAAe,KAAK,WAAW,EAAE,MAAM,wBAAwB,CAAC;AAEvE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,iCAAiC,CAAC;AAG9D,eAAO,MAAM,mBAAmB,KAAK,CAAC;AAEtC,eAAO,MAAM,oBAAoB,EAAE,WAMlC,CAAC;AAEF,wBAAgB,gBAAgB,IAAI,MAAM,CAMzC;AAED,wBAAsB,qBAAqB,CAC1C,GAAG,EAAE,MAAM,EACX,WAAW,GAAE,WAAkC,GAC7C,OAAO,CAAC,QAAQ,CAAC,CAgCnB;AAED,UAAU,QAAQ;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;CACvB;AAED,UAAU,SAAS;IAClB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,uBAAuB,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACxC,gBAAgB,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACjC,IAAI,CAAC,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;IACvB,IAAI,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;IAC5B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,UAAU,SAAS;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,UAAU;IAC1B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,UAAU,eAAe;IACxB,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;CACb;AAED,MAAM,WAAW,MAAM;IACtB,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;CAC9B;AAED,MAAM,WAAW,SAAS;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;CACvB;AAwJD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,UAAU,GAAG,SAAS,CAInD;AAED,wBAAgB,QAAQ,CAAC,GAAG,EAAE,MAAM,GAAG,SAAS,CAI/C"}
@@ -0,0 +1,182 @@
1
+ import * as dntShim from "../../../_dnt.shims.js";
2
+ import { withRetries } from '../../helpers/async.js';
3
+ import { extractDomain } from '../../helpers/urls.js';
4
+ export const HASDATA_CONCURRENCY = 29;
5
+ export const HASDATA_RETRY_CONFIG = {
6
+ maxRetries: 3,
7
+ initialDelay: 1000,
8
+ maxDelay: 8000,
9
+ backoffMultiplier: 2,
10
+ statusCodes: [429, 500]
11
+ };
12
+ export function getHasDataApiKey() {
13
+ const apiKey = dntShim.Deno.env.get('HASDATA_API_KEY');
14
+ if (!apiKey) {
15
+ throw new Error('HASDATA_API_KEY environment variable is required');
16
+ }
17
+ return apiKey;
18
+ }
19
+ export async function fetchHasDataWithRetry(url, retryConfig = HASDATA_RETRY_CONFIG) {
20
+ const headers = {
21
+ 'x-api-key': getHasDataApiKey()
22
+ };
23
+ const response = await withRetries(async () => fetch(url, {
24
+ headers,
25
+ signal: dntShim.dntGlobalThis.abortSignal
26
+ }), retryConfig);
27
+ if (!response.ok) {
28
+ const status = response.status;
29
+ let errorMessage;
30
+ if (status === 401) {
31
+ errorMessage = 'HasData API error (401): Invalid API key';
32
+ }
33
+ else if (status === 403) {
34
+ errorMessage = 'HasData API error (403): API credits exhausted';
35
+ }
36
+ else if (status === 404) {
37
+ errorMessage = 'HasData API error (404): Page not found';
38
+ }
39
+ else {
40
+ errorMessage = `HasData API error: ${status} ${response.statusText}`;
41
+ }
42
+ console.error(errorMessage);
43
+ throw new Error(errorMessage);
44
+ }
45
+ return response;
46
+ }
47
+ function removeCSSChunks(text) {
48
+ if (!text) {
49
+ return '';
50
+ }
51
+ // Remove CSS blocks that start with :root (anchored pattern - safe)
52
+ text = text.replace(/:root\{[^}]*\}(?:@supports[^}]*\{[^}]*\{[^}]*\}\})?(?:\.[a-zA-Z0-9_-]+\{[^}]*\})*\.?/g, '');
53
+ // Remove standalone @supports blocks (less common but safe anchor)
54
+ text = text.replace(/@supports[^\{]*\{(?:[^{}]|\{[^}]*\})*\}/g, '');
55
+ // Only remove class blocks if they appear in suspicious patterns (3+ consecutive)
56
+ text = text.replace(/(?:\.[a-zA-Z0-9_-]+\{[^}]*\}){3,}/g, '');
57
+ return text;
58
+ }
59
+ function cleanText(text) {
60
+ if (!text) {
61
+ return '';
62
+ }
63
+ text = removeCSSChunks(text);
64
+ text = text.replace(/\u00a0/g, ' ');
65
+ text = text.replace(/[ \t]+/g, ' ');
66
+ const lines = text.split('\n').map(line => line.trim());
67
+ const cleaned = [];
68
+ for (const line of lines) {
69
+ if (line || (cleaned.length > 0 && cleaned[cleaned.length - 1])) {
70
+ cleaned.push(line);
71
+ }
72
+ }
73
+ return cleaned.join('\n').trim();
74
+ }
75
+ function* iterListItems(items, indent = 0) {
76
+ const prefix = ' '.repeat(indent) + '- ';
77
+ for (const obj of items) {
78
+ const title = obj.title || '';
79
+ const snippet = obj.snippet || '';
80
+ let line;
81
+ if (title && snippet && title.endsWith(':')) {
82
+ line = `${title} ${snippet}`.trim();
83
+ }
84
+ else {
85
+ line = [title, snippet].filter(p => p).join(' ').trim();
86
+ }
87
+ if (line) {
88
+ yield prefix + cleanText(line);
89
+ }
90
+ if (obj.list && Array.isArray(obj.list)) {
91
+ yield* iterListItems(obj.list, indent + 1);
92
+ }
93
+ }
94
+ }
95
+ function formatTable(block) {
96
+ const rows = block.rows || [];
97
+ if (rows.length === 0) {
98
+ return '';
99
+ }
100
+ const out = [];
101
+ const header = rows[0].map(cell => removeCSSChunks(cell));
102
+ out.push('| ' + header.join(' | ') + ' |');
103
+ out.push('| ' + header.map(() => '---').join(' | ') + ' |');
104
+ for (let i = 1; i < rows.length; i++) {
105
+ const cleanedRow = rows[i].map(cell => removeCSSChunks(cell));
106
+ out.push('| ' + cleanedRow.join(' | ') + ' |');
107
+ }
108
+ return out.join('\n');
109
+ }
110
+ function formatCode(block) {
111
+ const lang = block.language || '';
112
+ const snippet = block.snippet || '';
113
+ if (!snippet) {
114
+ return '';
115
+ }
116
+ const header = `[Code${lang ? ': ' + lang : ''}]`;
117
+ return `${header}\n${snippet.trim()}`;
118
+ }
119
+ function parseAIResult(data, { allowNestedOverview = true } = {}) {
120
+ const textBlocks = data.textBlocks || (allowNestedOverview ? data.aiOverview?.textBlocks : []) || [];
121
+ const parts = [];
122
+ const handlers = {
123
+ paragraph: (b) => cleanText(b.snippet || ''),
124
+ list: (b) => Array.from(iterListItems(b.list || [])).join('\n'),
125
+ table: formatTable,
126
+ code: formatCode
127
+ };
128
+ for (const block of textBlocks) {
129
+ const btype = block.type || (block.snippet ? 'paragraph' : null);
130
+ if (!btype || btype === 'carousel') {
131
+ continue;
132
+ }
133
+ const handler = handlers[btype];
134
+ if (handler) {
135
+ const rendered = handler(block);
136
+ if (rendered) {
137
+ parts.push(rendered);
138
+ }
139
+ }
140
+ else {
141
+ const snippet = block.snippet || '';
142
+ if (snippet) {
143
+ parts.push(cleanText(snippet));
144
+ }
145
+ }
146
+ }
147
+ const deduped = [];
148
+ for (const p of parts) {
149
+ if (deduped.length === 0 || deduped[deduped.length - 1] !== p) {
150
+ deduped.push(p);
151
+ }
152
+ }
153
+ let answer = cleanText(deduped.join('\n\n'));
154
+ if (answer.length > 16000) {
155
+ console.warn('Warning: AI answer truncated to 16000 characters');
156
+ answer = answer.slice(0, 16000);
157
+ }
158
+ const refs = data.references || (allowNestedOverview ? data.aiOverview?.references : []) || [];
159
+ const sources = [];
160
+ for (const r of refs) {
161
+ const link = r.link || r.url;
162
+ const title = [r.title, r.source, r.snippet].filter(Boolean).join(' - ');
163
+ if (link) {
164
+ sources.push({
165
+ title,
166
+ url: link,
167
+ domain: extractDomain(link)
168
+ });
169
+ }
170
+ }
171
+ return { answer, sources };
172
+ }
173
+ export function parseAIO(aio) {
174
+ return parseAIResult(aio, {
175
+ allowNestedOverview: true
176
+ });
177
+ }
178
+ export function parseAIM(aim) {
179
+ return parseAIResult(aim, {
180
+ allowNestedOverview: false
181
+ });
182
+ }
@@ -0,0 +1,6 @@
1
+ export * from './helpers.js';
2
+ export * from './aio.js';
3
+ export * from './aim.js';
4
+ export * from './serp.js';
5
+ export * from './scrape.js';
6
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/src/apis/hasdata/index.ts"],"names":[],"mappings":"AAAA,cAAc,cAAc,CAAC;AAC7B,cAAc,UAAU,CAAC;AACzB,cAAc,UAAU,CAAC;AACzB,cAAc,WAAW,CAAC;AAC1B,cAAc,aAAa,CAAC"}
@@ -0,0 +1,5 @@
1
+ export * from './helpers.js';
2
+ export * from './aio.js';
3
+ export * from './aim.js';
4
+ export * from './serp.js';
5
+ export * from './scrape.js';
@@ -0,0 +1,73 @@
1
+ type ProxyType = 'datacenter' | 'residential';
2
+ type OutputFormat = 'markdown' | 'text' | 'html';
3
+ interface JSScenarioAction {
4
+ click?: string;
5
+ fill?: [string, string];
6
+ wait?: number;
7
+ waitFor?: string;
8
+ scroll?: string;
9
+ evaluate?: string;
10
+ }
11
+ export interface ScrapeOptions {
12
+ formats: Array<OutputFormat>;
13
+ proxyType?: ProxyType;
14
+ proxyCountry?: string;
15
+ extractLinks?: boolean;
16
+ wait?: number;
17
+ waitFor?: string;
18
+ blockResources?: boolean;
19
+ blockAds?: boolean;
20
+ blockUrls?: Array<string>;
21
+ jsRendering?: boolean;
22
+ jsScenario?: Array<JSScenarioAction>;
23
+ headers?: Record<string, string>;
24
+ }
25
+ export interface ScrapeResponse {
26
+ url?: string;
27
+ markdown?: string;
28
+ text?: string;
29
+ html?: string;
30
+ links?: Array<string>;
31
+ }
32
+ export interface BatchJobResponse {
33
+ jobId: string;
34
+ status: string;
35
+ }
36
+ export interface BatchJobStatus {
37
+ jobId: string;
38
+ status: string;
39
+ data: {
40
+ status: string;
41
+ requestsCount: number;
42
+ responsesCount: number;
43
+ };
44
+ }
45
+ /**
46
+ * In batch jobs, results are only links to json files containing the actual scrape results.
47
+ */
48
+ export interface BatchResultItem {
49
+ query: Record<string, unknown>;
50
+ result: {
51
+ id: string;
52
+ status: string;
53
+ json?: string;
54
+ };
55
+ }
56
+ export interface BatchResults {
57
+ page: number;
58
+ limit: number;
59
+ total: number;
60
+ results: Array<BatchResultItem>;
61
+ }
62
+ export declare function scrapeWeb(url: string, options: ScrapeOptions): Promise<ScrapeResponse>;
63
+ export declare function scrapeWebBatch(urls: Array<string>, options: ScrapeOptions, maxConcurrency?: number): Promise<Array<ScrapeResponse>>;
64
+ /** Submit a batch scrape job to HasData API.
65
+ * IMPORTANT: results are not returned in original order! You need to match them by jobId and query.url.
66
+ */
67
+ export declare function submitBatchScrapeJob(urls: Array<string>, options: ScrapeOptions): Promise<BatchJobResponse>;
68
+ export declare function getBatchJobStatus(jobId: string): Promise<BatchJobStatus>;
69
+ export declare function waitForBatchCompletion(jobId: string, pollInterval?: number, maxWaitTime?: number): Promise<BatchJobStatus>;
70
+ export declare function getBatchJobPage(jobId: string, page?: number, limit?: number): Promise<BatchResults>;
71
+ export declare function runBatchScrape(urls: Array<string>, options: ScrapeOptions, pageSize?: number, pollInterval?: number, maxWaitTime?: number): Promise<Array<ScrapeResponse>>;
72
+ export {};
73
+ //# sourceMappingURL=scrape.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"scrape.d.ts","sourceRoot":"","sources":["../../../../src/src/apis/hasdata/scrape.ts"],"names":[],"mappings":"AAiBA,KAAK,SAAS,GAAG,YAAY,GAAG,aAAa,CAAC;AAE9C,KAAK,YAAY,GAAG,UAAU,GAAG,MAAM,GAAG,MAAM,CAAC;AAEjD,UAAU,gBAAgB;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACxB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,aAAa;IAC7B,OAAO,EAAE,KAAK,CAAC,YAAY,CAAC,CAAC;IAC7B,SAAS,CAAC,EAAE,SAAS,CAAC;IACtB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,SAAS,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IAC1B,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,UAAU,CAAC,EAAE,KAAK,CAAC,gBAAgB,CAAC,CAAC;IACrC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACjC;AAED,MAAM,WAAW,cAAc;IAC9B,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;CACtB;AAED,MAAM,WAAW,gBAAgB;IAChC,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,cAAc;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE;QACL,MAAM,EAAE,MAAM,CAAC;QACf,aAAa,EAAE,MAAM,CAAC;QACtB,cAAc,EAAE,MAAM,CAAC;KACvB,CAAA;CACD;AAED;;EAEE;AAEF,MAAM,WAAW,eAAe;IAC/B,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC/B,MAAM,EAAE;QACP,EAAE,EAAE,MAAM,CAAC;QACX,MAAM,EAAE,MAAM,CAAC;QACf,IAAI,CAAC,EAAE,MAAM,CAAC;KACd,CAAC;CACF;AAED,MAAM,WAAW,YAAY;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,KAAK,CAAC,eAAe,CAAC,CAAC;CAChC;AAkJD,wBAAsB,SAAS,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,aAAa,GAAG,OAAO,CAAC,cAAc,CAAC,CAwC5F;AAED,wBAAsB,cAAc,CACnC,IAAI,EAAE,KAAK,CAAC,MAAM,CAAC,EACnB,OAAO,EAAE,aAAa,EACtB,cAAc,GAAE,MAA4B,GAC1C,OAAO,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC,CAQhC;AAED;;EAEE;AACF,wBAAsB,oBAAoB,CACzC,IAAI,EAAE,KAAK,CAAC,MAAM,CAAC,EACnB,OAAO,EAAE,aAAa,GACpB,OAAO,CAAC,gBAAgB,CAAC,CA8B3B;AAED,wBAAsB,iBAAiB,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,cAAc,CAAC,CAqB9E;AAED,wBAAsB,sBAAsB,CAC3C,KAAK,EAAE,MAAM,EACb,YAAY,GAAE,MAAa,EAC3B,WAAW,GAAE,MAAe,GAC1B,OAAO,CAAC,cAAc,CAAC,CAsBzB;AAED,wBAAsB,eAAe,CACpC,KAAK,EAAE,MAAM,EACb,IAAI,GAAE,MAAU,EAChB,KAAK,GAAE,MAAY,GACjB,OAAO,CAAC,YAAY,CAAC,CAuBvB;AAED,wBAAsB,cAAc,CACnC,IAAI,EAAE,KAAK,CAAC,MAAM,CAAC,EACnB,OAAO,EAAE,aAAa,EACtB,QAAQ,GAAE,MAAY,EACtB,YAAY,GAAE,MAAa,EAC3B,WAAW,GAAE,MAAe,GAC1B,OAAO,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC,CA8DhC"}