@graphext/cuery 0.9.4 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/browser.d.ts +1 -1
- package/esm/browser.js +1 -1
- package/esm/mod.d.ts +3 -3
- package/esm/mod.d.ts.map +1 -1
- package/esm/mod.js +3 -3
- package/esm/src/apis/brightdata/contentScraper/index.d.ts.map +1 -0
- package/{script/src/apis/brightdata → esm/src/apis/brightdata/contentScraper}/scrape.d.ts +1 -1
- package/esm/src/apis/brightdata/contentScraper/scrape.d.ts.map +1 -0
- package/esm/src/apis/brightdata/{scrape.js → contentScraper/scrape.js} +2 -2
- package/esm/src/apis/brightdata/llmScraper/brightdata.d.ts +20 -0
- package/esm/src/apis/brightdata/llmScraper/brightdata.d.ts.map +1 -0
- package/esm/src/apis/brightdata/llmScraper/brightdata.js +182 -0
- package/esm/src/apis/brightdata/llmScraper/index.d.ts +14 -0
- package/esm/src/apis/brightdata/llmScraper/index.d.ts.map +1 -0
- package/esm/src/apis/brightdata/llmScraper/index.js +97 -0
- package/esm/src/apis/brightdata/llmScraper/oxy.d.ts +16 -0
- package/esm/src/apis/brightdata/llmScraper/oxy.d.ts.map +1 -0
- package/esm/src/apis/brightdata/llmScraper/oxy.js +171 -0
- package/{script/src/apis/chatgptScraper/scraper.d.ts → esm/src/apis/brightdata/llmScraper/scrape.d.ts} +12 -15
- package/esm/src/apis/brightdata/llmScraper/scrape.d.ts.map +1 -0
- package/esm/src/apis/brightdata/llmScraper/scrape.js +184 -0
- package/esm/src/schemas/search.schema.d.ts +2 -2
- package/esm/src/schemas/search.schema.d.ts.map +1 -1
- package/esm/src/schemas/sources.schema.d.ts +1 -4
- package/esm/src/schemas/sources.schema.d.ts.map +1 -1
- package/esm/src/tools/sentiment.d.ts.map +1 -1
- package/esm/src/tools/sentiment.js +3 -2
- package/package.json +1 -1
- package/script/browser.d.ts +1 -1
- package/script/browser.js +1 -1
- package/script/mod.d.ts +3 -3
- package/script/mod.d.ts.map +1 -1
- package/script/mod.js +6 -6
- package/script/src/apis/brightdata/contentScraper/index.d.ts.map +1 -0
- package/{esm/src/apis/brightdata → script/src/apis/brightdata/contentScraper}/scrape.d.ts +1 -1
- package/script/src/apis/brightdata/contentScraper/scrape.d.ts.map +1 -0
- package/script/src/apis/brightdata/{scrape.js → contentScraper/scrape.js} +2 -2
- package/script/src/apis/brightdata/llmScraper/brightdata.d.ts +20 -0
- package/script/src/apis/brightdata/llmScraper/brightdata.d.ts.map +1 -0
- package/script/src/apis/brightdata/llmScraper/brightdata.js +219 -0
- package/script/src/apis/brightdata/llmScraper/index.d.ts +14 -0
- package/script/src/apis/brightdata/llmScraper/index.d.ts.map +1 -0
- package/script/src/apis/brightdata/llmScraper/index.js +140 -0
- package/script/src/apis/brightdata/llmScraper/oxy.d.ts +16 -0
- package/script/src/apis/brightdata/llmScraper/oxy.d.ts.map +1 -0
- package/script/src/apis/brightdata/llmScraper/oxy.js +208 -0
- package/{esm/src/apis/chatgptScraper/scraper.d.ts → script/src/apis/brightdata/llmScraper/scrape.d.ts} +12 -15
- package/script/src/apis/brightdata/llmScraper/scrape.d.ts.map +1 -0
- package/script/src/apis/brightdata/llmScraper/scrape.js +224 -0
- package/script/src/schemas/search.schema.d.ts +2 -2
- package/script/src/schemas/search.schema.d.ts.map +1 -1
- package/script/src/schemas/sources.schema.d.ts +1 -4
- package/script/src/schemas/sources.schema.d.ts.map +1 -1
- package/script/src/tools/sentiment.d.ts.map +1 -1
- package/script/src/tools/sentiment.js +3 -2
- package/esm/src/apis/brightdata/index.d.ts.map +0 -1
- package/esm/src/apis/brightdata/scrape.d.ts.map +0 -1
- package/esm/src/apis/chatgptScraper/brightdata.d.ts +0 -3
- package/esm/src/apis/chatgptScraper/brightdata.d.ts.map +0 -1
- package/esm/src/apis/chatgptScraper/brightdata.js +0 -172
- package/esm/src/apis/chatgptScraper/index.d.ts +0 -10
- package/esm/src/apis/chatgptScraper/index.d.ts.map +0 -1
- package/esm/src/apis/chatgptScraper/index.js +0 -41
- package/esm/src/apis/chatgptScraper/oxy.d.ts +0 -3
- package/esm/src/apis/chatgptScraper/oxy.d.ts.map +0 -1
- package/esm/src/apis/chatgptScraper/oxy.js +0 -156
- package/esm/src/apis/chatgptScraper/scraper.d.ts.map +0 -1
- package/esm/src/apis/chatgptScraper/scraper.js +0 -98
- package/script/src/apis/brightdata/index.d.ts.map +0 -1
- package/script/src/apis/brightdata/scrape.d.ts.map +0 -1
- package/script/src/apis/chatgptScraper/brightdata.d.ts +0 -3
- package/script/src/apis/chatgptScraper/brightdata.d.ts.map +0 -1
- package/script/src/apis/chatgptScraper/brightdata.js +0 -208
- package/script/src/apis/chatgptScraper/index.d.ts +0 -10
- package/script/src/apis/chatgptScraper/index.d.ts.map +0 -1
- package/script/src/apis/chatgptScraper/index.js +0 -81
- package/script/src/apis/chatgptScraper/oxy.d.ts +0 -3
- package/script/src/apis/chatgptScraper/oxy.d.ts.map +0 -1
- package/script/src/apis/chatgptScraper/oxy.js +0 -192
- package/script/src/apis/chatgptScraper/scraper.d.ts.map +0 -1
- package/script/src/apis/chatgptScraper/scraper.js +0 -139
- /package/esm/src/apis/brightdata/{index.d.ts → contentScraper/index.d.ts} +0 -0
- /package/esm/src/apis/brightdata/{index.js → contentScraper/index.js} +0 -0
- /package/script/src/apis/brightdata/{index.d.ts → contentScraper/index.d.ts} +0 -0
- /package/script/src/apis/brightdata/{index.js → contentScraper/index.js} +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scrape.d.ts","sourceRoot":"","sources":["../../../../../src/src/apis/brightdata/llmScraper/scrape.ts"],"names":[],"mappings":"AAWA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mCAAmC,CAAC;AACrE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oCAAoC,CAAC;AAOjE,MAAM,WAAW,YAAY;IAC5B,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACvB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,cAAc,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC/B;AAED,MAAM,WAAW,iBAAiB;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,cAAc,EAAE,MAAM,CAAC;IACvB,oBAAoB,EAAE,MAAM,CAAC;IAC7B,UAAU,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,cAAc,EAAE,MAAM,GAAG,IAAI,KAAK,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;IAC1G,UAAU,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;IAChD,WAAW,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;IACjD,iBAAiB,EAAE,CAAC,GAAG,EAAE,OAAO,KAAK,WAAW,GAAG,IAAI,CAAC;CACxD;AAED,MAAM,WAAW,UAAU;IAC1B,cAAc,EAAE,MAAM,CAAC;IACvB,oBAAoB,EAAE,MAAM,CAAC;IAC7B,cAAc,EAAE,CAAC,OAAO,EAAE,YAAY,KAAK,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC;IACvE,eAAe,EAAE,CAAC,OAAO,EAAE,YAAY,KAAK,OAAO,CAAC,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC;IAC1E,oBAAoB,EAAE,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,KAAK,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC;CACpF;AAMD,wBAAgB,cAAc,IAAI,WAAW,GAAG,SAAS,CAExD;AAED,wBAAgB,WAAW,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAMlD;AA+BD,wBAAgB,YAAY,CAC3B,SAAS,EAAE,KAAK,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,WAAW,CAAC,EAAE,MAAM,CAAC;IAAC,IAAI,CAAC,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,OAAO,CAAA;CAAE,CAAC,EACvG,aAAa,GAAE,KAAK,CAAC;IAAE,GAAG,CAAC,EAAE,MAAM,CAAC;IAAC,IAAI,CAAC,EAAE,MAAM,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAE,CAAM,GAC3E,KAAK,CAAC,MAAM,CAAC,CAoFf;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,YAAY,EAAE,MAAM,EAAE,YAAY,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,OAAO,GAAG,WAAW,CAU5G;AAMD,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,iBAAiB,GAAG,UAAU,CAkExE"}
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
/* eslint no-console: ["warn", { allow: ["log", "warn", "error"] }] */
|
|
2
|
+
/**
|
|
3
|
+
* LLM Scraper - Core types and orchestration logic.
|
|
4
|
+
*
|
|
5
|
+
* Uses composition: providers supply functions, this module orchestrates them.
|
|
6
|
+
*/
|
|
7
|
+
import * as dntShim from "../../../../_dnt.shims.js";
|
|
8
|
+
import { mapParallel } from '../../../helpers/async.js';
|
|
9
|
+
import { extractDomain } from '../../../helpers/urls.js';
|
|
10
|
+
// ============================================================================
|
|
11
|
+
// Shared Utilities
|
|
12
|
+
// ============================================================================
|
|
13
|
+
export function getAbortSignal() {
|
|
14
|
+
return dntShim.dntGlobalThis.abortSignal;
|
|
15
|
+
}
|
|
16
|
+
export function cleanAnswer(answer) {
|
|
17
|
+
return answer
|
|
18
|
+
.replace(/!\[([^\]]*)\]\([^)]+\)/g, '')
|
|
19
|
+
.replace(/\n\s*Image\s*\n/g, '\n')
|
|
20
|
+
.replace(/\n{3,}/g, '\n\n')
|
|
21
|
+
.trim();
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Derive a merge key from a URL: origin + pathname, stripping query and fragment.
|
|
25
|
+
* Falls back to the raw URL if parsing fails.
|
|
26
|
+
*/
|
|
27
|
+
function urlMergeKey(url) {
|
|
28
|
+
try {
|
|
29
|
+
const parsed = new URL(url);
|
|
30
|
+
return parsed.origin + parsed.pathname;
|
|
31
|
+
}
|
|
32
|
+
catch {
|
|
33
|
+
return url;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Returns true when `candidate` carries extra info (hash or search params)
|
|
38
|
+
* that `current` does not.
|
|
39
|
+
*/
|
|
40
|
+
function hasExtraUrlInfo(current, candidate) {
|
|
41
|
+
try {
|
|
42
|
+
const cur = new URL(current);
|
|
43
|
+
const cand = new URL(candidate);
|
|
44
|
+
const hasNewHash = cand.hash !== '' && cur.hash === '';
|
|
45
|
+
const hasNewParams = cand.search !== '' && cur.search === '';
|
|
46
|
+
return hasNewHash || hasNewParams;
|
|
47
|
+
}
|
|
48
|
+
catch {
|
|
49
|
+
return false;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
export function buildSources(citations, linksAttached = []) {
|
|
53
|
+
const sources = [];
|
|
54
|
+
const sourcesByKey = new Map();
|
|
55
|
+
const upsertSource = (url, initialTitle, cited) => {
|
|
56
|
+
const key = urlMergeKey(url);
|
|
57
|
+
const existing = sourcesByKey.get(key);
|
|
58
|
+
if (existing) {
|
|
59
|
+
if (!existing.title && initialTitle) {
|
|
60
|
+
existing.title = initialTitle;
|
|
61
|
+
}
|
|
62
|
+
existing.cited = existing.cited || cited;
|
|
63
|
+
// Keep the most informative URL (with fragment/params)
|
|
64
|
+
if (hasExtraUrlInfo(existing.url, url)) {
|
|
65
|
+
existing.url = url;
|
|
66
|
+
}
|
|
67
|
+
return existing;
|
|
68
|
+
}
|
|
69
|
+
const source = {
|
|
70
|
+
title: initialTitle,
|
|
71
|
+
url,
|
|
72
|
+
domain: extractDomain(url),
|
|
73
|
+
cited,
|
|
74
|
+
};
|
|
75
|
+
sources.push(source);
|
|
76
|
+
sourcesByKey.set(key, source);
|
|
77
|
+
return source;
|
|
78
|
+
};
|
|
79
|
+
const sortedLinks = [...linksAttached].sort((a, b) => {
|
|
80
|
+
const aPos = a.position ?? Number.MAX_SAFE_INTEGER;
|
|
81
|
+
const bPos = b.position ?? Number.MAX_SAFE_INTEGER;
|
|
82
|
+
return aPos - bPos;
|
|
83
|
+
});
|
|
84
|
+
for (const link of sortedLinks) {
|
|
85
|
+
if (!link.url)
|
|
86
|
+
continue;
|
|
87
|
+
const source = upsertSource(link.url, link.text ?? '', true);
|
|
88
|
+
if (link.position != null) {
|
|
89
|
+
source.positions ??= [];
|
|
90
|
+
if (!source.positions.includes(link.position)) {
|
|
91
|
+
source.positions.push(link.position);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
for (const citation of citations) {
|
|
96
|
+
if (!citation.url)
|
|
97
|
+
continue;
|
|
98
|
+
const key = urlMergeKey(citation.url);
|
|
99
|
+
const existing = sourcesByKey.get(key);
|
|
100
|
+
const title = citation.title || citation.description || citation.text || '';
|
|
101
|
+
if (existing) {
|
|
102
|
+
if (title) {
|
|
103
|
+
existing.title = title;
|
|
104
|
+
}
|
|
105
|
+
existing.cited = existing.cited || citation.cited;
|
|
106
|
+
// Append extra fragment/params from citation
|
|
107
|
+
if (hasExtraUrlInfo(existing.url, citation.url)) {
|
|
108
|
+
existing.url = citation.url;
|
|
109
|
+
}
|
|
110
|
+
continue;
|
|
111
|
+
}
|
|
112
|
+
const source = {
|
|
113
|
+
title,
|
|
114
|
+
url: citation.url,
|
|
115
|
+
domain: extractDomain(citation.url),
|
|
116
|
+
cited: citation.cited,
|
|
117
|
+
};
|
|
118
|
+
sources.push(source);
|
|
119
|
+
sourcesByKey.set(key, source);
|
|
120
|
+
}
|
|
121
|
+
for (const source of sources) {
|
|
122
|
+
source.positions?.sort((a, b) => a - b);
|
|
123
|
+
}
|
|
124
|
+
return sources;
|
|
125
|
+
}
|
|
126
|
+
/**
|
|
127
|
+
* Creates an empty model result for failed jobs.
|
|
128
|
+
* This ensures we always return the same number of rows as input.
|
|
129
|
+
*/
|
|
130
|
+
export function emptyModelResult(providerName, errorMessage, context) {
|
|
131
|
+
if (errorMessage) {
|
|
132
|
+
console.error(`[${providerName}] ${errorMessage}`, context ?? '');
|
|
133
|
+
}
|
|
134
|
+
return {
|
|
135
|
+
prompt: '',
|
|
136
|
+
answer: '',
|
|
137
|
+
answer_text_markdown: '',
|
|
138
|
+
sources: [],
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
// ============================================================================
|
|
142
|
+
// Scraper Factory
|
|
143
|
+
// ============================================================================
|
|
144
|
+
export function createLLMScraper(provider) {
|
|
145
|
+
const { name, maxConcurrency, maxPromptsPerRequest, triggerJob, monitorJob, downloadJob, transformResponse, } = provider;
|
|
146
|
+
async function triggerLLMBatch({ prompts, useSearch = false, countryISOCode = null, }) {
|
|
147
|
+
const jobIds = await mapParallel(prompts, maxConcurrency, (prompt) => triggerJob(prompt, useSearch, countryISOCode));
|
|
148
|
+
console.log(`[${name}] Triggered ${jobIds.length} jobs for ${prompts.length} prompts`);
|
|
149
|
+
return jobIds;
|
|
150
|
+
}
|
|
151
|
+
async function downloadLLMSnapshots(jobIds) {
|
|
152
|
+
const results = [];
|
|
153
|
+
for (const jobId of jobIds) {
|
|
154
|
+
if (!jobId) {
|
|
155
|
+
results.push(emptyModelResult(name, 'No job ID provided'));
|
|
156
|
+
continue;
|
|
157
|
+
}
|
|
158
|
+
const isReady = await monitorJob(jobId);
|
|
159
|
+
if (!isReady) {
|
|
160
|
+
results.push(emptyModelResult(name, 'Job not ready or failed', jobId));
|
|
161
|
+
continue;
|
|
162
|
+
}
|
|
163
|
+
const raw = await downloadJob(jobId);
|
|
164
|
+
if (!raw) {
|
|
165
|
+
results.push(emptyModelResult(name, 'Failed to download job', jobId));
|
|
166
|
+
continue;
|
|
167
|
+
}
|
|
168
|
+
const result = transformResponse(raw);
|
|
169
|
+
results.push(result ?? emptyModelResult(name, 'Failed to transform response', jobId));
|
|
170
|
+
}
|
|
171
|
+
return results;
|
|
172
|
+
}
|
|
173
|
+
async function scrapeLLMBatch(options) {
|
|
174
|
+
const jobIds = await triggerLLMBatch(options);
|
|
175
|
+
return downloadLLMSnapshots(jobIds);
|
|
176
|
+
}
|
|
177
|
+
return {
|
|
178
|
+
maxConcurrency,
|
|
179
|
+
maxPromptsPerRequest,
|
|
180
|
+
scrapeLLMBatch,
|
|
181
|
+
triggerLLMBatch,
|
|
182
|
+
downloadLLMSnapshots,
|
|
183
|
+
};
|
|
184
|
+
}
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import type { z } from '../../deps/jsr.io/@zod/zod/4.3.6/src/index.js';
|
|
2
|
-
import type { Source
|
|
2
|
+
import type { Source } from './sources.schema.js';
|
|
3
3
|
export type ContextSize = 'low' | 'medium' | 'high';
|
|
4
4
|
export type ReasoningEffort = 'low' | 'medium' | 'high';
|
|
5
5
|
export interface SearchResult {
|
|
6
6
|
answer: string;
|
|
7
|
+
answer_text_markdown?: string;
|
|
7
8
|
sources: Array<Source>;
|
|
8
9
|
searchQueries?: Array<string>;
|
|
9
|
-
searchSources?: Array<SearchSource>;
|
|
10
10
|
}
|
|
11
11
|
export type SearchOptions = {
|
|
12
12
|
prompt: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"search.schema.d.ts","sourceRoot":"","sources":["../../../src/src/schemas/search.schema.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,+CAA+C,CAAC;AAEvE,OAAO,KAAK,EAAE,MAAM,EAAE,
|
|
1
|
+
{"version":3,"file":"search.schema.d.ts","sourceRoot":"","sources":["../../../src/src/schemas/search.schema.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,+CAA+C,CAAC;AAEvE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,qBAAqB,CAAC;AAClD,MAAM,MAAM,WAAW,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;AACpD,MAAM,MAAM,eAAe,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;AAExD,MAAM,WAAW,YAAY;IAC5B,MAAM,EAAE,MAAM,CAAC;IACf,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACvB,aAAa,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;CAC9B;AAED,MAAM,MAAM,aAAa,GAAG;IAC3B,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB;4FACwF;IACxF,cAAc,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC,UAAU,CAAC,EAAE,YAAY,GAAG,oBAAoB,CAAC;CACjD,CAAC;AAEF,MAAM,MAAM,sBAAsB,CAAC,CAAC,IAAI,aAAa,GAAG;IACvD,cAAc,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;CAC7B,CAAC;AAEF,MAAM,MAAM,kBAAkB,GAAG,IAAI,CAAC,aAAa,EAAE,QAAQ,CAAC,GAAG;IAChE,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACvB,cAAc,CAAC,EAAE,MAAM,CAAC;CACxB,CAAC"}
|
|
@@ -3,6 +3,7 @@ export interface Source {
|
|
|
3
3
|
url: string;
|
|
4
4
|
domain: string;
|
|
5
5
|
cited?: boolean;
|
|
6
|
+
snippet?: string;
|
|
6
7
|
positions?: Array<number>;
|
|
7
8
|
}
|
|
8
9
|
/**
|
|
@@ -22,8 +23,4 @@ export interface CategorizedSource extends EnrichedSource {
|
|
|
22
23
|
category: string | null;
|
|
23
24
|
subcategory: string | null;
|
|
24
25
|
}
|
|
25
|
-
export interface SearchSource extends Source {
|
|
26
|
-
rank: number;
|
|
27
|
-
datePublished: string | null;
|
|
28
|
-
}
|
|
29
26
|
//# sourceMappingURL=sources.schema.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"sources.schema.d.ts","sourceRoot":"","sources":["../../../src/src/schemas/sources.schema.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,MAAM;IACtB,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,SAAS,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;CAC1B;AAED;;;GAGG;AACH,MAAM,WAAW,cAAe,SAAQ,MAAM;IAC7C,eAAe,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IAC/B,oBAAoB,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACpC,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,gBAAgB,EAAE,MAAM,GAAG,IAAI,CAAC;CAChC;AAED;;GAEG;AACH,MAAM,WAAW,iBAAkB,SAAQ,cAAc;IACxD,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;CAC3B
|
|
1
|
+
{"version":3,"file":"sources.schema.d.ts","sourceRoot":"","sources":["../../../src/src/schemas/sources.schema.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,MAAM;IACtB,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;CAC1B;AAED;;;GAGG;AACH,MAAM,WAAW,cAAe,SAAQ,MAAM;IAC7C,eAAe,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IAC/B,oBAAoB,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACpC,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,gBAAgB,EAAE,MAAM,GAAG,IAAI,CAAC;CAChC;AAED;;GAEG;AACH,MAAM,WAAW,iBAAkB,SAAQ,cAAc;IACxD,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;CAC3B"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"sentiment.d.ts","sourceRoot":"","sources":["../../../src/src/tools/sentiment.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,WAAW,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACtD,OAAO,KAAK,EAAE,YAAY,EAAW,MAAM,4BAA4B,CAAC;AACxE,OAAO,EAAE,KAAK,WAAW,EAAE,KAAK,YAAY,EAAsB,MAAM,gCAAgC,CAAC;AACzG,OAAO,EAAE,KAAK,WAAW,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AACpD,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AA6D7C,MAAM,WAAW,wBAAwB;IACxC,uDAAuD;IACvD,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,yCAAyC;IACzC,KAAK,CAAC,EAAE,YAAY,GAAG,IAAI,CAAC;CAC5B;AAED;;GAEG;AACH,qBAAa,kBAAmB,SAAQ,IAAI,CAAC,MAAM,GAAG,IAAI,EAAE,YAAY,EAAE,KAAK,CAAC,WAAW,CAAC,CAAC;IAC5F,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAS;gBAE1B,MAAM,EAAE,wBAAwB,YAAK,EAAE,WAAW,EAAE,WAAW;
|
|
1
|
+
{"version":3,"file":"sentiment.d.ts","sourceRoot":"","sources":["../../../src/src/tools/sentiment.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,WAAW,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACtD,OAAO,KAAK,EAAE,YAAY,EAAW,MAAM,4BAA4B,CAAC;AACxE,OAAO,EAAE,KAAK,WAAW,EAAE,KAAK,YAAY,EAAsB,MAAM,gCAAgC,CAAC;AACzG,OAAO,EAAE,KAAK,WAAW,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AACpD,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AA6D7C,MAAM,WAAW,wBAAwB;IACxC,uDAAuD;IACvD,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,yCAAyC;IACzC,KAAK,CAAC,EAAE,YAAY,GAAG,IAAI,CAAC;CAC5B;AAED;;GAEG;AACH,qBAAa,kBAAmB,SAAQ,IAAI,CAAC,MAAM,GAAG,IAAI,EAAE,YAAY,EAAE,KAAK,CAAC,WAAW,CAAC,CAAC;IAC5F,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAS;gBAE1B,MAAM,EAAE,wBAAwB,YAAK,EAAE,WAAW,EAAE,WAAW;cAwBxD,MAAM;;;;;;;;;;;;IAIzB,SAAS,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,GAAG,OAAO,EAAE;cAQ7B,OAAO,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,GAAG,OAAO;cAIrC,aAAa,CAAC,MAAM,EAAE,YAAY,GAAG,KAAK,CAAC,WAAW,CAAC;IAI1E;;OAEG;IACY,MAAM,CACpB,KAAK,EAAE,MAAM,GAAG,IAAI,EACpB,OAAO,GAAE,OAAO,CAAC,WAAW,CAAM,GAChC,OAAO,CAAC,WAAW,CAAC,KAAK,CAAC,WAAW,CAAC,GAAG,IAAI,CAAC,CAAC;CA+BlD;AAMD;;GAEG;AACH,eAAO,MAAM,yBAAyB,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAI5D,CAAC;AAWF;;GAEG;AACH,MAAM,WAAW,iCAAiC;IACjD,iDAAiD;IACjD,YAAY,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,qBAAa,2BAA4B,SAAQ,UAAU;gBAC9C,MAAM,EAAE,iCAAiC,YAAK,EAAE,WAAW,EAAE,WAAW;CAOpF;AAED,OAAO,EAAE,iBAAiB,EAAE,kBAAkB,EAAE,MAAM,gCAAgC,CAAC;AACvF,YAAY,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,gCAAgC,CAAC"}
|
|
@@ -73,8 +73,9 @@ export class SentimentExtractor extends Tool {
|
|
|
73
73
|
? ` or its products/services (${portfolio})`
|
|
74
74
|
: '';
|
|
75
75
|
return dedent(`
|
|
76
|
-
|
|
77
|
-
|
|
76
|
+
When an aspect relates specifically to "${brand.shortName}"${portfolioText}, set the "context" field to "${brand.shortName}".
|
|
77
|
+
If it relates to a different brand/entity, use that brand/entity as the "context" exactly as implied by the input, or null if unclear.
|
|
78
|
+
If additional instructions provide brand/entity names to use, apply those names exactly when setting the "context" field.
|
|
78
79
|
Keep aspect names and quoted text exactly as they appear in the original input.
|
|
79
80
|
Respond in language code ${brand.language}.
|
|
80
81
|
`);
|
package/package.json
CHANGED
package/script/browser.d.ts
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Browser-safe exports for @graphext/cuery
|
|
3
3
|
*
|
|
4
4
|
* This module only exports types and pure functions that can safely run in the browser.
|
|
5
|
-
* It excludes server-only modules like
|
|
5
|
+
* It excludes server-only modules like llmScraper, googleAds, and API functions
|
|
6
6
|
* that depend on Node.js or Deno-specific APIs.
|
|
7
7
|
*
|
|
8
8
|
* @module
|
package/script/browser.js
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
* Browser-safe exports for @graphext/cuery
|
|
4
4
|
*
|
|
5
5
|
* This module only exports types and pure functions that can safely run in the browser.
|
|
6
|
-
* It excludes server-only modules like
|
|
6
|
+
* It excludes server-only modules like llmScraper, googleAds, and API functions
|
|
7
7
|
* that depend on Node.js or Deno-specific APIs.
|
|
8
8
|
*
|
|
9
9
|
* @module
|
package/script/mod.d.ts
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
*/
|
|
6
6
|
export * from './src/llm.js';
|
|
7
7
|
export { BatchResponse } from './src/response.js';
|
|
8
|
-
export {
|
|
8
|
+
export { calculateCost, getModelInfo, getModelPricing, getProvider, getProviderForModel, type ModelInfo, type ModelPricing, } from './src/providers/index.js';
|
|
9
9
|
export * from './src/tools/keywords.js';
|
|
10
10
|
export * from './src/tools/classifier.js';
|
|
11
11
|
export * from './src/tools/funnel.js';
|
|
@@ -26,8 +26,8 @@ export * from './src/tools/scorer.js';
|
|
|
26
26
|
export * from './src/helpers/seedKeywords.js';
|
|
27
27
|
export * from './src/tools/generic.js';
|
|
28
28
|
export * from './src/apis/hasdata/index.js';
|
|
29
|
-
export * from './src/apis/brightdata/index.js';
|
|
30
|
-
export * from './src/apis/
|
|
29
|
+
export * from './src/apis/brightdata/contentScraper/index.js';
|
|
30
|
+
export * from './src/apis/brightdata/llmScraper/index.js';
|
|
31
31
|
export * from './src/apis/googleAds/keywordPlanner.js';
|
|
32
32
|
export * from './src/schemas/index.js';
|
|
33
33
|
//# sourceMappingURL=mod.d.ts.map
|
package/script/mod.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"mod.d.ts","sourceRoot":"","sources":["../src/mod.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,cAAc,cAAc,CAAC;AAC7B,OAAO,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAClD,OAAO,EACN,
|
|
1
|
+
{"version":3,"file":"mod.d.ts","sourceRoot":"","sources":["../src/mod.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,cAAc,cAAc,CAAC;AAC7B,OAAO,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAClD,OAAO,EACN,aAAa,EACb,YAAY,EACZ,eAAe,EACf,WAAW,EACX,mBAAmB,EACnB,KAAK,SAAS,EACd,KAAK,YAAY,GACjB,MAAM,0BAA0B,CAAC;AAGlC,cAAc,yBAAyB,CAAC;AACxC,cAAc,2BAA2B,CAAC;AAC1C,cAAc,uBAAuB,CAAC;AACtC,cAAc,yBAAyB,CAAC;AACxC,cAAc,uBAAuB,CAAC;AACtC,cAAc,uBAAuB,CAAC;AACtC,cAAc,wBAAwB,CAAC;AACvC,cAAc,wBAAwB,CAAC;AACvC,cAAc,uBAAuB,CAAC;AACtC,cAAc,uBAAuB,CAAC;AACtC,cAAc,0BAA0B,CAAC;AACzC,cAAc,0BAA0B,CAAC;AACzC,cAAc,0BAA0B,CAAC;AACzC,cAAc,wBAAwB,CAAC;AACvC,cAAc,yBAAyB,CAAC;AACxC,cAAc,wBAAwB,CAAC;AACvC,cAAc,uBAAuB,CAAC;AACtC,cAAc,+BAA+B,CAAC;AAC9C,cAAc,wBAAwB,CAAC;AACvC,cAAc,6BAA6B,CAAC;AAC5C,cAAc,+CAA+C,CAAC;AAC9D,cAAc,2CAA2C,CAAC;AAC1D,cAAc,wCAAwC,CAAC;AACvD,cAAc,wBAAwB,CAAC"}
|
package/script/mod.js
CHANGED
|
@@ -19,17 +19,17 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
19
19
|
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
20
20
|
};
|
|
21
21
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
22
|
-
exports.
|
|
22
|
+
exports.getProviderForModel = exports.getProvider = exports.getModelPricing = exports.getModelInfo = exports.calculateCost = exports.BatchResponse = void 0;
|
|
23
23
|
// Core LLM interface and types
|
|
24
24
|
__exportStar(require("./src/llm.js"), exports);
|
|
25
25
|
var response_js_1 = require("./src/response.js");
|
|
26
26
|
Object.defineProperty(exports, "BatchResponse", { enumerable: true, get: function () { return response_js_1.BatchResponse; } });
|
|
27
27
|
var index_js_1 = require("./src/providers/index.js");
|
|
28
|
+
Object.defineProperty(exports, "calculateCost", { enumerable: true, get: function () { return index_js_1.calculateCost; } });
|
|
29
|
+
Object.defineProperty(exports, "getModelInfo", { enumerable: true, get: function () { return index_js_1.getModelInfo; } });
|
|
30
|
+
Object.defineProperty(exports, "getModelPricing", { enumerable: true, get: function () { return index_js_1.getModelPricing; } });
|
|
28
31
|
Object.defineProperty(exports, "getProvider", { enumerable: true, get: function () { return index_js_1.getProvider; } });
|
|
29
32
|
Object.defineProperty(exports, "getProviderForModel", { enumerable: true, get: function () { return index_js_1.getProviderForModel; } });
|
|
30
|
-
Object.defineProperty(exports, "getModelPricing", { enumerable: true, get: function () { return index_js_1.getModelPricing; } });
|
|
31
|
-
Object.defineProperty(exports, "getModelInfo", { enumerable: true, get: function () { return index_js_1.getModelInfo; } });
|
|
32
|
-
Object.defineProperty(exports, "calculateCost", { enumerable: true, get: function () { return index_js_1.calculateCost; } });
|
|
33
33
|
// Tools
|
|
34
34
|
__exportStar(require("./src/tools/keywords.js"), exports);
|
|
35
35
|
__exportStar(require("./src/tools/classifier.js"), exports);
|
|
@@ -51,7 +51,7 @@ __exportStar(require("./src/tools/scorer.js"), exports);
|
|
|
51
51
|
__exportStar(require("./src/helpers/seedKeywords.js"), exports);
|
|
52
52
|
__exportStar(require("./src/tools/generic.js"), exports);
|
|
53
53
|
__exportStar(require("./src/apis/hasdata/index.js"), exports);
|
|
54
|
-
__exportStar(require("./src/apis/brightdata/index.js"), exports);
|
|
55
|
-
__exportStar(require("./src/apis/
|
|
54
|
+
__exportStar(require("./src/apis/brightdata/contentScraper/index.js"), exports);
|
|
55
|
+
__exportStar(require("./src/apis/brightdata/llmScraper/index.js"), exports);
|
|
56
56
|
__exportStar(require("./src/apis/googleAds/keywordPlanner.js"), exports);
|
|
57
57
|
__exportStar(require("./src/schemas/index.js"), exports);
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../../src/src/apis/brightdata/contentScraper/index.ts"],"names":[],"mappings":"AAAA,cAAc,aAAa,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scrape.d.ts","sourceRoot":"","sources":["../../../../../src/src/apis/brightdata/contentScraper/scrape.ts"],"names":[],"mappings":"AACA,OAAO,EAA4B,KAAK,WAAW,EAAE,MAAM,2BAA2B,CAAC;AAavF,MAAM,WAAW,uBAAuB;IACpC,wDAAwD;IACxD,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,6DAA6D;IAC7D,MAAM,CAAC,EAAE,KAAK,GAAG,MAAM,CAAC;IACxB,8DAA8D;IAC9D,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,wDAAwD;IACxD,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oDAAoD;IACpD,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,kCAAkC;IAClC,WAAW,CAAC,EAAE,WAAW,CAAC;CAC7B;AAED,MAAM,WAAW,wBAAwB;IACrC,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,CAAC,EAAE,MAAM,CAAC;CACjB;AA4ED;;;GAGG;AACH,wBAAsB,gBAAgB,CAClC,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,uBAA4B,GACtC,OAAO,CAAC,wBAAwB,CAAC,CAYnC;AAED;;;GAGG;AACH,wBAAsB,qBAAqB,CACvC,IAAI,EAAE,KAAK,CAAC,MAAM,CAAC,EACnB,OAAO,GAAE,uBAA4B,EACrC,cAAc,GAAE,MAA+B,GAChD,OAAO,CAAC,KAAK,CAAC,wBAAwB,CAAC,CAAC,CAQ1C"}
|
|
@@ -35,8 +35,8 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
35
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
36
|
exports.scrapeBrightData = scrapeBrightData;
|
|
37
37
|
exports.scrapeBrightDataBatch = scrapeBrightDataBatch;
|
|
38
|
-
const dntShim = __importStar(require("
|
|
39
|
-
const async_js_1 = require("
|
|
38
|
+
const dntShim = __importStar(require("../../../../_dnt.shims.js"));
|
|
39
|
+
const async_js_1 = require("../../../helpers/async.js");
|
|
40
40
|
const BRIGHTDATA_CONCURRENCY = 10;
|
|
41
41
|
const BRIGHTDATA_RETRY_CONFIG = {
|
|
42
42
|
maxRetries: 3,
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { type ProviderFunctions } from './scrape.js';
|
|
2
|
+
interface BrightdataProviderConfig {
|
|
3
|
+
apiBase: string;
|
|
4
|
+
datasetId: string;
|
|
5
|
+
outputFields: Array<string>;
|
|
6
|
+
extraFields?: Array<string>;
|
|
7
|
+
targetUrl: string;
|
|
8
|
+
extraInputs?: (params: {
|
|
9
|
+
prompt: string;
|
|
10
|
+
useSearch: boolean;
|
|
11
|
+
countryISOCode: string | null;
|
|
12
|
+
}) => Record<string, unknown>;
|
|
13
|
+
providerName: string;
|
|
14
|
+
maxConcurrency: number;
|
|
15
|
+
maxPromptsPerRequest: number;
|
|
16
|
+
}
|
|
17
|
+
export declare function createBrightdataProvider(overrides?: Partial<BrightdataProviderConfig>): ProviderFunctions;
|
|
18
|
+
export declare const brightdataProvider: ProviderFunctions;
|
|
19
|
+
export {};
|
|
20
|
+
//# sourceMappingURL=brightdata.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"brightdata.d.ts","sourceRoot":"","sources":["../../../../../src/src/apis/brightdata/llmScraper/brightdata.ts"],"names":[],"mappings":"AAcA,OAAO,EAA6C,KAAK,iBAAiB,EAAE,MAAM,aAAa,CAAC;AA4BhG,UAAU,wBAAwB;IACjC,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IAC5B,WAAW,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IAC5B,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,CAAC,EAAE,CACb,MAAM,EAAE;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,OAAO,CAAC;QAAC,cAAc,EAAE,MAAM,GAAG,IAAI,CAAA;KAAE,KACzE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC7B,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,CAAC;IACvB,oBAAoB,EAAE,MAAM,CAAC;CAC7B;AA2DD,wBAAgB,wBAAwB,CACvC,SAAS,GAAE,OAAO,CAAC,wBAAwB,CAAM,GAC/C,iBAAiB,CAgJnB;AAMD,eAAO,MAAM,kBAAkB,EAAE,iBAA8C,CAAC"}
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.brightdataProvider = void 0;
|
|
37
|
+
exports.createBrightdataProvider = createBrightdataProvider;
|
|
38
|
+
/* eslint no-console: ["warn", { allow: ["log", "warn", "error"] }] */
|
|
39
|
+
/**
|
|
40
|
+
* Brightdata LLM Scraper Provider.
|
|
41
|
+
*
|
|
42
|
+
* API Flow:
|
|
43
|
+
* 1. Trigger: POST to /datasets/v3/trigger → returns snapshot_id
|
|
44
|
+
* 2. Monitor: GET /datasets/v3/progress/{snapshot_id} until ready
|
|
45
|
+
* 3. Download: GET /datasets/v3/snapshot/{snapshot_id}
|
|
46
|
+
*/
|
|
47
|
+
const dntShim = __importStar(require("../../../../_dnt.shims.js"));
|
|
48
|
+
const async_js_1 = require("../../../helpers/async.js");
|
|
49
|
+
const scrape_js_1 = require("./scrape.js");
|
|
50
|
+
const DEFAULT_BRIGHTDATA_PROVIDER_CONFIG = {
|
|
51
|
+
apiBase: 'https://api.brightdata.com',
|
|
52
|
+
datasetId: 'gd_m7aof0k82r803d5bjm',
|
|
53
|
+
outputFields: [
|
|
54
|
+
'url',
|
|
55
|
+
'prompt',
|
|
56
|
+
'answer_text',
|
|
57
|
+
'answer_text_markdown',
|
|
58
|
+
'citations',
|
|
59
|
+
'links_attached',
|
|
60
|
+
'country',
|
|
61
|
+
'index',
|
|
62
|
+
],
|
|
63
|
+
targetUrl: 'http://chatgpt.com/',
|
|
64
|
+
providerName: 'Brightdata',
|
|
65
|
+
maxConcurrency: 50,
|
|
66
|
+
maxPromptsPerRequest: 1,
|
|
67
|
+
};
|
|
68
|
+
const TRIGGER_RETRY = {
|
|
69
|
+
maxRetries: 3,
|
|
70
|
+
initialDelay: 0,
|
|
71
|
+
statusCodes: [429, 500, 502, 503, 504],
|
|
72
|
+
};
|
|
73
|
+
const DOWNLOAD_RETRY = {
|
|
74
|
+
maxRetries: 5,
|
|
75
|
+
initialDelay: 2000,
|
|
76
|
+
statusCodes: [202, 500, 502, 503, 504],
|
|
77
|
+
};
|
|
78
|
+
const MONITOR_RETRY = {
|
|
79
|
+
maxRetries: 4,
|
|
80
|
+
initialDelay: 1000,
|
|
81
|
+
statusCodes: [408, 425, 429, 500, 502, 503, 504],
|
|
82
|
+
};
|
|
83
|
+
const MONITOR_RETRIABLE = new Set(MONITOR_RETRY.statusCodes ?? []);
|
|
84
|
+
const MAX_WAIT_MS = 600_000; // 10 minutes
|
|
85
|
+
const POLL_INTERVAL_MS = 5_000;
|
|
86
|
+
// ============================================================================
|
|
87
|
+
// API Key
|
|
88
|
+
// ============================================================================
|
|
89
|
+
function getApiKey() {
|
|
90
|
+
const apiKey = dntShim.Deno.env.get('BRIGHTDATA_API_KEY');
|
|
91
|
+
if (!apiKey) {
|
|
92
|
+
throw new Error('BRIGHTDATA_API_KEY environment variable is required');
|
|
93
|
+
}
|
|
94
|
+
return apiKey;
|
|
95
|
+
}
|
|
96
|
+
// ============================================================================
|
|
97
|
+
// Provider Functions
|
|
98
|
+
// ============================================================================
|
|
99
|
+
function createBrightdataProvider(overrides = {}) {
|
|
100
|
+
const config = { ...DEFAULT_BRIGHTDATA_PROVIDER_CONFIG, ...overrides };
|
|
101
|
+
const customOutputFields = [...new Set([...(config.outputFields ?? []), ...(config.extraFields ?? [])])].join('|');
|
|
102
|
+
async function triggerJob(prompt, useSearch, countryISOCode) {
|
|
103
|
+
const apiKey = getApiKey();
|
|
104
|
+
const url = `${config.apiBase}/datasets/v3/trigger?dataset_id=${config.datasetId}&include_errors=true`;
|
|
105
|
+
const input = {
|
|
106
|
+
url: config.targetUrl,
|
|
107
|
+
prompt,
|
|
108
|
+
country: countryISOCode || '',
|
|
109
|
+
index: 0,
|
|
110
|
+
};
|
|
111
|
+
Object.assign(input, config.extraInputs?.({ prompt, useSearch, countryISOCode }) ?? {});
|
|
112
|
+
const body = {
|
|
113
|
+
custom_output_fields: customOutputFields,
|
|
114
|
+
input: [input],
|
|
115
|
+
};
|
|
116
|
+
try {
|
|
117
|
+
const response = await (0, async_js_1.withRetries)(() => fetch(url, {
|
|
118
|
+
method: 'POST',
|
|
119
|
+
headers: {
|
|
120
|
+
'Authorization': `Bearer ${apiKey}`,
|
|
121
|
+
'Content-Type': 'application/json',
|
|
122
|
+
},
|
|
123
|
+
body: JSON.stringify(body),
|
|
124
|
+
signal: (0, scrape_js_1.getAbortSignal)(),
|
|
125
|
+
}), TRIGGER_RETRY);
|
|
126
|
+
if (!response.ok) {
|
|
127
|
+
console.error(`[${config.providerName}] Trigger error: ${response.status}`);
|
|
128
|
+
return null;
|
|
129
|
+
}
|
|
130
|
+
const data = await response.json();
|
|
131
|
+
return data?.snapshot_id || null;
|
|
132
|
+
}
|
|
133
|
+
catch (error) {
|
|
134
|
+
console.error(`[${config.providerName}] Trigger failed:`, error);
|
|
135
|
+
return null;
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
async function monitorJob(snapshotId) {
|
|
139
|
+
const apiKey = getApiKey();
|
|
140
|
+
const url = `${config.apiBase}/datasets/v3/progress/${snapshotId}`;
|
|
141
|
+
const startTime = Date.now();
|
|
142
|
+
const abortSignal = (0, scrape_js_1.getAbortSignal)();
|
|
143
|
+
while (Date.now() - startTime < MAX_WAIT_MS) {
|
|
144
|
+
if (abortSignal?.aborted)
|
|
145
|
+
return false;
|
|
146
|
+
try {
|
|
147
|
+
const response = await (0, async_js_1.withRetries)(() => fetch(url, {
|
|
148
|
+
headers: { 'Authorization': `Bearer ${apiKey}` },
|
|
149
|
+
signal: abortSignal,
|
|
150
|
+
}), MONITOR_RETRY);
|
|
151
|
+
if (!response.ok) {
|
|
152
|
+
if (!MONITOR_RETRIABLE.has(response.status))
|
|
153
|
+
return false;
|
|
154
|
+
}
|
|
155
|
+
else {
|
|
156
|
+
const status = await response.json();
|
|
157
|
+
if (status.status === 'ready' || status.status === 'complete')
|
|
158
|
+
return true;
|
|
159
|
+
if (status.status === 'failed' || status.status === 'error')
|
|
160
|
+
return false;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
catch (error) {
|
|
164
|
+
console.error(`[${config.providerName}] Monitor error:`, error);
|
|
165
|
+
}
|
|
166
|
+
await (0, async_js_1.sleep)(POLL_INTERVAL_MS, abortSignal);
|
|
167
|
+
}
|
|
168
|
+
console.error(`[${config.providerName}] Monitor timeout after ${MAX_WAIT_MS / 1000}s`);
|
|
169
|
+
return false;
|
|
170
|
+
}
|
|
171
|
+
async function downloadJob(snapshotId) {
|
|
172
|
+
const apiKey = getApiKey();
|
|
173
|
+
const url = `${config.apiBase}/datasets/v3/snapshot/${snapshotId}?format=json`;
|
|
174
|
+
try {
|
|
175
|
+
const response = await (0, async_js_1.withRetries)(() => fetch(url, {
|
|
176
|
+
headers: { 'Authorization': `Bearer ${apiKey}` },
|
|
177
|
+
signal: (0, scrape_js_1.getAbortSignal)(),
|
|
178
|
+
}), DOWNLOAD_RETRY);
|
|
179
|
+
if (!response.ok) {
|
|
180
|
+
console.error(`[${config.providerName}] Download error: ${response.status}`);
|
|
181
|
+
return null;
|
|
182
|
+
}
|
|
183
|
+
const data = await response.json();
|
|
184
|
+
return Array.isArray(data) ? data : null;
|
|
185
|
+
}
|
|
186
|
+
catch (error) {
|
|
187
|
+
console.error(`[${config.providerName}] Download failed:`, error);
|
|
188
|
+
return null;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
function transformResponse(raw) {
|
|
192
|
+
const responses = raw;
|
|
193
|
+
if (!responses || responses.length === 0)
|
|
194
|
+
return null;
|
|
195
|
+
const response = responses[0];
|
|
196
|
+
const answerText = (0, scrape_js_1.cleanAnswer)(response.answer_text || '');
|
|
197
|
+
const answerTextMarkdown = (0, scrape_js_1.cleanAnswer)(response.answer_text_markdown || '');
|
|
198
|
+
return {
|
|
199
|
+
prompt: response.prompt,
|
|
200
|
+
answer: answerText,
|
|
201
|
+
answer_text_markdown: answerTextMarkdown,
|
|
202
|
+
sources: (0, scrape_js_1.buildSources)(response.citations ?? [], response.links_attached ?? []),
|
|
203
|
+
searchQueries: response.web_search_query || [],
|
|
204
|
+
};
|
|
205
|
+
}
|
|
206
|
+
return {
|
|
207
|
+
name: config.providerName,
|
|
208
|
+
maxConcurrency: config.maxConcurrency,
|
|
209
|
+
maxPromptsPerRequest: config.maxPromptsPerRequest,
|
|
210
|
+
triggerJob,
|
|
211
|
+
monitorJob,
|
|
212
|
+
downloadJob,
|
|
213
|
+
transformResponse,
|
|
214
|
+
};
|
|
215
|
+
}
|
|
216
|
+
// ============================================================================
|
|
217
|
+
// Export
|
|
218
|
+
// ============================================================================
|
|
219
|
+
exports.brightdataProvider = createBrightdataProvider();
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import type { ModelResult } from '../../../schemas/models.schema.js';
|
|
2
|
+
import { type BatchOptions } from './scrape.js';
|
|
3
|
+
export type { BatchOptions };
|
|
4
|
+
export type JobId = string | null;
|
|
5
|
+
export type ScraperTarget = 'chatgpt' | 'aim';
|
|
6
|
+
export declare function getMaxConcurrency(target?: ScraperTarget): number;
|
|
7
|
+
export declare function getMaxPromptsPerRequest(target?: ScraperTarget): number;
|
|
8
|
+
export declare function scrapeGPTBatch(options: BatchOptions): Promise<Array<ModelResult>>;
|
|
9
|
+
export declare function triggerGPTBatch(options: BatchOptions): Promise<Array<string | null>>;
|
|
10
|
+
export declare function downloadGPTSnapshots(jobIds: Array<string | null>): Promise<Array<ModelResult>>;
|
|
11
|
+
export declare function scrapeAIMBatch(options: BatchOptions): Promise<Array<ModelResult>>;
|
|
12
|
+
export declare function triggerAIMBatch(options: BatchOptions): Promise<Array<string | null>>;
|
|
13
|
+
export declare function downloadAIMSnapshots(jobIds: Array<string | null>): Promise<Array<ModelResult>>;
|
|
14
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../../src/src/apis/brightdata/llmScraper/index.ts"],"names":[],"mappings":"AAYA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mCAAmC,CAAC;AACrE,OAAO,EAAE,KAAK,YAAY,EAAqC,MAAM,aAAa,CAAC;AAKnF,YAAY,EAAE,YAAY,EAAE,CAAC;AAC7B,MAAM,MAAM,KAAK,GAAG,MAAM,GAAG,IAAI,CAAC;AAClC,MAAM,MAAM,aAAa,GAAG,SAAS,GAAG,KAAK,CAAC;AAuE9C,wBAAgB,iBAAiB,CAAC,MAAM,GAAE,aAAyB,GAAG,MAAM,CAE3E;AAED,wBAAgB,uBAAuB,CAAC,MAAM,GAAE,aAAyB,GAAG,MAAM,CAEjF;AAGD,wBAAsB,cAAc,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAEvF;AAED,wBAAsB,eAAe,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAE1F;AAED,wBAAsB,oBAAoB,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAEpG;AAGD,wBAAsB,cAAc,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAEvF;AAED,wBAAsB,eAAe,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAE1F;AAED,wBAAsB,oBAAoB,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAEpG"}
|