@graphext/cuery 0.9.5 → 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/browser.d.ts +1 -1
- package/esm/browser.js +1 -1
- package/esm/mod.d.ts +3 -3
- package/esm/mod.d.ts.map +1 -1
- package/esm/mod.js +3 -3
- package/esm/src/apis/brightdata/contentScraper/index.d.ts.map +1 -0
- package/{script/src/apis/brightdata → esm/src/apis/brightdata/contentScraper}/scrape.d.ts +1 -1
- package/esm/src/apis/brightdata/contentScraper/scrape.d.ts.map +1 -0
- package/esm/src/apis/brightdata/{scrape.js → contentScraper/scrape.js} +2 -2
- package/esm/src/apis/brightdata/llmScraper/brightdata.d.ts +20 -0
- package/esm/src/apis/brightdata/llmScraper/brightdata.d.ts.map +1 -0
- package/esm/src/apis/brightdata/llmScraper/brightdata.js +182 -0
- package/esm/src/apis/brightdata/llmScraper/index.d.ts +14 -0
- package/esm/src/apis/brightdata/llmScraper/index.d.ts.map +1 -0
- package/esm/src/apis/brightdata/llmScraper/index.js +97 -0
- package/esm/src/apis/brightdata/llmScraper/oxy.d.ts +16 -0
- package/esm/src/apis/brightdata/llmScraper/oxy.d.ts.map +1 -0
- package/esm/src/apis/brightdata/llmScraper/oxy.js +171 -0
- package/{script/src/apis/chatgptScraper/scraper.d.ts → esm/src/apis/brightdata/llmScraper/scrape.d.ts} +12 -15
- package/esm/src/apis/brightdata/llmScraper/scrape.d.ts.map +1 -0
- package/esm/src/apis/brightdata/llmScraper/scrape.js +184 -0
- package/esm/src/apis/hasdata/helpers.d.ts.map +1 -1
- package/esm/src/apis/hasdata/helpers.js +56 -18
- package/esm/src/schemas/search.schema.d.ts +2 -2
- package/esm/src/schemas/search.schema.d.ts.map +1 -1
- package/esm/src/schemas/sources.schema.d.ts +1 -4
- package/esm/src/schemas/sources.schema.d.ts.map +1 -1
- package/package.json +1 -1
- package/script/browser.d.ts +1 -1
- package/script/browser.js +1 -1
- package/script/mod.d.ts +3 -3
- package/script/mod.d.ts.map +1 -1
- package/script/mod.js +6 -6
- package/script/src/apis/brightdata/contentScraper/index.d.ts.map +1 -0
- package/{esm/src/apis/brightdata → script/src/apis/brightdata/contentScraper}/scrape.d.ts +1 -1
- package/script/src/apis/brightdata/contentScraper/scrape.d.ts.map +1 -0
- package/script/src/apis/brightdata/{scrape.js → contentScraper/scrape.js} +2 -2
- package/script/src/apis/brightdata/llmScraper/brightdata.d.ts +20 -0
- package/script/src/apis/brightdata/llmScraper/brightdata.d.ts.map +1 -0
- package/script/src/apis/brightdata/llmScraper/brightdata.js +219 -0
- package/script/src/apis/brightdata/llmScraper/index.d.ts +14 -0
- package/script/src/apis/brightdata/llmScraper/index.d.ts.map +1 -0
- package/script/src/apis/brightdata/llmScraper/index.js +140 -0
- package/script/src/apis/brightdata/llmScraper/oxy.d.ts +16 -0
- package/script/src/apis/brightdata/llmScraper/oxy.d.ts.map +1 -0
- package/script/src/apis/brightdata/llmScraper/oxy.js +208 -0
- package/{esm/src/apis/chatgptScraper/scraper.d.ts → script/src/apis/brightdata/llmScraper/scrape.d.ts} +12 -15
- package/script/src/apis/brightdata/llmScraper/scrape.d.ts.map +1 -0
- package/script/src/apis/brightdata/llmScraper/scrape.js +224 -0
- package/script/src/apis/hasdata/helpers.d.ts.map +1 -1
- package/script/src/apis/hasdata/helpers.js +56 -18
- package/script/src/schemas/search.schema.d.ts +2 -2
- package/script/src/schemas/search.schema.d.ts.map +1 -1
- package/script/src/schemas/sources.schema.d.ts +1 -4
- package/script/src/schemas/sources.schema.d.ts.map +1 -1
- package/esm/src/apis/brightdata/index.d.ts.map +0 -1
- package/esm/src/apis/brightdata/scrape.d.ts.map +0 -1
- package/esm/src/apis/chatgptScraper/brightdata.d.ts +0 -3
- package/esm/src/apis/chatgptScraper/brightdata.d.ts.map +0 -1
- package/esm/src/apis/chatgptScraper/brightdata.js +0 -172
- package/esm/src/apis/chatgptScraper/index.d.ts +0 -10
- package/esm/src/apis/chatgptScraper/index.d.ts.map +0 -1
- package/esm/src/apis/chatgptScraper/index.js +0 -41
- package/esm/src/apis/chatgptScraper/oxy.d.ts +0 -3
- package/esm/src/apis/chatgptScraper/oxy.d.ts.map +0 -1
- package/esm/src/apis/chatgptScraper/oxy.js +0 -156
- package/esm/src/apis/chatgptScraper/scraper.d.ts.map +0 -1
- package/esm/src/apis/chatgptScraper/scraper.js +0 -98
- package/script/src/apis/brightdata/index.d.ts.map +0 -1
- package/script/src/apis/brightdata/scrape.d.ts.map +0 -1
- package/script/src/apis/chatgptScraper/brightdata.d.ts +0 -3
- package/script/src/apis/chatgptScraper/brightdata.d.ts.map +0 -1
- package/script/src/apis/chatgptScraper/brightdata.js +0 -208
- package/script/src/apis/chatgptScraper/index.d.ts +0 -10
- package/script/src/apis/chatgptScraper/index.d.ts.map +0 -1
- package/script/src/apis/chatgptScraper/index.js +0 -81
- package/script/src/apis/chatgptScraper/oxy.d.ts +0 -3
- package/script/src/apis/chatgptScraper/oxy.d.ts.map +0 -1
- package/script/src/apis/chatgptScraper/oxy.js +0 -192
- package/script/src/apis/chatgptScraper/scraper.d.ts.map +0 -1
- package/script/src/apis/chatgptScraper/scraper.js +0 -139
- /package/esm/src/apis/brightdata/{index.d.ts → contentScraper/index.d.ts} +0 -0
- /package/esm/src/apis/brightdata/{index.js → contentScraper/index.js} +0 -0
- /package/script/src/apis/brightdata/{index.d.ts → contentScraper/index.d.ts} +0 -0
- /package/script/src/apis/brightdata/{index.js → contentScraper/index.js} +0 -0
|
@@ -1,192 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
-
if (k2 === undefined) k2 = k;
|
|
4
|
-
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
-
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
-
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
-
}
|
|
8
|
-
Object.defineProperty(o, k2, desc);
|
|
9
|
-
}) : (function(o, m, k, k2) {
|
|
10
|
-
if (k2 === undefined) k2 = k;
|
|
11
|
-
o[k2] = m[k];
|
|
12
|
-
}));
|
|
13
|
-
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
-
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
-
}) : function(o, v) {
|
|
16
|
-
o["default"] = v;
|
|
17
|
-
});
|
|
18
|
-
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
-
var ownKeys = function(o) {
|
|
20
|
-
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
-
var ar = [];
|
|
22
|
-
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
-
return ar;
|
|
24
|
-
};
|
|
25
|
-
return ownKeys(o);
|
|
26
|
-
};
|
|
27
|
-
return function (mod) {
|
|
28
|
-
if (mod && mod.__esModule) return mod;
|
|
29
|
-
var result = {};
|
|
30
|
-
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
-
__setModuleDefault(result, mod);
|
|
32
|
-
return result;
|
|
33
|
-
};
|
|
34
|
-
})();
|
|
35
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
-
exports.oxylabsProvider = void 0;
|
|
37
|
-
/* eslint no-console: ["warn", { allow: ["log", "warn", "error"] }] */
|
|
38
|
-
/**
|
|
39
|
-
* Oxylabs GPT Scraper Provider.
|
|
40
|
-
*
|
|
41
|
-
* API Flow (Async Push-Pull):
|
|
42
|
-
* 1. Trigger: POST to /v1/queries → returns job id
|
|
43
|
-
* 2. Monitor: GET /v1/queries/{id} until status is 'done'
|
|
44
|
-
* 3. Download: GET /v1/queries/{id}/results
|
|
45
|
-
*/
|
|
46
|
-
const dntShim = __importStar(require("../../../_dnt.shims.js"));
|
|
47
|
-
const async_js_1 = require("../../helpers/async.js");
|
|
48
|
-
const scraper_js_1 = require("./scraper.js");
|
|
49
|
-
// ============================================================================
|
|
50
|
-
// Constants
|
|
51
|
-
// ============================================================================
|
|
52
|
-
const API_BASE = 'https://data.oxylabs.io/v1';
|
|
53
|
-
const RETRY_CONFIG = {
|
|
54
|
-
maxRetries: 3,
|
|
55
|
-
initialDelay: 1000,
|
|
56
|
-
statusCodes: [429, 500, 502, 503, 504, 524, 612, 613]
|
|
57
|
-
};
|
|
58
|
-
const MAX_WAIT_MS = 600_000; // 10 minutes
|
|
59
|
-
const POLL_INTERVAL_MS = 5_000;
|
|
60
|
-
// ============================================================================
|
|
61
|
-
// Auth
|
|
62
|
-
// ============================================================================
|
|
63
|
-
function getAuthHeader() {
|
|
64
|
-
const username = dntShim.Deno.env.get('OXYLABS_USERNAME');
|
|
65
|
-
const password = dntShim.Deno.env.get('OXYLABS_PASSWORD');
|
|
66
|
-
if (!username || !password) {
|
|
67
|
-
throw new Error('OXYLABS_USERNAME and OXYLABS_PASSWORD environment variables are required');
|
|
68
|
-
}
|
|
69
|
-
return `Basic ${btoa(`${username}:${password}`)}`;
|
|
70
|
-
}
|
|
71
|
-
// ============================================================================
|
|
72
|
-
// Provider Functions
|
|
73
|
-
// ============================================================================
|
|
74
|
-
async function triggerJob(prompt, useSearch, countryISOCode) {
|
|
75
|
-
const authHeader = getAuthHeader();
|
|
76
|
-
const url = `${API_BASE}/queries`;
|
|
77
|
-
const body = {
|
|
78
|
-
source: 'chatgpt',
|
|
79
|
-
prompt,
|
|
80
|
-
parse: true,
|
|
81
|
-
search: true // Oxylabs requires search: true (cannot be false or blank)
|
|
82
|
-
};
|
|
83
|
-
if (countryISOCode) {
|
|
84
|
-
body.geo_location = countryISOCode;
|
|
85
|
-
}
|
|
86
|
-
try {
|
|
87
|
-
const response = await (0, async_js_1.withRetries)(() => fetch(url, {
|
|
88
|
-
method: 'POST',
|
|
89
|
-
headers: {
|
|
90
|
-
'Authorization': authHeader,
|
|
91
|
-
'Content-Type': 'application/json'
|
|
92
|
-
},
|
|
93
|
-
body: JSON.stringify(body),
|
|
94
|
-
signal: (0, scraper_js_1.getAbortSignal)()
|
|
95
|
-
}), RETRY_CONFIG);
|
|
96
|
-
if (!response.ok) {
|
|
97
|
-
console.error(`[Oxylabs] Trigger error: ${response.status}`);
|
|
98
|
-
return null;
|
|
99
|
-
}
|
|
100
|
-
const data = await response.json();
|
|
101
|
-
return data?.id || null;
|
|
102
|
-
}
|
|
103
|
-
catch (error) {
|
|
104
|
-
console.error('[Oxylabs] Trigger failed:', error);
|
|
105
|
-
return null;
|
|
106
|
-
}
|
|
107
|
-
}
|
|
108
|
-
async function monitorJob(jobId) {
|
|
109
|
-
const authHeader = getAuthHeader();
|
|
110
|
-
const url = `${API_BASE}/queries/${jobId}`;
|
|
111
|
-
const startTime = Date.now();
|
|
112
|
-
const abortSignal = (0, scraper_js_1.getAbortSignal)();
|
|
113
|
-
while (Date.now() - startTime < MAX_WAIT_MS) {
|
|
114
|
-
if (abortSignal?.aborted)
|
|
115
|
-
return false;
|
|
116
|
-
try {
|
|
117
|
-
const response = await fetch(url, {
|
|
118
|
-
headers: { 'Authorization': authHeader },
|
|
119
|
-
signal: abortSignal
|
|
120
|
-
});
|
|
121
|
-
// 204 = job not completed yet, continue polling
|
|
122
|
-
if (response.status === 204) {
|
|
123
|
-
await (0, async_js_1.sleep)(POLL_INTERVAL_MS, abortSignal);
|
|
124
|
-
continue;
|
|
125
|
-
}
|
|
126
|
-
if (response.ok) {
|
|
127
|
-
const status = await response.json();
|
|
128
|
-
if (status.status === 'done')
|
|
129
|
-
return true;
|
|
130
|
-
if (status.status === 'faulted' || status.status === 'failed')
|
|
131
|
-
return false;
|
|
132
|
-
}
|
|
133
|
-
}
|
|
134
|
-
catch (error) {
|
|
135
|
-
console.error('[Oxylabs] Monitor error:', error);
|
|
136
|
-
}
|
|
137
|
-
await (0, async_js_1.sleep)(POLL_INTERVAL_MS, abortSignal);
|
|
138
|
-
}
|
|
139
|
-
console.error(`[Oxylabs] Monitor timeout after ${MAX_WAIT_MS / 1000}s`);
|
|
140
|
-
return false;
|
|
141
|
-
}
|
|
142
|
-
async function downloadJob(jobId) {
|
|
143
|
-
const authHeader = getAuthHeader();
|
|
144
|
-
const url = `${API_BASE}/queries/${jobId}/results`;
|
|
145
|
-
try {
|
|
146
|
-
const response = await (0, async_js_1.withRetries)(() => fetch(url, {
|
|
147
|
-
headers: { 'Authorization': authHeader },
|
|
148
|
-
signal: (0, scraper_js_1.getAbortSignal)()
|
|
149
|
-
}), RETRY_CONFIG);
|
|
150
|
-
if (!response.ok) {
|
|
151
|
-
console.error(`[Oxylabs] Download error: ${response.status}`);
|
|
152
|
-
return null;
|
|
153
|
-
}
|
|
154
|
-
return await response.json();
|
|
155
|
-
}
|
|
156
|
-
catch (error) {
|
|
157
|
-
console.error('[Oxylabs] Download failed:', error);
|
|
158
|
-
return null;
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
function transformResponse(raw) {
|
|
162
|
-
const response = raw;
|
|
163
|
-
const content = response?.results?.[0]?.content;
|
|
164
|
-
if (!content)
|
|
165
|
-
return null;
|
|
166
|
-
let answer = content.markdown_text || content.response_text || '';
|
|
167
|
-
answer = (0, scraper_js_1.cleanAnswer)(answer);
|
|
168
|
-
// Map section='citations' to cited=true (like Brightdata's cited field)
|
|
169
|
-
const citations = (content.citations ?? []).map(c => ({
|
|
170
|
-
...c,
|
|
171
|
-
cited: c.section === 'citations'
|
|
172
|
-
}));
|
|
173
|
-
return {
|
|
174
|
-
prompt: content.prompt || '',
|
|
175
|
-
answer,
|
|
176
|
-
sources: (0, scraper_js_1.buildSources)(citations),
|
|
177
|
-
searchQueries: [],
|
|
178
|
-
searchSources: []
|
|
179
|
-
};
|
|
180
|
-
}
|
|
181
|
-
// ============================================================================
|
|
182
|
-
// Export
|
|
183
|
-
// ============================================================================
|
|
184
|
-
exports.oxylabsProvider = {
|
|
185
|
-
name: 'Oxylabs',
|
|
186
|
-
maxConcurrency: 10,
|
|
187
|
-
maxPromptsPerRequest: 1,
|
|
188
|
-
triggerJob,
|
|
189
|
-
monitorJob,
|
|
190
|
-
downloadJob,
|
|
191
|
-
transformResponse
|
|
192
|
-
};
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"scraper.d.ts","sourceRoot":"","sources":["../../../../src/src/apis/chatgptScraper/scraper.ts"],"names":[],"mappings":"AAWA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,gCAAgC,CAAC;AAClE,OAAO,KAAK,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,iCAAiC,CAAC;AAO5E,MAAM,WAAW,YAAY;IAC5B,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACvB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,cAAc,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC/B;AAED,MAAM,WAAW,iBAAiB;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,cAAc,EAAE,MAAM,CAAC;IACvB,oBAAoB,EAAE,MAAM,CAAC;IAC7B,UAAU,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,cAAc,EAAE,MAAM,GAAG,IAAI,KAAK,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;IAC1G,UAAU,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;IAChD,WAAW,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;IACjD,iBAAiB,EAAE,CAAC,GAAG,EAAE,OAAO,KAAK,WAAW,GAAG,IAAI,CAAC;CACxD;AAED,MAAM,WAAW,UAAU;IAC1B,cAAc,EAAE,MAAM,CAAC;IACvB,oBAAoB,EAAE,MAAM,CAAC;IAC7B,cAAc,EAAE,CAAC,OAAO,EAAE,YAAY,KAAK,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC;IACvE,eAAe,EAAE,CAAC,OAAO,EAAE,YAAY,KAAK,OAAO,CAAC,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC;IAC1E,oBAAoB,EAAE,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,KAAK,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC;CACpF;AAMD,wBAAgB,cAAc,IAAI,WAAW,GAAG,SAAS,CAExD;AAED,wBAAgB,WAAW,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAMlD;AAED,wBAAgB,YAAY,CAC3B,SAAS,EAAE,KAAK,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,WAAW,CAAC,EAAE,MAAM,CAAC;IAAC,IAAI,CAAC,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,OAAO,CAAA;CAAE,CAAC,EACvG,aAAa,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC,GAC3C,KAAK,CAAC,MAAM,CAAC,CAQf;AAED,wBAAgB,kBAAkB,CACjC,OAAO,EAAE,KAAK,CAAC;IAAE,GAAG,CAAC,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAC;IAAC,IAAI,CAAC,EAAE,MAAM,CAAC;IAAC,cAAc,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC,GACxG,KAAK,CAAC,YAAY,CAAC,CAQrB;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,YAAY,EAAE,MAAM,EAAE,YAAY,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,OAAO,GAAG,WAAW,CAS5G;AAMD,wBAAgB,aAAa,CAAC,QAAQ,EAAE,iBAAiB,GAAG,UAAU,CAkErE"}
|
|
@@ -1,139 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
-
if (k2 === undefined) k2 = k;
|
|
4
|
-
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
-
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
-
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
-
}
|
|
8
|
-
Object.defineProperty(o, k2, desc);
|
|
9
|
-
}) : (function(o, m, k, k2) {
|
|
10
|
-
if (k2 === undefined) k2 = k;
|
|
11
|
-
o[k2] = m[k];
|
|
12
|
-
}));
|
|
13
|
-
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
-
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
-
}) : function(o, v) {
|
|
16
|
-
o["default"] = v;
|
|
17
|
-
});
|
|
18
|
-
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
-
var ownKeys = function(o) {
|
|
20
|
-
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
-
var ar = [];
|
|
22
|
-
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
-
return ar;
|
|
24
|
-
};
|
|
25
|
-
return ownKeys(o);
|
|
26
|
-
};
|
|
27
|
-
return function (mod) {
|
|
28
|
-
if (mod && mod.__esModule) return mod;
|
|
29
|
-
var result = {};
|
|
30
|
-
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
-
__setModuleDefault(result, mod);
|
|
32
|
-
return result;
|
|
33
|
-
};
|
|
34
|
-
})();
|
|
35
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
-
exports.getAbortSignal = getAbortSignal;
|
|
37
|
-
exports.cleanAnswer = cleanAnswer;
|
|
38
|
-
exports.buildSources = buildSources;
|
|
39
|
-
exports.buildSearchSources = buildSearchSources;
|
|
40
|
-
exports.emptyModelResult = emptyModelResult;
|
|
41
|
-
exports.createScraper = createScraper;
|
|
42
|
-
/* eslint no-console: ["warn", { allow: ["log", "warn", "error"] }] */
|
|
43
|
-
/**
|
|
44
|
-
* GPT Scraper - Core types and orchestration logic.
|
|
45
|
-
*
|
|
46
|
-
* Uses composition: providers supply functions, this module orchestrates them.
|
|
47
|
-
*/
|
|
48
|
-
const dntShim = __importStar(require("../../../_dnt.shims.js"));
|
|
49
|
-
const async_js_1 = require("../../helpers/async.js");
|
|
50
|
-
const urls_js_1 = require("../../helpers/urls.js");
|
|
51
|
-
// ============================================================================
|
|
52
|
-
// Shared Utilities
|
|
53
|
-
// ============================================================================
|
|
54
|
-
function getAbortSignal() {
|
|
55
|
-
return dntShim.dntGlobalThis.abortSignal;
|
|
56
|
-
}
|
|
57
|
-
function cleanAnswer(answer) {
|
|
58
|
-
return answer
|
|
59
|
-
.replace(/!\[([^\]]*)\]\([^)]+\)/g, '')
|
|
60
|
-
.replace(/\n\s*Image\s*\n/g, '\n')
|
|
61
|
-
.replace(/\n{3,}/g, '\n\n')
|
|
62
|
-
.trim();
|
|
63
|
-
}
|
|
64
|
-
function buildSources(citations, linkPositions) {
|
|
65
|
-
return citations.map(c => ({
|
|
66
|
-
title: c.title || c.description || c.text || '',
|
|
67
|
-
url: c.url,
|
|
68
|
-
domain: (0, urls_js_1.extractDomain)(c.url),
|
|
69
|
-
cited: c.cited,
|
|
70
|
-
positions: linkPositions?.[c.url]
|
|
71
|
-
}));
|
|
72
|
-
}
|
|
73
|
-
function buildSearchSources(sources) {
|
|
74
|
-
return sources.map(s => ({
|
|
75
|
-
title: s.title || s.snippet || '',
|
|
76
|
-
url: s.url || '',
|
|
77
|
-
domain: s.url ? (0, urls_js_1.extractDomain)(s.url) : '',
|
|
78
|
-
rank: s.rank || 0,
|
|
79
|
-
datePublished: s.date_published || null
|
|
80
|
-
}));
|
|
81
|
-
}
|
|
82
|
-
/**
|
|
83
|
-
* Creates an empty model result for failed jobs.
|
|
84
|
-
* This ensures we always return the same number of rows as input.
|
|
85
|
-
*/
|
|
86
|
-
function emptyModelResult(providerName, errorMessage, context) {
|
|
87
|
-
if (errorMessage) {
|
|
88
|
-
console.error(`[${providerName}] ${errorMessage}`, context ?? '');
|
|
89
|
-
}
|
|
90
|
-
return {
|
|
91
|
-
prompt: '',
|
|
92
|
-
answer: '',
|
|
93
|
-
sources: []
|
|
94
|
-
};
|
|
95
|
-
}
|
|
96
|
-
// ============================================================================
|
|
97
|
-
// Scraper Factory
|
|
98
|
-
// ============================================================================
|
|
99
|
-
function createScraper(provider) {
|
|
100
|
-
const { name, maxConcurrency, maxPromptsPerRequest, triggerJob, monitorJob, downloadJob, transformResponse } = provider;
|
|
101
|
-
async function triggerGPTBatch({ prompts, useSearch = false, countryISOCode = null }) {
|
|
102
|
-
const jobIds = await (0, async_js_1.mapParallel)(prompts, maxConcurrency, (prompt) => triggerJob(prompt, useSearch, countryISOCode));
|
|
103
|
-
console.log(`[${name}] Triggered ${jobIds.length} jobs for ${prompts.length} prompts`);
|
|
104
|
-
return jobIds;
|
|
105
|
-
}
|
|
106
|
-
async function downloadGPTSnapshots(jobIds) {
|
|
107
|
-
const results = [];
|
|
108
|
-
for (const jobId of jobIds) {
|
|
109
|
-
if (!jobId) {
|
|
110
|
-
results.push(emptyModelResult(name, 'No job ID provided'));
|
|
111
|
-
continue;
|
|
112
|
-
}
|
|
113
|
-
const isReady = await monitorJob(jobId);
|
|
114
|
-
if (!isReady) {
|
|
115
|
-
results.push(emptyModelResult(name, 'Job not ready or failed', jobId));
|
|
116
|
-
continue;
|
|
117
|
-
}
|
|
118
|
-
const raw = await downloadJob(jobId);
|
|
119
|
-
if (!raw) {
|
|
120
|
-
results.push(emptyModelResult(name, 'Failed to download job', jobId));
|
|
121
|
-
continue;
|
|
122
|
-
}
|
|
123
|
-
const result = transformResponse(raw);
|
|
124
|
-
results.push(result ?? emptyModelResult(name, 'Failed to transform response', jobId));
|
|
125
|
-
}
|
|
126
|
-
return results;
|
|
127
|
-
}
|
|
128
|
-
async function scrapeGPTBatch(options) {
|
|
129
|
-
const jobIds = await triggerGPTBatch(options);
|
|
130
|
-
return downloadGPTSnapshots(jobIds);
|
|
131
|
-
}
|
|
132
|
-
return {
|
|
133
|
-
maxConcurrency,
|
|
134
|
-
maxPromptsPerRequest,
|
|
135
|
-
scrapeGPTBatch,
|
|
136
|
-
triggerGPTBatch,
|
|
137
|
-
downloadGPTSnapshots
|
|
138
|
-
};
|
|
139
|
-
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|