@graphext/cuery 0.9.5 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/esm/browser.d.ts +1 -1
  2. package/esm/browser.js +1 -1
  3. package/esm/mod.d.ts +3 -3
  4. package/esm/mod.d.ts.map +1 -1
  5. package/esm/mod.js +3 -3
  6. package/esm/src/apis/brightdata/contentScraper/index.d.ts.map +1 -0
  7. package/{script/src/apis/brightdata → esm/src/apis/brightdata/contentScraper}/scrape.d.ts +1 -1
  8. package/esm/src/apis/brightdata/contentScraper/scrape.d.ts.map +1 -0
  9. package/esm/src/apis/brightdata/{scrape.js → contentScraper/scrape.js} +2 -2
  10. package/esm/src/apis/brightdata/llmScraper/brightdata.d.ts +20 -0
  11. package/esm/src/apis/brightdata/llmScraper/brightdata.d.ts.map +1 -0
  12. package/esm/src/apis/brightdata/llmScraper/brightdata.js +182 -0
  13. package/esm/src/apis/brightdata/llmScraper/index.d.ts +14 -0
  14. package/esm/src/apis/brightdata/llmScraper/index.d.ts.map +1 -0
  15. package/esm/src/apis/brightdata/llmScraper/index.js +97 -0
  16. package/esm/src/apis/brightdata/llmScraper/oxy.d.ts +16 -0
  17. package/esm/src/apis/brightdata/llmScraper/oxy.d.ts.map +1 -0
  18. package/esm/src/apis/brightdata/llmScraper/oxy.js +171 -0
  19. package/{script/src/apis/chatgptScraper/scraper.d.ts → esm/src/apis/brightdata/llmScraper/scrape.d.ts} +12 -15
  20. package/esm/src/apis/brightdata/llmScraper/scrape.d.ts.map +1 -0
  21. package/esm/src/apis/brightdata/llmScraper/scrape.js +184 -0
  22. package/esm/src/apis/hasdata/helpers.d.ts.map +1 -1
  23. package/esm/src/apis/hasdata/helpers.js +56 -18
  24. package/esm/src/schemas/search.schema.d.ts +2 -2
  25. package/esm/src/schemas/search.schema.d.ts.map +1 -1
  26. package/esm/src/schemas/sources.schema.d.ts +1 -4
  27. package/esm/src/schemas/sources.schema.d.ts.map +1 -1
  28. package/package.json +1 -1
  29. package/script/browser.d.ts +1 -1
  30. package/script/browser.js +1 -1
  31. package/script/mod.d.ts +3 -3
  32. package/script/mod.d.ts.map +1 -1
  33. package/script/mod.js +6 -6
  34. package/script/src/apis/brightdata/contentScraper/index.d.ts.map +1 -0
  35. package/{esm/src/apis/brightdata → script/src/apis/brightdata/contentScraper}/scrape.d.ts +1 -1
  36. package/script/src/apis/brightdata/contentScraper/scrape.d.ts.map +1 -0
  37. package/script/src/apis/brightdata/{scrape.js → contentScraper/scrape.js} +2 -2
  38. package/script/src/apis/brightdata/llmScraper/brightdata.d.ts +20 -0
  39. package/script/src/apis/brightdata/llmScraper/brightdata.d.ts.map +1 -0
  40. package/script/src/apis/brightdata/llmScraper/brightdata.js +219 -0
  41. package/script/src/apis/brightdata/llmScraper/index.d.ts +14 -0
  42. package/script/src/apis/brightdata/llmScraper/index.d.ts.map +1 -0
  43. package/script/src/apis/brightdata/llmScraper/index.js +140 -0
  44. package/script/src/apis/brightdata/llmScraper/oxy.d.ts +16 -0
  45. package/script/src/apis/brightdata/llmScraper/oxy.d.ts.map +1 -0
  46. package/script/src/apis/brightdata/llmScraper/oxy.js +208 -0
  47. package/{esm/src/apis/chatgptScraper/scraper.d.ts → script/src/apis/brightdata/llmScraper/scrape.d.ts} +12 -15
  48. package/script/src/apis/brightdata/llmScraper/scrape.d.ts.map +1 -0
  49. package/script/src/apis/brightdata/llmScraper/scrape.js +224 -0
  50. package/script/src/apis/hasdata/helpers.d.ts.map +1 -1
  51. package/script/src/apis/hasdata/helpers.js +56 -18
  52. package/script/src/schemas/search.schema.d.ts +2 -2
  53. package/script/src/schemas/search.schema.d.ts.map +1 -1
  54. package/script/src/schemas/sources.schema.d.ts +1 -4
  55. package/script/src/schemas/sources.schema.d.ts.map +1 -1
  56. package/esm/src/apis/brightdata/index.d.ts.map +0 -1
  57. package/esm/src/apis/brightdata/scrape.d.ts.map +0 -1
  58. package/esm/src/apis/chatgptScraper/brightdata.d.ts +0 -3
  59. package/esm/src/apis/chatgptScraper/brightdata.d.ts.map +0 -1
  60. package/esm/src/apis/chatgptScraper/brightdata.js +0 -172
  61. package/esm/src/apis/chatgptScraper/index.d.ts +0 -10
  62. package/esm/src/apis/chatgptScraper/index.d.ts.map +0 -1
  63. package/esm/src/apis/chatgptScraper/index.js +0 -41
  64. package/esm/src/apis/chatgptScraper/oxy.d.ts +0 -3
  65. package/esm/src/apis/chatgptScraper/oxy.d.ts.map +0 -1
  66. package/esm/src/apis/chatgptScraper/oxy.js +0 -156
  67. package/esm/src/apis/chatgptScraper/scraper.d.ts.map +0 -1
  68. package/esm/src/apis/chatgptScraper/scraper.js +0 -98
  69. package/script/src/apis/brightdata/index.d.ts.map +0 -1
  70. package/script/src/apis/brightdata/scrape.d.ts.map +0 -1
  71. package/script/src/apis/chatgptScraper/brightdata.d.ts +0 -3
  72. package/script/src/apis/chatgptScraper/brightdata.d.ts.map +0 -1
  73. package/script/src/apis/chatgptScraper/brightdata.js +0 -208
  74. package/script/src/apis/chatgptScraper/index.d.ts +0 -10
  75. package/script/src/apis/chatgptScraper/index.d.ts.map +0 -1
  76. package/script/src/apis/chatgptScraper/index.js +0 -81
  77. package/script/src/apis/chatgptScraper/oxy.d.ts +0 -3
  78. package/script/src/apis/chatgptScraper/oxy.d.ts.map +0 -1
  79. package/script/src/apis/chatgptScraper/oxy.js +0 -192
  80. package/script/src/apis/chatgptScraper/scraper.d.ts.map +0 -1
  81. package/script/src/apis/chatgptScraper/scraper.js +0 -139
  82. /package/esm/src/apis/brightdata/{index.d.ts → contentScraper/index.d.ts} +0 -0
  83. /package/esm/src/apis/brightdata/{index.js → contentScraper/index.js} +0 -0
  84. /package/script/src/apis/brightdata/{index.d.ts → contentScraper/index.d.ts} +0 -0
  85. /package/script/src/apis/brightdata/{index.js → contentScraper/index.js} +0 -0
@@ -0,0 +1,224 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.getAbortSignal = getAbortSignal;
37
+ exports.cleanAnswer = cleanAnswer;
38
+ exports.buildSources = buildSources;
39
+ exports.emptyModelResult = emptyModelResult;
40
+ exports.createLLMScraper = createLLMScraper;
41
+ /* eslint no-console: ["warn", { allow: ["log", "warn", "error"] }] */
42
+ /**
43
+ * LLM Scraper - Core types and orchestration logic.
44
+ *
45
+ * Uses composition: providers supply functions, this module orchestrates them.
46
+ */
47
+ const dntShim = __importStar(require("../../../../_dnt.shims.js"));
48
+ const async_js_1 = require("../../../helpers/async.js");
49
+ const urls_js_1 = require("../../../helpers/urls.js");
50
+ // ============================================================================
51
+ // Shared Utilities
52
+ // ============================================================================
53
+ function getAbortSignal() {
54
+ return dntShim.dntGlobalThis.abortSignal;
55
+ }
56
+ function cleanAnswer(answer) {
57
+ return answer
58
+ .replace(/!\[([^\]]*)\]\([^)]+\)/g, '')
59
+ .replace(/\n\s*Image\s*\n/g, '\n')
60
+ .replace(/\n{3,}/g, '\n\n')
61
+ .trim();
62
+ }
63
+ /**
64
+ * Derive a merge key from a URL: origin + pathname, stripping query and fragment.
65
+ * Falls back to the raw URL if parsing fails.
66
+ */
67
+ function urlMergeKey(url) {
68
+ try {
69
+ const parsed = new URL(url);
70
+ return parsed.origin + parsed.pathname;
71
+ }
72
+ catch {
73
+ return url;
74
+ }
75
+ }
76
+ /**
77
+ * Returns true when `candidate` carries extra info (hash or search params)
78
+ * that `current` does not.
79
+ */
80
+ function hasExtraUrlInfo(current, candidate) {
81
+ try {
82
+ const cur = new URL(current);
83
+ const cand = new URL(candidate);
84
+ const hasNewHash = cand.hash !== '' && cur.hash === '';
85
+ const hasNewParams = cand.search !== '' && cur.search === '';
86
+ return hasNewHash || hasNewParams;
87
+ }
88
+ catch {
89
+ return false;
90
+ }
91
+ }
92
+ function buildSources(citations, linksAttached = []) {
93
+ const sources = [];
94
+ const sourcesByKey = new Map();
95
+ const upsertSource = (url, initialTitle, cited) => {
96
+ const key = urlMergeKey(url);
97
+ const existing = sourcesByKey.get(key);
98
+ if (existing) {
99
+ if (!existing.title && initialTitle) {
100
+ existing.title = initialTitle;
101
+ }
102
+ existing.cited = existing.cited || cited;
103
+ // Keep the most informative URL (with fragment/params)
104
+ if (hasExtraUrlInfo(existing.url, url)) {
105
+ existing.url = url;
106
+ }
107
+ return existing;
108
+ }
109
+ const source = {
110
+ title: initialTitle,
111
+ url,
112
+ domain: (0, urls_js_1.extractDomain)(url),
113
+ cited,
114
+ };
115
+ sources.push(source);
116
+ sourcesByKey.set(key, source);
117
+ return source;
118
+ };
119
+ const sortedLinks = [...linksAttached].sort((a, b) => {
120
+ const aPos = a.position ?? Number.MAX_SAFE_INTEGER;
121
+ const bPos = b.position ?? Number.MAX_SAFE_INTEGER;
122
+ return aPos - bPos;
123
+ });
124
+ for (const link of sortedLinks) {
125
+ if (!link.url)
126
+ continue;
127
+ const source = upsertSource(link.url, link.text ?? '', true);
128
+ if (link.position != null) {
129
+ source.positions ??= [];
130
+ if (!source.positions.includes(link.position)) {
131
+ source.positions.push(link.position);
132
+ }
133
+ }
134
+ }
135
+ for (const citation of citations) {
136
+ if (!citation.url)
137
+ continue;
138
+ const key = urlMergeKey(citation.url);
139
+ const existing = sourcesByKey.get(key);
140
+ const title = citation.title || citation.description || citation.text || '';
141
+ if (existing) {
142
+ if (title) {
143
+ existing.title = title;
144
+ }
145
+ existing.cited = existing.cited || citation.cited;
146
+ // Append extra fragment/params from citation
147
+ if (hasExtraUrlInfo(existing.url, citation.url)) {
148
+ existing.url = citation.url;
149
+ }
150
+ continue;
151
+ }
152
+ const source = {
153
+ title,
154
+ url: citation.url,
155
+ domain: (0, urls_js_1.extractDomain)(citation.url),
156
+ cited: citation.cited,
157
+ };
158
+ sources.push(source);
159
+ sourcesByKey.set(key, source);
160
+ }
161
+ for (const source of sources) {
162
+ source.positions?.sort((a, b) => a - b);
163
+ }
164
+ return sources;
165
+ }
166
+ /**
167
+ * Creates an empty model result for failed jobs.
168
+ * This ensures we always return the same number of rows as input.
169
+ */
170
+ function emptyModelResult(providerName, errorMessage, context) {
171
+ if (errorMessage) {
172
+ console.error(`[${providerName}] ${errorMessage}`, context ?? '');
173
+ }
174
+ return {
175
+ prompt: '',
176
+ answer: '',
177
+ answer_text_markdown: '',
178
+ sources: [],
179
+ };
180
+ }
181
+ // ============================================================================
182
+ // Scraper Factory
183
+ // ============================================================================
184
+ function createLLMScraper(provider) {
185
+ const { name, maxConcurrency, maxPromptsPerRequest, triggerJob, monitorJob, downloadJob, transformResponse, } = provider;
186
+ async function triggerLLMBatch({ prompts, useSearch = false, countryISOCode = null, }) {
187
+ const jobIds = await (0, async_js_1.mapParallel)(prompts, maxConcurrency, (prompt) => triggerJob(prompt, useSearch, countryISOCode));
188
+ console.log(`[${name}] Triggered ${jobIds.length} jobs for ${prompts.length} prompts`);
189
+ return jobIds;
190
+ }
191
+ async function downloadLLMSnapshots(jobIds) {
192
+ const results = [];
193
+ for (const jobId of jobIds) {
194
+ if (!jobId) {
195
+ results.push(emptyModelResult(name, 'No job ID provided'));
196
+ continue;
197
+ }
198
+ const isReady = await monitorJob(jobId);
199
+ if (!isReady) {
200
+ results.push(emptyModelResult(name, 'Job not ready or failed', jobId));
201
+ continue;
202
+ }
203
+ const raw = await downloadJob(jobId);
204
+ if (!raw) {
205
+ results.push(emptyModelResult(name, 'Failed to download job', jobId));
206
+ continue;
207
+ }
208
+ const result = transformResponse(raw);
209
+ results.push(result ?? emptyModelResult(name, 'Failed to transform response', jobId));
210
+ }
211
+ return results;
212
+ }
213
+ async function scrapeLLMBatch(options) {
214
+ const jobIds = await triggerLLMBatch(options);
215
+ return downloadLLMSnapshots(jobIds);
216
+ }
217
+ return {
218
+ maxConcurrency,
219
+ maxPromptsPerRequest,
220
+ scrapeLLMBatch,
221
+ triggerLLMBatch,
222
+ downloadLLMSnapshots,
223
+ };
224
+ }
@@ -1 +1 @@
1
- {"version":3,"file":"helpers.d.ts","sourceRoot":"","sources":["../../../../src/src/apis/hasdata/helpers.ts"],"names":[],"mappings":"AACA,OAAO,EAAe,KAAK,WAAW,EAAE,MAAM,wBAAwB,CAAC;AAEvE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,iCAAiC,CAAC;AAG9D,eAAO,MAAM,mBAAmB,KAAK,CAAC;AAEtC,eAAO,MAAM,oBAAoB,EAAE,WAMlC,CAAC;AAEF,wBAAgB,gBAAgB,IAAI,MAAM,CAMzC;AAED,wBAAsB,qBAAqB,CAC1C,GAAG,EAAE,MAAM,EACX,WAAW,GAAE,WAAkC,GAC7C,OAAO,CAAC,QAAQ,CAAC,CAgCnB;AAED,UAAU,QAAQ;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;CACvB;AAED,UAAU,SAAS;IAClB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,uBAAuB,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACxC,gBAAgB,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACjC,IAAI,CAAC,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;IACvB,IAAI,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;IAC5B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,UAAU,SAAS;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,UAAU;IAC1B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,UAAU,eAAe;IACxB,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;CACb;AAED,MAAM,WAAW,MAAM;IACtB,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;CAC9B;AAED,MAAM,WAAW,SAAS;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;CACvB;AAwJD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,UAAU,GAAG,SAAS,CAInD;AAED,wBAAgB,QAAQ,CAAC,GAAG,EAAE,MAAM,GAAG,SAAS,CAI/C"}
1
+ {"version":3,"file":"helpers.d.ts","sourceRoot":"","sources":["../../../../src/src/apis/hasdata/helpers.ts"],"names":[],"mappings":"AACA,OAAO,EAAe,KAAK,WAAW,EAAE,MAAM,wBAAwB,CAAC;AAEvE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,iCAAiC,CAAC;AAG9D,eAAO,MAAM,mBAAmB,KAAK,CAAC;AAEtC,eAAO,MAAM,oBAAoB,EAAE,WAMlC,CAAC;AAEF,wBAAgB,gBAAgB,IAAI,MAAM,CAMzC;AAED,wBAAsB,qBAAqB,CAC1C,GAAG,EAAE,MAAM,EACX,WAAW,GAAE,WAAkC,GAC7C,OAAO,CAAC,QAAQ,CAAC,CAgCnB;AAED,UAAU,QAAQ;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;CACvB;AAED,UAAU,SAAS;IAClB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,uBAAuB,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACxC,gBAAgB,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACjC,IAAI,CAAC,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;IACvB,IAAI,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;IAC5B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,UAAU,SAAS;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,UAAU;IAC1B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,UAAU,eAAe;IACxB,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;CACb;AAED,MAAM,WAAW,MAAM;IACtB,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;CAC9B;AAED,MAAM,WAAW,SAAS;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;CACvB;AAqMD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,UAAU,GAAG,SAAS,CAInD;AAED,wBAAgB,QAAQ,CAAC,GAAG,EAAE,MAAM,GAAG,SAAS,CAI/C"}
@@ -156,8 +156,38 @@ function formatCode(block) {
156
156
  const header = `[Code${lang ? ': ' + lang : ''}]`;
157
157
  return `${header}\n${snippet.trim()}`;
158
158
  }
159
+ function formatCitationMarkers(refIndexes) {
160
+ if (refIndexes.length === 0) {
161
+ return '';
162
+ }
163
+ return ' ' + refIndexes.map(i => `[${i + 1}]`).join('');
164
+ }
159
165
  function parseAIResult(data, { allowNestedOverview = true } = {}) {
160
166
  const textBlocks = data.textBlocks || (allowNestedOverview ? data.aiOverview?.textBlocks : []) || [];
167
+ // Build reference index → source index mapping and track cited refs
168
+ const refs = data.references || (allowNestedOverview ? data.aiOverview?.references : []) || [];
169
+ const sources = [];
170
+ const refIndexToSourceIndex = new Map();
171
+ for (const r of refs) {
172
+ const link = r.link || r.url;
173
+ const title = [r.title, r.source, r.snippet].filter(Boolean).join(' - ');
174
+ if (link && r.index != null) {
175
+ // Deduplicate by URL
176
+ const existingIdx = sources.findIndex(s => s.url === link);
177
+ if (existingIdx >= 0) {
178
+ refIndexToSourceIndex.set(r.index, existingIdx);
179
+ }
180
+ else {
181
+ refIndexToSourceIndex.set(r.index, sources.length);
182
+ sources.push({
183
+ title,
184
+ url: link,
185
+ domain: (0, urls_js_1.extractDomain)(link)
186
+ });
187
+ }
188
+ }
189
+ }
190
+ const citedSourceIndexes = new Set();
161
191
  const parts = [];
162
192
  const handlers = {
163
193
  paragraph: (b) => cleanText(b.snippet || ''),
@@ -171,19 +201,40 @@ function parseAIResult(data, { allowNestedOverview = true } = {}) {
171
201
  continue;
172
202
  }
173
203
  const handler = handlers[btype];
204
+ let rendered = '';
174
205
  if (handler) {
175
- const rendered = handler(block);
176
- if (rendered) {
177
- parts.push(rendered);
178
- }
206
+ rendered = handler(block);
179
207
  }
180
208
  else {
181
209
  const snippet = block.snippet || '';
182
210
  if (snippet) {
183
- parts.push(cleanText(snippet));
211
+ rendered = cleanText(snippet);
212
+ }
213
+ }
214
+ if (rendered) {
215
+ // Append citation markers and track positions
216
+ const refIndexes = block.referenceIndexes || [];
217
+ if (refIndexes.length > 0) {
218
+ // Map ref indexes to 1-based source indexes for display
219
+ const sourceIndexes = refIndexes
220
+ .map(ri => refIndexToSourceIndex.get(ri))
221
+ .filter((si) => si != null);
222
+ for (const si of sourceIndexes) {
223
+ citedSourceIndexes.add(si);
224
+ sources[si].positions ??= [];
225
+ if (!sources[si].positions.includes(parts.length)) {
226
+ sources[si].positions.push(parts.length);
227
+ }
228
+ }
229
+ rendered += formatCitationMarkers(sourceIndexes.filter((v, i, a) => a.indexOf(v) === i));
184
230
  }
231
+ parts.push(rendered);
185
232
  }
186
233
  }
234
+ // Mark cited sources
235
+ for (const si of citedSourceIndexes) {
236
+ sources[si].cited = true;
237
+ }
187
238
  const deduped = [];
188
239
  for (const p of parts) {
189
240
  if (deduped.length === 0 || deduped[deduped.length - 1] !== p) {
@@ -195,19 +246,6 @@ function parseAIResult(data, { allowNestedOverview = true } = {}) {
195
246
  console.warn('Warning: AI answer truncated to 16000 characters');
196
247
  answer = answer.slice(0, 16000);
197
248
  }
198
- const refs = data.references || (allowNestedOverview ? data.aiOverview?.references : []) || [];
199
- const sources = [];
200
- for (const r of refs) {
201
- const link = r.link || r.url;
202
- const title = [r.title, r.source, r.snippet].filter(Boolean).join(' - ');
203
- if (link) {
204
- sources.push({
205
- title,
206
- url: link,
207
- domain: (0, urls_js_1.extractDomain)(link)
208
- });
209
- }
210
- }
211
249
  return { answer, sources };
212
250
  }
213
251
  function parseAIO(aio) {
@@ -1,12 +1,12 @@
1
1
  import type { z } from '../../deps/jsr.io/@zod/zod/4.3.6/src/index.js';
2
- import type { Source, SearchSource } from './sources.schema.js';
2
+ import type { Source } from './sources.schema.js';
3
3
  export type ContextSize = 'low' | 'medium' | 'high';
4
4
  export type ReasoningEffort = 'low' | 'medium' | 'high';
5
5
  export interface SearchResult {
6
6
  answer: string;
7
+ answer_text_markdown?: string;
7
8
  sources: Array<Source>;
8
9
  searchQueries?: Array<string>;
9
- searchSources?: Array<SearchSource>;
10
10
  }
11
11
  export type SearchOptions = {
12
12
  prompt: string;
@@ -1 +1 @@
1
- {"version":3,"file":"search.schema.d.ts","sourceRoot":"","sources":["../../../src/src/schemas/search.schema.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,+CAA+C,CAAC;AAEvE,OAAO,KAAK,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AAChE,MAAM,MAAM,WAAW,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;AACpD,MAAM,MAAM,eAAe,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;AAGxD,MAAM,WAAW,YAAY;IAC5B,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACvB,aAAa,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IAC9B,aAAa,CAAC,EAAE,KAAK,CAAC,YAAY,CAAC,CAAC;CACpC;AAED,MAAM,MAAM,aAAa,GAAG;IAC3B,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB;4FACwF;IACxF,cAAc,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC,UAAU,CAAC,EAAE,YAAY,GAAG,oBAAoB,CAAA;CAChD,CAAC;AAEF,MAAM,MAAM,sBAAsB,CAAC,CAAC,IAAI,aAAa,GAAG;IACvD,cAAc,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;CAC7B,CAAC;AAEF,MAAM,MAAM,kBAAkB,GAAG,IAAI,CAAC,aAAa,EAAE,QAAQ,CAAC,GAAG;IAChE,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACvB,cAAc,CAAC,EAAE,MAAM,CAAA;CACvB,CAAA"}
1
+ {"version":3,"file":"search.schema.d.ts","sourceRoot":"","sources":["../../../src/src/schemas/search.schema.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,+CAA+C,CAAC;AAEvE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,qBAAqB,CAAC;AAClD,MAAM,MAAM,WAAW,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;AACpD,MAAM,MAAM,eAAe,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;AAExD,MAAM,WAAW,YAAY;IAC5B,MAAM,EAAE,MAAM,CAAC;IACf,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACvB,aAAa,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;CAC9B;AAED,MAAM,MAAM,aAAa,GAAG;IAC3B,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB;4FACwF;IACxF,cAAc,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC,UAAU,CAAC,EAAE,YAAY,GAAG,oBAAoB,CAAC;CACjD,CAAC;AAEF,MAAM,MAAM,sBAAsB,CAAC,CAAC,IAAI,aAAa,GAAG;IACvD,cAAc,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;CAC7B,CAAC;AAEF,MAAM,MAAM,kBAAkB,GAAG,IAAI,CAAC,aAAa,EAAE,QAAQ,CAAC,GAAG;IAChE,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACvB,cAAc,CAAC,EAAE,MAAM,CAAC;CACxB,CAAC"}
@@ -3,6 +3,7 @@ export interface Source {
3
3
  url: string;
4
4
  domain: string;
5
5
  cited?: boolean;
6
+ snippet?: string;
6
7
  positions?: Array<number>;
7
8
  }
8
9
  /**
@@ -22,8 +23,4 @@ export interface CategorizedSource extends EnrichedSource {
22
23
  category: string | null;
23
24
  subcategory: string | null;
24
25
  }
25
- export interface SearchSource extends Source {
26
- rank: number;
27
- datePublished: string | null;
28
- }
29
26
  //# sourceMappingURL=sources.schema.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"sources.schema.d.ts","sourceRoot":"","sources":["../../../src/src/schemas/sources.schema.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,MAAM;IACtB,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,SAAS,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;CAC1B;AAED;;;GAGG;AACH,MAAM,WAAW,cAAe,SAAQ,MAAM;IAC7C,eAAe,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IAC/B,oBAAoB,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACpC,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,gBAAgB,EAAE,MAAM,GAAG,IAAI,CAAC;CAChC;AAED;;GAEG;AACH,MAAM,WAAW,iBAAkB,SAAQ,cAAc;IACxD,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;CAC3B;AAED,MAAM,WAAW,YAAa,SAAQ,MAAM;IAC3C,IAAI,EAAE,MAAM,CAAC;IACb,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;CAC7B"}
1
+ {"version":3,"file":"sources.schema.d.ts","sourceRoot":"","sources":["../../../src/src/schemas/sources.schema.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,MAAM;IACtB,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;CAC1B;AAED;;;GAGG;AACH,MAAM,WAAW,cAAe,SAAQ,MAAM;IAC7C,eAAe,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IAC/B,oBAAoB,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACpC,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,gBAAgB,EAAE,MAAM,GAAG,IAAI,CAAC;CAChC;AAED;;GAEG;AACH,MAAM,WAAW,iBAAkB,SAAQ,cAAc;IACxD,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;CAC3B"}
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/src/apis/brightdata/index.ts"],"names":[],"mappings":"AAAA,cAAc,aAAa,CAAC"}
@@ -1 +0,0 @@
1
- {"version":3,"file":"scrape.d.ts","sourceRoot":"","sources":["../../../../src/src/apis/brightdata/scrape.ts"],"names":[],"mappings":"AACA,OAAO,EAA4B,KAAK,WAAW,EAAE,MAAM,wBAAwB,CAAC;AAapF,MAAM,WAAW,uBAAuB;IACpC,wDAAwD;IACxD,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,6DAA6D;IAC7D,MAAM,CAAC,EAAE,KAAK,GAAG,MAAM,CAAC;IACxB,8DAA8D;IAC9D,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,wDAAwD;IACxD,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oDAAoD;IACpD,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,kCAAkC;IAClC,WAAW,CAAC,EAAE,WAAW,CAAC;CAC7B;AAED,MAAM,WAAW,wBAAwB;IACrC,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,CAAC,EAAE,MAAM,CAAC;CACjB;AA4ED;;;GAGG;AACH,wBAAsB,gBAAgB,CAClC,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,uBAA4B,GACtC,OAAO,CAAC,wBAAwB,CAAC,CAYnC;AAED;;;GAGG;AACH,wBAAsB,qBAAqB,CACvC,IAAI,EAAE,KAAK,CAAC,MAAM,CAAC,EACnB,OAAO,GAAE,uBAA4B,EACrC,cAAc,GAAE,MAA+B,GAChD,OAAO,CAAC,KAAK,CAAC,wBAAwB,CAAC,CAAC,CAQ1C"}
@@ -1,3 +0,0 @@
1
- import { type ProviderFunctions } from './scraper.js';
2
- export declare const brightdataProvider: ProviderFunctions;
3
- //# sourceMappingURL=brightdata.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"brightdata.d.ts","sourceRoot":"","sources":["../../../../src/src/apis/chatgptScraper/brightdata.ts"],"names":[],"mappings":"AAeA,OAAO,EACN,KAAK,iBAAiB,EAKtB,MAAM,cAAc,CAAC;AAmNtB,eAAO,MAAM,kBAAkB,EAAE,iBAQhC,CAAC"}
@@ -1,172 +0,0 @@
1
- /* eslint no-console: ["warn", { allow: ["log", "warn", "error"] }] */
2
- /**
3
- * Brightdata GPT Scraper Provider.
4
- *
5
- * API Flow:
6
- * 1. Trigger: POST to /datasets/v3/trigger → returns snapshot_id
7
- * 2. Monitor: GET /datasets/v3/progress/{snapshot_id} until ready
8
- * 3. Download: GET /datasets/v3/snapshot/{snapshot_id}
9
- */
10
- import * as dntShim from "../../../_dnt.shims.js";
11
- import { withRetries, sleep } from '../../helpers/async.js';
12
- import { getAbortSignal, cleanAnswer, buildSources, buildSearchSources } from './scraper.js';
13
- // ============================================================================
14
- // Constants
15
- // ============================================================================
16
- const API_BASE = 'https://api.brightdata.com';
17
- const DATASET_ID = 'gd_m7aof0k82r803d5bjm';
18
- const OUTPUT_FIELDS = 'url|prompt|answer_text|answer_text_markdown|citations|links_attached|search_sources|country|model|web_search_triggered|web_search_query|index';
19
- const TRIGGER_RETRY = {
20
- maxRetries: 3,
21
- initialDelay: 0,
22
- statusCodes: [429, 500, 502, 503, 504]
23
- };
24
- const DOWNLOAD_RETRY = {
25
- maxRetries: 5,
26
- initialDelay: 2000,
27
- statusCodes: [202, 500, 502, 503, 504]
28
- };
29
- const MONITOR_RETRY = {
30
- maxRetries: 4,
31
- initialDelay: 1000,
32
- statusCodes: [408, 425, 429, 500, 502, 503, 504]
33
- };
34
- const MONITOR_RETRIABLE = new Set(MONITOR_RETRY.statusCodes ?? []);
35
- const MAX_WAIT_MS = 600_000; // 10 minutes
36
- const POLL_INTERVAL_MS = 5_000;
37
- // ============================================================================
38
- // API Key
39
- // ============================================================================
40
- function getApiKey() {
41
- const apiKey = dntShim.Deno.env.get('BRIGHTDATA_API_KEY');
42
- if (!apiKey) {
43
- throw new Error('BRIGHTDATA_API_KEY environment variable is required');
44
- }
45
- return apiKey;
46
- }
47
- // ============================================================================
48
- // Provider Functions
49
- // ============================================================================
50
- async function triggerJob(prompt, useSearch, countryISOCode) {
51
- const apiKey = getApiKey();
52
- const url = `${API_BASE}/datasets/v3/trigger?dataset_id=${DATASET_ID}&include_errors=true`;
53
- const body = {
54
- custom_output_fields: OUTPUT_FIELDS,
55
- input: [{
56
- url: 'http://chatgpt.com/',
57
- prompt,
58
- web_search: useSearch,
59
- country: countryISOCode || '',
60
- index: 0
61
- }]
62
- };
63
- try {
64
- const response = await withRetries(() => fetch(url, {
65
- method: 'POST',
66
- headers: {
67
- 'Authorization': `Bearer ${apiKey}`,
68
- 'Content-Type': 'application/json'
69
- },
70
- body: JSON.stringify(body),
71
- signal: getAbortSignal()
72
- }), TRIGGER_RETRY);
73
- if (!response.ok) {
74
- console.error(`[Brightdata] Trigger error: ${response.status}`);
75
- return null;
76
- }
77
- const data = await response.json();
78
- return data?.snapshot_id || null;
79
- }
80
- catch (error) {
81
- console.error('[Brightdata] Trigger failed:', error);
82
- return null;
83
- }
84
- }
85
- async function monitorJob(snapshotId) {
86
- const apiKey = getApiKey();
87
- const url = `${API_BASE}/datasets/v3/progress/${snapshotId}`;
88
- const startTime = Date.now();
89
- const abortSignal = getAbortSignal();
90
- while (Date.now() - startTime < MAX_WAIT_MS) {
91
- if (abortSignal?.aborted)
92
- return false;
93
- try {
94
- const response = await withRetries(() => fetch(url, {
95
- headers: { 'Authorization': `Bearer ${apiKey}` },
96
- signal: abortSignal
97
- }), MONITOR_RETRY);
98
- if (!response.ok) {
99
- if (!MONITOR_RETRIABLE.has(response.status))
100
- return false;
101
- }
102
- else {
103
- const status = await response.json();
104
- if (status.status === 'ready' || status.status === 'complete')
105
- return true;
106
- if (status.status === 'failed' || status.status === 'error')
107
- return false;
108
- }
109
- }
110
- catch (error) {
111
- console.error('[Brightdata] Monitor error:', error);
112
- }
113
- await sleep(POLL_INTERVAL_MS, abortSignal);
114
- }
115
- console.error(`[Brightdata] Monitor timeout after ${MAX_WAIT_MS / 1000}s`);
116
- return false;
117
- }
118
- async function downloadJob(snapshotId) {
119
- const apiKey = getApiKey();
120
- const url = `${API_BASE}/datasets/v3/snapshot/${snapshotId}?format=json`;
121
- try {
122
- const response = await withRetries(() => fetch(url, {
123
- headers: { 'Authorization': `Bearer ${apiKey}` },
124
- signal: getAbortSignal()
125
- }), DOWNLOAD_RETRY);
126
- if (!response.ok) {
127
- console.error(`[Brightdata] Download error: ${response.status}`);
128
- return null;
129
- }
130
- const data = await response.json();
131
- return Array.isArray(data) ? data : null;
132
- }
133
- catch (error) {
134
- console.error('[Brightdata] Download failed:', error);
135
- return null;
136
- }
137
- }
138
- function transformResponse(raw) {
139
- const responses = raw;
140
- if (!responses || responses.length === 0)
141
- return null;
142
- const response = responses[0];
143
- let answer = response.answer_text_markdown || response.answer_text || '';
144
- answer = cleanAnswer(answer);
145
- // Build link positions map
146
- const linkPositions = {};
147
- for (const link of response.links_attached ?? []) {
148
- if (link.url && link.position != null) {
149
- linkPositions[link.url] ??= [];
150
- linkPositions[link.url].push(link.position);
151
- }
152
- }
153
- return {
154
- prompt: response.prompt,
155
- answer,
156
- sources: buildSources(response.citations ?? [], linkPositions),
157
- searchQueries: response.web_search_query || [],
158
- searchSources: buildSearchSources(response.search_sources ?? [])
159
- };
160
- }
161
- // ============================================================================
162
- // Export
163
- // ============================================================================
164
- export const brightdataProvider = {
165
- name: 'Brightdata',
166
- maxConcurrency: 50,
167
- maxPromptsPerRequest: 1,
168
- triggerJob,
169
- monitorJob,
170
- downloadJob,
171
- transformResponse
172
- };
@@ -1,10 +0,0 @@
1
- import type { ModelResult } from '../../schemas/models.schema.js';
2
- import { type BatchOptions } from './scraper.js';
3
- export type { BatchOptions };
4
- export type JobId = string | null;
5
- export declare function getMaxConcurrency(): number;
6
- export declare function getMaxPromptsPerRequest(): number;
7
- export declare function scrapeGPTBatch(options: BatchOptions): Promise<Array<ModelResult>>;
8
- export declare function triggerGPTBatch(options: BatchOptions): Promise<Array<string | null>>;
9
- export declare function downloadGPTSnapshots(jobIds: Array<string | null>): Promise<Array<ModelResult>>;
10
- //# sourceMappingURL=index.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/src/apis/chatgptScraper/index.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,gCAAgC,CAAC;AAClE,OAAO,EAAE,KAAK,YAAY,EAAkC,MAAM,cAAc,CAAC;AAKjF,YAAY,EAAE,YAAY,EAAE,CAAC;AAC7B,MAAM,MAAM,KAAK,GAAG,MAAM,GAAG,IAAI,CAAC;AAqBlC,wBAAgB,iBAAiB,IAAI,MAAM,CAE1C;AAED,wBAAgB,uBAAuB,IAAI,MAAM,CAEhD;AAED,wBAAsB,cAAc,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAEvF;AAED,wBAAsB,eAAe,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAE1F;AAED,wBAAsB,oBAAoB,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAEpG"}
@@ -1,41 +0,0 @@
1
- /* eslint no-console: ["warn", { allow: ["log", "warn", "error"] }] */
2
- /**
3
- * GPT Scraper - Public API
4
- *
5
- * Selects between Brightdata and Oxylabs based on CHATGPT_SCRAPER_PROVIDER env var.
6
- * Default: oxylabs
7
- */
8
- import * as dntShim from "../../../_dnt.shims.js";
9
- import { createScraper } from './scraper.js';
10
- import { brightdataProvider } from './brightdata.js';
11
- import { oxylabsProvider } from './oxy.js';
12
- // ============================================================================
13
- // Scraper Instance (lazy singleton)
14
- // ============================================================================
15
- let scraper = null;
16
- function getScraper() {
17
- if (!scraper) {
18
- const providerName = dntShim.Deno.env.get('CHATGPT_SCRAPER_PROVIDER')?.toLowerCase();
19
- const provider = providerName === 'brightdata' ? brightdataProvider : oxylabsProvider;
20
- scraper = createScraper(provider);
21
- }
22
- return scraper;
23
- }
24
- // ============================================================================
25
- // Public API
26
- // ============================================================================
27
- export function getMaxConcurrency() {
28
- return getScraper().maxConcurrency;
29
- }
30
- export function getMaxPromptsPerRequest() {
31
- return getScraper().maxPromptsPerRequest;
32
- }
33
- export async function scrapeGPTBatch(options) {
34
- return getScraper().scrapeGPTBatch(options);
35
- }
36
- export async function triggerGPTBatch(options) {
37
- return getScraper().triggerGPTBatch(options);
38
- }
39
- export async function downloadGPTSnapshots(jobIds) {
40
- return getScraper().downloadGPTSnapshots(jobIds);
41
- }
@@ -1,3 +0,0 @@
1
- import { type ProviderFunctions } from './scraper.js';
2
- export declare const oxylabsProvider: ProviderFunctions;
3
- //# sourceMappingURL=oxy.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"oxy.d.ts","sourceRoot":"","sources":["../../../../src/src/apis/chatgptScraper/oxy.ts"],"names":[],"mappings":"AAeA,OAAO,EACN,KAAK,iBAAiB,EAItB,MAAM,cAAc,CAAC;AA6LtB,eAAO,MAAM,eAAe,EAAE,iBAQ7B,CAAC"}