@graphext/cuery 0.10.0 → 0.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@ import type { ModelResult } from '../../../schemas/models.schema.js';
2
2
  import { type BatchOptions } from './scrape.js';
3
3
  export type { BatchOptions };
4
4
  export type JobId = string | null;
5
- export type ScraperTarget = 'chatgpt' | 'aim';
5
+ export type ScraperTarget = 'chatgpt' | 'aim' | 'generic';
6
6
  export declare function getMaxConcurrency(target?: ScraperTarget): number;
7
7
  export declare function getMaxPromptsPerRequest(target?: ScraperTarget): number;
8
8
  export declare function scrapeGPTBatch(options: BatchOptions): Promise<Array<ModelResult>>;
@@ -11,4 +11,5 @@ export declare function downloadGPTSnapshots(jobIds: Array<string | null>): Prom
11
11
  export declare function scrapeAIMBatch(options: BatchOptions): Promise<Array<ModelResult>>;
12
12
  export declare function triggerAIMBatch(options: BatchOptions): Promise<Array<string | null>>;
13
13
  export declare function downloadAIMSnapshots(jobIds: Array<string | null>): Promise<Array<ModelResult>>;
14
+ export declare function downloadSnapshots(jobIds: Array<string | null>): Promise<Array<ModelResult>>;
14
15
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../../src/src/apis/brightdata/llmScraper/index.ts"],"names":[],"mappings":"AAYA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mCAAmC,CAAC;AACrE,OAAO,EAAE,KAAK,YAAY,EAAqC,MAAM,aAAa,CAAC;AAKnF,YAAY,EAAE,YAAY,EAAE,CAAC;AAC7B,MAAM,MAAM,KAAK,GAAG,MAAM,GAAG,IAAI,CAAC;AAClC,MAAM,MAAM,aAAa,GAAG,SAAS,GAAG,KAAK,CAAC;AAuE9C,wBAAgB,iBAAiB,CAAC,MAAM,GAAE,aAAyB,GAAG,MAAM,CAE3E;AAED,wBAAgB,uBAAuB,CAAC,MAAM,GAAE,aAAyB,GAAG,MAAM,CAEjF;AAGD,wBAAsB,cAAc,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAEvF;AAED,wBAAsB,eAAe,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAE1F;AAED,wBAAsB,oBAAoB,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAEpG;AAGD,wBAAsB,cAAc,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAEvF;AAED,wBAAsB,eAAe,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAE1F;AAED,wBAAsB,oBAAoB,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAEpG"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../../src/src/apis/brightdata/llmScraper/index.ts"],"names":[],"mappings":"AAYA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mCAAmC,CAAC;AACrE,OAAO,EAAE,KAAK,YAAY,EAAqC,MAAM,aAAa,CAAC;AAKnF,YAAY,EAAE,YAAY,EAAE,CAAC;AAC7B,MAAM,MAAM,KAAK,GAAG,MAAM,GAAG,IAAI,CAAC;AAClC,MAAM,MAAM,aAAa,GAAG,SAAS,GAAG,KAAK,GAAG,SAAS,CAAC;AAiF1D,wBAAgB,iBAAiB,CAAC,MAAM,GAAE,aAAyB,GAAG,MAAM,CAE3E;AAED,wBAAgB,uBAAuB,CAAC,MAAM,GAAE,aAAyB,GAAG,MAAM,CAEjF;AAGD,wBAAsB,cAAc,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAEvF;AAED,wBAAsB,eAAe,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAE1F;AAED,wBAAsB,oBAAoB,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAEpG;AAGD,wBAAsB,cAAc,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAEvF;AAED,wBAAsB,eAAe,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAE1F;AAED,wBAAsB,oBAAoB,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAEpG;AAGD,wBAAsB,iBAAiB,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAEjG"}
@@ -57,10 +57,19 @@ function getLLMScraper(target = 'chatgpt') {
57
57
  return existingScraper;
58
58
  }
59
59
  const providerName = getProviderName();
60
- const targetOptions = getTargetOptions(target);
61
- const provider = providerName === 'brightdata'
62
- ? createBrightdataProvider(targetOptions.brightdata)
63
- : createOxylabsProvider(targetOptions.oxylabs);
60
+ let provider;
61
+ if (target === 'generic') {
62
+ // Generic instance: only used for download/monitor, no target-specific config needed
63
+ provider = providerName === 'brightdata'
64
+ ? createBrightdataProvider()
65
+ : createOxylabsProvider();
66
+ }
67
+ else {
68
+ const targetOptions = getTargetOptions(target);
69
+ provider = providerName === 'brightdata'
70
+ ? createBrightdataProvider(targetOptions.brightdata)
71
+ : createOxylabsProvider(targetOptions.oxylabs);
72
+ }
64
73
  const scraper = createLLMScraper(provider);
65
74
  scrapers.set(target, scraper);
66
75
  return scraper;
@@ -83,7 +92,7 @@ export async function triggerGPTBatch(options) {
83
92
  return getLLMScraper('chatgpt').triggerLLMBatch(options);
84
93
  }
85
94
  export async function downloadGPTSnapshots(jobIds) {
86
- return getLLMScraper('chatgpt').downloadLLMSnapshots(jobIds);
95
+ return downloadSnapshots(jobIds);
87
96
  }
88
97
  // AIM scraper methods
89
98
  export async function scrapeAIMBatch(options) {
@@ -93,5 +102,9 @@ export async function triggerAIMBatch(options) {
93
102
  return getLLMScraper('aim').triggerLLMBatch(options);
94
103
  }
95
104
  export async function downloadAIMSnapshots(jobIds) {
96
- return getLLMScraper('aim').downloadLLMSnapshots(jobIds);
105
+ return downloadSnapshots(jobIds);
106
+ }
107
+ // Generic download — target-agnostic, works with any job IDs
108
+ export async function downloadSnapshots(jobIds) {
109
+ return getLLMScraper('generic').downloadLLMSnapshots(jobIds);
97
110
  }
@@ -1 +1 @@
1
- {"version":3,"file":"helpers.d.ts","sourceRoot":"","sources":["../../../../src/src/apis/hasdata/helpers.ts"],"names":[],"mappings":"AACA,OAAO,EAAe,KAAK,WAAW,EAAE,MAAM,wBAAwB,CAAC;AAEvE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,iCAAiC,CAAC;AAG9D,eAAO,MAAM,mBAAmB,KAAK,CAAC;AAEtC,eAAO,MAAM,oBAAoB,EAAE,WAMlC,CAAC;AAEF,wBAAgB,gBAAgB,IAAI,MAAM,CAMzC;AAED,wBAAsB,qBAAqB,CAC1C,GAAG,EAAE,MAAM,EACX,WAAW,GAAE,WAAkC,GAC7C,OAAO,CAAC,QAAQ,CAAC,CAgCnB;AAED,UAAU,QAAQ;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;CACvB;AAED,UAAU,SAAS;IAClB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,uBAAuB,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACxC,gBAAgB,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACjC,IAAI,CAAC,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;IACvB,IAAI,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;IAC5B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,UAAU,SAAS;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,UAAU;IAC1B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,UAAU,eAAe;IACxB,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;CACb;AAED,MAAM,WAAW,MAAM;IACtB,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;CAC9B;AAED,MAAM,WAAW,SAAS;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;CACvB;AAwJD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,UAAU,GAAG,SAAS,CAInD;AAED,wBAAgB,QAAQ,CAAC,GAAG,EAAE,MAAM,GAAG,SAAS,CAI/C"}
1
+ {"version":3,"file":"helpers.d.ts","sourceRoot":"","sources":["../../../../src/src/apis/hasdata/helpers.ts"],"names":[],"mappings":"AACA,OAAO,EAAe,KAAK,WAAW,EAAE,MAAM,wBAAwB,CAAC;AAEvE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,iCAAiC,CAAC;AAG9D,eAAO,MAAM,mBAAmB,KAAK,CAAC;AAEtC,eAAO,MAAM,oBAAoB,EAAE,WAMlC,CAAC;AAEF,wBAAgB,gBAAgB,IAAI,MAAM,CAMzC;AAED,wBAAsB,qBAAqB,CAC1C,GAAG,EAAE,MAAM,EACX,WAAW,GAAE,WAAkC,GAC7C,OAAO,CAAC,QAAQ,CAAC,CAgCnB;AAED,UAAU,QAAQ;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;CACvB;AAED,UAAU,SAAS;IAClB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,uBAAuB,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACxC,gBAAgB,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACjC,IAAI,CAAC,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;IACvB,IAAI,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;IAC5B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,UAAU,SAAS;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,UAAU;IAC1B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,UAAU,eAAe;IACxB,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;CACb;AAED,MAAM,WAAW,MAAM;IACtB,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;CAC9B;AAED,MAAM,WAAW,SAAS;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;CACvB;AAqMD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,UAAU,GAAG,SAAS,CAInD;AAED,wBAAgB,QAAQ,CAAC,GAAG,EAAE,MAAM,GAAG,SAAS,CAI/C"}
@@ -116,8 +116,38 @@ function formatCode(block) {
116
116
  const header = `[Code${lang ? ': ' + lang : ''}]`;
117
117
  return `${header}\n${snippet.trim()}`;
118
118
  }
119
+ function formatCitationMarkers(refIndexes) {
120
+ if (refIndexes.length === 0) {
121
+ return '';
122
+ }
123
+ return ' ' + refIndexes.map(i => `[${i + 1}]`).join('');
124
+ }
119
125
  function parseAIResult(data, { allowNestedOverview = true } = {}) {
120
126
  const textBlocks = data.textBlocks || (allowNestedOverview ? data.aiOverview?.textBlocks : []) || [];
127
+ // Build reference index → source index mapping and track cited refs
128
+ const refs = data.references || (allowNestedOverview ? data.aiOverview?.references : []) || [];
129
+ const sources = [];
130
+ const refIndexToSourceIndex = new Map();
131
+ for (const r of refs) {
132
+ const link = r.link || r.url;
133
+ const title = [r.title, r.source, r.snippet].filter(Boolean).join(' - ');
134
+ if (link && r.index != null) {
135
+ // Deduplicate by URL
136
+ const existingIdx = sources.findIndex(s => s.url === link);
137
+ if (existingIdx >= 0) {
138
+ refIndexToSourceIndex.set(r.index, existingIdx);
139
+ }
140
+ else {
141
+ refIndexToSourceIndex.set(r.index, sources.length);
142
+ sources.push({
143
+ title,
144
+ url: link,
145
+ domain: extractDomain(link)
146
+ });
147
+ }
148
+ }
149
+ }
150
+ const citedSourceIndexes = new Set();
121
151
  const parts = [];
122
152
  const handlers = {
123
153
  paragraph: (b) => cleanText(b.snippet || ''),
@@ -131,19 +161,40 @@ function parseAIResult(data, { allowNestedOverview = true } = {}) {
131
161
  continue;
132
162
  }
133
163
  const handler = handlers[btype];
164
+ let rendered = '';
134
165
  if (handler) {
135
- const rendered = handler(block);
136
- if (rendered) {
137
- parts.push(rendered);
138
- }
166
+ rendered = handler(block);
139
167
  }
140
168
  else {
141
169
  const snippet = block.snippet || '';
142
170
  if (snippet) {
143
- parts.push(cleanText(snippet));
171
+ rendered = cleanText(snippet);
172
+ }
173
+ }
174
+ if (rendered) {
175
+ // Append citation markers and track positions
176
+ const refIndexes = block.referenceIndexes || [];
177
+ if (refIndexes.length > 0) {
178
+ // Map ref indexes to 1-based source indexes for display
179
+ const sourceIndexes = refIndexes
180
+ .map(ri => refIndexToSourceIndex.get(ri))
181
+ .filter((si) => si != null);
182
+ for (const si of sourceIndexes) {
183
+ citedSourceIndexes.add(si);
184
+ sources[si].positions ??= [];
185
+ if (!sources[si].positions.includes(parts.length)) {
186
+ sources[si].positions.push(parts.length);
187
+ }
188
+ }
189
+ rendered += formatCitationMarkers(sourceIndexes.filter((v, i, a) => a.indexOf(v) === i));
144
190
  }
191
+ parts.push(rendered);
145
192
  }
146
193
  }
194
+ // Mark cited sources
195
+ for (const si of citedSourceIndexes) {
196
+ sources[si].cited = true;
197
+ }
147
198
  const deduped = [];
148
199
  for (const p of parts) {
149
200
  if (deduped.length === 0 || deduped[deduped.length - 1] !== p) {
@@ -155,19 +206,6 @@ function parseAIResult(data, { allowNestedOverview = true } = {}) {
155
206
  console.warn('Warning: AI answer truncated to 16000 characters');
156
207
  answer = answer.slice(0, 16000);
157
208
  }
158
- const refs = data.references || (allowNestedOverview ? data.aiOverview?.references : []) || [];
159
- const sources = [];
160
- for (const r of refs) {
161
- const link = r.link || r.url;
162
- const title = [r.title, r.source, r.snippet].filter(Boolean).join(' - ');
163
- if (link) {
164
- sources.push({
165
- title,
166
- url: link,
167
- domain: extractDomain(link)
168
- });
169
- }
170
- }
171
209
  return { answer, sources };
172
210
  }
173
211
  export function parseAIO(aio) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@graphext/cuery",
3
- "version": "0.10.0",
3
+ "version": "0.10.2",
4
4
  "description": "Cuery tools for AI-powered keyword research and brand analysis",
5
5
  "repository": {
6
6
  "type": "git",
@@ -2,7 +2,7 @@ import type { ModelResult } from '../../../schemas/models.schema.js';
2
2
  import { type BatchOptions } from './scrape.js';
3
3
  export type { BatchOptions };
4
4
  export type JobId = string | null;
5
- export type ScraperTarget = 'chatgpt' | 'aim';
5
+ export type ScraperTarget = 'chatgpt' | 'aim' | 'generic';
6
6
  export declare function getMaxConcurrency(target?: ScraperTarget): number;
7
7
  export declare function getMaxPromptsPerRequest(target?: ScraperTarget): number;
8
8
  export declare function scrapeGPTBatch(options: BatchOptions): Promise<Array<ModelResult>>;
@@ -11,4 +11,5 @@ export declare function downloadGPTSnapshots(jobIds: Array<string | null>): Prom
11
11
  export declare function scrapeAIMBatch(options: BatchOptions): Promise<Array<ModelResult>>;
12
12
  export declare function triggerAIMBatch(options: BatchOptions): Promise<Array<string | null>>;
13
13
  export declare function downloadAIMSnapshots(jobIds: Array<string | null>): Promise<Array<ModelResult>>;
14
+ export declare function downloadSnapshots(jobIds: Array<string | null>): Promise<Array<ModelResult>>;
14
15
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../../src/src/apis/brightdata/llmScraper/index.ts"],"names":[],"mappings":"AAYA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mCAAmC,CAAC;AACrE,OAAO,EAAE,KAAK,YAAY,EAAqC,MAAM,aAAa,CAAC;AAKnF,YAAY,EAAE,YAAY,EAAE,CAAC;AAC7B,MAAM,MAAM,KAAK,GAAG,MAAM,GAAG,IAAI,CAAC;AAClC,MAAM,MAAM,aAAa,GAAG,SAAS,GAAG,KAAK,CAAC;AAuE9C,wBAAgB,iBAAiB,CAAC,MAAM,GAAE,aAAyB,GAAG,MAAM,CAE3E;AAED,wBAAgB,uBAAuB,CAAC,MAAM,GAAE,aAAyB,GAAG,MAAM,CAEjF;AAGD,wBAAsB,cAAc,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAEvF;AAED,wBAAsB,eAAe,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAE1F;AAED,wBAAsB,oBAAoB,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAEpG;AAGD,wBAAsB,cAAc,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAEvF;AAED,wBAAsB,eAAe,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAE1F;AAED,wBAAsB,oBAAoB,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAEpG"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../../src/src/apis/brightdata/llmScraper/index.ts"],"names":[],"mappings":"AAYA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mCAAmC,CAAC;AACrE,OAAO,EAAE,KAAK,YAAY,EAAqC,MAAM,aAAa,CAAC;AAKnF,YAAY,EAAE,YAAY,EAAE,CAAC;AAC7B,MAAM,MAAM,KAAK,GAAG,MAAM,GAAG,IAAI,CAAC;AAClC,MAAM,MAAM,aAAa,GAAG,SAAS,GAAG,KAAK,GAAG,SAAS,CAAC;AAiF1D,wBAAgB,iBAAiB,CAAC,MAAM,GAAE,aAAyB,GAAG,MAAM,CAE3E;AAED,wBAAgB,uBAAuB,CAAC,MAAM,GAAE,aAAyB,GAAG,MAAM,CAEjF;AAGD,wBAAsB,cAAc,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAEvF;AAED,wBAAsB,eAAe,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAE1F;AAED,wBAAsB,oBAAoB,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAEpG;AAGD,wBAAsB,cAAc,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAEvF;AAED,wBAAsB,eAAe,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAE1F;AAED,wBAAsB,oBAAoB,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAEpG;AAGD,wBAAsB,iBAAiB,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAEjG"}
@@ -41,6 +41,7 @@ exports.downloadGPTSnapshots = downloadGPTSnapshots;
41
41
  exports.scrapeAIMBatch = scrapeAIMBatch;
42
42
  exports.triggerAIMBatch = triggerAIMBatch;
43
43
  exports.downloadAIMSnapshots = downloadAIMSnapshots;
44
+ exports.downloadSnapshots = downloadSnapshots;
44
45
  /* eslint no-console: ["warn", { allow: ["log", "warn", "error"] }] */
45
46
  /**
46
47
  * LLM Scraper - Public API
@@ -100,10 +101,19 @@ function getLLMScraper(target = 'chatgpt') {
100
101
  return existingScraper;
101
102
  }
102
103
  const providerName = getProviderName();
103
- const targetOptions = getTargetOptions(target);
104
- const provider = providerName === 'brightdata'
105
- ? (0, brightdata_js_1.createBrightdataProvider)(targetOptions.brightdata)
106
- : (0, oxy_js_1.createOxylabsProvider)(targetOptions.oxylabs);
104
+ let provider;
105
+ if (target === 'generic') {
106
+ // Generic instance: only used for download/monitor, no target-specific config needed
107
+ provider = providerName === 'brightdata'
108
+ ? (0, brightdata_js_1.createBrightdataProvider)()
109
+ : (0, oxy_js_1.createOxylabsProvider)();
110
+ }
111
+ else {
112
+ const targetOptions = getTargetOptions(target);
113
+ provider = providerName === 'brightdata'
114
+ ? (0, brightdata_js_1.createBrightdataProvider)(targetOptions.brightdata)
115
+ : (0, oxy_js_1.createOxylabsProvider)(targetOptions.oxylabs);
116
+ }
107
117
  const scraper = (0, scrape_js_1.createLLMScraper)(provider);
108
118
  scrapers.set(target, scraper);
109
119
  return scraper;
@@ -126,7 +136,7 @@ async function triggerGPTBatch(options) {
126
136
  return getLLMScraper('chatgpt').triggerLLMBatch(options);
127
137
  }
128
138
  async function downloadGPTSnapshots(jobIds) {
129
- return getLLMScraper('chatgpt').downloadLLMSnapshots(jobIds);
139
+ return downloadSnapshots(jobIds);
130
140
  }
131
141
  // AIM scraper methods
132
142
  async function scrapeAIMBatch(options) {
@@ -136,5 +146,9 @@ async function triggerAIMBatch(options) {
136
146
  return getLLMScraper('aim').triggerLLMBatch(options);
137
147
  }
138
148
  async function downloadAIMSnapshots(jobIds) {
139
- return getLLMScraper('aim').downloadLLMSnapshots(jobIds);
149
+ return downloadSnapshots(jobIds);
150
+ }
151
+ // Generic download — target-agnostic, works with any job IDs
152
+ async function downloadSnapshots(jobIds) {
153
+ return getLLMScraper('generic').downloadLLMSnapshots(jobIds);
140
154
  }
@@ -1 +1 @@
1
- {"version":3,"file":"helpers.d.ts","sourceRoot":"","sources":["../../../../src/src/apis/hasdata/helpers.ts"],"names":[],"mappings":"AACA,OAAO,EAAe,KAAK,WAAW,EAAE,MAAM,wBAAwB,CAAC;AAEvE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,iCAAiC,CAAC;AAG9D,eAAO,MAAM,mBAAmB,KAAK,CAAC;AAEtC,eAAO,MAAM,oBAAoB,EAAE,WAMlC,CAAC;AAEF,wBAAgB,gBAAgB,IAAI,MAAM,CAMzC;AAED,wBAAsB,qBAAqB,CAC1C,GAAG,EAAE,MAAM,EACX,WAAW,GAAE,WAAkC,GAC7C,OAAO,CAAC,QAAQ,CAAC,CAgCnB;AAED,UAAU,QAAQ;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;CACvB;AAED,UAAU,SAAS;IAClB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,uBAAuB,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACxC,gBAAgB,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACjC,IAAI,CAAC,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;IACvB,IAAI,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;IAC5B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,UAAU,SAAS;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,UAAU;IAC1B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,UAAU,eAAe;IACxB,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;CACb;AAED,MAAM,WAAW,MAAM;IACtB,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;CAC9B;AAED,MAAM,WAAW,SAAS;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;CACvB;AAwJD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,UAAU,GAAG,SAAS,CAInD;AAED,wBAAgB,QAAQ,CAAC,GAAG,EAAE,MAAM,GAAG,SAAS,CAI/C"}
1
+ {"version":3,"file":"helpers.d.ts","sourceRoot":"","sources":["../../../../src/src/apis/hasdata/helpers.ts"],"names":[],"mappings":"AACA,OAAO,EAAe,KAAK,WAAW,EAAE,MAAM,wBAAwB,CAAC;AAEvE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,iCAAiC,CAAC;AAG9D,eAAO,MAAM,mBAAmB,KAAK,CAAC;AAEtC,eAAO,MAAM,oBAAoB,EAAE,WAMlC,CAAC;AAEF,wBAAgB,gBAAgB,IAAI,MAAM,CAMzC;AAED,wBAAsB,qBAAqB,CAC1C,GAAG,EAAE,MAAM,EACX,WAAW,GAAE,WAAkC,GAC7C,OAAO,CAAC,QAAQ,CAAC,CAgCnB;AAED,UAAU,QAAQ;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;CACvB;AAED,UAAU,SAAS;IAClB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,uBAAuB,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACxC,gBAAgB,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACjC,IAAI,CAAC,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;IACvB,IAAI,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;IAC5B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,UAAU,SAAS;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,UAAU;IAC1B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,UAAU,eAAe;IACxB,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;CACb;AAED,MAAM,WAAW,MAAM;IACtB,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;CAC9B;AAED,MAAM,WAAW,SAAS;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;CACvB;AAqMD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,UAAU,GAAG,SAAS,CAInD;AAED,wBAAgB,QAAQ,CAAC,GAAG,EAAE,MAAM,GAAG,SAAS,CAI/C"}
@@ -156,8 +156,38 @@ function formatCode(block) {
156
156
  const header = `[Code${lang ? ': ' + lang : ''}]`;
157
157
  return `${header}\n${snippet.trim()}`;
158
158
  }
159
+ function formatCitationMarkers(refIndexes) {
160
+ if (refIndexes.length === 0) {
161
+ return '';
162
+ }
163
+ return ' ' + refIndexes.map(i => `[${i + 1}]`).join('');
164
+ }
159
165
  function parseAIResult(data, { allowNestedOverview = true } = {}) {
160
166
  const textBlocks = data.textBlocks || (allowNestedOverview ? data.aiOverview?.textBlocks : []) || [];
167
+ // Build reference index → source index mapping and track cited refs
168
+ const refs = data.references || (allowNestedOverview ? data.aiOverview?.references : []) || [];
169
+ const sources = [];
170
+ const refIndexToSourceIndex = new Map();
171
+ for (const r of refs) {
172
+ const link = r.link || r.url;
173
+ const title = [r.title, r.source, r.snippet].filter(Boolean).join(' - ');
174
+ if (link && r.index != null) {
175
+ // Deduplicate by URL
176
+ const existingIdx = sources.findIndex(s => s.url === link);
177
+ if (existingIdx >= 0) {
178
+ refIndexToSourceIndex.set(r.index, existingIdx);
179
+ }
180
+ else {
181
+ refIndexToSourceIndex.set(r.index, sources.length);
182
+ sources.push({
183
+ title,
184
+ url: link,
185
+ domain: (0, urls_js_1.extractDomain)(link)
186
+ });
187
+ }
188
+ }
189
+ }
190
+ const citedSourceIndexes = new Set();
161
191
  const parts = [];
162
192
  const handlers = {
163
193
  paragraph: (b) => cleanText(b.snippet || ''),
@@ -171,19 +201,40 @@ function parseAIResult(data, { allowNestedOverview = true } = {}) {
171
201
  continue;
172
202
  }
173
203
  const handler = handlers[btype];
204
+ let rendered = '';
174
205
  if (handler) {
175
- const rendered = handler(block);
176
- if (rendered) {
177
- parts.push(rendered);
178
- }
206
+ rendered = handler(block);
179
207
  }
180
208
  else {
181
209
  const snippet = block.snippet || '';
182
210
  if (snippet) {
183
- parts.push(cleanText(snippet));
211
+ rendered = cleanText(snippet);
212
+ }
213
+ }
214
+ if (rendered) {
215
+ // Append citation markers and track positions
216
+ const refIndexes = block.referenceIndexes || [];
217
+ if (refIndexes.length > 0) {
218
+ // Map ref indexes to 1-based source indexes for display
219
+ const sourceIndexes = refIndexes
220
+ .map(ri => refIndexToSourceIndex.get(ri))
221
+ .filter((si) => si != null);
222
+ for (const si of sourceIndexes) {
223
+ citedSourceIndexes.add(si);
224
+ sources[si].positions ??= [];
225
+ if (!sources[si].positions.includes(parts.length)) {
226
+ sources[si].positions.push(parts.length);
227
+ }
228
+ }
229
+ rendered += formatCitationMarkers(sourceIndexes.filter((v, i, a) => a.indexOf(v) === i));
184
230
  }
231
+ parts.push(rendered);
185
232
  }
186
233
  }
234
+ // Mark cited sources
235
+ for (const si of citedSourceIndexes) {
236
+ sources[si].cited = true;
237
+ }
187
238
  const deduped = [];
188
239
  for (const p of parts) {
189
240
  if (deduped.length === 0 || deduped[deduped.length - 1] !== p) {
@@ -195,19 +246,6 @@ function parseAIResult(data, { allowNestedOverview = true } = {}) {
195
246
  console.warn('Warning: AI answer truncated to 16000 characters');
196
247
  answer = answer.slice(0, 16000);
197
248
  }
198
- const refs = data.references || (allowNestedOverview ? data.aiOverview?.references : []) || [];
199
- const sources = [];
200
- for (const r of refs) {
201
- const link = r.link || r.url;
202
- const title = [r.title, r.source, r.snippet].filter(Boolean).join(' - ');
203
- if (link) {
204
- sources.push({
205
- title,
206
- url: link,
207
- domain: (0, urls_js_1.extractDomain)(link)
208
- });
209
- }
210
- }
211
249
  return { answer, sources };
212
250
  }
213
251
  function parseAIO(aio) {