@graphext/cuery 0.10.2 → 0.10.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/src/apis/brightdata/llmScraper/brightdata.js +3 -3
- package/esm/src/apis/brightdata/llmScraper/oxy.d.ts.map +1 -1
- package/esm/src/apis/brightdata/llmScraper/oxy.js +3 -3
- package/esm/src/apis/brightdata/llmScraper/scrape.d.ts +1 -2
- package/esm/src/apis/brightdata/llmScraper/scrape.d.ts.map +1 -1
- package/esm/src/apis/brightdata/llmScraper/scrape.js +8 -3
- package/esm/src/apis/hasdata/helpers.d.ts +1 -0
- package/esm/src/apis/hasdata/helpers.d.ts.map +1 -1
- package/esm/src/apis/hasdata/helpers.js +85 -18
- package/esm/src/schemas/search.schema.d.ts +1 -1
- package/esm/src/schemas/search.schema.d.ts.map +1 -1
- package/package.json +1 -1
- package/script/src/apis/brightdata/llmScraper/brightdata.js +2 -2
- package/script/src/apis/brightdata/llmScraper/oxy.d.ts.map +1 -1
- package/script/src/apis/brightdata/llmScraper/oxy.js +2 -2
- package/script/src/apis/brightdata/llmScraper/scrape.d.ts +1 -2
- package/script/src/apis/brightdata/llmScraper/scrape.d.ts.map +1 -1
- package/script/src/apis/brightdata/llmScraper/scrape.js +9 -4
- package/script/src/apis/hasdata/helpers.d.ts +1 -0
- package/script/src/apis/hasdata/helpers.d.ts.map +1 -1
- package/script/src/apis/hasdata/helpers.js +85 -18
- package/script/src/schemas/search.schema.d.ts +1 -1
- package/script/src/schemas/search.schema.d.ts.map +1 -1
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
*/
|
|
10
10
|
import * as dntShim from "../../../../_dnt.shims.js";
|
|
11
11
|
import { sleep, withRetries } from '../../../helpers/async.js';
|
|
12
|
-
import {
|
|
12
|
+
import { parseSources, cleanAnswer, getAbortSignal } from './scrape.js';
|
|
13
13
|
const DEFAULT_BRIGHTDATA_PROVIDER_CONFIG = {
|
|
14
14
|
apiBase: 'https://api.brightdata.com',
|
|
15
15
|
datasetId: 'gd_m7aof0k82r803d5bjm',
|
|
@@ -161,8 +161,8 @@ export function createBrightdataProvider(overrides = {}) {
|
|
|
161
161
|
return {
|
|
162
162
|
prompt: response.prompt,
|
|
163
163
|
answer: answerText,
|
|
164
|
-
|
|
165
|
-
sources:
|
|
164
|
+
answerMarkdown: answerTextMarkdown,
|
|
165
|
+
sources: parseSources(response.citations ?? [], response.links_attached ?? []),
|
|
166
166
|
searchQueries: response.web_search_query || [],
|
|
167
167
|
};
|
|
168
168
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"oxy.d.ts","sourceRoot":"","sources":["../../../../../src/src/apis/brightdata/llmScraper/oxy.ts"],"names":[],"mappings":"AAeA,OAAO,EAA6C,KAAK,iBAAiB,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"oxy.d.ts","sourceRoot":"","sources":["../../../../../src/src/apis/brightdata/llmScraper/oxy.ts"],"names":[],"mappings":"AAeA,OAAO,EAA6C,KAAK,iBAAiB,EAAE,MAAM,aAAa,CAAC;AA0BhG,UAAU,qBAAqB;IAC9B,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC;IAC7B,KAAK,EAAE,OAAO,CAAC;IACf,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,CAAC;IACvB,oBAAoB,EAAE,MAAM,CAAC;CAC7B;AAyCD,wBAAgB,qBAAqB,CAAC,SAAS,GAAE,OAAO,CAAC,qBAAqB,CAAM,GAAG,iBAAiB,CAyJvG;AAMD,eAAO,MAAM,eAAe,EAAE,iBAA2C,CAAC"}
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
*/
|
|
10
10
|
import * as dntShim from "../../../../_dnt.shims.js";
|
|
11
11
|
import { sleep, withRetries } from '../../../helpers/async.js';
|
|
12
|
-
import {
|
|
12
|
+
import { parseSources, cleanAnswer, getAbortSignal } from './scrape.js';
|
|
13
13
|
const DEFAULT_OXYLABS_PROVIDER_CONFIG = {
|
|
14
14
|
apiBase: 'https://data.oxylabs.io/v1',
|
|
15
15
|
source: 'chatgpt',
|
|
@@ -150,8 +150,8 @@ export function createOxylabsProvider(overrides = {}) {
|
|
|
150
150
|
return {
|
|
151
151
|
prompt: content.prompt || '',
|
|
152
152
|
answer: answerText,
|
|
153
|
-
|
|
154
|
-
sources:
|
|
153
|
+
answerMarkdown: answerTextMarkdown,
|
|
154
|
+
sources: parseSources(citations),
|
|
155
155
|
searchQueries: [],
|
|
156
156
|
};
|
|
157
157
|
}
|
|
@@ -23,11 +23,10 @@ export interface LLMScraper {
|
|
|
23
23
|
}
|
|
24
24
|
export declare function getAbortSignal(): AbortSignal | undefined;
|
|
25
25
|
export declare function cleanAnswer(answer: string): string;
|
|
26
|
-
export declare function
|
|
26
|
+
export declare function parseSources(citations: Array<{
|
|
27
27
|
url: string;
|
|
28
28
|
title?: string;
|
|
29
29
|
description?: string;
|
|
30
|
-
text?: string;
|
|
31
30
|
cited?: boolean;
|
|
32
31
|
}>, linksAttached?: Array<{
|
|
33
32
|
url?: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrape.d.ts","sourceRoot":"","sources":["../../../../../src/src/apis/brightdata/llmScraper/scrape.ts"],"names":[],"mappings":"AAWA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mCAAmC,CAAC;AACrE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oCAAoC,CAAC;AAOjE,MAAM,WAAW,YAAY;IAC5B,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACvB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,cAAc,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC/B;AAED,MAAM,WAAW,iBAAiB;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,cAAc,EAAE,MAAM,CAAC;IACvB,oBAAoB,EAAE,MAAM,CAAC;IAC7B,UAAU,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,cAAc,EAAE,MAAM,GAAG,IAAI,KAAK,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;IAC1G,UAAU,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;IAChD,WAAW,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;IACjD,iBAAiB,EAAE,CAAC,GAAG,EAAE,OAAO,KAAK,WAAW,GAAG,IAAI,CAAC;CACxD;AAED,MAAM,WAAW,UAAU;IAC1B,cAAc,EAAE,MAAM,CAAC;IACvB,oBAAoB,EAAE,MAAM,CAAC;IAC7B,cAAc,EAAE,CAAC,OAAO,EAAE,YAAY,KAAK,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC;IACvE,eAAe,EAAE,CAAC,OAAO,EAAE,YAAY,KAAK,OAAO,CAAC,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC;IAC1E,oBAAoB,EAAE,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,KAAK,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC;CACpF;AAMD,wBAAgB,cAAc,IAAI,WAAW,GAAG,SAAS,CAExD;AAED,wBAAgB,WAAW,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAMlD;AA+BD,wBAAgB,YAAY,CAC3B,SAAS,EAAE,KAAK,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,WAAW,CAAC,EAAE,MAAM,CAAC;IAAC,
|
|
1
|
+
{"version":3,"file":"scrape.d.ts","sourceRoot":"","sources":["../../../../../src/src/apis/brightdata/llmScraper/scrape.ts"],"names":[],"mappings":"AAWA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mCAAmC,CAAC;AACrE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oCAAoC,CAAC;AAOjE,MAAM,WAAW,YAAY;IAC5B,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACvB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,cAAc,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC/B;AAED,MAAM,WAAW,iBAAiB;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,cAAc,EAAE,MAAM,CAAC;IACvB,oBAAoB,EAAE,MAAM,CAAC;IAC7B,UAAU,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,cAAc,EAAE,MAAM,GAAG,IAAI,KAAK,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;IAC1G,UAAU,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;IAChD,WAAW,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;IACjD,iBAAiB,EAAE,CAAC,GAAG,EAAE,OAAO,KAAK,WAAW,GAAG,IAAI,CAAC;CACxD;AAED,MAAM,WAAW,UAAU;IAC1B,cAAc,EAAE,MAAM,CAAC;IACvB,oBAAoB,EAAE,MAAM,CAAC;IAC7B,cAAc,EAAE,CAAC,OAAO,EAAE,YAAY,KAAK,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC;IACvE,eAAe,EAAE,CAAC,OAAO,EAAE,YAAY,KAAK,OAAO,CAAC,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC;IAC1E,oBAAoB,EAAE,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,KAAK,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC;CACpF;AAMD,wBAAgB,cAAc,IAAI,WAAW,GAAG,SAAS,CAExD;AAED,wBAAgB,WAAW,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAMlD;AA+BD,wBAAgB,YAAY,CAC3B,SAAS,EAAE,KAAK,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,WAAW,CAAC,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,OAAO,CAAA;CAAE,CAAC,EACxF,aAAa,GAAE,KAAK,CAAC;IAAE,GAAG,CAAC,EAAE,MAAM,CAAC;IAAC,IAAI,CAAC,EAAE,MAAM,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAE,CAAM,GAC3E,KAAK,CAAC,MAAM,CAAC,CAyFf;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,YAAY,EAAE,MAAM,EAAE,YAAY,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,OAAO,GAAG,WAAW,CAU5G;AAMD,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,iBAAiB,GAAG,UAAU,CAkExE"}
|
|
@@ -49,7 +49,7 @@ function hasExtraUrlInfo(current, candidate) {
|
|
|
49
49
|
return false;
|
|
50
50
|
}
|
|
51
51
|
}
|
|
52
|
-
export function
|
|
52
|
+
export function parseSources(citations, linksAttached = []) {
|
|
53
53
|
const sources = [];
|
|
54
54
|
const sourcesByKey = new Map();
|
|
55
55
|
const upsertSource = (url, initialTitle, cited) => {
|
|
@@ -97,11 +97,15 @@ export function buildSources(citations, linksAttached = []) {
|
|
|
97
97
|
continue;
|
|
98
98
|
const key = urlMergeKey(citation.url);
|
|
99
99
|
const existing = sourcesByKey.get(key);
|
|
100
|
-
const title = citation.title
|
|
100
|
+
const title = citation.title ?? '';
|
|
101
|
+
const snippet = citation.description;
|
|
101
102
|
if (existing) {
|
|
102
103
|
if (title) {
|
|
103
104
|
existing.title = title;
|
|
104
105
|
}
|
|
106
|
+
if (snippet) {
|
|
107
|
+
existing.snippet = snippet;
|
|
108
|
+
}
|
|
105
109
|
existing.cited = existing.cited || citation.cited;
|
|
106
110
|
// Append extra fragment/params from citation
|
|
107
111
|
if (hasExtraUrlInfo(existing.url, citation.url)) {
|
|
@@ -111,6 +115,7 @@ export function buildSources(citations, linksAttached = []) {
|
|
|
111
115
|
}
|
|
112
116
|
const source = {
|
|
113
117
|
title,
|
|
118
|
+
snippet,
|
|
114
119
|
url: citation.url,
|
|
115
120
|
domain: extractDomain(citation.url),
|
|
116
121
|
cited: citation.cited,
|
|
@@ -134,7 +139,7 @@ export function emptyModelResult(providerName, errorMessage, context) {
|
|
|
134
139
|
return {
|
|
135
140
|
prompt: '',
|
|
136
141
|
answer: '',
|
|
137
|
-
|
|
142
|
+
answerMarkdown: '',
|
|
138
143
|
sources: [],
|
|
139
144
|
};
|
|
140
145
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"helpers.d.ts","sourceRoot":"","sources":["../../../../src/src/apis/hasdata/helpers.ts"],"names":[],"mappings":"AACA,OAAO,EAAe,KAAK,WAAW,EAAE,MAAM,wBAAwB,CAAC;AAEvE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,iCAAiC,CAAC;AAG9D,eAAO,MAAM,mBAAmB,KAAK,CAAC;AAEtC,eAAO,MAAM,oBAAoB,EAAE,WAMlC,CAAC;AAEF,wBAAgB,gBAAgB,IAAI,MAAM,CAMzC;AAED,wBAAsB,qBAAqB,CAC1C,GAAG,EAAE,MAAM,EACX,WAAW,GAAE,WAAkC,GAC7C,OAAO,CAAC,QAAQ,CAAC,CAgCnB;AAED,UAAU,QAAQ;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;CACvB;AAED,UAAU,SAAS;IAClB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,uBAAuB,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACxC,gBAAgB,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACjC,IAAI,CAAC,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;IACvB,IAAI,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;IAC5B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,UAAU,SAAS;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,UAAU;IAC1B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,UAAU,eAAe;IACxB,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;CACb;AAED,MAAM,WAAW,MAAM;IACtB,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;CAC9B;AAED,MAAM,WAAW,SAAS;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;CACvB;
|
|
1
|
+
{"version":3,"file":"helpers.d.ts","sourceRoot":"","sources":["../../../../src/src/apis/hasdata/helpers.ts"],"names":[],"mappings":"AACA,OAAO,EAAe,KAAK,WAAW,EAAE,MAAM,wBAAwB,CAAC;AAEvE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,iCAAiC,CAAC;AAG9D,eAAO,MAAM,mBAAmB,KAAK,CAAC;AAEtC,eAAO,MAAM,oBAAoB,EAAE,WAMlC,CAAC;AAEF,wBAAgB,gBAAgB,IAAI,MAAM,CAMzC;AAED,wBAAsB,qBAAqB,CAC1C,GAAG,EAAE,MAAM,EACX,WAAW,GAAE,WAAkC,GAC7C,OAAO,CAAC,QAAQ,CAAC,CAgCnB;AAED,UAAU,QAAQ;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;CACvB;AAED,UAAU,SAAS;IAClB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,uBAAuB,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACxC,gBAAgB,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACjC,IAAI,CAAC,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;IACvB,IAAI,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;IAC5B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,UAAU,SAAS;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,UAAU;IAC1B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,UAAU,eAAe;IACxB,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;CACb;AAED,MAAM,WAAW,MAAM;IACtB,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;CAC9B;AAED,MAAM,WAAW,SAAS;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;CACvB;AA0QD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,UAAU,GAAG,SAAS,CAInD;AAED,wBAAgB,QAAQ,CAAC,GAAG,EAAE,MAAM,GAAG,SAAS,CAI/C"}
|
|
@@ -92,6 +92,19 @@ function* iterListItems(items, indent = 0) {
|
|
|
92
92
|
}
|
|
93
93
|
}
|
|
94
94
|
}
|
|
95
|
+
function* iterPlainListItems(items) {
|
|
96
|
+
for (const obj of items) {
|
|
97
|
+
const title = obj.title || '';
|
|
98
|
+
const snippet = obj.snippet || '';
|
|
99
|
+
const line = [title, snippet].filter(Boolean).join(' ').trim();
|
|
100
|
+
if (line) {
|
|
101
|
+
yield cleanText(line);
|
|
102
|
+
}
|
|
103
|
+
if (obj.list && Array.isArray(obj.list)) {
|
|
104
|
+
yield* iterPlainListItems(obj.list);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
95
108
|
function formatTable(block) {
|
|
96
109
|
const rows = block.rows || [];
|
|
97
110
|
if (rows.length === 0) {
|
|
@@ -116,6 +129,18 @@ function formatCode(block) {
|
|
|
116
129
|
const header = `[Code${lang ? ': ' + lang : ''}]`;
|
|
117
130
|
return `${header}\n${snippet.trim()}`;
|
|
118
131
|
}
|
|
132
|
+
function formatPlainTable(block) {
|
|
133
|
+
const rows = block.rows || [];
|
|
134
|
+
if (rows.length === 0) {
|
|
135
|
+
return '';
|
|
136
|
+
}
|
|
137
|
+
return rows
|
|
138
|
+
.map((row) => row.map(cell => removeCSSChunks(cell)).join(' | '))
|
|
139
|
+
.join('\n');
|
|
140
|
+
}
|
|
141
|
+
function formatPlainCode(block) {
|
|
142
|
+
return cleanText(block.snippet || '');
|
|
143
|
+
}
|
|
119
144
|
function formatCitationMarkers(refIndexes) {
|
|
120
145
|
if (refIndexes.length === 0) {
|
|
121
146
|
return '';
|
|
@@ -130,17 +155,25 @@ function parseAIResult(data, { allowNestedOverview = true } = {}) {
|
|
|
130
155
|
const refIndexToSourceIndex = new Map();
|
|
131
156
|
for (const r of refs) {
|
|
132
157
|
const link = r.link || r.url;
|
|
133
|
-
const title = [r.title, r.source
|
|
158
|
+
const title = [r.title, r.source].filter(Boolean).join(' - ');
|
|
159
|
+
const snippet = cleanText(r.snippet || '') || undefined;
|
|
134
160
|
if (link && r.index != null) {
|
|
135
161
|
// Deduplicate by URL
|
|
136
162
|
const existingIdx = sources.findIndex(s => s.url === link);
|
|
137
163
|
if (existingIdx >= 0) {
|
|
138
164
|
refIndexToSourceIndex.set(r.index, existingIdx);
|
|
165
|
+
if (!sources[existingIdx].title && title) {
|
|
166
|
+
sources[existingIdx].title = title;
|
|
167
|
+
}
|
|
168
|
+
if (!sources[existingIdx].snippet && snippet) {
|
|
169
|
+
sources[existingIdx].snippet = snippet;
|
|
170
|
+
}
|
|
139
171
|
}
|
|
140
172
|
else {
|
|
141
173
|
refIndexToSourceIndex.set(r.index, sources.length);
|
|
142
174
|
sources.push({
|
|
143
175
|
title,
|
|
176
|
+
snippet,
|
|
144
177
|
url: link,
|
|
145
178
|
domain: extractDomain(link)
|
|
146
179
|
});
|
|
@@ -148,8 +181,15 @@ function parseAIResult(data, { allowNestedOverview = true } = {}) {
|
|
|
148
181
|
}
|
|
149
182
|
}
|
|
150
183
|
const citedSourceIndexes = new Set();
|
|
151
|
-
const
|
|
152
|
-
const
|
|
184
|
+
const answerParts = [];
|
|
185
|
+
const answerMarkdownParts = [];
|
|
186
|
+
const plainHandlers = {
|
|
187
|
+
paragraph: (b) => cleanText(b.snippet || ''),
|
|
188
|
+
list: (b) => Array.from(iterPlainListItems(b.list || [])).join('\n'),
|
|
189
|
+
table: formatPlainTable,
|
|
190
|
+
code: formatPlainCode
|
|
191
|
+
};
|
|
192
|
+
const markdownHandlers = {
|
|
153
193
|
paragraph: (b) => cleanText(b.snippet || ''),
|
|
154
194
|
list: (b) => Array.from(iterListItems(b.list || [])).join('\n'),
|
|
155
195
|
table: formatTable,
|
|
@@ -160,10 +200,12 @@ function parseAIResult(data, { allowNestedOverview = true } = {}) {
|
|
|
160
200
|
if (!btype || btype === 'carousel') {
|
|
161
201
|
continue;
|
|
162
202
|
}
|
|
163
|
-
const
|
|
203
|
+
const plainHandler = plainHandlers[btype];
|
|
204
|
+
const markdownHandler = markdownHandlers[btype];
|
|
164
205
|
let rendered = '';
|
|
165
|
-
|
|
166
|
-
|
|
206
|
+
let renderedMarkdown = '';
|
|
207
|
+
if (plainHandler) {
|
|
208
|
+
rendered = plainHandler(block);
|
|
167
209
|
}
|
|
168
210
|
else {
|
|
169
211
|
const snippet = block.snippet || '';
|
|
@@ -171,7 +213,16 @@ function parseAIResult(data, { allowNestedOverview = true } = {}) {
|
|
|
171
213
|
rendered = cleanText(snippet);
|
|
172
214
|
}
|
|
173
215
|
}
|
|
174
|
-
if (
|
|
216
|
+
if (markdownHandler) {
|
|
217
|
+
renderedMarkdown = markdownHandler(block);
|
|
218
|
+
}
|
|
219
|
+
else {
|
|
220
|
+
const snippet = block.snippet || '';
|
|
221
|
+
if (snippet) {
|
|
222
|
+
renderedMarkdown = cleanText(snippet);
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
if (rendered || renderedMarkdown) {
|
|
175
226
|
// Append citation markers and track positions
|
|
176
227
|
const refIndexes = block.referenceIndexes || [];
|
|
177
228
|
if (refIndexes.length > 0) {
|
|
@@ -179,34 +230,50 @@ function parseAIResult(data, { allowNestedOverview = true } = {}) {
|
|
|
179
230
|
const sourceIndexes = refIndexes
|
|
180
231
|
.map(ri => refIndexToSourceIndex.get(ri))
|
|
181
232
|
.filter((si) => si != null);
|
|
182
|
-
|
|
233
|
+
const uniqueSourceIndexes = sourceIndexes.filter((v, i, a) => a.indexOf(v) === i);
|
|
234
|
+
for (const si of uniqueSourceIndexes) {
|
|
183
235
|
citedSourceIndexes.add(si);
|
|
184
236
|
sources[si].positions ??= [];
|
|
185
|
-
|
|
186
|
-
|
|
237
|
+
const citationNumber = si + 1;
|
|
238
|
+
if (!sources[si].positions.includes(citationNumber)) {
|
|
239
|
+
sources[si].positions.push(citationNumber);
|
|
187
240
|
}
|
|
188
241
|
}
|
|
189
|
-
|
|
242
|
+
const citationMarkers = formatCitationMarkers(uniqueSourceIndexes);
|
|
243
|
+
rendered += citationMarkers;
|
|
244
|
+
renderedMarkdown += citationMarkers;
|
|
190
245
|
}
|
|
191
|
-
|
|
246
|
+
answerParts.push(rendered);
|
|
247
|
+
answerMarkdownParts.push(renderedMarkdown);
|
|
192
248
|
}
|
|
193
249
|
}
|
|
194
250
|
// Mark cited sources
|
|
195
251
|
for (const si of citedSourceIndexes) {
|
|
196
252
|
sources[si].cited = true;
|
|
197
253
|
}
|
|
198
|
-
const
|
|
199
|
-
for (const
|
|
200
|
-
if (
|
|
201
|
-
|
|
254
|
+
const dedupedAnswer = [];
|
|
255
|
+
for (const part of answerParts) {
|
|
256
|
+
if (dedupedAnswer.length === 0 || dedupedAnswer[dedupedAnswer.length - 1] !== part) {
|
|
257
|
+
dedupedAnswer.push(part);
|
|
202
258
|
}
|
|
203
259
|
}
|
|
204
|
-
|
|
260
|
+
const dedupedAnswerMarkdown = [];
|
|
261
|
+
for (const part of answerMarkdownParts) {
|
|
262
|
+
if (dedupedAnswerMarkdown.length === 0 || dedupedAnswerMarkdown[dedupedAnswerMarkdown.length - 1] !== part) {
|
|
263
|
+
dedupedAnswerMarkdown.push(part);
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
let answer = cleanText(dedupedAnswer.join('\n\n'));
|
|
267
|
+
let answerMarkdown = cleanText(dedupedAnswerMarkdown.join('\n\n'));
|
|
205
268
|
if (answer.length > 16000) {
|
|
206
269
|
console.warn('Warning: AI answer truncated to 16000 characters');
|
|
207
270
|
answer = answer.slice(0, 16000);
|
|
208
271
|
}
|
|
209
|
-
|
|
272
|
+
if (answerMarkdown.length > 16000) {
|
|
273
|
+
console.warn('Warning: AI markdown answer truncated to 16000 characters');
|
|
274
|
+
answerMarkdown = answerMarkdown.slice(0, 16000);
|
|
275
|
+
}
|
|
276
|
+
return { answer, answerMarkdown, sources };
|
|
210
277
|
}
|
|
211
278
|
export function parseAIO(aio) {
|
|
212
279
|
return parseAIResult(aio, {
|
|
@@ -4,9 +4,9 @@ export type ContextSize = 'low' | 'medium' | 'high';
|
|
|
4
4
|
export type ReasoningEffort = 'low' | 'medium' | 'high';
|
|
5
5
|
export interface SearchResult {
|
|
6
6
|
answer: string;
|
|
7
|
-
answer_text_markdown?: string;
|
|
8
7
|
sources: Array<Source>;
|
|
9
8
|
searchQueries?: Array<string>;
|
|
9
|
+
answerMarkdown?: string;
|
|
10
10
|
}
|
|
11
11
|
export type SearchOptions = {
|
|
12
12
|
prompt: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"search.schema.d.ts","sourceRoot":"","sources":["../../../src/src/schemas/search.schema.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,+CAA+C,CAAC;AAEvE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,qBAAqB,CAAC;AAClD,MAAM,MAAM,WAAW,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;AACpD,MAAM,MAAM,eAAe,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;AAExD,MAAM,WAAW,YAAY;IAC5B,MAAM,EAAE,MAAM,CAAC;IACf,
|
|
1
|
+
{"version":3,"file":"search.schema.d.ts","sourceRoot":"","sources":["../../../src/src/schemas/search.schema.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,+CAA+C,CAAC;AAEvE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,qBAAqB,CAAC;AAClD,MAAM,MAAM,WAAW,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;AACpD,MAAM,MAAM,eAAe,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;AAExD,MAAM,WAAW,YAAY;IAC5B,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACvB,aAAa,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IAC9B,cAAc,CAAC,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,MAAM,aAAa,GAAG;IAC3B,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB;4FACwF;IACxF,cAAc,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC,UAAU,CAAC,EAAE,YAAY,GAAG,oBAAoB,CAAC;CACjD,CAAC;AAEF,MAAM,MAAM,sBAAsB,CAAC,CAAC,IAAI,aAAa,GAAG;IACvD,cAAc,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;CAC7B,CAAC;AAEF,MAAM,MAAM,kBAAkB,GAAG,IAAI,CAAC,aAAa,EAAE,QAAQ,CAAC,GAAG;IAChE,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACvB,cAAc,CAAC,EAAE,MAAM,CAAC;CACxB,CAAC"}
|
package/package.json
CHANGED
|
@@ -198,8 +198,8 @@ function createBrightdataProvider(overrides = {}) {
|
|
|
198
198
|
return {
|
|
199
199
|
prompt: response.prompt,
|
|
200
200
|
answer: answerText,
|
|
201
|
-
|
|
202
|
-
sources: (0, scrape_js_1.
|
|
201
|
+
answerMarkdown: answerTextMarkdown,
|
|
202
|
+
sources: (0, scrape_js_1.parseSources)(response.citations ?? [], response.links_attached ?? []),
|
|
203
203
|
searchQueries: response.web_search_query || [],
|
|
204
204
|
};
|
|
205
205
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"oxy.d.ts","sourceRoot":"","sources":["../../../../../src/src/apis/brightdata/llmScraper/oxy.ts"],"names":[],"mappings":"AAeA,OAAO,EAA6C,KAAK,iBAAiB,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"oxy.d.ts","sourceRoot":"","sources":["../../../../../src/src/apis/brightdata/llmScraper/oxy.ts"],"names":[],"mappings":"AAeA,OAAO,EAA6C,KAAK,iBAAiB,EAAE,MAAM,aAAa,CAAC;AA0BhG,UAAU,qBAAqB;IAC9B,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC;IAC7B,KAAK,EAAE,OAAO,CAAC;IACf,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,CAAC;IACvB,oBAAoB,EAAE,MAAM,CAAC;CAC7B;AAyCD,wBAAgB,qBAAqB,CAAC,SAAS,GAAE,OAAO,CAAC,qBAAqB,CAAM,GAAG,iBAAiB,CAyJvG;AAMD,eAAO,MAAM,eAAe,EAAE,iBAA2C,CAAC"}
|
|
@@ -187,8 +187,8 @@ function createOxylabsProvider(overrides = {}) {
|
|
|
187
187
|
return {
|
|
188
188
|
prompt: content.prompt || '',
|
|
189
189
|
answer: answerText,
|
|
190
|
-
|
|
191
|
-
sources: (0, scrape_js_1.
|
|
190
|
+
answerMarkdown: answerTextMarkdown,
|
|
191
|
+
sources: (0, scrape_js_1.parseSources)(citations),
|
|
192
192
|
searchQueries: [],
|
|
193
193
|
};
|
|
194
194
|
}
|
|
@@ -23,11 +23,10 @@ export interface LLMScraper {
|
|
|
23
23
|
}
|
|
24
24
|
export declare function getAbortSignal(): AbortSignal | undefined;
|
|
25
25
|
export declare function cleanAnswer(answer: string): string;
|
|
26
|
-
export declare function
|
|
26
|
+
export declare function parseSources(citations: Array<{
|
|
27
27
|
url: string;
|
|
28
28
|
title?: string;
|
|
29
29
|
description?: string;
|
|
30
|
-
text?: string;
|
|
31
30
|
cited?: boolean;
|
|
32
31
|
}>, linksAttached?: Array<{
|
|
33
32
|
url?: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrape.d.ts","sourceRoot":"","sources":["../../../../../src/src/apis/brightdata/llmScraper/scrape.ts"],"names":[],"mappings":"AAWA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mCAAmC,CAAC;AACrE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oCAAoC,CAAC;AAOjE,MAAM,WAAW,YAAY;IAC5B,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACvB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,cAAc,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC/B;AAED,MAAM,WAAW,iBAAiB;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,cAAc,EAAE,MAAM,CAAC;IACvB,oBAAoB,EAAE,MAAM,CAAC;IAC7B,UAAU,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,cAAc,EAAE,MAAM,GAAG,IAAI,KAAK,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;IAC1G,UAAU,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;IAChD,WAAW,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;IACjD,iBAAiB,EAAE,CAAC,GAAG,EAAE,OAAO,KAAK,WAAW,GAAG,IAAI,CAAC;CACxD;AAED,MAAM,WAAW,UAAU;IAC1B,cAAc,EAAE,MAAM,CAAC;IACvB,oBAAoB,EAAE,MAAM,CAAC;IAC7B,cAAc,EAAE,CAAC,OAAO,EAAE,YAAY,KAAK,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC;IACvE,eAAe,EAAE,CAAC,OAAO,EAAE,YAAY,KAAK,OAAO,CAAC,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC;IAC1E,oBAAoB,EAAE,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,KAAK,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC;CACpF;AAMD,wBAAgB,cAAc,IAAI,WAAW,GAAG,SAAS,CAExD;AAED,wBAAgB,WAAW,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAMlD;AA+BD,wBAAgB,YAAY,CAC3B,SAAS,EAAE,KAAK,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,WAAW,CAAC,EAAE,MAAM,CAAC;IAAC,
|
|
1
|
+
{"version":3,"file":"scrape.d.ts","sourceRoot":"","sources":["../../../../../src/src/apis/brightdata/llmScraper/scrape.ts"],"names":[],"mappings":"AAWA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mCAAmC,CAAC;AACrE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oCAAoC,CAAC;AAOjE,MAAM,WAAW,YAAY;IAC5B,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACvB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,cAAc,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC/B;AAED,MAAM,WAAW,iBAAiB;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,cAAc,EAAE,MAAM,CAAC;IACvB,oBAAoB,EAAE,MAAM,CAAC;IAC7B,UAAU,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,cAAc,EAAE,MAAM,GAAG,IAAI,KAAK,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;IAC1G,UAAU,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;IAChD,WAAW,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;IACjD,iBAAiB,EAAE,CAAC,GAAG,EAAE,OAAO,KAAK,WAAW,GAAG,IAAI,CAAC;CACxD;AAED,MAAM,WAAW,UAAU;IAC1B,cAAc,EAAE,MAAM,CAAC;IACvB,oBAAoB,EAAE,MAAM,CAAC;IAC7B,cAAc,EAAE,CAAC,OAAO,EAAE,YAAY,KAAK,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC;IACvE,eAAe,EAAE,CAAC,OAAO,EAAE,YAAY,KAAK,OAAO,CAAC,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC;IAC1E,oBAAoB,EAAE,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,KAAK,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC;CACpF;AAMD,wBAAgB,cAAc,IAAI,WAAW,GAAG,SAAS,CAExD;AAED,wBAAgB,WAAW,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAMlD;AA+BD,wBAAgB,YAAY,CAC3B,SAAS,EAAE,KAAK,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,WAAW,CAAC,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,OAAO,CAAA;CAAE,CAAC,EACxF,aAAa,GAAE,KAAK,CAAC;IAAE,GAAG,CAAC,EAAE,MAAM,CAAC;IAAC,IAAI,CAAC,EAAE,MAAM,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAE,CAAM,GAC3E,KAAK,CAAC,MAAM,CAAC,CAyFf;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,YAAY,EAAE,MAAM,EAAE,YAAY,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,OAAO,GAAG,WAAW,CAU5G;AAMD,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,iBAAiB,GAAG,UAAU,CAkExE"}
|
|
@@ -35,7 +35,7 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
35
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
36
|
exports.getAbortSignal = getAbortSignal;
|
|
37
37
|
exports.cleanAnswer = cleanAnswer;
|
|
38
|
-
exports.
|
|
38
|
+
exports.parseSources = parseSources;
|
|
39
39
|
exports.emptyModelResult = emptyModelResult;
|
|
40
40
|
exports.createLLMScraper = createLLMScraper;
|
|
41
41
|
/* eslint no-console: ["warn", { allow: ["log", "warn", "error"] }] */
|
|
@@ -89,7 +89,7 @@ function hasExtraUrlInfo(current, candidate) {
|
|
|
89
89
|
return false;
|
|
90
90
|
}
|
|
91
91
|
}
|
|
92
|
-
function
|
|
92
|
+
function parseSources(citations, linksAttached = []) {
|
|
93
93
|
const sources = [];
|
|
94
94
|
const sourcesByKey = new Map();
|
|
95
95
|
const upsertSource = (url, initialTitle, cited) => {
|
|
@@ -137,11 +137,15 @@ function buildSources(citations, linksAttached = []) {
|
|
|
137
137
|
continue;
|
|
138
138
|
const key = urlMergeKey(citation.url);
|
|
139
139
|
const existing = sourcesByKey.get(key);
|
|
140
|
-
const title = citation.title
|
|
140
|
+
const title = citation.title ?? '';
|
|
141
|
+
const snippet = citation.description;
|
|
141
142
|
if (existing) {
|
|
142
143
|
if (title) {
|
|
143
144
|
existing.title = title;
|
|
144
145
|
}
|
|
146
|
+
if (snippet) {
|
|
147
|
+
existing.snippet = snippet;
|
|
148
|
+
}
|
|
145
149
|
existing.cited = existing.cited || citation.cited;
|
|
146
150
|
// Append extra fragment/params from citation
|
|
147
151
|
if (hasExtraUrlInfo(existing.url, citation.url)) {
|
|
@@ -151,6 +155,7 @@ function buildSources(citations, linksAttached = []) {
|
|
|
151
155
|
}
|
|
152
156
|
const source = {
|
|
153
157
|
title,
|
|
158
|
+
snippet,
|
|
154
159
|
url: citation.url,
|
|
155
160
|
domain: (0, urls_js_1.extractDomain)(citation.url),
|
|
156
161
|
cited: citation.cited,
|
|
@@ -174,7 +179,7 @@ function emptyModelResult(providerName, errorMessage, context) {
|
|
|
174
179
|
return {
|
|
175
180
|
prompt: '',
|
|
176
181
|
answer: '',
|
|
177
|
-
|
|
182
|
+
answerMarkdown: '',
|
|
178
183
|
sources: [],
|
|
179
184
|
};
|
|
180
185
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"helpers.d.ts","sourceRoot":"","sources":["../../../../src/src/apis/hasdata/helpers.ts"],"names":[],"mappings":"AACA,OAAO,EAAe,KAAK,WAAW,EAAE,MAAM,wBAAwB,CAAC;AAEvE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,iCAAiC,CAAC;AAG9D,eAAO,MAAM,mBAAmB,KAAK,CAAC;AAEtC,eAAO,MAAM,oBAAoB,EAAE,WAMlC,CAAC;AAEF,wBAAgB,gBAAgB,IAAI,MAAM,CAMzC;AAED,wBAAsB,qBAAqB,CAC1C,GAAG,EAAE,MAAM,EACX,WAAW,GAAE,WAAkC,GAC7C,OAAO,CAAC,QAAQ,CAAC,CAgCnB;AAED,UAAU,QAAQ;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;CACvB;AAED,UAAU,SAAS;IAClB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,uBAAuB,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACxC,gBAAgB,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACjC,IAAI,CAAC,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;IACvB,IAAI,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;IAC5B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,UAAU,SAAS;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,UAAU;IAC1B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,UAAU,eAAe;IACxB,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;CACb;AAED,MAAM,WAAW,MAAM;IACtB,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;CAC9B;AAED,MAAM,WAAW,SAAS;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;CACvB;
|
|
1
|
+
{"version":3,"file":"helpers.d.ts","sourceRoot":"","sources":["../../../../src/src/apis/hasdata/helpers.ts"],"names":[],"mappings":"AACA,OAAO,EAAe,KAAK,WAAW,EAAE,MAAM,wBAAwB,CAAC;AAEvE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,iCAAiC,CAAC;AAG9D,eAAO,MAAM,mBAAmB,KAAK,CAAC;AAEtC,eAAO,MAAM,oBAAoB,EAAE,WAMlC,CAAC;AAEF,wBAAgB,gBAAgB,IAAI,MAAM,CAMzC;AAED,wBAAsB,qBAAqB,CAC1C,GAAG,EAAE,MAAM,EACX,WAAW,GAAE,WAAkC,GAC7C,OAAO,CAAC,QAAQ,CAAC,CAgCnB;AAED,UAAU,QAAQ;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;CACvB;AAED,UAAU,SAAS;IAClB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,uBAAuB,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACxC,gBAAgB,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACjC,IAAI,CAAC,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;IACvB,IAAI,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;IAC5B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,UAAU,SAAS;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,UAAU;IAC1B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,UAAU,eAAe;IACxB,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;CACb;AAED,MAAM,WAAW,MAAM;IACtB,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;CAC9B;AAED,MAAM,WAAW,SAAS;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;CACvB;AA0QD,wBAAgB,QAAQ,CAAC,GAAG,EAAE,UAAU,GAAG,SAAS,CAInD;AAED,wBAAgB,QAAQ,CAAC,GAAG,EAAE,MAAM,GAAG,SAAS,CAI/C"}
|
|
@@ -132,6 +132,19 @@ function* iterListItems(items, indent = 0) {
|
|
|
132
132
|
}
|
|
133
133
|
}
|
|
134
134
|
}
|
|
135
|
+
function* iterPlainListItems(items) {
|
|
136
|
+
for (const obj of items) {
|
|
137
|
+
const title = obj.title || '';
|
|
138
|
+
const snippet = obj.snippet || '';
|
|
139
|
+
const line = [title, snippet].filter(Boolean).join(' ').trim();
|
|
140
|
+
if (line) {
|
|
141
|
+
yield cleanText(line);
|
|
142
|
+
}
|
|
143
|
+
if (obj.list && Array.isArray(obj.list)) {
|
|
144
|
+
yield* iterPlainListItems(obj.list);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
}
|
|
135
148
|
function formatTable(block) {
|
|
136
149
|
const rows = block.rows || [];
|
|
137
150
|
if (rows.length === 0) {
|
|
@@ -156,6 +169,18 @@ function formatCode(block) {
|
|
|
156
169
|
const header = `[Code${lang ? ': ' + lang : ''}]`;
|
|
157
170
|
return `${header}\n${snippet.trim()}`;
|
|
158
171
|
}
|
|
172
|
+
function formatPlainTable(block) {
|
|
173
|
+
const rows = block.rows || [];
|
|
174
|
+
if (rows.length === 0) {
|
|
175
|
+
return '';
|
|
176
|
+
}
|
|
177
|
+
return rows
|
|
178
|
+
.map((row) => row.map(cell => removeCSSChunks(cell)).join(' | '))
|
|
179
|
+
.join('\n');
|
|
180
|
+
}
|
|
181
|
+
function formatPlainCode(block) {
|
|
182
|
+
return cleanText(block.snippet || '');
|
|
183
|
+
}
|
|
159
184
|
function formatCitationMarkers(refIndexes) {
|
|
160
185
|
if (refIndexes.length === 0) {
|
|
161
186
|
return '';
|
|
@@ -170,17 +195,25 @@ function parseAIResult(data, { allowNestedOverview = true } = {}) {
|
|
|
170
195
|
const refIndexToSourceIndex = new Map();
|
|
171
196
|
for (const r of refs) {
|
|
172
197
|
const link = r.link || r.url;
|
|
173
|
-
const title = [r.title, r.source
|
|
198
|
+
const title = [r.title, r.source].filter(Boolean).join(' - ');
|
|
199
|
+
const snippet = cleanText(r.snippet || '') || undefined;
|
|
174
200
|
if (link && r.index != null) {
|
|
175
201
|
// Deduplicate by URL
|
|
176
202
|
const existingIdx = sources.findIndex(s => s.url === link);
|
|
177
203
|
if (existingIdx >= 0) {
|
|
178
204
|
refIndexToSourceIndex.set(r.index, existingIdx);
|
|
205
|
+
if (!sources[existingIdx].title && title) {
|
|
206
|
+
sources[existingIdx].title = title;
|
|
207
|
+
}
|
|
208
|
+
if (!sources[existingIdx].snippet && snippet) {
|
|
209
|
+
sources[existingIdx].snippet = snippet;
|
|
210
|
+
}
|
|
179
211
|
}
|
|
180
212
|
else {
|
|
181
213
|
refIndexToSourceIndex.set(r.index, sources.length);
|
|
182
214
|
sources.push({
|
|
183
215
|
title,
|
|
216
|
+
snippet,
|
|
184
217
|
url: link,
|
|
185
218
|
domain: (0, urls_js_1.extractDomain)(link)
|
|
186
219
|
});
|
|
@@ -188,8 +221,15 @@ function parseAIResult(data, { allowNestedOverview = true } = {}) {
|
|
|
188
221
|
}
|
|
189
222
|
}
|
|
190
223
|
const citedSourceIndexes = new Set();
|
|
191
|
-
const
|
|
192
|
-
const
|
|
224
|
+
const answerParts = [];
|
|
225
|
+
const answerMarkdownParts = [];
|
|
226
|
+
const plainHandlers = {
|
|
227
|
+
paragraph: (b) => cleanText(b.snippet || ''),
|
|
228
|
+
list: (b) => Array.from(iterPlainListItems(b.list || [])).join('\n'),
|
|
229
|
+
table: formatPlainTable,
|
|
230
|
+
code: formatPlainCode
|
|
231
|
+
};
|
|
232
|
+
const markdownHandlers = {
|
|
193
233
|
paragraph: (b) => cleanText(b.snippet || ''),
|
|
194
234
|
list: (b) => Array.from(iterListItems(b.list || [])).join('\n'),
|
|
195
235
|
table: formatTable,
|
|
@@ -200,10 +240,12 @@ function parseAIResult(data, { allowNestedOverview = true } = {}) {
|
|
|
200
240
|
if (!btype || btype === 'carousel') {
|
|
201
241
|
continue;
|
|
202
242
|
}
|
|
203
|
-
const
|
|
243
|
+
const plainHandler = plainHandlers[btype];
|
|
244
|
+
const markdownHandler = markdownHandlers[btype];
|
|
204
245
|
let rendered = '';
|
|
205
|
-
|
|
206
|
-
|
|
246
|
+
let renderedMarkdown = '';
|
|
247
|
+
if (plainHandler) {
|
|
248
|
+
rendered = plainHandler(block);
|
|
207
249
|
}
|
|
208
250
|
else {
|
|
209
251
|
const snippet = block.snippet || '';
|
|
@@ -211,7 +253,16 @@ function parseAIResult(data, { allowNestedOverview = true } = {}) {
|
|
|
211
253
|
rendered = cleanText(snippet);
|
|
212
254
|
}
|
|
213
255
|
}
|
|
214
|
-
if (
|
|
256
|
+
if (markdownHandler) {
|
|
257
|
+
renderedMarkdown = markdownHandler(block);
|
|
258
|
+
}
|
|
259
|
+
else {
|
|
260
|
+
const snippet = block.snippet || '';
|
|
261
|
+
if (snippet) {
|
|
262
|
+
renderedMarkdown = cleanText(snippet);
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
if (rendered || renderedMarkdown) {
|
|
215
266
|
// Append citation markers and track positions
|
|
216
267
|
const refIndexes = block.referenceIndexes || [];
|
|
217
268
|
if (refIndexes.length > 0) {
|
|
@@ -219,34 +270,50 @@ function parseAIResult(data, { allowNestedOverview = true } = {}) {
|
|
|
219
270
|
const sourceIndexes = refIndexes
|
|
220
271
|
.map(ri => refIndexToSourceIndex.get(ri))
|
|
221
272
|
.filter((si) => si != null);
|
|
222
|
-
|
|
273
|
+
const uniqueSourceIndexes = sourceIndexes.filter((v, i, a) => a.indexOf(v) === i);
|
|
274
|
+
for (const si of uniqueSourceIndexes) {
|
|
223
275
|
citedSourceIndexes.add(si);
|
|
224
276
|
sources[si].positions ??= [];
|
|
225
|
-
|
|
226
|
-
|
|
277
|
+
const citationNumber = si + 1;
|
|
278
|
+
if (!sources[si].positions.includes(citationNumber)) {
|
|
279
|
+
sources[si].positions.push(citationNumber);
|
|
227
280
|
}
|
|
228
281
|
}
|
|
229
|
-
|
|
282
|
+
const citationMarkers = formatCitationMarkers(uniqueSourceIndexes);
|
|
283
|
+
rendered += citationMarkers;
|
|
284
|
+
renderedMarkdown += citationMarkers;
|
|
230
285
|
}
|
|
231
|
-
|
|
286
|
+
answerParts.push(rendered);
|
|
287
|
+
answerMarkdownParts.push(renderedMarkdown);
|
|
232
288
|
}
|
|
233
289
|
}
|
|
234
290
|
// Mark cited sources
|
|
235
291
|
for (const si of citedSourceIndexes) {
|
|
236
292
|
sources[si].cited = true;
|
|
237
293
|
}
|
|
238
|
-
const
|
|
239
|
-
for (const
|
|
240
|
-
if (
|
|
241
|
-
|
|
294
|
+
const dedupedAnswer = [];
|
|
295
|
+
for (const part of answerParts) {
|
|
296
|
+
if (dedupedAnswer.length === 0 || dedupedAnswer[dedupedAnswer.length - 1] !== part) {
|
|
297
|
+
dedupedAnswer.push(part);
|
|
242
298
|
}
|
|
243
299
|
}
|
|
244
|
-
|
|
300
|
+
const dedupedAnswerMarkdown = [];
|
|
301
|
+
for (const part of answerMarkdownParts) {
|
|
302
|
+
if (dedupedAnswerMarkdown.length === 0 || dedupedAnswerMarkdown[dedupedAnswerMarkdown.length - 1] !== part) {
|
|
303
|
+
dedupedAnswerMarkdown.push(part);
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
let answer = cleanText(dedupedAnswer.join('\n\n'));
|
|
307
|
+
let answerMarkdown = cleanText(dedupedAnswerMarkdown.join('\n\n'));
|
|
245
308
|
if (answer.length > 16000) {
|
|
246
309
|
console.warn('Warning: AI answer truncated to 16000 characters');
|
|
247
310
|
answer = answer.slice(0, 16000);
|
|
248
311
|
}
|
|
249
|
-
|
|
312
|
+
if (answerMarkdown.length > 16000) {
|
|
313
|
+
console.warn('Warning: AI markdown answer truncated to 16000 characters');
|
|
314
|
+
answerMarkdown = answerMarkdown.slice(0, 16000);
|
|
315
|
+
}
|
|
316
|
+
return { answer, answerMarkdown, sources };
|
|
250
317
|
}
|
|
251
318
|
function parseAIO(aio) {
|
|
252
319
|
return parseAIResult(aio, {
|
|
@@ -4,9 +4,9 @@ export type ContextSize = 'low' | 'medium' | 'high';
|
|
|
4
4
|
export type ReasoningEffort = 'low' | 'medium' | 'high';
|
|
5
5
|
export interface SearchResult {
|
|
6
6
|
answer: string;
|
|
7
|
-
answer_text_markdown?: string;
|
|
8
7
|
sources: Array<Source>;
|
|
9
8
|
searchQueries?: Array<string>;
|
|
9
|
+
answerMarkdown?: string;
|
|
10
10
|
}
|
|
11
11
|
export type SearchOptions = {
|
|
12
12
|
prompt: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"search.schema.d.ts","sourceRoot":"","sources":["../../../src/src/schemas/search.schema.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,+CAA+C,CAAC;AAEvE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,qBAAqB,CAAC;AAClD,MAAM,MAAM,WAAW,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;AACpD,MAAM,MAAM,eAAe,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;AAExD,MAAM,WAAW,YAAY;IAC5B,MAAM,EAAE,MAAM,CAAC;IACf,
|
|
1
|
+
{"version":3,"file":"search.schema.d.ts","sourceRoot":"","sources":["../../../src/src/schemas/search.schema.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,+CAA+C,CAAC;AAEvE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,qBAAqB,CAAC;AAClD,MAAM,MAAM,WAAW,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;AACpD,MAAM,MAAM,eAAe,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;AAExD,MAAM,WAAW,YAAY;IAC5B,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACvB,aAAa,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IAC9B,cAAc,CAAC,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,MAAM,aAAa,GAAG;IAC3B,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB;4FACwF;IACxF,cAAc,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC,UAAU,CAAC,EAAE,YAAY,GAAG,oBAAoB,CAAC;CACjD,CAAC;AAEF,MAAM,MAAM,sBAAsB,CAAC,CAAC,IAAI,aAAa,GAAG;IACvD,cAAc,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;CAC7B,CAAC;AAEF,MAAM,MAAM,kBAAkB,GAAG,IAAI,CAAC,aAAa,EAAE,QAAQ,CAAC,GAAG;IAChE,OAAO,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IACvB,cAAc,CAAC,EAAE,MAAM,CAAC;CACxB,CAAC"}
|