@fettstorch/clai 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +145 -78
- package/dist/index.js +130 -65
- package/package.json +36 -36
- package/src/cli.ts +152 -148
- package/src/index.ts +45 -22
- package/src/openai.ts +0 -6
- package/src/scraper.ts +195 -93
- package/src/summarizer.ts +101 -40
package/dist/cli.js
CHANGED
@@ -28997,22 +28997,22 @@ var require_diagnostics = __commonJS((exports, module) => {
|
|
28997
28997
|
const debuglog = fetchDebuglog.enabled ? fetchDebuglog : undiciDebugLog;
|
28998
28998
|
diagnosticsChannel.channel("undici:client:beforeConnect").subscribe((evt) => {
|
28999
28999
|
const {
|
29000
|
-
connectParams: { version, protocol, port, host }
|
29000
|
+
connectParams: { version: version2, protocol, port, host }
|
29001
29001
|
} = evt;
|
29002
|
-
debuglog("connecting to %s using %s%s", `${host}${port ? `:${port}` : ""}`, protocol,
|
29002
|
+
debuglog("connecting to %s using %s%s", `${host}${port ? `:${port}` : ""}`, protocol, version2);
|
29003
29003
|
});
|
29004
29004
|
diagnosticsChannel.channel("undici:client:connected").subscribe((evt) => {
|
29005
29005
|
const {
|
29006
|
-
connectParams: { version, protocol, port, host }
|
29006
|
+
connectParams: { version: version2, protocol, port, host }
|
29007
29007
|
} = evt;
|
29008
|
-
debuglog("connected to %s using %s%s", `${host}${port ? `:${port}` : ""}`, protocol,
|
29008
|
+
debuglog("connected to %s using %s%s", `${host}${port ? `:${port}` : ""}`, protocol, version2);
|
29009
29009
|
});
|
29010
29010
|
diagnosticsChannel.channel("undici:client:connectError").subscribe((evt) => {
|
29011
29011
|
const {
|
29012
|
-
connectParams: { version, protocol, port, host },
|
29012
|
+
connectParams: { version: version2, protocol, port, host },
|
29013
29013
|
error
|
29014
29014
|
} = evt;
|
29015
|
-
debuglog("connection to %s using %s%s errored - %s", `${host}${port ? `:${port}` : ""}`, protocol,
|
29015
|
+
debuglog("connection to %s using %s%s errored - %s", `${host}${port ? `:${port}` : ""}`, protocol, version2, error.message);
|
29016
29016
|
});
|
29017
29017
|
diagnosticsChannel.channel("undici:client:sendHeaders").subscribe((evt) => {
|
29018
29018
|
const {
|
@@ -29047,22 +29047,22 @@ var require_diagnostics = __commonJS((exports, module) => {
|
|
29047
29047
|
const debuglog = undiciDebugLog.enabled ? undiciDebugLog : websocketDebuglog;
|
29048
29048
|
diagnosticsChannel.channel("undici:client:beforeConnect").subscribe((evt) => {
|
29049
29049
|
const {
|
29050
|
-
connectParams: { version, protocol, port, host }
|
29050
|
+
connectParams: { version: version2, protocol, port, host }
|
29051
29051
|
} = evt;
|
29052
|
-
debuglog("connecting to %s%s using %s%s", host, port ? `:${port}` : "", protocol,
|
29052
|
+
debuglog("connecting to %s%s using %s%s", host, port ? `:${port}` : "", protocol, version2);
|
29053
29053
|
});
|
29054
29054
|
diagnosticsChannel.channel("undici:client:connected").subscribe((evt) => {
|
29055
29055
|
const {
|
29056
|
-
connectParams: { version, protocol, port, host }
|
29056
|
+
connectParams: { version: version2, protocol, port, host }
|
29057
29057
|
} = evt;
|
29058
|
-
debuglog("connected to %s%s using %s%s", host, port ? `:${port}` : "", protocol,
|
29058
|
+
debuglog("connected to %s%s using %s%s", host, port ? `:${port}` : "", protocol, version2);
|
29059
29059
|
});
|
29060
29060
|
diagnosticsChannel.channel("undici:client:connectError").subscribe((evt) => {
|
29061
29061
|
const {
|
29062
|
-
connectParams: { version, protocol, port, host },
|
29062
|
+
connectParams: { version: version2, protocol, port, host },
|
29063
29063
|
error
|
29064
29064
|
} = evt;
|
29065
|
-
debuglog("connection to %s%s using %s%s errored - %s", host, port ? `:${port}` : "", protocol,
|
29065
|
+
debuglog("connection to %s%s using %s%s errored - %s", host, port ? `:${port}` : "", protocol, version2, error.message);
|
29066
29066
|
});
|
29067
29067
|
diagnosticsChannel.channel("undici:client:sendHeaders").subscribe((evt) => {
|
29068
29068
|
const {
|
@@ -54255,9 +54255,10 @@ function ora(options) {
|
|
54255
54255
|
return new Ora(options);
|
54256
54256
|
}
|
54257
54257
|
// package.json
|
54258
|
+
var version = "0.1.8";
|
54258
54259
|
var package_default = {
|
54259
54260
|
name: "@fettstorch/clai",
|
54260
|
-
version
|
54261
|
+
version,
|
54261
54262
|
main: "dist/index.js",
|
54262
54263
|
bin: {
|
54263
54264
|
clai: "dist/cli.js"
|
@@ -68031,8 +68032,7 @@ async function scrape(input) {
|
|
68031
68032
|
}));
|
68032
68033
|
return results.filter((result) => result !== null);
|
68033
68034
|
} catch (error) {
|
68034
|
-
|
68035
|
-
throw error;
|
68035
|
+
return [];
|
68036
68036
|
}
|
68037
68037
|
}
|
68038
68038
|
function isValidUrl(input) {
|
@@ -68048,55 +68048,31 @@ function normalizeUrl(url) {
|
|
68048
68048
|
return url;
|
68049
68049
|
}
|
68050
68050
|
async function getSearchResults(query) {
|
68051
|
-
|
68052
|
-
|
68053
|
-
|
68054
|
-
|
68051
|
+
const searchEngines = [
|
68052
|
+
{ name: "SearX", fn: getSearXResults },
|
68053
|
+
{ name: "Google", fn: getGoogleResults },
|
68054
|
+
{ name: "DuckDuckGo", fn: getDuckDuckGoResults },
|
68055
|
+
{ name: "Wikipedia", fn: getWikipediaResults }
|
68056
|
+
];
|
68057
|
+
for (const engine of searchEngines) {
|
68055
68058
|
try {
|
68056
|
-
|
68057
|
-
|
68058
|
-
|
68059
|
-
|
68060
|
-
|
68061
|
-
} catch (_4) {
|
68062
|
-
console.log("Using emergency fallback...");
|
68063
|
-
return getEmergencyResults(query);
|
68064
|
-
}
|
68065
|
-
}
|
68066
|
-
}
|
68067
|
-
}
|
68068
|
-
function getEmergencyResults(query) {
|
68069
|
-
const results = [];
|
68070
|
-
const cleanQuery = query.toLowerCase().replace(/[^a-z0-9\s]/g, "").trim();
|
68071
|
-
const words = cleanQuery.split(/\s+/).filter((word) => word.length > 2);
|
68072
|
-
if (words.length > 0) {
|
68073
|
-
const mainWord = words[0];
|
68074
|
-
results.push(`https://en.wikipedia.org/wiki/${encodeURIComponent(query.replace(/\s+/g, "_"))}`);
|
68075
|
-
if (mainWord.length > 3) {
|
68076
|
-
results.push(`https://${mainWord}.com`);
|
68077
|
-
results.push(`https://www.${mainWord}.org`);
|
68059
|
+
const result = await engine.fn(query);
|
68060
|
+
console.log(`[${engine.name}]::✅`);
|
68061
|
+
return result;
|
68062
|
+
} catch (_2) {
|
68063
|
+
console.log(`[${engine.name}]::❌`);
|
68078
68064
|
}
|
68079
|
-
results.push(`https://www.reddit.com/search/?q=${encodeURIComponent(query)}`);
|
68080
68065
|
}
|
68081
|
-
console.log("
|
68082
|
-
|
68083
|
-
`https://en.wikipedia.org/wiki/${encodeURIComponent(query.replace(/\s+/g, "_"))}`
|
68084
|
-
];
|
68066
|
+
console.log("All search engines failed - no URLs to scrape");
|
68067
|
+
throw new Error("No search results available");
|
68085
68068
|
}
|
68086
68069
|
async function getSearXResults(query) {
|
68087
|
-
const searxInstances = [
|
68088
|
-
"https://searx.be",
|
68089
|
-
"https://search.sapti.me",
|
68090
|
-
"https://searx.tiekoetter.com",
|
68091
|
-
"https://searx.prvcy.eu"
|
68092
|
-
];
|
68070
|
+
const searxInstances = ["https://searx.be", "https://search.sapti.me"];
|
68093
68071
|
for (const instance of searxInstances) {
|
68094
68072
|
try {
|
68095
68073
|
const searchUrl = `${instance}/search?q=${encodeURIComponent(query)}&format=json&categories=general`;
|
68096
|
-
|
68097
|
-
const response = await fetch(searchUrl, {
|
68074
|
+
const response = await enhancedFetch(searchUrl, {
|
68098
68075
|
headers: {
|
68099
|
-
"User-Agent": getRandomUserAgent(),
|
68100
68076
|
Accept: "application/json"
|
68101
68077
|
}
|
68102
68078
|
});
|
@@ -68113,7 +68089,6 @@ async function getSearXResults(query) {
|
|
68113
68089
|
}
|
68114
68090
|
}
|
68115
68091
|
if (urls.length > 0) {
|
68116
|
-
console.log(`✓ SearX found ${urls.length} results`);
|
68117
68092
|
return urls.slice(0, 3);
|
68118
68093
|
}
|
68119
68094
|
} catch (error) {
|
@@ -68121,9 +68096,31 @@ async function getSearXResults(query) {
|
|
68121
68096
|
}
|
68122
68097
|
throw new Error("All SearX instances failed");
|
68123
68098
|
}
|
68099
|
+
async function getWikipediaResults(query) {
|
68100
|
+
const searchUrl = `https://en.wikipedia.org/w/api.php?action=opensearch&search=${encodeURIComponent(query)}&limit=3&format=json&origin=*`;
|
68101
|
+
const response = await enhancedFetch(searchUrl, {
|
68102
|
+
headers: {
|
68103
|
+
Accept: "application/json"
|
68104
|
+
}
|
68105
|
+
});
|
68106
|
+
if (!response.ok) {
|
68107
|
+
throw new Error(`Wikipedia API error: ${response.status}`);
|
68108
|
+
}
|
68109
|
+
const data2 = await response.json();
|
68110
|
+
if (Array.isArray(data2) && data2.length >= 4 && Array.isArray(data2[3])) {
|
68111
|
+
const urls = data2[3]?.filter((url) => url?.startsWith("https://"));
|
68112
|
+
if (urls?.length > 0) {
|
68113
|
+
return urls;
|
68114
|
+
}
|
68115
|
+
}
|
68116
|
+
throw new Error("No Wikipedia results found");
|
68117
|
+
}
|
68124
68118
|
async function getGoogleResults(query) {
|
68125
68119
|
const searchUrl = `https://www.google.com/search?q=${encodeURIComponent(query)}&num=10`;
|
68126
68120
|
const html3 = await fetchHtml(searchUrl);
|
68121
|
+
if (html3.includes("If you're having trouble accessing Google Search") || html3.includes("unusual traffic from your computer network")) {
|
68122
|
+
throw new Error("Google blocked request - detected as bot");
|
68123
|
+
}
|
68127
68124
|
const cheerioDoc = load2(html3);
|
68128
68125
|
const urls = [];
|
68129
68126
|
cheerioDoc('a[href^="/url?q="]').each((_2, element) => {
|
@@ -68151,12 +68148,14 @@ async function getGoogleResults(query) {
|
|
68151
68148
|
if (uniqueUrls.length === 0) {
|
68152
68149
|
throw new Error("No search results found in Google response");
|
68153
68150
|
}
|
68154
|
-
console.log(`✓ Google found ${uniqueUrls.length} results`);
|
68155
68151
|
return uniqueUrls;
|
68156
68152
|
}
|
68157
68153
|
async function getDuckDuckGoResults(query) {
|
68158
68154
|
const searchUrl = `https://api.duckduckgo.com/?q=${encodeURIComponent(query)}&format=json&no_html=1&skip_disambig=1`;
|
68159
|
-
const response = await
|
68155
|
+
const response = await enhancedFetch(searchUrl);
|
68156
|
+
if (!response.ok) {
|
68157
|
+
throw new Error(`DuckDuckGo API error: ${response.status}`);
|
68158
|
+
}
|
68160
68159
|
const data2 = await response.json();
|
68161
68160
|
const urls = [];
|
68162
68161
|
if (data2.AbstractURL) {
|
@@ -68169,29 +68168,39 @@ async function getDuckDuckGoResults(query) {
|
|
68169
68168
|
}
|
68170
68169
|
}
|
68171
68170
|
}
|
68171
|
+
if (urls.length === 0 && data2.DefinitionURL) {
|
68172
|
+
urls.push(data2.DefinitionURL);
|
68173
|
+
}
|
68172
68174
|
if (urls.length === 0) {
|
68173
68175
|
throw new Error("No search results found in DuckDuckGo response");
|
68174
68176
|
}
|
68175
|
-
console.log(`✓ DuckDuckGo found ${urls.length} results`);
|
68176
68177
|
return urls;
|
68177
68178
|
}
|
68178
|
-
async function
|
68179
|
-
const
|
68180
|
-
|
68181
|
-
|
68182
|
-
|
68183
|
-
|
68184
|
-
|
68185
|
-
|
68186
|
-
|
68187
|
-
|
68188
|
-
|
68189
|
-
|
68190
|
-
|
68191
|
-
|
68192
|
-
|
68193
|
-
|
68179
|
+
async function enhancedFetch(url, options = {}) {
|
68180
|
+
const headers = {
|
68181
|
+
"User-Agent": getRandomUserAgent(),
|
68182
|
+
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
|
68183
|
+
"Accept-Language": "en-US,en;q=0.9",
|
68184
|
+
"Accept-Encoding": "gzip, deflate, br",
|
68185
|
+
"sec-ch-ua": '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"',
|
68186
|
+
"sec-ch-ua-mobile": "?0",
|
68187
|
+
"sec-ch-ua-platform": '"macOS"',
|
68188
|
+
"Sec-Fetch-Dest": "document",
|
68189
|
+
"Sec-Fetch-Mode": "navigate",
|
68190
|
+
"Sec-Fetch-Site": "cross-site",
|
68191
|
+
"Sec-Fetch-User": "?1",
|
68192
|
+
"Upgrade-Insecure-Requests": "1",
|
68193
|
+
"Cache-Control": "no-cache",
|
68194
|
+
Pragma: "no-cache",
|
68195
|
+
...options.headers
|
68196
|
+
};
|
68197
|
+
return fetch(url, {
|
68198
|
+
...options,
|
68199
|
+
headers
|
68194
68200
|
});
|
68201
|
+
}
|
68202
|
+
async function fetchHtml(url) {
|
68203
|
+
const response = await enhancedFetch(url);
|
68195
68204
|
return response.text();
|
68196
68205
|
}
|
68197
68206
|
function getRandomUserAgent() {
|
@@ -73255,25 +73264,83 @@ async function summarizeWebPage(content, openAIApiKey) {
|
|
73255
73264
|
});
|
73256
73265
|
return result;
|
73257
73266
|
}
|
73267
|
+
async function summarizeQuery(query, openAIApiKey) {
|
73268
|
+
const openai = openaiClient(openAIApiKey);
|
73269
|
+
const prompt2 = `You are an expert educator and researcher. Answer the following query with accurate, helpful information:
|
73270
|
+
|
73271
|
+
"${query}"
|
73272
|
+
|
73273
|
+
Guidelines:
|
73274
|
+
1. Provide a comprehensive but concise answer
|
73275
|
+
2. Use bullet points, lists, and tables when appropriate
|
73276
|
+
3. Include relevant examples or step-by-step instructions if applicable
|
73277
|
+
4. Format your response in valid markdown
|
73278
|
+
5. Be factual and cite general knowledge sources when relevant
|
73279
|
+
6. If you suggest external resources, format them as links in the response
|
73280
|
+
7. Mark proper nouns as bold e.g. **OpenAI**
|
73281
|
+
8. Use appropriate headings (##, ###) to structure your response
|
73282
|
+
9. If the query is about current events beyond your knowledge cutoff, mention that limitation
|
73283
|
+
|
73284
|
+
Provide a thorough, educational response that directly addresses the user's query.`;
|
73285
|
+
const schema = {
|
73286
|
+
textual: {
|
73287
|
+
type: "string",
|
73288
|
+
description: "Comprehensive answer to the user query"
|
73289
|
+
},
|
73290
|
+
links: {
|
73291
|
+
type: "array",
|
73292
|
+
items: {
|
73293
|
+
type: "object",
|
73294
|
+
properties: {
|
73295
|
+
name: {
|
73296
|
+
type: "string",
|
73297
|
+
description: "Descriptive name of the recommended resource"
|
73298
|
+
},
|
73299
|
+
url: {
|
73300
|
+
type: "string",
|
73301
|
+
description: "URL to the recommended resource"
|
73302
|
+
}
|
73303
|
+
},
|
73304
|
+
required: ["name", "url"]
|
73305
|
+
}
|
73306
|
+
}
|
73307
|
+
};
|
73308
|
+
const result = await openai.completeStructured(prompt2, {
|
73309
|
+
temperature: 0.7,
|
73310
|
+
responseSchema: schema
|
73311
|
+
});
|
73312
|
+
return result;
|
73313
|
+
}
|
73258
73314
|
|
73259
73315
|
// src/index.ts
|
73260
73316
|
async function clai(input, openAIKey) {
|
73261
73317
|
const scrapedData = await scrape(input);
|
73262
|
-
const
|
73318
|
+
const usefulData = scrapedData.filter((data2) => data2.content.length > 200 && !data2.content.includes("Wikipedia does not have an article") && !data2.content.includes("page not found") && !data2.content.includes("404") && !data2.content.includes("error"));
|
73319
|
+
if (usefulData.length > 0) {
|
73320
|
+
const combinedContent = usefulData.map((data2) => `Content from ${data2.url}:
|
73263
73321
|
${data2.content}`).join(`
|
73264
73322
|
|
73265
73323
|
`);
|
73266
|
-
|
73324
|
+
const result2 = await summarizeWebPage(combinedContent, openAIKey);
|
73325
|
+
return {
|
73326
|
+
summary: result2.textual.trim(),
|
73327
|
+
links: result2.links,
|
73328
|
+
sources: usefulData.map((data2) => data2.url)
|
73329
|
+
};
|
73330
|
+
}
|
73331
|
+
console.log("No scraped data available - using OpenAI directly for query...");
|
73332
|
+
const result = await summarizeQuery(input, openAIKey);
|
73267
73333
|
return {
|
73268
73334
|
summary: result.textual.trim(),
|
73269
73335
|
links: result.links,
|
73270
|
-
sources:
|
73336
|
+
sources: ["OpenAI Knowledge Base"]
|
73271
73337
|
};
|
73272
73338
|
}
|
73273
73339
|
|
73274
73340
|
// src/cli.ts
|
73275
73341
|
var program2 = new Command;
|
73276
73342
|
async function main2() {
|
73343
|
+
console.log(`[clAi]::${source_default.cyan(version)}`);
|
73277
73344
|
try {
|
73278
73345
|
program2.name("clai").description("AI-powered web scraping tool").version(package_default.version).argument("[input...]", "URL or search terms to analyze").action(async (inputs) => {
|
73279
73346
|
const openAIKey = process.env.OPENAI_API_KEY;
|
package/dist/index.js
CHANGED
@@ -41059,8 +41059,7 @@ async function scrape(input) {
|
|
41059
41059
|
}));
|
41060
41060
|
return results.filter((result) => result !== null);
|
41061
41061
|
} catch (error) {
|
41062
|
-
|
41063
|
-
throw error;
|
41062
|
+
return [];
|
41064
41063
|
}
|
41065
41064
|
}
|
41066
41065
|
function isValidUrl(input) {
|
@@ -41076,55 +41075,31 @@ function normalizeUrl(url) {
|
|
41076
41075
|
return url;
|
41077
41076
|
}
|
41078
41077
|
async function getSearchResults(query) {
|
41079
|
-
|
41080
|
-
|
41081
|
-
|
41082
|
-
|
41078
|
+
const searchEngines = [
|
41079
|
+
{ name: "SearX", fn: getSearXResults },
|
41080
|
+
{ name: "Google", fn: getGoogleResults },
|
41081
|
+
{ name: "DuckDuckGo", fn: getDuckDuckGoResults },
|
41082
|
+
{ name: "Wikipedia", fn: getWikipediaResults }
|
41083
|
+
];
|
41084
|
+
for (const engine of searchEngines) {
|
41083
41085
|
try {
|
41084
|
-
|
41085
|
-
|
41086
|
-
|
41087
|
-
|
41088
|
-
|
41089
|
-
} catch (_3) {
|
41090
|
-
console.log("Using emergency fallback...");
|
41091
|
-
return getEmergencyResults(query);
|
41092
|
-
}
|
41093
|
-
}
|
41094
|
-
}
|
41095
|
-
}
|
41096
|
-
function getEmergencyResults(query) {
|
41097
|
-
const results = [];
|
41098
|
-
const cleanQuery = query.toLowerCase().replace(/[^a-z0-9\s]/g, "").trim();
|
41099
|
-
const words = cleanQuery.split(/\s+/).filter((word) => word.length > 2);
|
41100
|
-
if (words.length > 0) {
|
41101
|
-
const mainWord = words[0];
|
41102
|
-
results.push(`https://en.wikipedia.org/wiki/${encodeURIComponent(query.replace(/\s+/g, "_"))}`);
|
41103
|
-
if (mainWord.length > 3) {
|
41104
|
-
results.push(`https://${mainWord}.com`);
|
41105
|
-
results.push(`https://www.${mainWord}.org`);
|
41086
|
+
const result = await engine.fn(query);
|
41087
|
+
console.log(`[${engine.name}]::✅`);
|
41088
|
+
return result;
|
41089
|
+
} catch (_) {
|
41090
|
+
console.log(`[${engine.name}]::❌`);
|
41106
41091
|
}
|
41107
|
-
results.push(`https://www.reddit.com/search/?q=${encodeURIComponent(query)}`);
|
41108
41092
|
}
|
41109
|
-
console.log("
|
41110
|
-
|
41111
|
-
`https://en.wikipedia.org/wiki/${encodeURIComponent(query.replace(/\s+/g, "_"))}`
|
41112
|
-
];
|
41093
|
+
console.log("All search engines failed - no URLs to scrape");
|
41094
|
+
throw new Error("No search results available");
|
41113
41095
|
}
|
41114
41096
|
async function getSearXResults(query) {
|
41115
|
-
const searxInstances = [
|
41116
|
-
"https://searx.be",
|
41117
|
-
"https://search.sapti.me",
|
41118
|
-
"https://searx.tiekoetter.com",
|
41119
|
-
"https://searx.prvcy.eu"
|
41120
|
-
];
|
41097
|
+
const searxInstances = ["https://searx.be", "https://search.sapti.me"];
|
41121
41098
|
for (const instance of searxInstances) {
|
41122
41099
|
try {
|
41123
41100
|
const searchUrl = `${instance}/search?q=${encodeURIComponent(query)}&format=json&categories=general`;
|
41124
|
-
|
41125
|
-
const response = await fetch(searchUrl, {
|
41101
|
+
const response = await enhancedFetch(searchUrl, {
|
41126
41102
|
headers: {
|
41127
|
-
"User-Agent": getRandomUserAgent(),
|
41128
41103
|
Accept: "application/json"
|
41129
41104
|
}
|
41130
41105
|
});
|
@@ -41141,7 +41116,6 @@ async function getSearXResults(query) {
|
|
41141
41116
|
}
|
41142
41117
|
}
|
41143
41118
|
if (urls.length > 0) {
|
41144
|
-
console.log(`✓ SearX found ${urls.length} results`);
|
41145
41119
|
return urls.slice(0, 3);
|
41146
41120
|
}
|
41147
41121
|
} catch (error) {
|
@@ -41149,9 +41123,31 @@ async function getSearXResults(query) {
|
|
41149
41123
|
}
|
41150
41124
|
throw new Error("All SearX instances failed");
|
41151
41125
|
}
|
41126
|
+
async function getWikipediaResults(query) {
|
41127
|
+
const searchUrl = `https://en.wikipedia.org/w/api.php?action=opensearch&search=${encodeURIComponent(query)}&limit=3&format=json&origin=*`;
|
41128
|
+
const response = await enhancedFetch(searchUrl, {
|
41129
|
+
headers: {
|
41130
|
+
Accept: "application/json"
|
41131
|
+
}
|
41132
|
+
});
|
41133
|
+
if (!response.ok) {
|
41134
|
+
throw new Error(`Wikipedia API error: ${response.status}`);
|
41135
|
+
}
|
41136
|
+
const data2 = await response.json();
|
41137
|
+
if (Array.isArray(data2) && data2.length >= 4 && Array.isArray(data2[3])) {
|
41138
|
+
const urls = data2[3]?.filter((url) => url?.startsWith("https://"));
|
41139
|
+
if (urls?.length > 0) {
|
41140
|
+
return urls;
|
41141
|
+
}
|
41142
|
+
}
|
41143
|
+
throw new Error("No Wikipedia results found");
|
41144
|
+
}
|
41152
41145
|
async function getGoogleResults(query) {
|
41153
41146
|
const searchUrl = `https://www.google.com/search?q=${encodeURIComponent(query)}&num=10`;
|
41154
41147
|
const html3 = await fetchHtml(searchUrl);
|
41148
|
+
if (html3.includes("If you're having trouble accessing Google Search") || html3.includes("unusual traffic from your computer network")) {
|
41149
|
+
throw new Error("Google blocked request - detected as bot");
|
41150
|
+
}
|
41155
41151
|
const cheerioDoc = load(html3);
|
41156
41152
|
const urls = [];
|
41157
41153
|
cheerioDoc('a[href^="/url?q="]').each((_, element) => {
|
@@ -41179,12 +41175,14 @@ async function getGoogleResults(query) {
|
|
41179
41175
|
if (uniqueUrls.length === 0) {
|
41180
41176
|
throw new Error("No search results found in Google response");
|
41181
41177
|
}
|
41182
|
-
console.log(`✓ Google found ${uniqueUrls.length} results`);
|
41183
41178
|
return uniqueUrls;
|
41184
41179
|
}
|
41185
41180
|
async function getDuckDuckGoResults(query) {
|
41186
41181
|
const searchUrl = `https://api.duckduckgo.com/?q=${encodeURIComponent(query)}&format=json&no_html=1&skip_disambig=1`;
|
41187
|
-
const response = await
|
41182
|
+
const response = await enhancedFetch(searchUrl);
|
41183
|
+
if (!response.ok) {
|
41184
|
+
throw new Error(`DuckDuckGo API error: ${response.status}`);
|
41185
|
+
}
|
41188
41186
|
const data2 = await response.json();
|
41189
41187
|
const urls = [];
|
41190
41188
|
if (data2.AbstractURL) {
|
@@ -41197,29 +41195,39 @@ async function getDuckDuckGoResults(query) {
|
|
41197
41195
|
}
|
41198
41196
|
}
|
41199
41197
|
}
|
41198
|
+
if (urls.length === 0 && data2.DefinitionURL) {
|
41199
|
+
urls.push(data2.DefinitionURL);
|
41200
|
+
}
|
41200
41201
|
if (urls.length === 0) {
|
41201
41202
|
throw new Error("No search results found in DuckDuckGo response");
|
41202
41203
|
}
|
41203
|
-
console.log(`✓ DuckDuckGo found ${urls.length} results`);
|
41204
41204
|
return urls;
|
41205
41205
|
}
|
41206
|
-
async function
|
41207
|
-
const
|
41208
|
-
|
41209
|
-
|
41210
|
-
|
41211
|
-
|
41212
|
-
|
41213
|
-
|
41214
|
-
|
41215
|
-
|
41216
|
-
|
41217
|
-
|
41218
|
-
|
41219
|
-
|
41220
|
-
|
41221
|
-
|
41206
|
+
async function enhancedFetch(url, options = {}) {
|
41207
|
+
const headers = {
|
41208
|
+
"User-Agent": getRandomUserAgent(),
|
41209
|
+
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
|
41210
|
+
"Accept-Language": "en-US,en;q=0.9",
|
41211
|
+
"Accept-Encoding": "gzip, deflate, br",
|
41212
|
+
"sec-ch-ua": '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"',
|
41213
|
+
"sec-ch-ua-mobile": "?0",
|
41214
|
+
"sec-ch-ua-platform": '"macOS"',
|
41215
|
+
"Sec-Fetch-Dest": "document",
|
41216
|
+
"Sec-Fetch-Mode": "navigate",
|
41217
|
+
"Sec-Fetch-Site": "cross-site",
|
41218
|
+
"Sec-Fetch-User": "?1",
|
41219
|
+
"Upgrade-Insecure-Requests": "1",
|
41220
|
+
"Cache-Control": "no-cache",
|
41221
|
+
Pragma: "no-cache",
|
41222
|
+
...options.headers
|
41223
|
+
};
|
41224
|
+
return fetch(url, {
|
41225
|
+
...options,
|
41226
|
+
headers
|
41222
41227
|
});
|
41228
|
+
}
|
41229
|
+
async function fetchHtml(url) {
|
41230
|
+
const response = await enhancedFetch(url);
|
41223
41231
|
return response.text();
|
41224
41232
|
}
|
41225
41233
|
function getRandomUserAgent() {
|
@@ -46353,19 +46361,76 @@ async function summarizeWebPage(content, openAIApiKey) {
|
|
46353
46361
|
});
|
46354
46362
|
return result;
|
46355
46363
|
}
|
46364
|
+
async function summarizeQuery(query, openAIApiKey) {
|
46365
|
+
const openai = openaiClient(openAIApiKey);
|
46366
|
+
const prompt = `You are an expert educator and researcher. Answer the following query with accurate, helpful information:
|
46367
|
+
|
46368
|
+
"${query}"
|
46369
|
+
|
46370
|
+
Guidelines:
|
46371
|
+
1. Provide a comprehensive but concise answer
|
46372
|
+
2. Use bullet points, lists, and tables when appropriate
|
46373
|
+
3. Include relevant examples or step-by-step instructions if applicable
|
46374
|
+
4. Format your response in valid markdown
|
46375
|
+
5. Be factual and cite general knowledge sources when relevant
|
46376
|
+
6. If you suggest external resources, format them as links in the response
|
46377
|
+
7. Mark proper nouns as bold e.g. **OpenAI**
|
46378
|
+
8. Use appropriate headings (##, ###) to structure your response
|
46379
|
+
9. If the query is about current events beyond your knowledge cutoff, mention that limitation
|
46380
|
+
|
46381
|
+
Provide a thorough, educational response that directly addresses the user's query.`;
|
46382
|
+
const schema = {
|
46383
|
+
textual: {
|
46384
|
+
type: "string",
|
46385
|
+
description: "Comprehensive answer to the user query"
|
46386
|
+
},
|
46387
|
+
links: {
|
46388
|
+
type: "array",
|
46389
|
+
items: {
|
46390
|
+
type: "object",
|
46391
|
+
properties: {
|
46392
|
+
name: {
|
46393
|
+
type: "string",
|
46394
|
+
description: "Descriptive name of the recommended resource"
|
46395
|
+
},
|
46396
|
+
url: {
|
46397
|
+
type: "string",
|
46398
|
+
description: "URL to the recommended resource"
|
46399
|
+
}
|
46400
|
+
},
|
46401
|
+
required: ["name", "url"]
|
46402
|
+
}
|
46403
|
+
}
|
46404
|
+
};
|
46405
|
+
const result = await openai.completeStructured(prompt, {
|
46406
|
+
temperature: 0.7,
|
46407
|
+
responseSchema: schema
|
46408
|
+
});
|
46409
|
+
return result;
|
46410
|
+
}
|
46356
46411
|
|
46357
46412
|
// src/index.ts
|
46358
46413
|
async function clai(input, openAIKey) {
|
46359
46414
|
const scrapedData = await scrape(input);
|
46360
|
-
const
|
46415
|
+
const usefulData = scrapedData.filter((data2) => data2.content.length > 200 && !data2.content.includes("Wikipedia does not have an article") && !data2.content.includes("page not found") && !data2.content.includes("404") && !data2.content.includes("error"));
|
46416
|
+
if (usefulData.length > 0) {
|
46417
|
+
const combinedContent = usefulData.map((data2) => `Content from ${data2.url}:
|
46361
46418
|
${data2.content}`).join(`
|
46362
46419
|
|
46363
46420
|
`);
|
46364
|
-
|
46421
|
+
const result2 = await summarizeWebPage(combinedContent, openAIKey);
|
46422
|
+
return {
|
46423
|
+
summary: result2.textual.trim(),
|
46424
|
+
links: result2.links,
|
46425
|
+
sources: usefulData.map((data2) => data2.url)
|
46426
|
+
};
|
46427
|
+
}
|
46428
|
+
console.log("No scraped data available - using OpenAI directly for query...");
|
46429
|
+
const result = await summarizeQuery(input, openAIKey);
|
46365
46430
|
return {
|
46366
46431
|
summary: result.textual.trim(),
|
46367
46432
|
links: result.links,
|
46368
|
-
sources:
|
46433
|
+
sources: ["OpenAI Knowledge Base"]
|
46369
46434
|
};
|
46370
46435
|
}
|
46371
46436
|
var src_default = clai;
|