@fettstorch/clai 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -28997,22 +28997,22 @@ var require_diagnostics = __commonJS((exports, module) => {
28997
28997
  const debuglog = fetchDebuglog.enabled ? fetchDebuglog : undiciDebugLog;
28998
28998
  diagnosticsChannel.channel("undici:client:beforeConnect").subscribe((evt) => {
28999
28999
  const {
29000
- connectParams: { version, protocol, port, host }
29000
+ connectParams: { version: version2, protocol, port, host }
29001
29001
  } = evt;
29002
- debuglog("connecting to %s using %s%s", `${host}${port ? `:${port}` : ""}`, protocol, version);
29002
+ debuglog("connecting to %s using %s%s", `${host}${port ? `:${port}` : ""}`, protocol, version2);
29003
29003
  });
29004
29004
  diagnosticsChannel.channel("undici:client:connected").subscribe((evt) => {
29005
29005
  const {
29006
- connectParams: { version, protocol, port, host }
29006
+ connectParams: { version: version2, protocol, port, host }
29007
29007
  } = evt;
29008
- debuglog("connected to %s using %s%s", `${host}${port ? `:${port}` : ""}`, protocol, version);
29008
+ debuglog("connected to %s using %s%s", `${host}${port ? `:${port}` : ""}`, protocol, version2);
29009
29009
  });
29010
29010
  diagnosticsChannel.channel("undici:client:connectError").subscribe((evt) => {
29011
29011
  const {
29012
- connectParams: { version, protocol, port, host },
29012
+ connectParams: { version: version2, protocol, port, host },
29013
29013
  error
29014
29014
  } = evt;
29015
- debuglog("connection to %s using %s%s errored - %s", `${host}${port ? `:${port}` : ""}`, protocol, version, error.message);
29015
+ debuglog("connection to %s using %s%s errored - %s", `${host}${port ? `:${port}` : ""}`, protocol, version2, error.message);
29016
29016
  });
29017
29017
  diagnosticsChannel.channel("undici:client:sendHeaders").subscribe((evt) => {
29018
29018
  const {
@@ -29047,22 +29047,22 @@ var require_diagnostics = __commonJS((exports, module) => {
29047
29047
  const debuglog = undiciDebugLog.enabled ? undiciDebugLog : websocketDebuglog;
29048
29048
  diagnosticsChannel.channel("undici:client:beforeConnect").subscribe((evt) => {
29049
29049
  const {
29050
- connectParams: { version, protocol, port, host }
29050
+ connectParams: { version: version2, protocol, port, host }
29051
29051
  } = evt;
29052
- debuglog("connecting to %s%s using %s%s", host, port ? `:${port}` : "", protocol, version);
29052
+ debuglog("connecting to %s%s using %s%s", host, port ? `:${port}` : "", protocol, version2);
29053
29053
  });
29054
29054
  diagnosticsChannel.channel("undici:client:connected").subscribe((evt) => {
29055
29055
  const {
29056
- connectParams: { version, protocol, port, host }
29056
+ connectParams: { version: version2, protocol, port, host }
29057
29057
  } = evt;
29058
- debuglog("connected to %s%s using %s%s", host, port ? `:${port}` : "", protocol, version);
29058
+ debuglog("connected to %s%s using %s%s", host, port ? `:${port}` : "", protocol, version2);
29059
29059
  });
29060
29060
  diagnosticsChannel.channel("undici:client:connectError").subscribe((evt) => {
29061
29061
  const {
29062
- connectParams: { version, protocol, port, host },
29062
+ connectParams: { version: version2, protocol, port, host },
29063
29063
  error
29064
29064
  } = evt;
29065
- debuglog("connection to %s%s using %s%s errored - %s", host, port ? `:${port}` : "", protocol, version, error.message);
29065
+ debuglog("connection to %s%s using %s%s errored - %s", host, port ? `:${port}` : "", protocol, version2, error.message);
29066
29066
  });
29067
29067
  diagnosticsChannel.channel("undici:client:sendHeaders").subscribe((evt) => {
29068
29068
  const {
@@ -54255,9 +54255,10 @@ function ora(options) {
54255
54255
  return new Ora(options);
54256
54256
  }
54257
54257
  // package.json
54258
+ var version = "0.1.8";
54258
54259
  var package_default = {
54259
54260
  name: "@fettstorch/clai",
54260
- version: "0.1.7",
54261
+ version,
54261
54262
  main: "dist/index.js",
54262
54263
  bin: {
54263
54264
  clai: "dist/cli.js"
@@ -68031,8 +68032,7 @@ async function scrape(input) {
68031
68032
  }));
68032
68033
  return results.filter((result) => result !== null);
68033
68034
  } catch (error) {
68034
- console.error("Error during scraping:", error);
68035
- throw error;
68035
+ return [];
68036
68036
  }
68037
68037
  }
68038
68038
  function isValidUrl(input) {
@@ -68048,55 +68048,31 @@ function normalizeUrl(url) {
68048
68048
  return url;
68049
68049
  }
68050
68050
  async function getSearchResults(query) {
68051
- try {
68052
- return await getSearXResults(query);
68053
- } catch (_2) {
68054
- console.log("Trying Google search...");
68051
+ const searchEngines = [
68052
+ { name: "SearX", fn: getSearXResults },
68053
+ { name: "Google", fn: getGoogleResults },
68054
+ { name: "DuckDuckGo", fn: getDuckDuckGoResults },
68055
+ { name: "Wikipedia", fn: getWikipediaResults }
68056
+ ];
68057
+ for (const engine of searchEngines) {
68055
68058
  try {
68056
- return await getGoogleResults(query);
68057
- } catch (_3) {
68058
- console.log("Trying DuckDuckGo search...");
68059
- try {
68060
- return await getDuckDuckGoResults(query);
68061
- } catch (_4) {
68062
- console.log("Using emergency fallback...");
68063
- return getEmergencyResults(query);
68064
- }
68065
- }
68066
- }
68067
- }
68068
- function getEmergencyResults(query) {
68069
- const results = [];
68070
- const cleanQuery = query.toLowerCase().replace(/[^a-z0-9\s]/g, "").trim();
68071
- const words = cleanQuery.split(/\s+/).filter((word) => word.length > 2);
68072
- if (words.length > 0) {
68073
- const mainWord = words[0];
68074
- results.push(`https://en.wikipedia.org/wiki/${encodeURIComponent(query.replace(/\s+/g, "_"))}`);
68075
- if (mainWord.length > 3) {
68076
- results.push(`https://${mainWord}.com`);
68077
- results.push(`https://www.${mainWord}.org`);
68059
+ const result = await engine.fn(query);
68060
+ console.log(`[${engine.name}]::✅`);
68061
+ return result;
68062
+ } catch (_2) {
68063
+ console.log(`[${engine.name}]::❌`);
68078
68064
  }
68079
- results.push(`https://www.reddit.com/search/?q=${encodeURIComponent(query)}`);
68080
68065
  }
68081
- console.log("Emergency fallback returning:", results.join(", "));
68082
- return results.length > 0 ? results.slice(0, 3) : [
68083
- `https://en.wikipedia.org/wiki/${encodeURIComponent(query.replace(/\s+/g, "_"))}`
68084
- ];
68066
+ console.log("All search engines failed - no URLs to scrape");
68067
+ throw new Error("No search results available");
68085
68068
  }
68086
68069
  async function getSearXResults(query) {
68087
- const searxInstances = [
68088
- "https://searx.be",
68089
- "https://search.sapti.me",
68090
- "https://searx.tiekoetter.com",
68091
- "https://searx.prvcy.eu"
68092
- ];
68070
+ const searxInstances = ["https://searx.be", "https://search.sapti.me"];
68093
68071
  for (const instance of searxInstances) {
68094
68072
  try {
68095
68073
  const searchUrl = `${instance}/search?q=${encodeURIComponent(query)}&format=json&categories=general`;
68096
- console.log("Trying SearX search...");
68097
- const response = await fetch(searchUrl, {
68074
+ const response = await enhancedFetch(searchUrl, {
68098
68075
  headers: {
68099
- "User-Agent": getRandomUserAgent(),
68100
68076
  Accept: "application/json"
68101
68077
  }
68102
68078
  });
@@ -68113,7 +68089,6 @@ async function getSearXResults(query) {
68113
68089
  }
68114
68090
  }
68115
68091
  if (urls.length > 0) {
68116
- console.log(`✓ SearX found ${urls.length} results`);
68117
68092
  return urls.slice(0, 3);
68118
68093
  }
68119
68094
  } catch (error) {
@@ -68121,9 +68096,31 @@ async function getSearXResults(query) {
68121
68096
  }
68122
68097
  throw new Error("All SearX instances failed");
68123
68098
  }
68099
+ async function getWikipediaResults(query) {
68100
+ const searchUrl = `https://en.wikipedia.org/w/api.php?action=opensearch&search=${encodeURIComponent(query)}&limit=3&format=json&origin=*`;
68101
+ const response = await enhancedFetch(searchUrl, {
68102
+ headers: {
68103
+ Accept: "application/json"
68104
+ }
68105
+ });
68106
+ if (!response.ok) {
68107
+ throw new Error(`Wikipedia API error: ${response.status}`);
68108
+ }
68109
+ const data2 = await response.json();
68110
+ if (Array.isArray(data2) && data2.length >= 4 && Array.isArray(data2[3])) {
68111
+ const urls = data2[3]?.filter((url) => url?.startsWith("https://"));
68112
+ if (urls?.length > 0) {
68113
+ return urls;
68114
+ }
68115
+ }
68116
+ throw new Error("No Wikipedia results found");
68117
+ }
68124
68118
  async function getGoogleResults(query) {
68125
68119
  const searchUrl = `https://www.google.com/search?q=${encodeURIComponent(query)}&num=10`;
68126
68120
  const html3 = await fetchHtml(searchUrl);
68121
+ if (html3.includes("If you're having trouble accessing Google Search") || html3.includes("unusual traffic from your computer network")) {
68122
+ throw new Error("Google blocked request - detected as bot");
68123
+ }
68127
68124
  const cheerioDoc = load2(html3);
68128
68125
  const urls = [];
68129
68126
  cheerioDoc('a[href^="/url?q="]').each((_2, element) => {
@@ -68151,12 +68148,14 @@ async function getGoogleResults(query) {
68151
68148
  if (uniqueUrls.length === 0) {
68152
68149
  throw new Error("No search results found in Google response");
68153
68150
  }
68154
- console.log(`✓ Google found ${uniqueUrls.length} results`);
68155
68151
  return uniqueUrls;
68156
68152
  }
68157
68153
  async function getDuckDuckGoResults(query) {
68158
68154
  const searchUrl = `https://api.duckduckgo.com/?q=${encodeURIComponent(query)}&format=json&no_html=1&skip_disambig=1`;
68159
- const response = await fetch(searchUrl);
68155
+ const response = await enhancedFetch(searchUrl);
68156
+ if (!response.ok) {
68157
+ throw new Error(`DuckDuckGo API error: ${response.status}`);
68158
+ }
68160
68159
  const data2 = await response.json();
68161
68160
  const urls = [];
68162
68161
  if (data2.AbstractURL) {
@@ -68169,29 +68168,39 @@ async function getDuckDuckGoResults(query) {
68169
68168
  }
68170
68169
  }
68171
68170
  }
68171
+ if (urls.length === 0 && data2.DefinitionURL) {
68172
+ urls.push(data2.DefinitionURL);
68173
+ }
68172
68174
  if (urls.length === 0) {
68173
68175
  throw new Error("No search results found in DuckDuckGo response");
68174
68176
  }
68175
- console.log(`✓ DuckDuckGo found ${urls.length} results`);
68176
68177
  return urls;
68177
68178
  }
68178
- async function fetchHtml(url) {
68179
- const response = await fetch(url, {
68180
- headers: {
68181
- "User-Agent": getRandomUserAgent(),
68182
- Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
68183
- "Accept-Language": "en-US,en;q=0.9",
68184
- "Accept-Encoding": "gzip, deflate, br",
68185
- DNT: "1",
68186
- Connection: "keep-alive",
68187
- "Upgrade-Insecure-Requests": "1",
68188
- "Sec-Fetch-Dest": "document",
68189
- "Sec-Fetch-Mode": "navigate",
68190
- "Sec-Fetch-Site": "none",
68191
- "Sec-Fetch-User": "?1",
68192
- "Cache-Control": "max-age=0"
68193
- }
68179
+ async function enhancedFetch(url, options = {}) {
68180
+ const headers = {
68181
+ "User-Agent": getRandomUserAgent(),
68182
+ Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
68183
+ "Accept-Language": "en-US,en;q=0.9",
68184
+ "Accept-Encoding": "gzip, deflate, br",
68185
+ "sec-ch-ua": '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"',
68186
+ "sec-ch-ua-mobile": "?0",
68187
+ "sec-ch-ua-platform": '"macOS"',
68188
+ "Sec-Fetch-Dest": "document",
68189
+ "Sec-Fetch-Mode": "navigate",
68190
+ "Sec-Fetch-Site": "cross-site",
68191
+ "Sec-Fetch-User": "?1",
68192
+ "Upgrade-Insecure-Requests": "1",
68193
+ "Cache-Control": "no-cache",
68194
+ Pragma: "no-cache",
68195
+ ...options.headers
68196
+ };
68197
+ return fetch(url, {
68198
+ ...options,
68199
+ headers
68194
68200
  });
68201
+ }
68202
+ async function fetchHtml(url) {
68203
+ const response = await enhancedFetch(url);
68195
68204
  return response.text();
68196
68205
  }
68197
68206
  function getRandomUserAgent() {
@@ -73255,25 +73264,83 @@ async function summarizeWebPage(content, openAIApiKey) {
73255
73264
  });
73256
73265
  return result;
73257
73266
  }
73267
+ async function summarizeQuery(query, openAIApiKey) {
73268
+ const openai = openaiClient(openAIApiKey);
73269
+ const prompt2 = `You are an expert educator and researcher. Answer the following query with accurate, helpful information:
73270
+
73271
+ "${query}"
73272
+
73273
+ Guidelines:
73274
+ 1. Provide a comprehensive but concise answer
73275
+ 2. Use bullet points, lists, and tables when appropriate
73276
+ 3. Include relevant examples or step-by-step instructions if applicable
73277
+ 4. Format your response in valid markdown
73278
+ 5. Be factual and cite general knowledge sources when relevant
73279
+ 6. If you suggest external resources, format them as links in the response
73280
+ 7. Mark proper nouns as bold e.g. **OpenAI**
73281
+ 8. Use appropriate headings (##, ###) to structure your response
73282
+ 9. If the query is about current events beyond your knowledge cutoff, mention that limitation
73283
+
73284
+ Provide a thorough, educational response that directly addresses the user's query.`;
73285
+ const schema = {
73286
+ textual: {
73287
+ type: "string",
73288
+ description: "Comprehensive answer to the user query"
73289
+ },
73290
+ links: {
73291
+ type: "array",
73292
+ items: {
73293
+ type: "object",
73294
+ properties: {
73295
+ name: {
73296
+ type: "string",
73297
+ description: "Descriptive name of the recommended resource"
73298
+ },
73299
+ url: {
73300
+ type: "string",
73301
+ description: "URL to the recommended resource"
73302
+ }
73303
+ },
73304
+ required: ["name", "url"]
73305
+ }
73306
+ }
73307
+ };
73308
+ const result = await openai.completeStructured(prompt2, {
73309
+ temperature: 0.7,
73310
+ responseSchema: schema
73311
+ });
73312
+ return result;
73313
+ }
73258
73314
 
73259
73315
  // src/index.ts
73260
73316
  async function clai(input, openAIKey) {
73261
73317
  const scrapedData = await scrape(input);
73262
- const combinedContent = scrapedData.map((data2) => `Content from ${data2.url}:
73318
+ const usefulData = scrapedData.filter((data2) => data2.content.length > 200 && !data2.content.includes("Wikipedia does not have an article") && !data2.content.includes("page not found") && !data2.content.includes("404") && !data2.content.includes("error"));
73319
+ if (usefulData.length > 0) {
73320
+ const combinedContent = usefulData.map((data2) => `Content from ${data2.url}:
73263
73321
  ${data2.content}`).join(`
73264
73322
 
73265
73323
  `);
73266
- const result = await summarizeWebPage(combinedContent, openAIKey);
73324
+ const result2 = await summarizeWebPage(combinedContent, openAIKey);
73325
+ return {
73326
+ summary: result2.textual.trim(),
73327
+ links: result2.links,
73328
+ sources: usefulData.map((data2) => data2.url)
73329
+ };
73330
+ }
73331
+ console.log("No scraped data available - using OpenAI directly for query...");
73332
+ const result = await summarizeQuery(input, openAIKey);
73267
73333
  return {
73268
73334
  summary: result.textual.trim(),
73269
73335
  links: result.links,
73270
- sources: scrapedData.map((data2) => data2.url)
73336
+ sources: ["OpenAI Knowledge Base"]
73271
73337
  };
73272
73338
  }
73273
73339
 
73274
73340
  // src/cli.ts
73275
73341
  var program2 = new Command;
73276
73342
  async function main2() {
73343
+ console.log(`[clAi]::${source_default.cyan(version)}`);
73277
73344
  try {
73278
73345
  program2.name("clai").description("AI-powered web scraping tool").version(package_default.version).argument("[input...]", "URL or search terms to analyze").action(async (inputs) => {
73279
73346
  const openAIKey = process.env.OPENAI_API_KEY;
package/dist/index.js CHANGED
@@ -41059,8 +41059,7 @@ async function scrape(input) {
41059
41059
  }));
41060
41060
  return results.filter((result) => result !== null);
41061
41061
  } catch (error) {
41062
- console.error("Error during scraping:", error);
41063
- throw error;
41062
+ return [];
41064
41063
  }
41065
41064
  }
41066
41065
  function isValidUrl(input) {
@@ -41076,55 +41075,31 @@ function normalizeUrl(url) {
41076
41075
  return url;
41077
41076
  }
41078
41077
  async function getSearchResults(query) {
41079
- try {
41080
- return await getSearXResults(query);
41081
- } catch (_) {
41082
- console.log("Trying Google search...");
41078
+ const searchEngines = [
41079
+ { name: "SearX", fn: getSearXResults },
41080
+ { name: "Google", fn: getGoogleResults },
41081
+ { name: "DuckDuckGo", fn: getDuckDuckGoResults },
41082
+ { name: "Wikipedia", fn: getWikipediaResults }
41083
+ ];
41084
+ for (const engine of searchEngines) {
41083
41085
  try {
41084
- return await getGoogleResults(query);
41085
- } catch (_2) {
41086
- console.log("Trying DuckDuckGo search...");
41087
- try {
41088
- return await getDuckDuckGoResults(query);
41089
- } catch (_3) {
41090
- console.log("Using emergency fallback...");
41091
- return getEmergencyResults(query);
41092
- }
41093
- }
41094
- }
41095
- }
41096
- function getEmergencyResults(query) {
41097
- const results = [];
41098
- const cleanQuery = query.toLowerCase().replace(/[^a-z0-9\s]/g, "").trim();
41099
- const words = cleanQuery.split(/\s+/).filter((word) => word.length > 2);
41100
- if (words.length > 0) {
41101
- const mainWord = words[0];
41102
- results.push(`https://en.wikipedia.org/wiki/${encodeURIComponent(query.replace(/\s+/g, "_"))}`);
41103
- if (mainWord.length > 3) {
41104
- results.push(`https://${mainWord}.com`);
41105
- results.push(`https://www.${mainWord}.org`);
41086
+ const result = await engine.fn(query);
41087
+ console.log(`[${engine.name}]::✅`);
41088
+ return result;
41089
+ } catch (_) {
41090
+ console.log(`[${engine.name}]::❌`);
41106
41091
  }
41107
- results.push(`https://www.reddit.com/search/?q=${encodeURIComponent(query)}`);
41108
41092
  }
41109
- console.log("Emergency fallback returning:", results.join(", "));
41110
- return results.length > 0 ? results.slice(0, 3) : [
41111
- `https://en.wikipedia.org/wiki/${encodeURIComponent(query.replace(/\s+/g, "_"))}`
41112
- ];
41093
+ console.log("All search engines failed - no URLs to scrape");
41094
+ throw new Error("No search results available");
41113
41095
  }
41114
41096
  async function getSearXResults(query) {
41115
- const searxInstances = [
41116
- "https://searx.be",
41117
- "https://search.sapti.me",
41118
- "https://searx.tiekoetter.com",
41119
- "https://searx.prvcy.eu"
41120
- ];
41097
+ const searxInstances = ["https://searx.be", "https://search.sapti.me"];
41121
41098
  for (const instance of searxInstances) {
41122
41099
  try {
41123
41100
  const searchUrl = `${instance}/search?q=${encodeURIComponent(query)}&format=json&categories=general`;
41124
- console.log("Trying SearX search...");
41125
- const response = await fetch(searchUrl, {
41101
+ const response = await enhancedFetch(searchUrl, {
41126
41102
  headers: {
41127
- "User-Agent": getRandomUserAgent(),
41128
41103
  Accept: "application/json"
41129
41104
  }
41130
41105
  });
@@ -41141,7 +41116,6 @@ async function getSearXResults(query) {
41141
41116
  }
41142
41117
  }
41143
41118
  if (urls.length > 0) {
41144
- console.log(`✓ SearX found ${urls.length} results`);
41145
41119
  return urls.slice(0, 3);
41146
41120
  }
41147
41121
  } catch (error) {
@@ -41149,9 +41123,31 @@ async function getSearXResults(query) {
41149
41123
  }
41150
41124
  throw new Error("All SearX instances failed");
41151
41125
  }
41126
+ async function getWikipediaResults(query) {
41127
+ const searchUrl = `https://en.wikipedia.org/w/api.php?action=opensearch&search=${encodeURIComponent(query)}&limit=3&format=json&origin=*`;
41128
+ const response = await enhancedFetch(searchUrl, {
41129
+ headers: {
41130
+ Accept: "application/json"
41131
+ }
41132
+ });
41133
+ if (!response.ok) {
41134
+ throw new Error(`Wikipedia API error: ${response.status}`);
41135
+ }
41136
+ const data2 = await response.json();
41137
+ if (Array.isArray(data2) && data2.length >= 4 && Array.isArray(data2[3])) {
41138
+ const urls = data2[3]?.filter((url) => url?.startsWith("https://"));
41139
+ if (urls?.length > 0) {
41140
+ return urls;
41141
+ }
41142
+ }
41143
+ throw new Error("No Wikipedia results found");
41144
+ }
41152
41145
  async function getGoogleResults(query) {
41153
41146
  const searchUrl = `https://www.google.com/search?q=${encodeURIComponent(query)}&num=10`;
41154
41147
  const html3 = await fetchHtml(searchUrl);
41148
+ if (html3.includes("If you're having trouble accessing Google Search") || html3.includes("unusual traffic from your computer network")) {
41149
+ throw new Error("Google blocked request - detected as bot");
41150
+ }
41155
41151
  const cheerioDoc = load(html3);
41156
41152
  const urls = [];
41157
41153
  cheerioDoc('a[href^="/url?q="]').each((_, element) => {
@@ -41179,12 +41175,14 @@ async function getGoogleResults(query) {
41179
41175
  if (uniqueUrls.length === 0) {
41180
41176
  throw new Error("No search results found in Google response");
41181
41177
  }
41182
- console.log(`✓ Google found ${uniqueUrls.length} results`);
41183
41178
  return uniqueUrls;
41184
41179
  }
41185
41180
  async function getDuckDuckGoResults(query) {
41186
41181
  const searchUrl = `https://api.duckduckgo.com/?q=${encodeURIComponent(query)}&format=json&no_html=1&skip_disambig=1`;
41187
- const response = await fetch(searchUrl);
41182
+ const response = await enhancedFetch(searchUrl);
41183
+ if (!response.ok) {
41184
+ throw new Error(`DuckDuckGo API error: ${response.status}`);
41185
+ }
41188
41186
  const data2 = await response.json();
41189
41187
  const urls = [];
41190
41188
  if (data2.AbstractURL) {
@@ -41197,29 +41195,39 @@ async function getDuckDuckGoResults(query) {
41197
41195
  }
41198
41196
  }
41199
41197
  }
41198
+ if (urls.length === 0 && data2.DefinitionURL) {
41199
+ urls.push(data2.DefinitionURL);
41200
+ }
41200
41201
  if (urls.length === 0) {
41201
41202
  throw new Error("No search results found in DuckDuckGo response");
41202
41203
  }
41203
- console.log(`✓ DuckDuckGo found ${urls.length} results`);
41204
41204
  return urls;
41205
41205
  }
41206
- async function fetchHtml(url) {
41207
- const response = await fetch(url, {
41208
- headers: {
41209
- "User-Agent": getRandomUserAgent(),
41210
- Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
41211
- "Accept-Language": "en-US,en;q=0.9",
41212
- "Accept-Encoding": "gzip, deflate, br",
41213
- DNT: "1",
41214
- Connection: "keep-alive",
41215
- "Upgrade-Insecure-Requests": "1",
41216
- "Sec-Fetch-Dest": "document",
41217
- "Sec-Fetch-Mode": "navigate",
41218
- "Sec-Fetch-Site": "none",
41219
- "Sec-Fetch-User": "?1",
41220
- "Cache-Control": "max-age=0"
41221
- }
41206
+ async function enhancedFetch(url, options = {}) {
41207
+ const headers = {
41208
+ "User-Agent": getRandomUserAgent(),
41209
+ Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
41210
+ "Accept-Language": "en-US,en;q=0.9",
41211
+ "Accept-Encoding": "gzip, deflate, br",
41212
+ "sec-ch-ua": '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"',
41213
+ "sec-ch-ua-mobile": "?0",
41214
+ "sec-ch-ua-platform": '"macOS"',
41215
+ "Sec-Fetch-Dest": "document",
41216
+ "Sec-Fetch-Mode": "navigate",
41217
+ "Sec-Fetch-Site": "cross-site",
41218
+ "Sec-Fetch-User": "?1",
41219
+ "Upgrade-Insecure-Requests": "1",
41220
+ "Cache-Control": "no-cache",
41221
+ Pragma: "no-cache",
41222
+ ...options.headers
41223
+ };
41224
+ return fetch(url, {
41225
+ ...options,
41226
+ headers
41222
41227
  });
41228
+ }
41229
+ async function fetchHtml(url) {
41230
+ const response = await enhancedFetch(url);
41223
41231
  return response.text();
41224
41232
  }
41225
41233
  function getRandomUserAgent() {
@@ -46353,19 +46361,76 @@ async function summarizeWebPage(content, openAIApiKey) {
46353
46361
  });
46354
46362
  return result;
46355
46363
  }
46364
+ async function summarizeQuery(query, openAIApiKey) {
46365
+ const openai = openaiClient(openAIApiKey);
46366
+ const prompt = `You are an expert educator and researcher. Answer the following query with accurate, helpful information:
46367
+
46368
+ "${query}"
46369
+
46370
+ Guidelines:
46371
+ 1. Provide a comprehensive but concise answer
46372
+ 2. Use bullet points, lists, and tables when appropriate
46373
+ 3. Include relevant examples or step-by-step instructions if applicable
46374
+ 4. Format your response in valid markdown
46375
+ 5. Be factual and cite general knowledge sources when relevant
46376
+ 6. If you suggest external resources, format them as links in the response
46377
+ 7. Mark proper nouns as bold e.g. **OpenAI**
46378
+ 8. Use appropriate headings (##, ###) to structure your response
46379
+ 9. If the query is about current events beyond your knowledge cutoff, mention that limitation
46380
+
46381
+ Provide a thorough, educational response that directly addresses the user's query.`;
46382
+ const schema = {
46383
+ textual: {
46384
+ type: "string",
46385
+ description: "Comprehensive answer to the user query"
46386
+ },
46387
+ links: {
46388
+ type: "array",
46389
+ items: {
46390
+ type: "object",
46391
+ properties: {
46392
+ name: {
46393
+ type: "string",
46394
+ description: "Descriptive name of the recommended resource"
46395
+ },
46396
+ url: {
46397
+ type: "string",
46398
+ description: "URL to the recommended resource"
46399
+ }
46400
+ },
46401
+ required: ["name", "url"]
46402
+ }
46403
+ }
46404
+ };
46405
+ const result = await openai.completeStructured(prompt, {
46406
+ temperature: 0.7,
46407
+ responseSchema: schema
46408
+ });
46409
+ return result;
46410
+ }
46356
46411
 
46357
46412
  // src/index.ts
46358
46413
  async function clai(input, openAIKey) {
46359
46414
  const scrapedData = await scrape(input);
46360
- const combinedContent = scrapedData.map((data2) => `Content from ${data2.url}:
46415
+ const usefulData = scrapedData.filter((data2) => data2.content.length > 200 && !data2.content.includes("Wikipedia does not have an article") && !data2.content.includes("page not found") && !data2.content.includes("404") && !data2.content.includes("error"));
46416
+ if (usefulData.length > 0) {
46417
+ const combinedContent = usefulData.map((data2) => `Content from ${data2.url}:
46361
46418
  ${data2.content}`).join(`
46362
46419
 
46363
46420
  `);
46364
- const result = await summarizeWebPage(combinedContent, openAIKey);
46421
+ const result2 = await summarizeWebPage(combinedContent, openAIKey);
46422
+ return {
46423
+ summary: result2.textual.trim(),
46424
+ links: result2.links,
46425
+ sources: usefulData.map((data2) => data2.url)
46426
+ };
46427
+ }
46428
+ console.log("No scraped data available - using OpenAI directly for query...");
46429
+ const result = await summarizeQuery(input, openAIKey);
46365
46430
  return {
46366
46431
  summary: result.textual.trim(),
46367
46432
  links: result.links,
46368
- sources: scrapedData.map((data2) => data2.url)
46433
+ sources: ["OpenAI Knowledge Base"]
46369
46434
  };
46370
46435
  }
46371
46436
  var src_default = clai;