@fettstorch/clai 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -28997,22 +28997,22 @@ var require_diagnostics = __commonJS((exports, module) => {
28997
28997
  const debuglog = fetchDebuglog.enabled ? fetchDebuglog : undiciDebugLog;
28998
28998
  diagnosticsChannel.channel("undici:client:beforeConnect").subscribe((evt) => {
28999
28999
  const {
29000
- connectParams: { version, protocol, port, host }
29000
+ connectParams: { version: version2, protocol, port, host }
29001
29001
  } = evt;
29002
- debuglog("connecting to %s using %s%s", `${host}${port ? `:${port}` : ""}`, protocol, version);
29002
+ debuglog("connecting to %s using %s%s", `${host}${port ? `:${port}` : ""}`, protocol, version2);
29003
29003
  });
29004
29004
  diagnosticsChannel.channel("undici:client:connected").subscribe((evt) => {
29005
29005
  const {
29006
- connectParams: { version, protocol, port, host }
29006
+ connectParams: { version: version2, protocol, port, host }
29007
29007
  } = evt;
29008
- debuglog("connected to %s using %s%s", `${host}${port ? `:${port}` : ""}`, protocol, version);
29008
+ debuglog("connected to %s using %s%s", `${host}${port ? `:${port}` : ""}`, protocol, version2);
29009
29009
  });
29010
29010
  diagnosticsChannel.channel("undici:client:connectError").subscribe((evt) => {
29011
29011
  const {
29012
- connectParams: { version, protocol, port, host },
29012
+ connectParams: { version: version2, protocol, port, host },
29013
29013
  error
29014
29014
  } = evt;
29015
- debuglog("connection to %s using %s%s errored - %s", `${host}${port ? `:${port}` : ""}`, protocol, version, error.message);
29015
+ debuglog("connection to %s using %s%s errored - %s", `${host}${port ? `:${port}` : ""}`, protocol, version2, error.message);
29016
29016
  });
29017
29017
  diagnosticsChannel.channel("undici:client:sendHeaders").subscribe((evt) => {
29018
29018
  const {
@@ -29047,22 +29047,22 @@ var require_diagnostics = __commonJS((exports, module) => {
29047
29047
  const debuglog = undiciDebugLog.enabled ? undiciDebugLog : websocketDebuglog;
29048
29048
  diagnosticsChannel.channel("undici:client:beforeConnect").subscribe((evt) => {
29049
29049
  const {
29050
- connectParams: { version, protocol, port, host }
29050
+ connectParams: { version: version2, protocol, port, host }
29051
29051
  } = evt;
29052
- debuglog("connecting to %s%s using %s%s", host, port ? `:${port}` : "", protocol, version);
29052
+ debuglog("connecting to %s%s using %s%s", host, port ? `:${port}` : "", protocol, version2);
29053
29053
  });
29054
29054
  diagnosticsChannel.channel("undici:client:connected").subscribe((evt) => {
29055
29055
  const {
29056
- connectParams: { version, protocol, port, host }
29056
+ connectParams: { version: version2, protocol, port, host }
29057
29057
  } = evt;
29058
- debuglog("connected to %s%s using %s%s", host, port ? `:${port}` : "", protocol, version);
29058
+ debuglog("connected to %s%s using %s%s", host, port ? `:${port}` : "", protocol, version2);
29059
29059
  });
29060
29060
  diagnosticsChannel.channel("undici:client:connectError").subscribe((evt) => {
29061
29061
  const {
29062
- connectParams: { version, protocol, port, host },
29062
+ connectParams: { version: version2, protocol, port, host },
29063
29063
  error
29064
29064
  } = evt;
29065
- debuglog("connection to %s%s using %s%s errored - %s", host, port ? `:${port}` : "", protocol, version, error.message);
29065
+ debuglog("connection to %s%s using %s%s errored - %s", host, port ? `:${port}` : "", protocol, version2, error.message);
29066
29066
  });
29067
29067
  diagnosticsChannel.channel("undici:client:sendHeaders").subscribe((evt) => {
29068
29068
  const {
@@ -54255,9 +54255,10 @@ function ora(options) {
54255
54255
  return new Ora(options);
54256
54256
  }
54257
54257
  // package.json
54258
+ var version = "0.1.9";
54258
54259
  var package_default = {
54259
54260
  name: "@fettstorch/clai",
54260
- version: "0.1.7",
54261
+ version,
54261
54262
  main: "dist/index.js",
54262
54263
  bin: {
54263
54264
  clai: "dist/cli.js"
@@ -68031,8 +68032,7 @@ async function scrape(input) {
68031
68032
  }));
68032
68033
  return results.filter((result) => result !== null);
68033
68034
  } catch (error) {
68034
- console.error("Error during scraping:", error);
68035
- throw error;
68035
+ return [];
68036
68036
  }
68037
68037
  }
68038
68038
  function isValidUrl(input) {
@@ -68048,55 +68048,31 @@ function normalizeUrl(url) {
68048
68048
  return url;
68049
68049
  }
68050
68050
  async function getSearchResults(query) {
68051
- try {
68052
- return await getSearXResults(query);
68053
- } catch (_2) {
68054
- console.log("Trying Google search...");
68051
+ const searchEngines = [
68052
+ { name: "SearX", fn: getSearXResults },
68053
+ { name: "Google", fn: getGoogleResults },
68054
+ { name: "DuckDuckGo", fn: getDuckDuckGoResults },
68055
+ { name: "Wikipedia", fn: getWikipediaResults }
68056
+ ];
68057
+ for (const engine of searchEngines) {
68055
68058
  try {
68056
- return await getGoogleResults(query);
68057
- } catch (_3) {
68058
- console.log("Trying DuckDuckGo search...");
68059
- try {
68060
- return await getDuckDuckGoResults(query);
68061
- } catch (_4) {
68062
- console.log("Using emergency fallback...");
68063
- return getEmergencyResults(query);
68064
- }
68065
- }
68066
- }
68067
- }
68068
- function getEmergencyResults(query) {
68069
- const results = [];
68070
- const cleanQuery = query.toLowerCase().replace(/[^a-z0-9\s]/g, "").trim();
68071
- const words = cleanQuery.split(/\s+/).filter((word) => word.length > 2);
68072
- if (words.length > 0) {
68073
- const mainWord = words[0];
68074
- results.push(`https://en.wikipedia.org/wiki/${encodeURIComponent(query.replace(/\s+/g, "_"))}`);
68075
- if (mainWord.length > 3) {
68076
- results.push(`https://${mainWord}.com`);
68077
- results.push(`https://www.${mainWord}.org`);
68059
+ const result = await engine.fn(query);
68060
+ console.log(`[${engine.name}]::✅`);
68061
+ return result;
68062
+ } catch (_2) {
68063
+ console.log(`[${engine.name}]::❌`);
68078
68064
  }
68079
- results.push(`https://www.reddit.com/search/?q=${encodeURIComponent(query)}`);
68080
68065
  }
68081
- console.log("Emergency fallback returning:", results.join(", "));
68082
- return results.length > 0 ? results.slice(0, 3) : [
68083
- `https://en.wikipedia.org/wiki/${encodeURIComponent(query.replace(/\s+/g, "_"))}`
68084
- ];
68066
+ console.log("All search engines failed - no URLs to scrape");
68067
+ throw new Error("No search results available");
68085
68068
  }
68086
68069
  async function getSearXResults(query) {
68087
- const searxInstances = [
68088
- "https://searx.be",
68089
- "https://search.sapti.me",
68090
- "https://searx.tiekoetter.com",
68091
- "https://searx.prvcy.eu"
68092
- ];
68070
+ const searxInstances = ["https://searx.be", "https://search.sapti.me"];
68093
68071
  for (const instance of searxInstances) {
68094
68072
  try {
68095
68073
  const searchUrl = `${instance}/search?q=${encodeURIComponent(query)}&format=json&categories=general`;
68096
- console.log("Trying SearX search...");
68097
- const response = await fetch(searchUrl, {
68074
+ const response = await enhancedFetch(searchUrl, {
68098
68075
  headers: {
68099
- "User-Agent": getRandomUserAgent(),
68100
68076
  Accept: "application/json"
68101
68077
  }
68102
68078
  });
@@ -68113,7 +68089,6 @@ async function getSearXResults(query) {
68113
68089
  }
68114
68090
  }
68115
68091
  if (urls.length > 0) {
68116
- console.log(`✓ SearX found ${urls.length} results`);
68117
68092
  return urls.slice(0, 3);
68118
68093
  }
68119
68094
  } catch (error) {
@@ -68121,9 +68096,31 @@ async function getSearXResults(query) {
68121
68096
  }
68122
68097
  throw new Error("All SearX instances failed");
68123
68098
  }
68099
+ async function getWikipediaResults(query) {
68100
+ const searchUrl = `https://en.wikipedia.org/w/api.php?action=opensearch&search=${encodeURIComponent(query)}&limit=3&format=json&origin=*`;
68101
+ const response = await enhancedFetch(searchUrl, {
68102
+ headers: {
68103
+ Accept: "application/json"
68104
+ }
68105
+ });
68106
+ if (!response.ok) {
68107
+ throw new Error(`Wikipedia API error: ${response.status}`);
68108
+ }
68109
+ const data2 = await response.json();
68110
+ if (Array.isArray(data2) && data2.length >= 4 && Array.isArray(data2[3])) {
68111
+ const urls = data2[3]?.filter((url) => url?.startsWith("https://"));
68112
+ if (urls?.length > 0) {
68113
+ return urls;
68114
+ }
68115
+ }
68116
+ throw new Error("No Wikipedia results found");
68117
+ }
68124
68118
  async function getGoogleResults(query) {
68125
68119
  const searchUrl = `https://www.google.com/search?q=${encodeURIComponent(query)}&num=10`;
68126
68120
  const html3 = await fetchHtml(searchUrl);
68121
+ if (html3.includes("If you're having trouble accessing Google Search") || html3.includes("unusual traffic from your computer network")) {
68122
+ throw new Error("Google blocked request - detected as bot");
68123
+ }
68127
68124
  const cheerioDoc = load2(html3);
68128
68125
  const urls = [];
68129
68126
  cheerioDoc('a[href^="/url?q="]').each((_2, element) => {
@@ -68151,13 +68148,18 @@ async function getGoogleResults(query) {
68151
68148
  if (uniqueUrls.length === 0) {
68152
68149
  throw new Error("No search results found in Google response");
68153
68150
  }
68154
- console.log(`✓ Google found ${uniqueUrls.length} results`);
68155
68151
  return uniqueUrls;
68156
68152
  }
68157
68153
  async function getDuckDuckGoResults(query) {
68158
68154
  const searchUrl = `https://api.duckduckgo.com/?q=${encodeURIComponent(query)}&format=json&no_html=1&skip_disambig=1`;
68159
- const response = await fetch(searchUrl);
68155
+ const response = await enhancedFetch(searchUrl);
68156
+ if (!response.ok) {
68157
+ throw new Error(`DuckDuckGo API error: ${response.status}`);
68158
+ }
68160
68159
  const data2 = await response.json();
68160
+ if (data2.Abstract?.includes("redirects users to a non-JavaScript site") || data2.Abstract?.includes("DuckDuckGo redirects users") || data2.AbstractText?.includes("redirects users to a non-JavaScript site") || data2.AbstractText?.includes("DuckDuckGo redirects users")) {
68161
+ throw new Error("DuckDuckGo blocked request - JavaScript disabled redirect");
68162
+ }
68161
68163
  const urls = [];
68162
68164
  if (data2.AbstractURL) {
68163
68165
  urls.push(data2.AbstractURL);
@@ -68169,29 +68171,39 @@ async function getDuckDuckGoResults(query) {
68169
68171
  }
68170
68172
  }
68171
68173
  }
68174
+ if (urls.length === 0 && data2.DefinitionURL) {
68175
+ urls.push(data2.DefinitionURL);
68176
+ }
68172
68177
  if (urls.length === 0) {
68173
68178
  throw new Error("No search results found in DuckDuckGo response");
68174
68179
  }
68175
- console.log(`✓ DuckDuckGo found ${urls.length} results`);
68176
68180
  return urls;
68177
68181
  }
68178
- async function fetchHtml(url) {
68179
- const response = await fetch(url, {
68180
- headers: {
68181
- "User-Agent": getRandomUserAgent(),
68182
- Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
68183
- "Accept-Language": "en-US,en;q=0.9",
68184
- "Accept-Encoding": "gzip, deflate, br",
68185
- DNT: "1",
68186
- Connection: "keep-alive",
68187
- "Upgrade-Insecure-Requests": "1",
68188
- "Sec-Fetch-Dest": "document",
68189
- "Sec-Fetch-Mode": "navigate",
68190
- "Sec-Fetch-Site": "none",
68191
- "Sec-Fetch-User": "?1",
68192
- "Cache-Control": "max-age=0"
68193
- }
68182
+ async function enhancedFetch(url, options = {}) {
68183
+ const headers = {
68184
+ "User-Agent": getRandomUserAgent(),
68185
+ Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
68186
+ "Accept-Language": "en-US,en;q=0.9",
68187
+ "Accept-Encoding": "gzip, deflate, br",
68188
+ "sec-ch-ua": '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"',
68189
+ "sec-ch-ua-mobile": "?0",
68190
+ "sec-ch-ua-platform": '"macOS"',
68191
+ "Sec-Fetch-Dest": "document",
68192
+ "Sec-Fetch-Mode": "navigate",
68193
+ "Sec-Fetch-Site": "cross-site",
68194
+ "Sec-Fetch-User": "?1",
68195
+ "Upgrade-Insecure-Requests": "1",
68196
+ "Cache-Control": "no-cache",
68197
+ Pragma: "no-cache",
68198
+ ...options.headers
68199
+ };
68200
+ return fetch(url, {
68201
+ ...options,
68202
+ headers
68194
68203
  });
68204
+ }
68205
+ async function fetchHtml(url) {
68206
+ const response = await enhancedFetch(url);
68195
68207
  return response.text();
68196
68208
  }
68197
68209
  function getRandomUserAgent() {
@@ -73255,25 +73267,83 @@ async function summarizeWebPage(content, openAIApiKey) {
73255
73267
  });
73256
73268
  return result;
73257
73269
  }
73270
+ async function summarizeQuery(query, openAIApiKey) {
73271
+ const openai = openaiClient(openAIApiKey);
73272
+ const prompt2 = `You are an expert educator and researcher. Answer the following query with accurate, helpful information:
73273
+
73274
+ "${query}"
73275
+
73276
+ Guidelines:
73277
+ 1. Provide a comprehensive but concise answer
73278
+ 2. Use bullet points, lists, and tables when appropriate
73279
+ 3. Include relevant examples or step-by-step instructions if applicable
73280
+ 4. Format your response in valid markdown
73281
+ 5. Be factual and cite general knowledge sources when relevant
73282
+ 6. If you suggest external resources, format them as links in the response
73283
+ 7. Mark proper nouns as bold e.g. **OpenAI**
73284
+ 8. Use appropriate headings (##, ###) to structure your response
73285
+ 9. If the query is about current events beyond your knowledge cutoff, mention that limitation
73286
+
73287
+ Provide a thorough, educational response that directly addresses the user's query.`;
73288
+ const schema = {
73289
+ textual: {
73290
+ type: "string",
73291
+ description: "Comprehensive answer to the user query"
73292
+ },
73293
+ links: {
73294
+ type: "array",
73295
+ items: {
73296
+ type: "object",
73297
+ properties: {
73298
+ name: {
73299
+ type: "string",
73300
+ description: "Descriptive name of the recommended resource"
73301
+ },
73302
+ url: {
73303
+ type: "string",
73304
+ description: "URL to the recommended resource"
73305
+ }
73306
+ },
73307
+ required: ["name", "url"]
73308
+ }
73309
+ }
73310
+ };
73311
+ const result = await openai.completeStructured(prompt2, {
73312
+ temperature: 0.7,
73313
+ responseSchema: schema
73314
+ });
73315
+ return result;
73316
+ }
73258
73317
 
73259
73318
  // src/index.ts
73260
73319
  async function clai(input, openAIKey) {
73261
73320
  const scrapedData = await scrape(input);
73262
- const combinedContent = scrapedData.map((data2) => `Content from ${data2.url}:
73321
+ const usefulData = scrapedData.filter((data2) => data2.content.length > 200 && !data2.content.includes("Wikipedia does not have an article") && !data2.content.includes("page not found") && !data2.content.includes("404") && !data2.content.includes("error"));
73322
+ if (usefulData.length > 0) {
73323
+ const combinedContent = usefulData.map((data2) => `Content from ${data2.url}:
73263
73324
  ${data2.content}`).join(`
73264
73325
 
73265
73326
  `);
73266
- const result = await summarizeWebPage(combinedContent, openAIKey);
73327
+ const result2 = await summarizeWebPage(combinedContent, openAIKey);
73328
+ return {
73329
+ summary: result2.textual.trim(),
73330
+ links: result2.links,
73331
+ sources: usefulData.map((data2) => data2.url)
73332
+ };
73333
+ }
73334
+ console.log("No scraped data available - using OpenAI directly for query...");
73335
+ const result = await summarizeQuery(input, openAIKey);
73267
73336
  return {
73268
73337
  summary: result.textual.trim(),
73269
73338
  links: result.links,
73270
- sources: scrapedData.map((data2) => data2.url)
73339
+ sources: ["OpenAI Knowledge Base"]
73271
73340
  };
73272
73341
  }
73273
73342
 
73274
73343
  // src/cli.ts
73275
73344
  var program2 = new Command;
73276
73345
  async function main2() {
73346
+ console.log(`[clAi]::${source_default.cyan(version)}`);
73277
73347
  try {
73278
73348
  program2.name("clai").description("AI-powered web scraping tool").version(package_default.version).argument("[input...]", "URL or search terms to analyze").action(async (inputs) => {
73279
73349
  const openAIKey = process.env.OPENAI_API_KEY;
package/dist/index.js CHANGED
@@ -41059,8 +41059,7 @@ async function scrape(input) {
41059
41059
  }));
41060
41060
  return results.filter((result) => result !== null);
41061
41061
  } catch (error) {
41062
- console.error("Error during scraping:", error);
41063
- throw error;
41062
+ return [];
41064
41063
  }
41065
41064
  }
41066
41065
  function isValidUrl(input) {
@@ -41076,55 +41075,31 @@ function normalizeUrl(url) {
41076
41075
  return url;
41077
41076
  }
41078
41077
  async function getSearchResults(query) {
41079
- try {
41080
- return await getSearXResults(query);
41081
- } catch (_) {
41082
- console.log("Trying Google search...");
41078
+ const searchEngines = [
41079
+ { name: "SearX", fn: getSearXResults },
41080
+ { name: "Google", fn: getGoogleResults },
41081
+ { name: "DuckDuckGo", fn: getDuckDuckGoResults },
41082
+ { name: "Wikipedia", fn: getWikipediaResults }
41083
+ ];
41084
+ for (const engine of searchEngines) {
41083
41085
  try {
41084
- return await getGoogleResults(query);
41085
- } catch (_2) {
41086
- console.log("Trying DuckDuckGo search...");
41087
- try {
41088
- return await getDuckDuckGoResults(query);
41089
- } catch (_3) {
41090
- console.log("Using emergency fallback...");
41091
- return getEmergencyResults(query);
41092
- }
41093
- }
41094
- }
41095
- }
41096
- function getEmergencyResults(query) {
41097
- const results = [];
41098
- const cleanQuery = query.toLowerCase().replace(/[^a-z0-9\s]/g, "").trim();
41099
- const words = cleanQuery.split(/\s+/).filter((word) => word.length > 2);
41100
- if (words.length > 0) {
41101
- const mainWord = words[0];
41102
- results.push(`https://en.wikipedia.org/wiki/${encodeURIComponent(query.replace(/\s+/g, "_"))}`);
41103
- if (mainWord.length > 3) {
41104
- results.push(`https://${mainWord}.com`);
41105
- results.push(`https://www.${mainWord}.org`);
41086
+ const result = await engine.fn(query);
41087
+ console.log(`[${engine.name}]::✅`);
41088
+ return result;
41089
+ } catch (_) {
41090
+ console.log(`[${engine.name}]::❌`);
41106
41091
  }
41107
- results.push(`https://www.reddit.com/search/?q=${encodeURIComponent(query)}`);
41108
41092
  }
41109
- console.log("Emergency fallback returning:", results.join(", "));
41110
- return results.length > 0 ? results.slice(0, 3) : [
41111
- `https://en.wikipedia.org/wiki/${encodeURIComponent(query.replace(/\s+/g, "_"))}`
41112
- ];
41093
+ console.log("All search engines failed - no URLs to scrape");
41094
+ throw new Error("No search results available");
41113
41095
  }
41114
41096
  async function getSearXResults(query) {
41115
- const searxInstances = [
41116
- "https://searx.be",
41117
- "https://search.sapti.me",
41118
- "https://searx.tiekoetter.com",
41119
- "https://searx.prvcy.eu"
41120
- ];
41097
+ const searxInstances = ["https://searx.be", "https://search.sapti.me"];
41121
41098
  for (const instance of searxInstances) {
41122
41099
  try {
41123
41100
  const searchUrl = `${instance}/search?q=${encodeURIComponent(query)}&format=json&categories=general`;
41124
- console.log("Trying SearX search...");
41125
- const response = await fetch(searchUrl, {
41101
+ const response = await enhancedFetch(searchUrl, {
41126
41102
  headers: {
41127
- "User-Agent": getRandomUserAgent(),
41128
41103
  Accept: "application/json"
41129
41104
  }
41130
41105
  });
@@ -41141,7 +41116,6 @@ async function getSearXResults(query) {
41141
41116
  }
41142
41117
  }
41143
41118
  if (urls.length > 0) {
41144
- console.log(`✓ SearX found ${urls.length} results`);
41145
41119
  return urls.slice(0, 3);
41146
41120
  }
41147
41121
  } catch (error) {
@@ -41149,9 +41123,31 @@ async function getSearXResults(query) {
41149
41123
  }
41150
41124
  throw new Error("All SearX instances failed");
41151
41125
  }
41126
+ async function getWikipediaResults(query) {
41127
+ const searchUrl = `https://en.wikipedia.org/w/api.php?action=opensearch&search=${encodeURIComponent(query)}&limit=3&format=json&origin=*`;
41128
+ const response = await enhancedFetch(searchUrl, {
41129
+ headers: {
41130
+ Accept: "application/json"
41131
+ }
41132
+ });
41133
+ if (!response.ok) {
41134
+ throw new Error(`Wikipedia API error: ${response.status}`);
41135
+ }
41136
+ const data2 = await response.json();
41137
+ if (Array.isArray(data2) && data2.length >= 4 && Array.isArray(data2[3])) {
41138
+ const urls = data2[3]?.filter((url) => url?.startsWith("https://"));
41139
+ if (urls?.length > 0) {
41140
+ return urls;
41141
+ }
41142
+ }
41143
+ throw new Error("No Wikipedia results found");
41144
+ }
41152
41145
  async function getGoogleResults(query) {
41153
41146
  const searchUrl = `https://www.google.com/search?q=${encodeURIComponent(query)}&num=10`;
41154
41147
  const html3 = await fetchHtml(searchUrl);
41148
+ if (html3.includes("If you're having trouble accessing Google Search") || html3.includes("unusual traffic from your computer network")) {
41149
+ throw new Error("Google blocked request - detected as bot");
41150
+ }
41155
41151
  const cheerioDoc = load(html3);
41156
41152
  const urls = [];
41157
41153
  cheerioDoc('a[href^="/url?q="]').each((_, element) => {
@@ -41179,13 +41175,18 @@ async function getGoogleResults(query) {
41179
41175
  if (uniqueUrls.length === 0) {
41180
41176
  throw new Error("No search results found in Google response");
41181
41177
  }
41182
- console.log(`✓ Google found ${uniqueUrls.length} results`);
41183
41178
  return uniqueUrls;
41184
41179
  }
41185
41180
  async function getDuckDuckGoResults(query) {
41186
41181
  const searchUrl = `https://api.duckduckgo.com/?q=${encodeURIComponent(query)}&format=json&no_html=1&skip_disambig=1`;
41187
- const response = await fetch(searchUrl);
41182
+ const response = await enhancedFetch(searchUrl);
41183
+ if (!response.ok) {
41184
+ throw new Error(`DuckDuckGo API error: ${response.status}`);
41185
+ }
41188
41186
  const data2 = await response.json();
41187
+ if (data2.Abstract?.includes("redirects users to a non-JavaScript site") || data2.Abstract?.includes("DuckDuckGo redirects users") || data2.AbstractText?.includes("redirects users to a non-JavaScript site") || data2.AbstractText?.includes("DuckDuckGo redirects users")) {
41188
+ throw new Error("DuckDuckGo blocked request - JavaScript disabled redirect");
41189
+ }
41189
41190
  const urls = [];
41190
41191
  if (data2.AbstractURL) {
41191
41192
  urls.push(data2.AbstractURL);
@@ -41197,29 +41198,39 @@ async function getDuckDuckGoResults(query) {
41197
41198
  }
41198
41199
  }
41199
41200
  }
41201
+ if (urls.length === 0 && data2.DefinitionURL) {
41202
+ urls.push(data2.DefinitionURL);
41203
+ }
41200
41204
  if (urls.length === 0) {
41201
41205
  throw new Error("No search results found in DuckDuckGo response");
41202
41206
  }
41203
- console.log(`✓ DuckDuckGo found ${urls.length} results`);
41204
41207
  return urls;
41205
41208
  }
41206
- async function fetchHtml(url) {
41207
- const response = await fetch(url, {
41208
- headers: {
41209
- "User-Agent": getRandomUserAgent(),
41210
- Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
41211
- "Accept-Language": "en-US,en;q=0.9",
41212
- "Accept-Encoding": "gzip, deflate, br",
41213
- DNT: "1",
41214
- Connection: "keep-alive",
41215
- "Upgrade-Insecure-Requests": "1",
41216
- "Sec-Fetch-Dest": "document",
41217
- "Sec-Fetch-Mode": "navigate",
41218
- "Sec-Fetch-Site": "none",
41219
- "Sec-Fetch-User": "?1",
41220
- "Cache-Control": "max-age=0"
41221
- }
41209
+ async function enhancedFetch(url, options = {}) {
41210
+ const headers = {
41211
+ "User-Agent": getRandomUserAgent(),
41212
+ Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
41213
+ "Accept-Language": "en-US,en;q=0.9",
41214
+ "Accept-Encoding": "gzip, deflate, br",
41215
+ "sec-ch-ua": '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"',
41216
+ "sec-ch-ua-mobile": "?0",
41217
+ "sec-ch-ua-platform": '"macOS"',
41218
+ "Sec-Fetch-Dest": "document",
41219
+ "Sec-Fetch-Mode": "navigate",
41220
+ "Sec-Fetch-Site": "cross-site",
41221
+ "Sec-Fetch-User": "?1",
41222
+ "Upgrade-Insecure-Requests": "1",
41223
+ "Cache-Control": "no-cache",
41224
+ Pragma: "no-cache",
41225
+ ...options.headers
41226
+ };
41227
+ return fetch(url, {
41228
+ ...options,
41229
+ headers
41222
41230
  });
41231
+ }
41232
+ async function fetchHtml(url) {
41233
+ const response = await enhancedFetch(url);
41223
41234
  return response.text();
41224
41235
  }
41225
41236
  function getRandomUserAgent() {
@@ -46353,19 +46364,76 @@ async function summarizeWebPage(content, openAIApiKey) {
46353
46364
  });
46354
46365
  return result;
46355
46366
  }
46367
+ async function summarizeQuery(query, openAIApiKey) {
46368
+ const openai = openaiClient(openAIApiKey);
46369
+ const prompt = `You are an expert educator and researcher. Answer the following query with accurate, helpful information:
46370
+
46371
+ "${query}"
46372
+
46373
+ Guidelines:
46374
+ 1. Provide a comprehensive but concise answer
46375
+ 2. Use bullet points, lists, and tables when appropriate
46376
+ 3. Include relevant examples or step-by-step instructions if applicable
46377
+ 4. Format your response in valid markdown
46378
+ 5. Be factual and cite general knowledge sources when relevant
46379
+ 6. If you suggest external resources, format them as links in the response
46380
+ 7. Mark proper nouns as bold e.g. **OpenAI**
46381
+ 8. Use appropriate headings (##, ###) to structure your response
46382
+ 9. If the query is about current events beyond your knowledge cutoff, mention that limitation
46383
+
46384
+ Provide a thorough, educational response that directly addresses the user's query.`;
46385
+ const schema = {
46386
+ textual: {
46387
+ type: "string",
46388
+ description: "Comprehensive answer to the user query"
46389
+ },
46390
+ links: {
46391
+ type: "array",
46392
+ items: {
46393
+ type: "object",
46394
+ properties: {
46395
+ name: {
46396
+ type: "string",
46397
+ description: "Descriptive name of the recommended resource"
46398
+ },
46399
+ url: {
46400
+ type: "string",
46401
+ description: "URL to the recommended resource"
46402
+ }
46403
+ },
46404
+ required: ["name", "url"]
46405
+ }
46406
+ }
46407
+ };
46408
+ const result = await openai.completeStructured(prompt, {
46409
+ temperature: 0.7,
46410
+ responseSchema: schema
46411
+ });
46412
+ return result;
46413
+ }
46356
46414
 
46357
46415
  // src/index.ts
46358
46416
  async function clai(input, openAIKey) {
46359
46417
  const scrapedData = await scrape(input);
46360
- const combinedContent = scrapedData.map((data2) => `Content from ${data2.url}:
46418
+ const usefulData = scrapedData.filter((data2) => data2.content.length > 200 && !data2.content.includes("Wikipedia does not have an article") && !data2.content.includes("page not found") && !data2.content.includes("404") && !data2.content.includes("error"));
46419
+ if (usefulData.length > 0) {
46420
+ const combinedContent = usefulData.map((data2) => `Content from ${data2.url}:
46361
46421
  ${data2.content}`).join(`
46362
46422
 
46363
46423
  `);
46364
- const result = await summarizeWebPage(combinedContent, openAIKey);
46424
+ const result2 = await summarizeWebPage(combinedContent, openAIKey);
46425
+ return {
46426
+ summary: result2.textual.trim(),
46427
+ links: result2.links,
46428
+ sources: usefulData.map((data2) => data2.url)
46429
+ };
46430
+ }
46431
+ console.log("No scraped data available - using OpenAI directly for query...");
46432
+ const result = await summarizeQuery(input, openAIKey);
46365
46433
  return {
46366
46434
  summary: result.textual.trim(),
46367
46435
  links: result.links,
46368
- sources: scrapedData.map((data2) => data2.url)
46436
+ sources: ["OpenAI Knowledge Base"]
46369
46437
  };
46370
46438
  }
46371
46439
  var src_default = clai;