@intlayer/backend 5.7.6 → 5.7.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -37,8 +37,7 @@ const getFileContent = (filePath) => (0, import_fs.readFileSync)((0, import_path
37
37
  const CHAT_GPT_PROMPT = getFileContent("./PROMPT.md");
38
38
  const aiDefaultOptions = {
39
39
  provider: import_aiSdk.AIProvider.OPENAI,
40
- model: "gpt-4o-mini",
41
- temperature: 0.2
40
+ model: "gpt-5-mini"
42
41
  };
43
42
  const formatLocaleWithName = (locale) => `${locale}: ${(0, import_core.getLocaleName)(locale, import_intlayer.Locales.ENGLISH)}`;
44
43
  const formatTagInstructions = (tags) => {
@@ -1 +1 @@
1
- {"version":3,"sources":["../../../../../src/utils/AI/translateJSON/index.ts"],"sourcesContent":["import type { Tag } from '@/types/tag.types';\nimport { getLocaleName } from '@intlayer/core';\nimport { logger } from '@logger';\nimport { extractJson } from '@utils/extractJSON';\nimport { generateText } from 'ai';\nimport { readFileSync } from 'fs';\nimport { Locales } from 'intlayer';\nimport { dirname, join } from 'path';\nimport { fileURLToPath } from 'url';\nimport { AIConfig, AIOptions, AIProvider } from '../aiSdk';\n\nconst __dirname = dirname(fileURLToPath(import.meta.url));\n\n// Get the content of a file at the specified path\nconst getFileContent = (filePath: string) =>\n readFileSync(join(__dirname, filePath), { encoding: 'utf-8' });\n\nexport type TranslateJSONOptions = {\n entryFileContent: JSON;\n presetOutputContent: JSON;\n dictionaryDescription: string;\n entryLocale: Locales;\n outputLocale: Locales;\n tags: Tag[];\n aiConfig: AIConfig;\n mode: 'complete' | 'review';\n applicationContext?: string;\n};\n\nexport type TranslateJSONResultData = {\n fileContent: string;\n tokenUsed: number;\n};\n\n// The prompt template to send to the AI model\nconst CHAT_GPT_PROMPT = getFileContent('./PROMPT.md');\n\nexport const aiDefaultOptions: AIOptions = {\n provider: AIProvider.OPENAI,\n model: 'gpt-4o-mini',\n temperature: 0.2,\n};\n\n/**\n * Format a locale with its name.\n *\n * @param locale - The locale to format.\n * @returns A string in the format \"locale: name\", e.g. \"en: English\".\n */\nconst formatLocaleWithName = (locale: Locales): string =>\n `${locale}: ${getLocaleName(locale, Locales.ENGLISH)}`;\n\n/**\n * Formats tag instructions for the AI prompt.\n * Creates a string with all available tags and their descriptions.\n *\n * @param tags - The list of tags to format.\n * @returns A formatted string with tag instructions.\n */\nconst formatTagInstructions = (tags: Tag[]): string => {\n if (!tags || tags.length === 0) {\n return '';\n }\n\n // Prepare the tag instructions.\n return `Based on the dictionary content, identify specific tags from the list below that would be relevant:\n \n${tags.map(({ key, description }) => `- ${key}: ${description}`).join('\\n\\n')}`;\n};\n\nconst getModeInstructions = (mode: 'complete' | 'review'): string => {\n if (mode === 'complete') {\n return 'Mode: \"Complete\" - Enrich the preset content with the missing keys and values in the output locale. Do not update existing keys. Everything should be returned in the output.';\n }\n\n return 'Mode: \"Review\" - Fill missing content and review existing keys from the preset content. If a key from the entry is missing in the output, it must be translated to the target language and added. If you detect misspelled content, or content that should be reformulated, correct it. If a translation is not coherent with the desired language, translate it.';\n};\n\n/**\n * TranslateJSONs a content declaration file by constructing a prompt for AI models.\n * The prompt includes details about the project's locales, file paths of content declarations,\n * and requests for identifying issues or inconsistencies.\n */\nexport const translateJSON = async ({\n entryFileContent,\n presetOutputContent,\n dictionaryDescription,\n aiConfig,\n entryLocale,\n outputLocale,\n tags,\n mode,\n applicationContext,\n}: TranslateJSONOptions): Promise<TranslateJSONResultData | undefined> => {\n // Prepare the prompt for AI by replacing placeholders with actual values.\n const prompt = CHAT_GPT_PROMPT.replace(\n '{{entryLocale}}',\n formatLocaleWithName(entryLocale)\n )\n .replace('{{outputLocale}}', formatLocaleWithName(outputLocale))\n .replace('{{entryFileContent}}', JSON.stringify(entryFileContent))\n .replace('{{presetOutputContent}}', JSON.stringify(presetOutputContent))\n .replace('{{dictionaryDescription}}', dictionaryDescription)\n .replace('{{applicationContext}}', applicationContext ?? '')\n .replace('{{tagsInstructions}}', formatTagInstructions(tags))\n .replace('{{modeInstructions}}', getModeInstructions(mode));\n\n // Use the AI SDK to generate the completion\n const { text: newContent, usage } = await generateText({\n ...aiConfig,\n messages: [{ role: 'system', content: prompt }],\n });\n\n logger.info(`${usage?.totalTokens ?? 0} tokens used in the request`);\n\n return {\n fileContent: extractJson(newContent),\n tokenUsed: usage?.totalTokens ?? 0,\n };\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AACA,kBAA8B;AAC9B,oBAAuB;AACvB,yBAA4B;AAC5B,gBAA6B;AAC7B,gBAA6B;AAC7B,sBAAwB;AACxB,kBAA8B;AAC9B,iBAA8B;AAC9B,mBAAgD;AAThD;AAWA,MAAM,gBAAY,yBAAQ,0BAAc,YAAY,GAAG,CAAC;AAGxD,MAAM,iBAAiB,CAAC,iBACtB,4BAAa,kBAAK,WAAW,QAAQ,GAAG,EAAE,UAAU,QAAQ,CAAC;AAoB/D,MAAM,kBAAkB,eAAe,aAAa;AAE7C,MAAM,mBAA8B;AAAA,EACzC,UAAU,wBAAW;AAAA,EACrB,OAAO;AAAA,EACP,aAAa;AACf;AAQA,MAAM,uBAAuB,CAAC,WAC5B,GAAG,MAAM,SAAK,2BAAc,QAAQ,wBAAQ,OAAO,CAAC;AAStD,MAAM,wBAAwB,CAAC,SAAwB;AACrD,MAAI,CAAC,QAAQ,KAAK,WAAW,GAAG;AAC9B,WAAO;AAAA,EACT;AAGA,SAAO;AAAA;AAAA,EAEP,KAAK,IAAI,CAAC,EAAE,KAAK,YAAY,MAAM,KAAK,GAAG,KAAK,WAAW,EAAE,EAAE,KAAK,MAAM,CAAC;AAC7E;AAEA,MAAM,sBAAsB,CAAC,SAAwC;AACnE,MAAI,SAAS,YAAY;AACvB,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAOO,MAAM,gBAAgB,OAAO;AAAA,EAClC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,MAA0E;AAExE,QAAM,SAAS,gBAAgB;AAAA,IAC7B;AAAA,IACA,qBAAqB,WAAW;AAAA,EAClC,EACG,QAAQ,oBAAoB,qBAAqB,YAAY,CAAC,EAC9D,QAAQ,wBAAwB,KAAK,UAAU,gBAAgB,CAAC,EAChE,QAAQ,2BAA2B,KAAK,UAAU,mBAAmB,CAAC,EACtE,QAAQ,6BAA6B,qBAAqB,EAC1D,QAAQ,0BAA0B,sBAAsB,EAAE,EAC1D,QAAQ,wBAAwB,sBAAsB,IAAI,CAAC,EAC3D,QAAQ,wBAAwB,oBAAoB,IAAI,CAAC;AAG5D,QAAM,EAAE,MAAM,YAAY,MAAM,IAAI,UAAM,wBAAa;AAAA,IACrD,GAAG;AAAA,IACH,UAAU,CAAC,EAAE,MAAM,UAAU,SAAS,OAAO,CAAC;AAAA,EAChD,CAAC;AAED,uBAAO,KAAK,GAAG,OAAO,eAAe,CAAC,6BAA6B;AAEnE,SAAO;AAAA,IACL,iBAAa,gCAAY,UAAU;AAAA,IACnC,WAAW,OAAO,eAAe;AAAA,EACnC;AACF;","names":[]}
1
+ {"version":3,"sources":["../../../../../src/utils/AI/translateJSON/index.ts"],"sourcesContent":["import type { Tag } from '@/types/tag.types';\nimport { getLocaleName } from '@intlayer/core';\nimport { logger } from '@logger';\nimport { extractJson } from '@utils/extractJSON';\nimport { generateText } from 'ai';\nimport { readFileSync } from 'fs';\nimport { Locales } from 'intlayer';\nimport { dirname, join } from 'path';\nimport { fileURLToPath } from 'url';\nimport { AIConfig, AIOptions, AIProvider } from '../aiSdk';\n\nconst __dirname = dirname(fileURLToPath(import.meta.url));\n\n// Get the content of a file at the specified path\nconst getFileContent = (filePath: string) =>\n readFileSync(join(__dirname, filePath), { encoding: 'utf-8' });\n\nexport type TranslateJSONOptions = {\n entryFileContent: JSON;\n presetOutputContent: JSON;\n dictionaryDescription: string;\n entryLocale: Locales;\n outputLocale: Locales;\n tags: Tag[];\n aiConfig: AIConfig;\n mode: 'complete' | 'review';\n applicationContext?: string;\n};\n\nexport type TranslateJSONResultData = {\n fileContent: string;\n tokenUsed: number;\n};\n\n// The prompt template to send to the AI model\nconst CHAT_GPT_PROMPT = getFileContent('./PROMPT.md');\n\nexport const aiDefaultOptions: AIOptions = {\n provider: AIProvider.OPENAI,\n model: 'gpt-5-mini',\n};\n\n/**\n * Format a locale with its name.\n *\n * @param locale - The locale to format.\n * @returns A string in the format \"locale: name\", e.g. \"en: English\".\n */\nconst formatLocaleWithName = (locale: Locales): string =>\n `${locale}: ${getLocaleName(locale, Locales.ENGLISH)}`;\n\n/**\n * Formats tag instructions for the AI prompt.\n * Creates a string with all available tags and their descriptions.\n *\n * @param tags - The list of tags to format.\n * @returns A formatted string with tag instructions.\n */\nconst formatTagInstructions = (tags: Tag[]): string => {\n if (!tags || tags.length === 0) {\n return '';\n }\n\n // Prepare the tag instructions.\n return `Based on the dictionary content, identify specific tags from the list below that would be relevant:\n \n${tags.map(({ key, description }) => `- ${key}: ${description}`).join('\\n\\n')}`;\n};\n\nconst getModeInstructions = (mode: 'complete' | 'review'): string => {\n if (mode === 'complete') {\n return 'Mode: \"Complete\" - Enrich the preset content with the missing keys and values in the output locale. Do not update existing keys. Everything should be returned in the output.';\n }\n\n return 'Mode: \"Review\" - Fill missing content and review existing keys from the preset content. If a key from the entry is missing in the output, it must be translated to the target language and added. If you detect misspelled content, or content that should be reformulated, correct it. If a translation is not coherent with the desired language, translate it.';\n};\n\n/**\n * TranslateJSONs a content declaration file by constructing a prompt for AI models.\n * The prompt includes details about the project's locales, file paths of content declarations,\n * and requests for identifying issues or inconsistencies.\n */\nexport const translateJSON = async ({\n entryFileContent,\n presetOutputContent,\n dictionaryDescription,\n aiConfig,\n entryLocale,\n outputLocale,\n tags,\n mode,\n applicationContext,\n}: TranslateJSONOptions): Promise<TranslateJSONResultData | undefined> => {\n // Prepare the prompt for AI by replacing placeholders with actual values.\n const prompt = CHAT_GPT_PROMPT.replace(\n '{{entryLocale}}',\n formatLocaleWithName(entryLocale)\n )\n .replace('{{outputLocale}}', formatLocaleWithName(outputLocale))\n .replace('{{entryFileContent}}', JSON.stringify(entryFileContent))\n .replace('{{presetOutputContent}}', JSON.stringify(presetOutputContent))\n .replace('{{dictionaryDescription}}', dictionaryDescription)\n .replace('{{applicationContext}}', applicationContext ?? '')\n .replace('{{tagsInstructions}}', formatTagInstructions(tags))\n .replace('{{modeInstructions}}', getModeInstructions(mode));\n\n // Use the AI SDK to generate the completion\n const { text: newContent, usage } = await generateText({\n ...aiConfig,\n messages: [{ role: 'system', content: prompt }],\n });\n\n logger.info(`${usage?.totalTokens ?? 0} tokens used in the request`);\n\n return {\n fileContent: extractJson(newContent),\n tokenUsed: usage?.totalTokens ?? 0,\n };\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AACA,kBAA8B;AAC9B,oBAAuB;AACvB,yBAA4B;AAC5B,gBAA6B;AAC7B,gBAA6B;AAC7B,sBAAwB;AACxB,kBAA8B;AAC9B,iBAA8B;AAC9B,mBAAgD;AAThD;AAWA,MAAM,gBAAY,yBAAQ,0BAAc,YAAY,GAAG,CAAC;AAGxD,MAAM,iBAAiB,CAAC,iBACtB,4BAAa,kBAAK,WAAW,QAAQ,GAAG,EAAE,UAAU,QAAQ,CAAC;AAoB/D,MAAM,kBAAkB,eAAe,aAAa;AAE7C,MAAM,mBAA8B;AAAA,EACzC,UAAU,wBAAW;AAAA,EACrB,OAAO;AACT;AAQA,MAAM,uBAAuB,CAAC,WAC5B,GAAG,MAAM,SAAK,2BAAc,QAAQ,wBAAQ,OAAO,CAAC;AAStD,MAAM,wBAAwB,CAAC,SAAwB;AACrD,MAAI,CAAC,QAAQ,KAAK,WAAW,GAAG;AAC9B,WAAO;AAAA,EACT;AAGA,SAAO;AAAA;AAAA,EAEP,KAAK,IAAI,CAAC,EAAE,KAAK,YAAY,MAAM,KAAK,GAAG,KAAK,WAAW,EAAE,EAAE,KAAK,MAAM,CAAC;AAC7E;AAEA,MAAM,sBAAsB,CAAC,SAAwC;AACnE,MAAI,SAAS,YAAY;AACvB,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAOO,MAAM,gBAAgB,OAAO;AAAA,EAClC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,MAA0E;AAExE,QAAM,SAAS,gBAAgB;AAAA,IAC7B;AAAA,IACA,qBAAqB,WAAW;AAAA,EAClC,EACG,QAAQ,oBAAoB,qBAAqB,YAAY,CAAC,EAC9D,QAAQ,wBAAwB,KAAK,UAAU,gBAAgB,CAAC,EAChE,QAAQ,2BAA2B,KAAK,UAAU,mBAAmB,CAAC,EACtE,QAAQ,6BAA6B,qBAAqB,EAC1D,QAAQ,0BAA0B,sBAAsB,EAAE,EAC1D,QAAQ,wBAAwB,sBAAsB,IAAI,CAAC,EAC3D,QAAQ,wBAAwB,oBAAoB,IAAI,CAAC;AAG5D,QAAM,EAAE,MAAM,YAAY,MAAM,IAAI,UAAM,wBAAa;AAAA,IACrD,GAAG;AAAA,IACH,UAAU,CAAC,EAAE,MAAM,UAAU,SAAS,OAAO,CAAC;AAAA,EAChD,CAAC;AAED,uBAAO,KAAK,GAAG,OAAO,eAAe,CAAC,6BAA6B;AAEnE,SAAO;AAAA,IACL,iBAAa,gCAAY,UAAU;AAAA,IACnC,WAAW,OAAO,eAAe;AAAA,EACnC;AACF;","names":[]}
@@ -28,10 +28,10 @@ const getAPIKey = (res, accessType, aiOptions) => {
28
28
  return void 0;
29
29
  };
30
30
  const getModel = (provider, userApiKey, userModel, defaultModel) => {
31
- let fallBackModel = defaultModel ?? "chatgpt-4o-latest";
31
+ let fallBackModel = defaultModel ?? "gpt-5";
32
32
  switch (provider) {
33
33
  case "openai" /* OPENAI */:
34
- defaultModel = "chatgpt-4o-latest";
34
+ defaultModel = "gpt-5";
35
35
  break;
36
36
  case "anthropic" /* ANTHROPIC */:
37
37
  defaultModel = "claude-3-haiku-20240307";
@@ -57,7 +57,7 @@ const getModel = (provider, userApiKey, userModel, defaultModel) => {
57
57
  return fallBackModel;
58
58
  };
59
59
  const DEFAULT_PROVIDER = "openai" /* OPENAI */;
60
- const DEFAULT_TEMPERATURE = 0.1;
60
+ const DEFAULT_TEMPERATURE = 1;
61
61
  const getAIConfig = async (res, options) => {
62
62
  const {
63
63
  userOptions,
@@ -1 +1 @@
1
- {"version":3,"sources":["../../../../src/utils/AI/aiSdk.ts"],"sourcesContent":["import { anthropic, createAnthropic } from '@ai-sdk/anthropic';\nimport { createDeepSeek, deepseek } from '@ai-sdk/deepseek';\nimport { createGoogleGenerativeAI, google } from '@ai-sdk/google';\nimport { createMistral, mistral } from '@ai-sdk/mistral';\nimport { createOpenAI, openai } from '@ai-sdk/openai';\nimport { CoreMessage, generateText } from 'ai';\nimport { Response } from 'express';\n\ntype AnthropicModel = Parameters<typeof anthropic>[0];\ntype DeepSeekModel = Parameters<typeof deepseek>[0];\ntype MistralModel = Parameters<typeof mistral>[0];\ntype OpenAIModel = Parameters<typeof openai>[0];\ntype GoogleModel = Parameters<typeof google>[0];\n\nexport type Messages = CoreMessage[];\n\n/**\n * Supported AI models\n */\nexport type Model =\n | AnthropicModel\n | DeepSeekModel\n | MistralModel\n | OpenAIModel\n | GoogleModel\n | (string & {});\n\n/**\n * Supported AI SDK providers\n */\nexport enum AIProvider {\n OPENAI = 'openai',\n ANTHROPIC = 'anthropic',\n MISTRAL = 'mistral',\n DEEPSEEK = 'deepseek',\n GEMINI = 'gemini',\n}\n\n/**\n * Common options for all AI providers\n */\nexport type AIOptions = {\n provider?: AIProvider;\n model?: Model;\n temperature?: number;\n apiKey?: string;\n applicationContext?: string;\n maxTokens?: number;\n};\n\n// Define the structure of messages used in chat completions\nexport type ChatCompletionRequestMessage = {\n role: 'system' | 'user' | 'assistant'; // The role of the message sender\n content: string; // The text content of the message\n timestamp?: Date; // The timestamp of the message\n};\n\ntype AccessType = 'apiKey' | 'registered_user' | 'premium_user' | 'public';\n\nconst getAPIKey = (\n res: Response,\n accessType: AccessType[],\n aiOptions?: AIOptions\n) => {\n const defaultApiKey = process.env.OPENAI_API_KEY;\n\n if (accessType.includes('public')) {\n return aiOptions?.apiKey ?? defaultApiKey;\n }\n\n if (accessType.includes('apiKey') && aiOptions?.apiKey) {\n return aiOptions?.apiKey;\n }\n\n if (accessType.includes('registered_user') && res.locals.user) {\n return aiOptions?.apiKey ?? defaultApiKey;\n }\n\n // TODO: Implement premium user access\n if (accessType.includes('premium_user') && res.locals.user) {\n return aiOptions?.apiKey ?? defaultApiKey;\n }\n\n return undefined;\n};\n\nconst getModel = (\n provider: AIProvider,\n userApiKey: string,\n userModel?: Model,\n defaultModel?: Model\n): Model => {\n // Set default models based on provider\n let fallBackModel: Model = defaultModel ?? 'chatgpt-4o-latest';\n\n switch (provider) {\n case AIProvider.OPENAI:\n defaultModel = 'chatgpt-4o-latest';\n break;\n case AIProvider.ANTHROPIC:\n defaultModel = 'claude-3-haiku-20240307';\n break;\n case AIProvider.MISTRAL:\n defaultModel = 'mistral-large-latest';\n break;\n case AIProvider.DEEPSEEK:\n defaultModel = 'deepseek-coder';\n break;\n case AIProvider.GEMINI:\n defaultModel = 'gemini-1.5-pro';\n break;\n }\n\n // If the user use his own API, let him use the model he wants\n if (Boolean(userApiKey) && Boolean(userModel)) {\n return userModel!;\n }\n\n if (Boolean(userModel)) {\n throw new Error(\n 'The user should use his own API key to use a custom model'\n );\n }\n\n return fallBackModel;\n};\n\nexport type AIConfig = Parameters<typeof generateText>[0];\n\nconst DEFAULT_PROVIDER: AIProvider = AIProvider.OPENAI as AIProvider;\nconst DEFAULT_TEMPERATURE: number = 0.1;\n\nexport type AIConfigOptions = {\n userOptions?: AIOptions;\n defaultOptions?: AIOptions;\n accessType?: AccessType[];\n};\n\n/**\n * Get AI model configuration based on the selected provider and options\n * This function handles the configuration for different AI providers\n *\n * @param options Configuration options including provider, API keys, models and temperature\n * @returns Configured AI model ready to use with generateText\n */\nexport const getAIConfig = async (\n res: Response,\n options: AIConfigOptions\n): Promise<AIConfig> => {\n const {\n userOptions,\n defaultOptions,\n accessType = ['registered_user'],\n } = options;\n\n const aiOptions = {\n provider: DEFAULT_PROVIDER,\n temperature: DEFAULT_TEMPERATURE,\n ...defaultOptions,\n ...userOptions,\n } satisfies AIOptions;\n\n const apiKey = getAPIKey(res, accessType, aiOptions);\n\n // Check if API key is provided\n if (!apiKey) {\n throw new Error(`API key for ${aiOptions.provider} is missing`);\n }\n\n const selectedModel = getModel(\n aiOptions.provider,\n apiKey,\n aiOptions.model,\n defaultOptions?.model\n );\n\n const protectedOptions = {\n ...aiOptions,\n apiKey,\n model: selectedModel,\n } satisfies AIOptions;\n\n let languageModel: AIConfig['model'];\n\n switch (protectedOptions.provider) {\n case AIProvider.OPENAI: {\n languageModel = createOpenAI({\n apiKey,\n })(selectedModel);\n break;\n }\n\n case AIProvider.ANTHROPIC: {\n languageModel = createAnthropic({\n apiKey,\n })(selectedModel);\n break;\n }\n\n case AIProvider.MISTRAL: {\n languageModel = createMistral({\n apiKey,\n })(selectedModel);\n break;\n }\n\n case AIProvider.DEEPSEEK: {\n languageModel = createDeepSeek({\n apiKey,\n })(selectedModel);\n break;\n }\n\n case AIProvider.GEMINI: {\n languageModel = createGoogleGenerativeAI({\n apiKey,\n })(selectedModel);\n break;\n }\n\n default: {\n throw new Error(`Provider ${protectedOptions.provider} not supported`);\n }\n }\n\n return {\n model: languageModel,\n maxTokens: protectedOptions.maxTokens,\n temperature: protectedOptions.temperature,\n };\n};\n"],"mappings":"AAAA,SAAoB,uBAAuB;AAC3C,SAAS,sBAAgC;AACzC,SAAS,gCAAwC;AACjD,SAAS,qBAA8B;AACvC,SAAS,oBAA4B;AA0B9B,IAAK,aAAL,kBAAKA,gBAAL;AACL,EAAAA,YAAA,YAAS;AACT,EAAAA,YAAA,eAAY;AACZ,EAAAA,YAAA,aAAU;AACV,EAAAA,YAAA,cAAW;AACX,EAAAA,YAAA,YAAS;AALC,SAAAA;AAAA,GAAA;AA6BZ,MAAM,YAAY,CAChB,KACA,YACA,cACG;AACH,QAAM,gBAAgB,QAAQ,IAAI;AAElC,MAAI,WAAW,SAAS,QAAQ,GAAG;AACjC,WAAO,WAAW,UAAU;AAAA,EAC9B;AAEA,MAAI,WAAW,SAAS,QAAQ,KAAK,WAAW,QAAQ;AACtD,WAAO,WAAW;AAAA,EACpB;AAEA,MAAI,WAAW,SAAS,iBAAiB,KAAK,IAAI,OAAO,MAAM;AAC7D,WAAO,WAAW,UAAU;AAAA,EAC9B;AAGA,MAAI,WAAW,SAAS,cAAc,KAAK,IAAI,OAAO,MAAM;AAC1D,WAAO,WAAW,UAAU;AAAA,EAC9B;AAEA,SAAO;AACT;AAEA,MAAM,WAAW,CACf,UACA,YACA,WACA,iBACU;AAEV,MAAI,gBAAuB,gBAAgB;AAE3C,UAAQ,UAAU;AAAA,IAChB,KAAK;AACH,qBAAe;AACf;AAAA,IACF,KAAK;AACH,qBAAe;AACf;AAAA,IACF,KAAK;AACH,qBAAe;AACf;AAAA,IACF,KAAK;AACH,qBAAe;AACf;AAAA,IACF,KAAK;AACH,qBAAe;AACf;AAAA,EACJ;AAGA,MAAI,QAAQ,UAAU,KAAK,QAAQ,SAAS,GAAG;AAC7C,WAAO;AAAA,EACT;AAEA,MAAI,QAAQ,SAAS,GAAG;AACtB,UAAM,IAAI;AAAA,MACR;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AACT;AAIA,MAAM,mBAA+B;AACrC,MAAM,sBAA8B;AAe7B,MAAM,cAAc,OACzB,KACA,YACsB;AACtB,QAAM;AAAA,IACJ;AAAA,IACA;AAAA,IACA,aAAa,CAAC,iBAAiB;AAAA,EACjC,IAAI;AAEJ,QAAM,YAAY;AAAA,IAChB,UAAU;AAAA,IACV,aAAa;AAAA,IACb,GAAG;AAAA,IACH,GAAG;AAAA,EACL;AAEA,QAAM,SAAS,UAAU,KAAK,YAAY,SAAS;AAGnD,MAAI,CAAC,QAAQ;AACX,UAAM,IAAI,MAAM,eAAe,UAAU,QAAQ,aAAa;AAAA,EAChE;AAEA,QAAM,gBAAgB;AAAA,IACpB,UAAU;AAAA,IACV;AAAA,IACA,UAAU;AAAA,IACV,gBAAgB;AAAA,EAClB;AAEA,QAAM,mBAAmB;AAAA,IACvB,GAAG;AAAA,IACH;AAAA,IACA,OAAO;AAAA,EACT;AAEA,MAAI;AAEJ,UAAQ,iBAAiB,UAAU;AAAA,IACjC,KAAK,uBAAmB;AACtB,sBAAgB,aAAa;AAAA,QAC3B;AAAA,MACF,CAAC,EAAE,aAAa;AAChB;AAAA,IACF;AAAA,IAEA,KAAK,6BAAsB;AACzB,sBAAgB,gBAAgB;AAAA,QAC9B;AAAA,MACF,CAAC,EAAE,aAAa;AAChB;AAAA,IACF;AAAA,IAEA,KAAK,yBAAoB;AACvB,sBAAgB,cAAc;AAAA,QAC5B;AAAA,MACF,CAAC,EAAE,aAAa;AAChB;AAAA,IACF;AAAA,IAEA,KAAK,2BAAqB;AACxB,sBAAgB,eAAe;AAAA,QAC7B;AAAA,MACF,CAAC,EAAE,aAAa;AAChB;AAAA,IACF;AAAA,IAEA,KAAK,uBAAmB;AACtB,sBAAgB,yBAAyB;AAAA,QACvC;AAAA,MACF,CAAC,EAAE,aAAa;AAChB;AAAA,IACF;AAAA,IAEA,SAAS;AACP,YAAM,IAAI,MAAM,YAAY,iBAAiB,QAAQ,gBAAgB;AAAA,IACvE;AAAA,EACF;AAEA,SAAO;AAAA,IACL,OAAO;AAAA,IACP,WAAW,iBAAiB;AAAA,IAC5B,aAAa,iBAAiB;AAAA,EAChC;AACF;","names":["AIProvider"]}
1
+ {"version":3,"sources":["../../../../src/utils/AI/aiSdk.ts"],"sourcesContent":["import { anthropic, createAnthropic } from '@ai-sdk/anthropic';\nimport { createDeepSeek, deepseek } from '@ai-sdk/deepseek';\nimport { createGoogleGenerativeAI, google } from '@ai-sdk/google';\nimport { createMistral, mistral } from '@ai-sdk/mistral';\nimport { createOpenAI, openai } from '@ai-sdk/openai';\nimport { CoreMessage, generateText } from 'ai';\nimport { Response } from 'express';\n\ntype AnthropicModel = Parameters<typeof anthropic>[0];\ntype DeepSeekModel = Parameters<typeof deepseek>[0];\ntype MistralModel = Parameters<typeof mistral>[0];\ntype OpenAIModel = Parameters<typeof openai>[0];\ntype GoogleModel = Parameters<typeof google>[0];\n\nexport type Messages = CoreMessage[];\n\n/**\n * Supported AI models\n */\nexport type Model =\n | AnthropicModel\n | DeepSeekModel\n | MistralModel\n | OpenAIModel\n | GoogleModel\n | (string & {});\n\n/**\n * Supported AI SDK providers\n */\nexport enum AIProvider {\n OPENAI = 'openai',\n ANTHROPIC = 'anthropic',\n MISTRAL = 'mistral',\n DEEPSEEK = 'deepseek',\n GEMINI = 'gemini',\n}\n\n/**\n * Common options for all AI providers\n */\nexport type AIOptions = {\n provider?: AIProvider;\n model?: Model;\n temperature?: number;\n apiKey?: string;\n applicationContext?: string;\n maxTokens?: number;\n};\n\n// Define the structure of messages used in chat completions\nexport type ChatCompletionRequestMessage = {\n role: 'system' | 'user' | 'assistant'; // The role of the message sender\n content: string; // The text content of the message\n timestamp?: Date; // The timestamp of the message\n};\n\ntype AccessType = 'apiKey' | 'registered_user' | 'premium_user' | 'public';\n\nconst getAPIKey = (\n res: Response,\n accessType: AccessType[],\n aiOptions?: AIOptions\n) => {\n const defaultApiKey = process.env.OPENAI_API_KEY;\n\n if (accessType.includes('public')) {\n return aiOptions?.apiKey ?? defaultApiKey;\n }\n\n if (accessType.includes('apiKey') && aiOptions?.apiKey) {\n return aiOptions?.apiKey;\n }\n\n if (accessType.includes('registered_user') && res.locals.user) {\n return aiOptions?.apiKey ?? defaultApiKey;\n }\n\n // TODO: Implement premium user access\n if (accessType.includes('premium_user') && res.locals.user) {\n return aiOptions?.apiKey ?? defaultApiKey;\n }\n\n return undefined;\n};\n\nconst getModel = (\n provider: AIProvider,\n userApiKey: string,\n userModel?: Model,\n defaultModel?: Model\n): Model => {\n // Set default models based on provider\n let fallBackModel: Model = defaultModel ?? 'gpt-5';\n\n switch (provider) {\n case AIProvider.OPENAI:\n defaultModel = 'gpt-5';\n break;\n case AIProvider.ANTHROPIC:\n defaultModel = 'claude-3-haiku-20240307';\n break;\n case AIProvider.MISTRAL:\n defaultModel = 'mistral-large-latest';\n break;\n case AIProvider.DEEPSEEK:\n defaultModel = 'deepseek-coder';\n break;\n case AIProvider.GEMINI:\n defaultModel = 'gemini-1.5-pro';\n break;\n }\n\n // If the user use his own API, let him use the model he wants\n if (Boolean(userApiKey) && Boolean(userModel)) {\n return userModel!;\n }\n\n if (Boolean(userModel)) {\n throw new Error(\n 'The user should use his own API key to use a custom model'\n );\n }\n\n return fallBackModel;\n};\n\nexport type AIConfig = Parameters<typeof generateText>[0];\n\nconst DEFAULT_PROVIDER: AIProvider = AIProvider.OPENAI as AIProvider;\nconst DEFAULT_TEMPERATURE: number = 1; // ChatGPT 5 accept only temperature 1\n\nexport type AIConfigOptions = {\n userOptions?: AIOptions;\n defaultOptions?: AIOptions;\n accessType?: AccessType[];\n};\n\n/**\n * Get AI model configuration based on the selected provider and options\n * This function handles the configuration for different AI providers\n *\n * @param options Configuration options including provider, API keys, models and temperature\n * @returns Configured AI model ready to use with generateText\n */\nexport const getAIConfig = async (\n res: Response,\n options: AIConfigOptions\n): Promise<AIConfig> => {\n const {\n userOptions,\n defaultOptions,\n accessType = ['registered_user'],\n } = options;\n\n const aiOptions = {\n provider: DEFAULT_PROVIDER,\n temperature: DEFAULT_TEMPERATURE,\n ...defaultOptions,\n ...userOptions,\n } satisfies AIOptions;\n\n const apiKey = getAPIKey(res, accessType, aiOptions);\n\n // Check if API key is provided\n if (!apiKey) {\n throw new Error(`API key for ${aiOptions.provider} is missing`);\n }\n\n const selectedModel = getModel(\n aiOptions.provider,\n apiKey,\n aiOptions.model,\n defaultOptions?.model\n );\n\n const protectedOptions = {\n ...aiOptions,\n apiKey,\n model: selectedModel,\n } satisfies AIOptions;\n\n let languageModel: AIConfig['model'];\n\n switch (protectedOptions.provider) {\n case AIProvider.OPENAI: {\n languageModel = createOpenAI({\n apiKey,\n })(selectedModel);\n break;\n }\n\n case AIProvider.ANTHROPIC: {\n languageModel = createAnthropic({\n apiKey,\n })(selectedModel);\n break;\n }\n\n case AIProvider.MISTRAL: {\n languageModel = createMistral({\n apiKey,\n })(selectedModel);\n break;\n }\n\n case AIProvider.DEEPSEEK: {\n languageModel = createDeepSeek({\n apiKey,\n })(selectedModel);\n break;\n }\n\n case AIProvider.GEMINI: {\n languageModel = createGoogleGenerativeAI({\n apiKey,\n })(selectedModel);\n break;\n }\n\n default: {\n throw new Error(`Provider ${protectedOptions.provider} not supported`);\n }\n }\n\n return {\n model: languageModel,\n maxTokens: protectedOptions.maxTokens,\n temperature: protectedOptions.temperature,\n };\n};\n"],"mappings":"AAAA,SAAoB,uBAAuB;AAC3C,SAAS,sBAAgC;AACzC,SAAS,gCAAwC;AACjD,SAAS,qBAA8B;AACvC,SAAS,oBAA4B;AA0B9B,IAAK,aAAL,kBAAKA,gBAAL;AACL,EAAAA,YAAA,YAAS;AACT,EAAAA,YAAA,eAAY;AACZ,EAAAA,YAAA,aAAU;AACV,EAAAA,YAAA,cAAW;AACX,EAAAA,YAAA,YAAS;AALC,SAAAA;AAAA,GAAA;AA6BZ,MAAM,YAAY,CAChB,KACA,YACA,cACG;AACH,QAAM,gBAAgB,QAAQ,IAAI;AAElC,MAAI,WAAW,SAAS,QAAQ,GAAG;AACjC,WAAO,WAAW,UAAU;AAAA,EAC9B;AAEA,MAAI,WAAW,SAAS,QAAQ,KAAK,WAAW,QAAQ;AACtD,WAAO,WAAW;AAAA,EACpB;AAEA,MAAI,WAAW,SAAS,iBAAiB,KAAK,IAAI,OAAO,MAAM;AAC7D,WAAO,WAAW,UAAU;AAAA,EAC9B;AAGA,MAAI,WAAW,SAAS,cAAc,KAAK,IAAI,OAAO,MAAM;AAC1D,WAAO,WAAW,UAAU;AAAA,EAC9B;AAEA,SAAO;AACT;AAEA,MAAM,WAAW,CACf,UACA,YACA,WACA,iBACU;AAEV,MAAI,gBAAuB,gBAAgB;AAE3C,UAAQ,UAAU;AAAA,IAChB,KAAK;AACH,qBAAe;AACf;AAAA,IACF,KAAK;AACH,qBAAe;AACf;AAAA,IACF,KAAK;AACH,qBAAe;AACf;AAAA,IACF,KAAK;AACH,qBAAe;AACf;AAAA,IACF,KAAK;AACH,qBAAe;AACf;AAAA,EACJ;AAGA,MAAI,QAAQ,UAAU,KAAK,QAAQ,SAAS,GAAG;AAC7C,WAAO;AAAA,EACT;AAEA,MAAI,QAAQ,SAAS,GAAG;AACtB,UAAM,IAAI;AAAA,MACR;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AACT;AAIA,MAAM,mBAA+B;AACrC,MAAM,sBAA8B;AAe7B,MAAM,cAAc,OACzB,KACA,YACsB;AACtB,QAAM;AAAA,IACJ;AAAA,IACA;AAAA,IACA,aAAa,CAAC,iBAAiB;AAAA,EACjC,IAAI;AAEJ,QAAM,YAAY;AAAA,IAChB,UAAU;AAAA,IACV,aAAa;AAAA,IACb,GAAG;AAAA,IACH,GAAG;AAAA,EACL;AAEA,QAAM,SAAS,UAAU,KAAK,YAAY,SAAS;AAGnD,MAAI,CAAC,QAAQ;AACX,UAAM,IAAI,MAAM,eAAe,UAAU,QAAQ,aAAa;AAAA,EAChE;AAEA,QAAM,gBAAgB;AAAA,IACpB,UAAU;AAAA,IACV;AAAA,IACA,UAAU;AAAA,IACV,gBAAgB;AAAA,EAClB;AAEA,QAAM,mBAAmB;AAAA,IACvB,GAAG;AAAA,IACH;AAAA,IACA,OAAO;AAAA,EACT;AAEA,MAAI;AAEJ,UAAQ,iBAAiB,UAAU;AAAA,IACjC,KAAK,uBAAmB;AACtB,sBAAgB,aAAa;AAAA,QAC3B;AAAA,MACF,CAAC,EAAE,aAAa;AAChB;AAAA,IACF;AAAA,IAEA,KAAK,6BAAsB;AACzB,sBAAgB,gBAAgB;AAAA,QAC9B;AAAA,MACF,CAAC,EAAE,aAAa;AAChB;AAAA,IACF;AAAA,IAEA,KAAK,yBAAoB;AACvB,sBAAgB,cAAc;AAAA,QAC5B;AAAA,MACF,CAAC,EAAE,aAAa;AAChB;AAAA,IACF;AAAA,IAEA,KAAK,2BAAqB;AACxB,sBAAgB,eAAe;AAAA,QAC7B;AAAA,MACF,CAAC,EAAE,aAAa;AAChB;AAAA,IACF;AAAA,IAEA,KAAK,uBAAmB;AACtB,sBAAgB,yBAAyB;AAAA,QACvC;AAAA,MACF,CAAC,EAAE,aAAa;AAChB;AAAA,IACF;AAAA,IAEA,SAAS;AACP,YAAM,IAAI,MAAM,YAAY,iBAAiB,QAAQ,gBAAgB;AAAA,IACvE;AAAA,EACF;AAEA,SAAO;AAAA,IACL,OAAO;AAAA,IACP,WAAW,iBAAiB;AAAA,IAC5B,aAAa,iBAAiB;AAAA,EAChC;AACF;","names":["AIProvider"]}
@@ -11,7 +11,7 @@ import {
11
11
  } from "../aiSdk.mjs";
12
12
  import embeddingsList from "./embeddings.json" with { type: "json" };
13
13
  const vectorStore = [];
14
- const MODEL = "chatgpt-4o-latest";
14
+ const MODEL = "gpt-5";
15
15
  const MODEL_TEMPERATURE = 0.1;
16
16
  const MAX_RELEVANT_CHUNKS_NB = 20;
17
17
  const MIN_RELEVANT_CHUNKS_SIMILARITY = 0.42;
@@ -1 +1 @@
1
- {"version":3,"sources":["../../../../../src/utils/AI/askDocQuestion/askDocQuestion.ts"],"sourcesContent":["import { getBlogs, getDocs, getFrequentQuestions } from '@intlayer/docs';\nimport { streamText } from 'ai';\nimport dotenv from 'dotenv';\nimport { readFileSync, writeFileSync } from 'fs';\nimport { getMarkdownMetadata } from 'intlayer';\nimport { OpenAI } from 'openai';\nimport { dirname, join } from 'path';\nimport { fileURLToPath } from 'url';\nimport {\n AIConfig,\n AIOptions,\n AIProvider,\n ChatCompletionRequestMessage,\n} from '../aiSdk';\nimport embeddingsList from './embeddings.json' with { type: 'json' };\n\ntype VectorStoreEl = {\n fileKey: string;\n chunkNumber: number;\n content: string;\n embedding: number[];\n docUrl: string;\n docName: string;\n};\n\n/**\n * Simple in-memory vector store to hold document embeddings and their content.\n * Each entry contains:\n * - fileKey: A unique key identifying the file\n * - chunkNumber: The number of the chunk within the document\n * - content: The chunk content\n * - embedding: The numerical embedding vector for the chunk\n */\nconst vectorStore: VectorStoreEl[] = [];\n\n/*\n * Ask question AI configuration\n */\nconst MODEL: AIOptions['model'] = 'chatgpt-4o-latest'; // Model to use for chat completions\nconst MODEL_TEMPERATURE: AIOptions['temperature'] = 0.1; // Temperature to use for chat completions\nconst MAX_RELEVANT_CHUNKS_NB: number = 20; // Maximum number of relevant chunks to attach to chatGPT context\nconst MIN_RELEVANT_CHUNKS_SIMILARITY: number = 0.42; // Minimum similarity required for a chunk to be considered relevant\n\nexport const aiDefaultOptions: AIOptions = {\n provider: AIProvider.OPENAI,\n model: MODEL,\n temperature: MODEL_TEMPERATURE,\n};\n\n/*\n * Embedding model configuration\n */\nconst EMBEDDING_MODEL: OpenAI.EmbeddingModel = 'text-embedding-3-large'; // Model to use for embedding generation\nconst OVERLAP_TOKENS: number = 200; // Number of tokens to overlap between chunks\nconst MAX_CHUNK_TOKENS: number = 800; // Maximum number of tokens per chunk\nconst CHAR_BY_TOKEN: number = 4.15; // Approximate pessimistically the number of characters per token // Can use `tiktoken` or other tokenizers to calculate it more precisely\nconst MAX_CHARS: number = MAX_CHUNK_TOKENS * CHAR_BY_TOKEN;\nconst OVERLAP_CHARS: number = OVERLAP_TOKENS * CHAR_BY_TOKEN;\n\n/**\n * Splits a given text into chunks ensuring each chunk does not exceed MAX_CHARS.\n * @param text - The input text to split.\n * @returns - Array of text chunks.\n */\nconst chunkText = (text: string): string[] => {\n const chunks: string[] = [];\n let start = 0;\n\n while (start < text.length) {\n let end = Math.min(start + MAX_CHARS, text.length);\n\n // Ensure we don't cut words in the middle (find nearest space)\n if (end < text.length) {\n const lastSpace = text.lastIndexOf(' ', end);\n if (lastSpace > start) {\n end = lastSpace;\n }\n }\n\n chunks.push(text.substring(start, end));\n\n // Move start forward correctly\n const nextStart = end - OVERLAP_CHARS;\n if (nextStart <= start) {\n // Prevent infinite loop if overlap is too large\n start = end;\n } else {\n start = nextStart;\n }\n }\n\n return chunks;\n};\n\n/**\n * Generates an embedding for a given text using OpenAI's embedding API.\n * Trims the text if it exceeds the maximum allowed characters.\n *\n * @param text - The input text to generate an embedding for\n * @returns The embedding vector as a number array\n */\nconst generateEmbedding = async (text: string): Promise<number[]> => {\n try {\n const openaiClient = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });\n\n const response = await openaiClient.embeddings.create({\n model: EMBEDDING_MODEL,\n input: text,\n });\n\n return response.data[0].embedding;\n } catch (error) {\n console.error('Error generating embedding:', error);\n return [];\n }\n};\n\n/**\n * Calculates the cosine similarity between two vectors.\n * Cosine similarity measures the cosine of the angle between two vectors in an inner product space.\n * Used to determine the similarity between chunks of text.\n *\n * @param vecA - The first vector\n * @param vecB - The second vector\n * @returns The cosine similarity score\n */\nconst cosineSimilarity = (vecA: number[], vecB: number[]): number => {\n // Calculate the dot product of the two vectors\n const dotProduct = vecA.reduce((sum, a, idx) => sum + a * vecB[idx], 0);\n\n // Calculate the magnitude (Euclidean norm) of each vector\n const magnitudeA = Math.sqrt(vecA.reduce((sum, a) => sum + a * a, 0));\n const magnitudeB = Math.sqrt(vecB.reduce((sum, b) => sum + b * b, 0));\n\n // Compute and return the cosine similarity\n return dotProduct / (magnitudeA * magnitudeB);\n};\n\n/**\n * Indexes all Markdown documents by generating embeddings for each chunk and storing them in memory.\n * Also updates the embeddings.json file if new embeddings are generated.\n * Handles cases where files have been updated and chunk counts have changed.\n */\nexport const indexMarkdownFiles = async (): Promise<void> => {\n const env = process.env.NODE_ENV;\n dotenv.config({\n path: [`.env.${env}.local`, `.env.${env}`, '.env.local', '.env'],\n });\n\n // if (process.env.SKIP_DOC_EMBEDDINGS_INDEX === 'true') return;\n\n // Retrieve documentation and blog posts in English locale\n const frequentQuestions = await getFrequentQuestions();\n const docs = await getDocs();\n const blogs = await getBlogs();\n\n let result: Record<string, number[]> = {}; // Object to hold updated embeddings\n const currentChunkKeys = new Set<string>(); // Track which chunks should exist\n\n const files = { ...docs, ...blogs, ...frequentQuestions }; // Combine docs and blogs into a single object\n\n // Iterate over each file key (identifier) in the combined files\n for await (const fileKey of Object.keys(files)) {\n // Get the metadata of the file\n const fileMetadata = getMarkdownMetadata(\n files[fileKey as keyof typeof files] as string\n );\n\n // Split the document into chunks based on headings\n const fileChunks = chunkText(\n files[fileKey as keyof typeof files] as string\n );\n\n // Check if the number of chunks has changed for this file\n const existingChunksForFile = Object.keys(embeddingsList).filter((key) =>\n key.startsWith(`${fileKey}/chunk_`)\n );\n const currentChunkCount = fileChunks.length;\n const previousChunkCount = existingChunksForFile.length;\n\n let shouldRegenerateFileEmbeddings = false;\n\n // If chunk count differs, we need to regenerate embeddings for this file\n if (currentChunkCount !== previousChunkCount) {\n console.info(\n `File \"${fileKey}\" chunk count changed: ${previousChunkCount} -> ${currentChunkCount}. Regenerating embeddings.`\n );\n shouldRegenerateFileEmbeddings = true;\n }\n\n // Iterate over each chunk within the current file\n for await (const chunkIndex of Object.keys(fileChunks)) {\n const chunkNumber = Number(chunkIndex) + 1; // Chunk number starts at 1\n const chunksNumber = fileChunks.length;\n\n const fileChunk = fileChunks[\n chunkIndex as keyof typeof fileChunks\n ] as string;\n\n const embeddingKeyName = `${fileKey}/chunk_${chunkNumber}`; // Unique key for the chunk\n currentChunkKeys.add(embeddingKeyName); // Track this chunk as current\n\n // Retrieve precomputed embedding if available and file hasn't changed\n const docEmbedding = !shouldRegenerateFileEmbeddings\n ? (embeddingsList[embeddingKeyName as keyof typeof embeddingsList] as\n | number[]\n | undefined)\n : undefined;\n\n let embedding = docEmbedding; // Use existing embedding if available and valid\n\n if (!embedding) {\n embedding = await generateEmbedding(fileChunk); // Generate embedding if not present or file changed\n console.info(`- Generated new embedding: ${embeddingKeyName}`);\n }\n\n // Update the result object with the embedding\n result = { ...result, [embeddingKeyName]: embedding };\n\n // Store the embedding and content in the in-memory vector store\n vectorStore.push({\n fileKey,\n chunkNumber,\n embedding,\n content: fileChunk,\n docUrl: fileMetadata.url,\n docName: fileMetadata.title,\n });\n\n console.info(`- Indexed: ${embeddingKeyName}/${chunksNumber}`);\n }\n }\n\n // Remove outdated embeddings that no longer exist in current files\n const filteredEmbeddings: Record<string, number[]> = {};\n for (const [key, embedding] of Object.entries(embeddingsList)) {\n if (currentChunkKeys.has(key)) {\n // Only keep embeddings for chunks that still exist\n if (!result[key]) {\n filteredEmbeddings[key] = embedding as number[];\n }\n }\n }\n\n // Merge filtered existing embeddings with new ones\n result = { ...filteredEmbeddings, ...result };\n\n try {\n // Compare the newly generated embeddings with existing ones\n if (JSON.stringify(result) !== JSON.stringify(embeddingsList)) {\n // If there are new embeddings or changes, save them to embeddings.json\n writeFileSync(\n 'src/utils/AI/askDocQuestion/embeddings.json',\n JSON.stringify(result, null, 2)\n );\n }\n } catch (error) {\n console.error(error); // Log any errors during the file write process\n }\n};\n\n// Automatically index Markdown files\nindexMarkdownFiles();\n\n/**\n * Searches the indexed documents for the most relevant chunks based on a query.\n * Utilizes cosine similarity to find the closest matching embeddings.\n *\n * @param query - The search query provided by the user\n * @returns An array of the top matching document chunks' content\n */\nexport const searchChunkReference = async (\n query: string,\n maxResults: number = MAX_RELEVANT_CHUNKS_NB,\n minSimilarity: number = MIN_RELEVANT_CHUNKS_SIMILARITY\n): Promise<VectorStoreEl[]> => {\n // Generate an embedding for the user's query\n const queryEmbedding = await generateEmbedding(query);\n\n // Calculate similarity scores between the query embedding and each document's embedding\n const selection = vectorStore\n .map((chunk) => ({\n ...chunk,\n similarity: cosineSimilarity(queryEmbedding, chunk.embedding), // Add similarity score to each doc\n }))\n .filter((chunk) => chunk.similarity > minSimilarity) // Filter out documents with low similarity scores\n .sort((a, b) => b.similarity - a.similarity) // Sort documents by highest similarity first\n .slice(0, maxResults); // Select the top 6 most similar documents\n\n const orderedDocKeys = new Set(selection.map((chunk) => chunk.fileKey));\n\n const orderedVectorStore = vectorStore.sort((a, b) =>\n orderedDocKeys.has(a.fileKey) ? -1 : 1\n );\n\n const results = orderedVectorStore.filter((chunk) =>\n selection.some(\n (v) => v.fileKey === chunk.fileKey && v.chunkNumber === chunk.chunkNumber\n )\n );\n\n // Return the content of the top matching documents\n return results;\n};\n\n/**\n * Reads the content of a file synchronously.\n *\n * @function\n * @param relativeFilePath - The relative or absolute path to the target file.\n * @returns The entire contents of the specified file as a UTF-8 encoded string.\n */\nconst getFileContent = (relativeFilePath: string): string => {\n const __dirname = dirname(fileURLToPath(import.meta.url));\n const absolutePath = join(__dirname, relativeFilePath);\n const fileContent = readFileSync(absolutePath, 'utf-8');\n return fileContent;\n};\n\nconst CHAT_GPT_PROMPT = getFileContent('./PROMPT.md');\n\n// Initial prompt configuration for the chatbot\nexport const initPrompt: ChatCompletionRequestMessage = {\n role: 'system',\n content: CHAT_GPT_PROMPT,\n};\n\nexport type AskDocQuestionResult = {\n response: string;\n relatedFiles: string[];\n};\n\nexport type AskDocQuestionOptions = {\n onMessage?: (chunk: string) => void;\n};\n\n/**\n * Handles the \"Ask a question\" endpoint in an Express.js route.\n * Processes user messages, retrieves relevant documents, and interacts with AI models to generate responses.\n *\n * @param messages - An array of chat messages from the user and assistant\n * @returns The assistant's response as a string\n */\nexport const askDocQuestion = async (\n messages: ChatCompletionRequestMessage[],\n aiConfig: AIConfig,\n options?: AskDocQuestionOptions\n): Promise<AskDocQuestionResult> => {\n // Format the user's question to keep only the relevant keywords\n const query = messages\n .filter((message) => message.role === 'user')\n .map((message) => `- ${message.content}`)\n .join('\\n');\n\n // 1) Find relevant documents based on the user's question\n const relevantFilesReferences = await searchChunkReference(query);\n\n // 2) Integrate the relevant documents into the initial system prompt\n const systemPrompt = initPrompt.content.replace(\n '{{relevantFilesReferences}}',\n relevantFilesReferences.length === 0\n ? 'Not relevant file found related to the question.'\n : relevantFilesReferences\n .map((doc, idx) =>\n [\n '-----',\n '---',\n `chunkId: ${idx}`,\n `docChunk: \"${doc.chunkNumber}/${doc.fileKey.length}\"`,\n `docName: \"${doc.docName}\"`,\n `docUrl: \"${doc.docUrl}\"`,\n `---`,\n doc.content,\n `-----`,\n ].join('\\n')\n )\n .join('\\n\\n') // Insert relevant docs into the prompt\n );\n\n // Format messages for AI SDK\n const aiMessages = [\n {\n role: 'system' as const,\n content: systemPrompt,\n },\n ...messages.slice(-8),\n ];\n\n if (!aiConfig) {\n throw new Error('Failed to initialize AI configuration');\n }\n\n // 3) Use the AI SDK to stream the response\n let fullResponse = '';\n const stream = streamText({\n ...aiConfig,\n messages: aiMessages,\n });\n\n // Process the stream\n for await (const chunk of stream.textStream) {\n fullResponse += chunk;\n options?.onMessage?.(chunk);\n }\n\n // 4) Extract unique related files\n const relatedFiles = [\n ...new Set(relevantFilesReferences.map((doc) => doc.fileKey)),\n ];\n\n // 5) Return the assistant's response to the user\n return {\n response: fullResponse ?? 'Error: No result found',\n relatedFiles,\n };\n};\n"],"mappings":"AAAA,SAAS,UAAU,SAAS,4BAA4B;AACxD,SAAS,kBAAkB;AAC3B,OAAO,YAAY;AACnB,SAAS,cAAc,qBAAqB;AAC5C,SAAS,2BAA2B;AACpC,SAAS,cAAc;AACvB,SAAS,SAAS,YAAY;AAC9B,SAAS,qBAAqB;AAC9B;AAAA,EAGE;AAAA,OAEK;AACP,OAAO,oBAAoB,oBAAoB,KAAK,EAAE,MAAM,OAAO;AAmBnE,MAAM,cAA+B,CAAC;AAKtC,MAAM,QAA4B;AAClC,MAAM,oBAA8C;AACpD,MAAM,yBAAiC;AACvC,MAAM,iCAAyC;AAExC,MAAM,mBAA8B;AAAA,EACzC,UAAU,WAAW;AAAA,EACrB,OAAO;AAAA,EACP,aAAa;AACf;AAKA,MAAM,kBAAyC;AAC/C,MAAM,iBAAyB;AAC/B,MAAM,mBAA2B;AACjC,MAAM,gBAAwB;AAC9B,MAAM,YAAoB,mBAAmB;AAC7C,MAAM,gBAAwB,iBAAiB;AAO/C,MAAM,YAAY,CAAC,SAA2B;AAC5C,QAAM,SAAmB,CAAC;AAC1B,MAAI,QAAQ;AAEZ,SAAO,QAAQ,KAAK,QAAQ;AAC1B,QAAI,MAAM,KAAK,IAAI,QAAQ,WAAW,KAAK,MAAM;AAGjD,QAAI,MAAM,KAAK,QAAQ;AACrB,YAAM,YAAY,KAAK,YAAY,KAAK,GAAG;AAC3C,UAAI,YAAY,OAAO;AACrB,cAAM;AAAA,MACR;AAAA,IACF;AAEA,WAAO,KAAK,KAAK,UAAU,OAAO,GAAG,CAAC;AAGtC,UAAM,YAAY,MAAM;AACxB,QAAI,aAAa,OAAO;AAEtB,cAAQ;AAAA,IACV,OAAO;AACL,cAAQ;AAAA,IACV;AAAA,EACF;AAEA,SAAO;AACT;AASA,MAAM,oBAAoB,OAAO,SAAoC;AACnE,MAAI;AACF,UAAM,eAAe,IAAI,OAAO,EAAE,QAAQ,QAAQ,IAAI,eAAe,CAAC;AAEtE,UAAM,WAAW,MAAM,aAAa,WAAW,OAAO;AAAA,MACpD,OAAO;AAAA,MACP,OAAO;AAAA,IACT,CAAC;AAED,WAAO,SAAS,KAAK,CAAC,EAAE;AAAA,EAC1B,SAAS,OAAO;AACd,YAAQ,MAAM,+BAA+B,KAAK;AAClD,WAAO,CAAC;AAAA,EACV;AACF;AAWA,MAAM,mBAAmB,CAAC,MAAgB,SAA2B;AAEnE,QAAM,aAAa,KAAK,OAAO,CAAC,KAAK,GAAG,QAAQ,MAAM,IAAI,KAAK,GAAG,GAAG,CAAC;AAGtE,QAAM,aAAa,KAAK,KAAK,KAAK,OAAO,CAAC,KAAK,MAAM,MAAM,IAAI,GAAG,CAAC,CAAC;AACpE,QAAM,aAAa,KAAK,KAAK,KAAK,OAAO,CAAC,KAAK,MAAM,MAAM,IAAI,GAAG,CAAC,CAAC;AAGpE,SAAO,cAAc,aAAa;AACpC;AAOO,MAAM,qBAAqB,YAA2B;AAC3D,QAAM,MAAM,QAAQ,IAAI;AACxB,SAAO,OAAO;AAAA,IACZ,MAAM,CAAC,QAAQ,GAAG,UAAU,QAAQ,GAAG,IAAI,cAAc,MAAM;AAAA,EACjE,CAAC;AAKD,QAAM,oBAAoB,MAAM,qBAAqB;AACrD,QAAM,OAAO,MAAM,QAAQ;AAC3B,QAAM,QAAQ,MAAM,SAAS;AAE7B,MAAI,SAAmC,CAAC;AACxC,QAAM,mBAAmB,oBAAI,IAAY;AAEzC,QAAM,QAAQ,EAAE,GAAG,MAAM,GAAG,OAAO,GAAG,kBAAkB;AAGxD,mBAAiB,WAAW,OAAO,KAAK,KAAK,GAAG;AAE9C,UAAM,eAAe;AAAA,MACnB,MAAM,OAA6B;AAAA,IACrC;AAGA,UAAM,aAAa;AAAA,MACjB,MAAM,OAA6B;AAAA,IACrC;AAGA,UAAM,wBAAwB,OAAO,KAAK,cAAc,EAAE;AAAA,MAAO,CAAC,QAChE,IAAI,WAAW,GAAG,OAAO,SAAS;AAAA,IACpC;AACA,UAAM,oBAAoB,WAAW;AACrC,UAAM,qBAAqB,sBAAsB;AAEjD,QAAI,iCAAiC;AAGrC,QAAI,sBAAsB,oBAAoB;AAC5C,cAAQ;AAAA,QACN,SAAS,OAAO,0BAA0B,kBAAkB,OAAO,iBAAiB;AAAA,MACtF;AACA,uCAAiC;AAAA,IACnC;AAGA,qBAAiB,cAAc,OAAO,KAAK,UAAU,GAAG;AACtD,YAAM,cAAc,OAAO,UAAU,IAAI;AACzC,YAAM,eAAe,WAAW;AAEhC,YAAM,YAAY,WAChB,UACF;AAEA,YAAM,mBAAmB,GAAG,OAAO,UAAU,WAAW;AACxD,uBAAiB,IAAI,gBAAgB;AAGrC,YAAM,eAAe,CAAC,iCACjB,eAAe,gBAA+C,IAG/D;AAEJ,UAAI,YAAY;AAEhB,UAAI,CAAC,WAAW;AACd,oBAAY,MAAM,kBAAkB,SAAS;AAC7C,gBAAQ,KAAK,8BAA8B,gBAAgB,EAAE;AAAA,MAC/D;AAGA,eAAS,EAAE,GAAG,QAAQ,CAAC,gBAAgB,GAAG,UAAU;AAGpD,kBAAY,KAAK;AAAA,QACf;AAAA,QACA;AAAA,QACA;AAAA,QACA,SAAS;AAAA,QACT,QAAQ,aAAa;AAAA,QACrB,SAAS,aAAa;AAAA,MACxB,CAAC;AAED,cAAQ,KAAK,cAAc,gBAAgB,IAAI,YAAY,EAAE;AAAA,IAC/D;AAAA,EACF;AAGA,QAAM,qBAA+C,CAAC;AACtD,aAAW,CAAC,KAAK,SAAS,KAAK,OAAO,QAAQ,cAAc,GAAG;AAC7D,QAAI,iBAAiB,IAAI,GAAG,GAAG;AAE7B,UAAI,CAAC,OAAO,GAAG,GAAG;AAChB,2BAAmB,GAAG,IAAI;AAAA,MAC5B;AAAA,IACF;AAAA,EACF;AAGA,WAAS,EAAE,GAAG,oBAAoB,GAAG,OAAO;AAE5C,MAAI;AAEF,QAAI,KAAK,UAAU,MAAM,MAAM,KAAK,UAAU,cAAc,GAAG;AAE7D;AAAA,QACE;AAAA,QACA,KAAK,UAAU,QAAQ,MAAM,CAAC;AAAA,MAChC;AAAA,IACF;AAAA,EACF,SAAS,OAAO;AACd,YAAQ,MAAM,KAAK;AAAA,EACrB;AACF;AAGA,mBAAmB;AASZ,MAAM,uBAAuB,OAClC,OACA,aAAqB,wBACrB,gBAAwB,mCACK;AAE7B,QAAM,iBAAiB,MAAM,kBAAkB,KAAK;AAGpD,QAAM,YAAY,YACf,IAAI,CAAC,WAAW;AAAA,IACf,GAAG;AAAA,IACH,YAAY,iBAAiB,gBAAgB,MAAM,SAAS;AAAA;AAAA,EAC9D,EAAE,EACD,OAAO,CAAC,UAAU,MAAM,aAAa,aAAa,EAClD,KAAK,CAAC,GAAG,MAAM,EAAE,aAAa,EAAE,UAAU,EAC1C,MAAM,GAAG,UAAU;AAEtB,QAAM,iBAAiB,IAAI,IAAI,UAAU,IAAI,CAAC,UAAU,MAAM,OAAO,CAAC;AAEtE,QAAM,qBAAqB,YAAY;AAAA,IAAK,CAAC,GAAG,MAC9C,eAAe,IAAI,EAAE,OAAO,IAAI,KAAK;AAAA,EACvC;AAEA,QAAM,UAAU,mBAAmB;AAAA,IAAO,CAAC,UACzC,UAAU;AAAA,MACR,CAAC,MAAM,EAAE,YAAY,MAAM,WAAW,EAAE,gBAAgB,MAAM;AAAA,IAChE;AAAA,EACF;AAGA,SAAO;AACT;AASA,MAAM,iBAAiB,CAAC,qBAAqC;AAC3D,QAAM,YAAY,QAAQ,cAAc,YAAY,GAAG,CAAC;AACxD,QAAM,eAAe,KAAK,WAAW,gBAAgB;AACrD,QAAM,cAAc,aAAa,cAAc,OAAO;AACtD,SAAO;AACT;AAEA,MAAM,kBAAkB,eAAe,aAAa;AAG7C,MAAM,aAA2C;AAAA,EACtD,MAAM;AAAA,EACN,SAAS;AACX;AAkBO,MAAM,iBAAiB,OAC5B,UACA,UACA,YACkC;AAElC,QAAM,QAAQ,SACX,OAAO,CAAC,YAAY,QAAQ,SAAS,MAAM,EAC3C,IAAI,CAAC,YAAY,KAAK,QAAQ,OAAO,EAAE,EACvC,KAAK,IAAI;AAGZ,QAAM,0BAA0B,MAAM,qBAAqB,KAAK;AAGhE,QAAM,eAAe,WAAW,QAAQ;AAAA,IACtC;AAAA,IACA,wBAAwB,WAAW,IAC/B,qDACA,wBACG;AAAA,MAAI,CAAC,KAAK,QACT;AAAA,QACE;AAAA,QACA;AAAA,QACA,YAAY,GAAG;AAAA,QACf,cAAc,IAAI,WAAW,IAAI,IAAI,QAAQ,MAAM;AAAA,QACnD,aAAa,IAAI,OAAO;AAAA,QACxB,YAAY,IAAI,MAAM;AAAA,QACtB;AAAA,QACA,IAAI;AAAA,QACJ;AAAA,MACF,EAAE,KAAK,IAAI;AAAA,IACb,EACC,KAAK,MAAM;AAAA;AAAA,EACpB;AAGA,QAAM,aAAa;AAAA,IACjB;AAAA,MACE,MAAM;AAAA,MACN,SAAS;AAAA,IACX;AAAA,IACA,GAAG,SAAS,MAAM,EAAE;AAAA,EACtB;AAEA,MAAI,CAAC,UAAU;AACb,UAAM,IAAI,MAAM,uCAAuC;AAAA,EACzD;AAGA,MAAI,eAAe;AACnB,QAAM,SAAS,WAAW;AAAA,IACxB,GAAG;AAAA,IACH,UAAU;AAAA,EACZ,CAAC;AAGD,mBAAiB,SAAS,OAAO,YAAY;AAC3C,oBAAgB;AAChB,aAAS,YAAY,KAAK;AAAA,EAC5B;AAGA,QAAM,eAAe;AAAA,IACnB,GAAG,IAAI,IAAI,wBAAwB,IAAI,CAAC,QAAQ,IAAI,OAAO,CAAC;AAAA,EAC9D;AAGA,SAAO;AAAA,IACL,UAAU,gBAAgB;AAAA,IAC1B;AAAA,EACF;AACF;","names":[]}
1
+ {"version":3,"sources":["../../../../../src/utils/AI/askDocQuestion/askDocQuestion.ts"],"sourcesContent":["import { getBlogs, getDocs, getFrequentQuestions } from '@intlayer/docs';\nimport { streamText } from 'ai';\nimport dotenv from 'dotenv';\nimport { readFileSync, writeFileSync } from 'fs';\nimport { getMarkdownMetadata } from 'intlayer';\nimport { OpenAI } from 'openai';\nimport { dirname, join } from 'path';\nimport { fileURLToPath } from 'url';\nimport {\n AIConfig,\n AIOptions,\n AIProvider,\n ChatCompletionRequestMessage,\n} from '../aiSdk';\nimport embeddingsList from './embeddings.json' with { type: 'json' };\n\ntype VectorStoreEl = {\n fileKey: string;\n chunkNumber: number;\n content: string;\n embedding: number[];\n docUrl: string;\n docName: string;\n};\n\n/**\n * Simple in-memory vector store to hold document embeddings and their content.\n * Each entry contains:\n * - fileKey: A unique key identifying the file\n * - chunkNumber: The number of the chunk within the document\n * - content: The chunk content\n * - embedding: The numerical embedding vector for the chunk\n */\nconst vectorStore: VectorStoreEl[] = [];\n\n/*\n * Ask question AI configuration\n */\nconst MODEL: AIOptions['model'] = 'gpt-5'; // Model to use for chat completions\nconst MODEL_TEMPERATURE: AIOptions['temperature'] = 0.1; // Temperature to use for chat completions\nconst MAX_RELEVANT_CHUNKS_NB: number = 20; // Maximum number of relevant chunks to attach to chatGPT context\nconst MIN_RELEVANT_CHUNKS_SIMILARITY: number = 0.42; // Minimum similarity required for a chunk to be considered relevant\n\nexport const aiDefaultOptions: AIOptions = {\n provider: AIProvider.OPENAI,\n model: MODEL,\n temperature: MODEL_TEMPERATURE,\n};\n\n/*\n * Embedding model configuration\n */\nconst EMBEDDING_MODEL: OpenAI.EmbeddingModel = 'text-embedding-3-large'; // Model to use for embedding generation\nconst OVERLAP_TOKENS: number = 200; // Number of tokens to overlap between chunks\nconst MAX_CHUNK_TOKENS: number = 800; // Maximum number of tokens per chunk\nconst CHAR_BY_TOKEN: number = 4.15; // Approximate pessimistically the number of characters per token // Can use `tiktoken` or other tokenizers to calculate it more precisely\nconst MAX_CHARS: number = MAX_CHUNK_TOKENS * CHAR_BY_TOKEN;\nconst OVERLAP_CHARS: number = OVERLAP_TOKENS * CHAR_BY_TOKEN;\n\n/**\n * Splits a given text into chunks ensuring each chunk does not exceed MAX_CHARS.\n * @param text - The input text to split.\n * @returns - Array of text chunks.\n */\nconst chunkText = (text: string): string[] => {\n const chunks: string[] = [];\n let start = 0;\n\n while (start < text.length) {\n let end = Math.min(start + MAX_CHARS, text.length);\n\n // Ensure we don't cut words in the middle (find nearest space)\n if (end < text.length) {\n const lastSpace = text.lastIndexOf(' ', end);\n if (lastSpace > start) {\n end = lastSpace;\n }\n }\n\n chunks.push(text.substring(start, end));\n\n // Move start forward correctly\n const nextStart = end - OVERLAP_CHARS;\n if (nextStart <= start) {\n // Prevent infinite loop if overlap is too large\n start = end;\n } else {\n start = nextStart;\n }\n }\n\n return chunks;\n};\n\n/**\n * Generates an embedding for a given text using OpenAI's embedding API.\n * Trims the text if it exceeds the maximum allowed characters.\n *\n * @param text - The input text to generate an embedding for\n * @returns The embedding vector as a number array\n */\nconst generateEmbedding = async (text: string): Promise<number[]> => {\n try {\n const openaiClient = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });\n\n const response = await openaiClient.embeddings.create({\n model: EMBEDDING_MODEL,\n input: text,\n });\n\n return response.data[0].embedding;\n } catch (error) {\n console.error('Error generating embedding:', error);\n return [];\n }\n};\n\n/**\n * Calculates the cosine similarity between two vectors.\n * Cosine similarity measures the cosine of the angle between two vectors in an inner product space.\n * Used to determine the similarity between chunks of text.\n *\n * @param vecA - The first vector\n * @param vecB - The second vector\n * @returns The cosine similarity score\n */\nconst cosineSimilarity = (vecA: number[], vecB: number[]): number => {\n // Calculate the dot product of the two vectors\n const dotProduct = vecA.reduce((sum, a, idx) => sum + a * vecB[idx], 0);\n\n // Calculate the magnitude (Euclidean norm) of each vector\n const magnitudeA = Math.sqrt(vecA.reduce((sum, a) => sum + a * a, 0));\n const magnitudeB = Math.sqrt(vecB.reduce((sum, b) => sum + b * b, 0));\n\n // Compute and return the cosine similarity\n return dotProduct / (magnitudeA * magnitudeB);\n};\n\n/**\n * Indexes all Markdown documents by generating embeddings for each chunk and storing them in memory.\n * Also updates the embeddings.json file if new embeddings are generated.\n * Handles cases where files have been updated and chunk counts have changed.\n */\nexport const indexMarkdownFiles = async (): Promise<void> => {\n const env = process.env.NODE_ENV;\n dotenv.config({\n path: [`.env.${env}.local`, `.env.${env}`, '.env.local', '.env'],\n });\n\n // if (process.env.SKIP_DOC_EMBEDDINGS_INDEX === 'true') return;\n\n // Retrieve documentation and blog posts in English locale\n const frequentQuestions = await getFrequentQuestions();\n const docs = await getDocs();\n const blogs = await getBlogs();\n\n let result: Record<string, number[]> = {}; // Object to hold updated embeddings\n const currentChunkKeys = new Set<string>(); // Track which chunks should exist\n\n const files = { ...docs, ...blogs, ...frequentQuestions }; // Combine docs and blogs into a single object\n\n // Iterate over each file key (identifier) in the combined files\n for await (const fileKey of Object.keys(files)) {\n // Get the metadata of the file\n const fileMetadata = getMarkdownMetadata(\n files[fileKey as keyof typeof files] as string\n );\n\n // Split the document into chunks based on headings\n const fileChunks = chunkText(\n files[fileKey as keyof typeof files] as string\n );\n\n // Check if the number of chunks has changed for this file\n const existingChunksForFile = Object.keys(embeddingsList).filter((key) =>\n key.startsWith(`${fileKey}/chunk_`)\n );\n const currentChunkCount = fileChunks.length;\n const previousChunkCount = existingChunksForFile.length;\n\n let shouldRegenerateFileEmbeddings = false;\n\n // If chunk count differs, we need to regenerate embeddings for this file\n if (currentChunkCount !== previousChunkCount) {\n console.info(\n `File \"${fileKey}\" chunk count changed: ${previousChunkCount} -> ${currentChunkCount}. Regenerating embeddings.`\n );\n shouldRegenerateFileEmbeddings = true;\n }\n\n // Iterate over each chunk within the current file\n for await (const chunkIndex of Object.keys(fileChunks)) {\n const chunkNumber = Number(chunkIndex) + 1; // Chunk number starts at 1\n const chunksNumber = fileChunks.length;\n\n const fileChunk = fileChunks[\n chunkIndex as keyof typeof fileChunks\n ] as string;\n\n const embeddingKeyName = `${fileKey}/chunk_${chunkNumber}`; // Unique key for the chunk\n currentChunkKeys.add(embeddingKeyName); // Track this chunk as current\n\n // Retrieve precomputed embedding if available and file hasn't changed\n const docEmbedding = !shouldRegenerateFileEmbeddings\n ? (embeddingsList[embeddingKeyName as keyof typeof embeddingsList] as\n | number[]\n | undefined)\n : undefined;\n\n let embedding = docEmbedding; // Use existing embedding if available and valid\n\n if (!embedding) {\n embedding = await generateEmbedding(fileChunk); // Generate embedding if not present or file changed\n console.info(`- Generated new embedding: ${embeddingKeyName}`);\n }\n\n // Update the result object with the embedding\n result = { ...result, [embeddingKeyName]: embedding };\n\n // Store the embedding and content in the in-memory vector store\n vectorStore.push({\n fileKey,\n chunkNumber,\n embedding,\n content: fileChunk,\n docUrl: fileMetadata.url,\n docName: fileMetadata.title,\n });\n\n console.info(`- Indexed: ${embeddingKeyName}/${chunksNumber}`);\n }\n }\n\n // Remove outdated embeddings that no longer exist in current files\n const filteredEmbeddings: Record<string, number[]> = {};\n for (const [key, embedding] of Object.entries(embeddingsList)) {\n if (currentChunkKeys.has(key)) {\n // Only keep embeddings for chunks that still exist\n if (!result[key]) {\n filteredEmbeddings[key] = embedding as number[];\n }\n }\n }\n\n // Merge filtered existing embeddings with new ones\n result = { ...filteredEmbeddings, ...result };\n\n try {\n // Compare the newly generated embeddings with existing ones\n if (JSON.stringify(result) !== JSON.stringify(embeddingsList)) {\n // If there are new embeddings or changes, save them to embeddings.json\n writeFileSync(\n 'src/utils/AI/askDocQuestion/embeddings.json',\n JSON.stringify(result, null, 2)\n );\n }\n } catch (error) {\n console.error(error); // Log any errors during the file write process\n }\n};\n\n// Automatically index Markdown files\nindexMarkdownFiles();\n\n/**\n * Searches the indexed documents for the most relevant chunks based on a query.\n * Utilizes cosine similarity to find the closest matching embeddings.\n *\n * @param query - The search query provided by the user\n * @returns An array of the top matching document chunks' content\n */\nexport const searchChunkReference = async (\n query: string,\n maxResults: number = MAX_RELEVANT_CHUNKS_NB,\n minSimilarity: number = MIN_RELEVANT_CHUNKS_SIMILARITY\n): Promise<VectorStoreEl[]> => {\n // Generate an embedding for the user's query\n const queryEmbedding = await generateEmbedding(query);\n\n // Calculate similarity scores between the query embedding and each document's embedding\n const selection = vectorStore\n .map((chunk) => ({\n ...chunk,\n similarity: cosineSimilarity(queryEmbedding, chunk.embedding), // Add similarity score to each doc\n }))\n .filter((chunk) => chunk.similarity > minSimilarity) // Filter out documents with low similarity scores\n .sort((a, b) => b.similarity - a.similarity) // Sort documents by highest similarity first\n .slice(0, maxResults); // Select the top 6 most similar documents\n\n const orderedDocKeys = new Set(selection.map((chunk) => chunk.fileKey));\n\n const orderedVectorStore = vectorStore.sort((a, b) =>\n orderedDocKeys.has(a.fileKey) ? -1 : 1\n );\n\n const results = orderedVectorStore.filter((chunk) =>\n selection.some(\n (v) => v.fileKey === chunk.fileKey && v.chunkNumber === chunk.chunkNumber\n )\n );\n\n // Return the content of the top matching documents\n return results;\n};\n\n/**\n * Reads the content of a file synchronously.\n *\n * @function\n * @param relativeFilePath - The relative or absolute path to the target file.\n * @returns The entire contents of the specified file as a UTF-8 encoded string.\n */\nconst getFileContent = (relativeFilePath: string): string => {\n const __dirname = dirname(fileURLToPath(import.meta.url));\n const absolutePath = join(__dirname, relativeFilePath);\n const fileContent = readFileSync(absolutePath, 'utf-8');\n return fileContent;\n};\n\nconst CHAT_GPT_PROMPT = getFileContent('./PROMPT.md');\n\n// Initial prompt configuration for the chatbot\nexport const initPrompt: ChatCompletionRequestMessage = {\n role: 'system',\n content: CHAT_GPT_PROMPT,\n};\n\nexport type AskDocQuestionResult = {\n response: string;\n relatedFiles: string[];\n};\n\nexport type AskDocQuestionOptions = {\n onMessage?: (chunk: string) => void;\n};\n\n/**\n * Handles the \"Ask a question\" endpoint in an Express.js route.\n * Processes user messages, retrieves relevant documents, and interacts with AI models to generate responses.\n *\n * @param messages - An array of chat messages from the user and assistant\n * @returns The assistant's response as a string\n */\nexport const askDocQuestion = async (\n messages: ChatCompletionRequestMessage[],\n aiConfig: AIConfig,\n options?: AskDocQuestionOptions\n): Promise<AskDocQuestionResult> => {\n // Format the user's question to keep only the relevant keywords\n const query = messages\n .filter((message) => message.role === 'user')\n .map((message) => `- ${message.content}`)\n .join('\\n');\n\n // 1) Find relevant documents based on the user's question\n const relevantFilesReferences = await searchChunkReference(query);\n\n // 2) Integrate the relevant documents into the initial system prompt\n const systemPrompt = initPrompt.content.replace(\n '{{relevantFilesReferences}}',\n relevantFilesReferences.length === 0\n ? 'Not relevant file found related to the question.'\n : relevantFilesReferences\n .map((doc, idx) =>\n [\n '-----',\n '---',\n `chunkId: ${idx}`,\n `docChunk: \"${doc.chunkNumber}/${doc.fileKey.length}\"`,\n `docName: \"${doc.docName}\"`,\n `docUrl: \"${doc.docUrl}\"`,\n `---`,\n doc.content,\n `-----`,\n ].join('\\n')\n )\n .join('\\n\\n') // Insert relevant docs into the prompt\n );\n\n // Format messages for AI SDK\n const aiMessages = [\n {\n role: 'system' as const,\n content: systemPrompt,\n },\n ...messages.slice(-8),\n ];\n\n if (!aiConfig) {\n throw new Error('Failed to initialize AI configuration');\n }\n\n // 3) Use the AI SDK to stream the response\n let fullResponse = '';\n const stream = streamText({\n ...aiConfig,\n messages: aiMessages,\n });\n\n // Process the stream\n for await (const chunk of stream.textStream) {\n fullResponse += chunk;\n options?.onMessage?.(chunk);\n }\n\n // 4) Extract unique related files\n const relatedFiles = [\n ...new Set(relevantFilesReferences.map((doc) => doc.fileKey)),\n ];\n\n // 5) Return the assistant's response to the user\n return {\n response: fullResponse ?? 'Error: No result found',\n relatedFiles,\n };\n};\n"],"mappings":"AAAA,SAAS,UAAU,SAAS,4BAA4B;AACxD,SAAS,kBAAkB;AAC3B,OAAO,YAAY;AACnB,SAAS,cAAc,qBAAqB;AAC5C,SAAS,2BAA2B;AACpC,SAAS,cAAc;AACvB,SAAS,SAAS,YAAY;AAC9B,SAAS,qBAAqB;AAC9B;AAAA,EAGE;AAAA,OAEK;AACP,OAAO,oBAAoB,oBAAoB,KAAK,EAAE,MAAM,OAAO;AAmBnE,MAAM,cAA+B,CAAC;AAKtC,MAAM,QAA4B;AAClC,MAAM,oBAA8C;AACpD,MAAM,yBAAiC;AACvC,MAAM,iCAAyC;AAExC,MAAM,mBAA8B;AAAA,EACzC,UAAU,WAAW;AAAA,EACrB,OAAO;AAAA,EACP,aAAa;AACf;AAKA,MAAM,kBAAyC;AAC/C,MAAM,iBAAyB;AAC/B,MAAM,mBAA2B;AACjC,MAAM,gBAAwB;AAC9B,MAAM,YAAoB,mBAAmB;AAC7C,MAAM,gBAAwB,iBAAiB;AAO/C,MAAM,YAAY,CAAC,SAA2B;AAC5C,QAAM,SAAmB,CAAC;AAC1B,MAAI,QAAQ;AAEZ,SAAO,QAAQ,KAAK,QAAQ;AAC1B,QAAI,MAAM,KAAK,IAAI,QAAQ,WAAW,KAAK,MAAM;AAGjD,QAAI,MAAM,KAAK,QAAQ;AACrB,YAAM,YAAY,KAAK,YAAY,KAAK,GAAG;AAC3C,UAAI,YAAY,OAAO;AACrB,cAAM;AAAA,MACR;AAAA,IACF;AAEA,WAAO,KAAK,KAAK,UAAU,OAAO,GAAG,CAAC;AAGtC,UAAM,YAAY,MAAM;AACxB,QAAI,aAAa,OAAO;AAEtB,cAAQ;AAAA,IACV,OAAO;AACL,cAAQ;AAAA,IACV;AAAA,EACF;AAEA,SAAO;AACT;AASA,MAAM,oBAAoB,OAAO,SAAoC;AACnE,MAAI;AACF,UAAM,eAAe,IAAI,OAAO,EAAE,QAAQ,QAAQ,IAAI,eAAe,CAAC;AAEtE,UAAM,WAAW,MAAM,aAAa,WAAW,OAAO;AAAA,MACpD,OAAO;AAAA,MACP,OAAO;AAAA,IACT,CAAC;AAED,WAAO,SAAS,KAAK,CAAC,EAAE;AAAA,EAC1B,SAAS,OAAO;AACd,YAAQ,MAAM,+BAA+B,KAAK;AAClD,WAAO,CAAC;AAAA,EACV;AACF;AAWA,MAAM,mBAAmB,CAAC,MAAgB,SAA2B;AAEnE,QAAM,aAAa,KAAK,OAAO,CAAC,KAAK,GAAG,QAAQ,MAAM,IAAI,KAAK,GAAG,GAAG,CAAC;AAGtE,QAAM,aAAa,KAAK,KAAK,KAAK,OAAO,CAAC,KAAK,MAAM,MAAM,IAAI,GAAG,CAAC,CAAC;AACpE,QAAM,aAAa,KAAK,KAAK,KAAK,OAAO,CAAC,KAAK,MAAM,MAAM,IAAI,GAAG,CAAC,CAAC;AAGpE,SAAO,cAAc,aAAa;AACpC;AAOO,MAAM,qBAAqB,YAA2B;AAC3D,QAAM,MAAM,QAAQ,IAAI;AACxB,SAAO,OAAO;AAAA,IACZ,MAAM,CAAC,QAAQ,GAAG,UAAU,QAAQ,GAAG,IAAI,cAAc,MAAM;AAAA,EACjE,CAAC;AAKD,QAAM,oBAAoB,MAAM,qBAAqB;AACrD,QAAM,OAAO,MAAM,QAAQ;AAC3B,QAAM,QAAQ,MAAM,SAAS;AAE7B,MAAI,SAAmC,CAAC;AACxC,QAAM,mBAAmB,oBAAI,IAAY;AAEzC,QAAM,QAAQ,EAAE,GAAG,MAAM,GAAG,OAAO,GAAG,kBAAkB;AAGxD,mBAAiB,WAAW,OAAO,KAAK,KAAK,GAAG;AAE9C,UAAM,eAAe;AAAA,MACnB,MAAM,OAA6B;AAAA,IACrC;AAGA,UAAM,aAAa;AAAA,MACjB,MAAM,OAA6B;AAAA,IACrC;AAGA,UAAM,wBAAwB,OAAO,KAAK,cAAc,EAAE;AAAA,MAAO,CAAC,QAChE,IAAI,WAAW,GAAG,OAAO,SAAS;AAAA,IACpC;AACA,UAAM,oBAAoB,WAAW;AACrC,UAAM,qBAAqB,sBAAsB;AAEjD,QAAI,iCAAiC;AAGrC,QAAI,sBAAsB,oBAAoB;AAC5C,cAAQ;AAAA,QACN,SAAS,OAAO,0BAA0B,kBAAkB,OAAO,iBAAiB;AAAA,MACtF;AACA,uCAAiC;AAAA,IACnC;AAGA,qBAAiB,cAAc,OAAO,KAAK,UAAU,GAAG;AACtD,YAAM,cAAc,OAAO,UAAU,IAAI;AACzC,YAAM,eAAe,WAAW;AAEhC,YAAM,YAAY,WAChB,UACF;AAEA,YAAM,mBAAmB,GAAG,OAAO,UAAU,WAAW;AACxD,uBAAiB,IAAI,gBAAgB;AAGrC,YAAM,eAAe,CAAC,iCACjB,eAAe,gBAA+C,IAG/D;AAEJ,UAAI,YAAY;AAEhB,UAAI,CAAC,WAAW;AACd,oBAAY,MAAM,kBAAkB,SAAS;AAC7C,gBAAQ,KAAK,8BAA8B,gBAAgB,EAAE;AAAA,MAC/D;AAGA,eAAS,EAAE,GAAG,QAAQ,CAAC,gBAAgB,GAAG,UAAU;AAGpD,kBAAY,KAAK;AAAA,QACf;AAAA,QACA;AAAA,QACA;AAAA,QACA,SAAS;AAAA,QACT,QAAQ,aAAa;AAAA,QACrB,SAAS,aAAa;AAAA,MACxB,CAAC;AAED,cAAQ,KAAK,cAAc,gBAAgB,IAAI,YAAY,EAAE;AAAA,IAC/D;AAAA,EACF;AAGA,QAAM,qBAA+C,CAAC;AACtD,aAAW,CAAC,KAAK,SAAS,KAAK,OAAO,QAAQ,cAAc,GAAG;AAC7D,QAAI,iBAAiB,IAAI,GAAG,GAAG;AAE7B,UAAI,CAAC,OAAO,GAAG,GAAG;AAChB,2BAAmB,GAAG,IAAI;AAAA,MAC5B;AAAA,IACF;AAAA,EACF;AAGA,WAAS,EAAE,GAAG,oBAAoB,GAAG,OAAO;AAE5C,MAAI;AAEF,QAAI,KAAK,UAAU,MAAM,MAAM,KAAK,UAAU,cAAc,GAAG;AAE7D;AAAA,QACE;AAAA,QACA,KAAK,UAAU,QAAQ,MAAM,CAAC;AAAA,MAChC;AAAA,IACF;AAAA,EACF,SAAS,OAAO;AACd,YAAQ,MAAM,KAAK;AAAA,EACrB;AACF;AAGA,mBAAmB;AASZ,MAAM,uBAAuB,OAClC,OACA,aAAqB,wBACrB,gBAAwB,mCACK;AAE7B,QAAM,iBAAiB,MAAM,kBAAkB,KAAK;AAGpD,QAAM,YAAY,YACf,IAAI,CAAC,WAAW;AAAA,IACf,GAAG;AAAA,IACH,YAAY,iBAAiB,gBAAgB,MAAM,SAAS;AAAA;AAAA,EAC9D,EAAE,EACD,OAAO,CAAC,UAAU,MAAM,aAAa,aAAa,EAClD,KAAK,CAAC,GAAG,MAAM,EAAE,aAAa,EAAE,UAAU,EAC1C,MAAM,GAAG,UAAU;AAEtB,QAAM,iBAAiB,IAAI,IAAI,UAAU,IAAI,CAAC,UAAU,MAAM,OAAO,CAAC;AAEtE,QAAM,qBAAqB,YAAY;AAAA,IAAK,CAAC,GAAG,MAC9C,eAAe,IAAI,EAAE,OAAO,IAAI,KAAK;AAAA,EACvC;AAEA,QAAM,UAAU,mBAAmB;AAAA,IAAO,CAAC,UACzC,UAAU;AAAA,MACR,CAAC,MAAM,EAAE,YAAY,MAAM,WAAW,EAAE,gBAAgB,MAAM;AAAA,IAChE;AAAA,EACF;AAGA,SAAO;AACT;AASA,MAAM,iBAAiB,CAAC,qBAAqC;AAC3D,QAAM,YAAY,QAAQ,cAAc,YAAY,GAAG,CAAC;AACxD,QAAM,eAAe,KAAK,WAAW,gBAAgB;AACrD,QAAM,cAAc,aAAa,cAAc,OAAO;AACtD,SAAO;AACT;AAEA,MAAM,kBAAkB,eAAe,aAAa;AAG7C,MAAM,aAA2C;AAAA,EACtD,MAAM;AAAA,EACN,SAAS;AACX;AAkBO,MAAM,iBAAiB,OAC5B,UACA,UACA,YACkC;AAElC,QAAM,QAAQ,SACX,OAAO,CAAC,YAAY,QAAQ,SAAS,MAAM,EAC3C,IAAI,CAAC,YAAY,KAAK,QAAQ,OAAO,EAAE,EACvC,KAAK,IAAI;AAGZ,QAAM,0BAA0B,MAAM,qBAAqB,KAAK;AAGhE,QAAM,eAAe,WAAW,QAAQ;AAAA,IACtC;AAAA,IACA,wBAAwB,WAAW,IAC/B,qDACA,wBACG;AAAA,MAAI,CAAC,KAAK,QACT;AAAA,QACE;AAAA,QACA;AAAA,QACA,YAAY,GAAG;AAAA,QACf,cAAc,IAAI,WAAW,IAAI,IAAI,QAAQ,MAAM;AAAA,QACnD,aAAa,IAAI,OAAO;AAAA,QACxB,YAAY,IAAI,MAAM;AAAA,QACtB;AAAA,QACA,IAAI;AAAA,QACJ;AAAA,MACF,EAAE,KAAK,IAAI;AAAA,IACb,EACC,KAAK,MAAM;AAAA;AAAA,EACpB;AAGA,QAAM,aAAa;AAAA,IACjB;AAAA,MACE,MAAM;AAAA,MACN,SAAS;AAAA,IACX;AAAA,IACA,GAAG,SAAS,MAAM,EAAE;AAAA,EACtB;AAEA,MAAI,CAAC,UAAU;AACb,UAAM,IAAI,MAAM,uCAAuC;AAAA,EACzD;AAGA,MAAI,eAAe;AACnB,QAAM,SAAS,WAAW;AAAA,IACxB,GAAG;AAAA,IACH,UAAU;AAAA,EACZ,CAAC;AAGD,mBAAiB,SAAS,OAAO,YAAY;AAC3C,oBAAgB;AAChB,aAAS,YAAY,KAAK;AAAA,EAC5B;AAGA,QAAM,eAAe;AAAA,IACnB,GAAG,IAAI,IAAI,wBAAwB,IAAI,CAAC,QAAQ,IAAI,OAAO,CAAC;AAAA,EAC9D;AAGA,SAAO;AAAA,IACL,UAAU,gBAAgB;AAAA,IAC1B;AAAA,EACF;AACF;","names":[]}