npm - @saltcorn/large-language-model - Versions diffs - 0.9.9 → 0.9.11 - Mend

@saltcorn/large-language-model 0.9.9 → 0.9.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/constants.js CHANGED Viewed

@@ -1,22 +1,26 @@
 const OPENAI_MODELS = [
-  "gpt-3.5-turbo",
-  "gpt-3.5-turbo-16k",
-  "gpt-4o-mini",
-  "gpt-4",
-  "gpt-4-32k",
-  "gpt-4-turbo-preview",
-  "gpt-4-turbo",
   "gpt-4o",
+  "gpt-4o-mini",
   "gpt-4.1",
   "gpt-4.1-mini",
   "gpt-4.1-nano",
-  "o1",
+  "gpt-5",
+  "gpt-5-mini",
+  "gpt-5-nano",
+  "gpt-5.1",
+  "gpt-5.1-mini",
+  "gpt-5.1-nano",
+  "gpt-5.2",
+  "gpt-5.2-pro",
   "o3",
   "o3-mini",
+  "o3-pro",
   "o4-mini",
-  "gpt-5",
-  "gpt-5-nano",
-  "gpt-5-mini",
+  "codex-mini-latest",
+  "gpt-5-codex",
+  "gpt-5.1-codex",
+  "gpt-5.1-codex-mini",
+  "gpt-5.1-codex-max",
 ];
 // https://github.com/ollama/ollama/blob/main/docs/faq.md#where-are-models-stored

package/generate.js CHANGED Viewed

@@ -9,7 +9,9 @@ const {
 } = require("@google-cloud/aiplatform");
 const { google } = require("googleapis");
 const Plugin = require("@saltcorn/data/models/plugin");
+const File = require("@saltcorn/data/models/file");
 const path = require("path");
+const fs = require("fs");
 const { features, getState } = require("@saltcorn/data/db/state");
 const {
   generateText,
@@ -18,8 +20,10 @@ const {
   jsonSchema,
   embed,
   embedMany,
+  experimental_transcribe,
 } = require("ai");
 const { createOpenAI } = require("@ai-sdk/openai");
+const OpenAI = require("openai");
 let ollamaMod;
 if (features.esm_plugins) ollamaMod = require("ollama");
@@ -113,6 +117,67 @@ const getImageGeneration = async (config, opts) => {
   }
 };
+const getAudioTranscription = async (
+  { backend, apiKey, api_key, provider, ai_sdk_provider },
+  opts
+) => {
+  switch (backend) {
+    case "OpenAI":
+      const client = new OpenAI({
+        apiKey: opts?.api_key || api_key || apiKey,
+      });
+      const fp = opts.file.location
+        ? opts.file.location
+        : typeof opts.file === "string"
+        ? await (
+            await File.findOne(opts.file)
+          ).location
+        : null;
+      const model = opts?.model || "whisper-1";
+      const diarize = model === "gpt-4o-transcribe-diarize";
+      const transcript1 = await client.audio.transcriptions.create({
+        file: Buffer.isBuffer(opts.file) ? opts.file : fs.createReadStream(fp),
+        model,
+        ...(diarize
+          ? {
+              response_format: "diarized_json",
+              chunking_strategy: "auto",
+            }
+          : {}),
+      });
+      return transcript1;
+    case "AI SDK":
+      const api_Key = opts?.api_key || api_key || apiKey;
+      const prov_obj = createOpenAI({ apiKey: api_Key });
+      const audio =
+        opts.url ||
+        (Buffer.isBuffer(opts.file)
+          ? opts.file
+          : typeof opts.file === "string"
+          ? await (await File.findOne(opts.file)).get_contents()
+          : await opts.file.get_contents());
+      const extra = {};
+      if (opts.prompt)
+        extra.providerOptions = {
+          openai: {
+            prompt: opts.prompt,
+            //response_format: "text",
+          },
+        };
+      const transcript = await experimental_transcribe({
+        model: prov_obj.transcription(opts?.model || "whisper-1"),
+        audio,
+        ...extra,
+      });
+      return transcript;
+    default:
+      throw new Error("Audio transcription not implemented for this backend");
+  }
+};
 const getCompletion = async (config, opts) => {
   switch (config.backend) {
     case "AI SDK":
@@ -190,6 +255,16 @@ const getCompletion = async (config, opts) => {
   }
 };
+const getAiSdkModel = ({ provider, api_key, model_name }) => {
+  switch (provider) {
+    case "OpenAI":
+      const openai = createOpenAI({ apiKey: api_key });
+      return openai(model_name);
+    default:
+      throw new Error("Provider not found: " + provider);
+  }
+};
 const getCompletionAISDK = async (
   { apiKey, model, provider, temperature },
   {
@@ -204,13 +279,11 @@ const getCompletionAISDK = async (
   }
 ) => {
   const use_model_name = rest.model || model;
-  let model_obj;
-  switch (provider) {
-    case "OpenAI":
-      const openai = createOpenAI({ apiKey: api_key || apiKey });
-      model_obj = openai(use_model_name);
-      break;
-  }
+  let model_obj = getAiSdkModel({
+    model_name: use_model_name,
+    api_key: api_key || apiKey,
+    provider,
+  });
   const modifyChat = (chat) => {
     const f = (c) => {
       if (c.type === "image_url")
@@ -864,4 +937,9 @@ const getEmbeddingGoogleVertex = async (config, opts, oauth2Client) => {
   return embeddings;
 };
-module.exports = { getCompletion, getEmbedding, getImageGeneration };
+module.exports = {
+  getCompletion,
+  getEmbedding,
+  getImageGeneration,
+  getAudioTranscription,
+};

package/index.js CHANGED Viewed

@@ -10,6 +10,7 @@ const {
   getCompletion,
   getEmbedding,
   getImageGeneration,
+  getAudioTranscription,
 } = require("./generate");
 const { OPENAI_MODELS } = require("./constants.js");
 const { eval_expression } = require("@saltcorn/data/models/expression");
@@ -400,6 +401,15 @@ const functions = (config) => {
       description: "Get vector embedding",
       arguments: [{ name: "prompt", type: "String" }],
     },
+    llm_transcribe: {
+      run: async (opts) => {
+        const result = await getAudioTranscription(config, opts);
+        return result;
+      },
+      isAsync: true,
+      description: "Get vector embedding",
+      arguments: [{ name: "prompt", type: "String" }],
+    },
   };
 };
@@ -640,6 +650,126 @@ module.exports = {
         else await table.updateRow(upd, row[table.pk_name]);
       },
     },
+    llm_transcribe_audio: {
+      description: "Generate text from audio file",
+      requireRow: true,
+      configFields: ({ table, mode }) => {
+        const override_fields =
+          config.backend === "OpenAI-compatible API" &&
+          (config.altconfigs || []).filter((c) => c.name).length
+            ? [
+                {
+                  name: "override_config",
+                  label: "Alternative LLM configuration",
+                  type: "String",
+                  attributes: { options: config.altconfigs.map((c) => c.name) },
+                },
+              ]
+            : [];
+        if (mode === "workflow") {
+          return [
+            {
+              name: "audio_file_field",
+              label: "Audio file variable",
+              sublabel: "Set the generated answer to this context variable",
+              type: "String",
+              required: true,
+            },
+            {
+              name: "answer_field",
+              label: "Response variable",
+              sublabel: "Set the generated response object to this context variable. The subfield <code>text</code> holds the string transcription",
+              type: "String",
+              required: true,
+            },
+            {
+              name: "model",
+              label: "The model name, for example <code>whisper-1</code>",
+              type: "String",
+            },
+            {
+              name: "prompt_template",
+              label: "Prompt",
+              sublabel:
+                "Additional prompt text (only some models). Use interpolations {{ }} to access variables in the context",
+              type: "String",
+              fieldview: "textarea",
+            },
+            //...override_fields,
+          ];
+        } else if (table) {
+          const textFields = table.fields
+            .filter((f) => f.type?.sql_name === "text")
+            .map((f) => f.name);
+          const fileFields = table.fields
+            .filter((f) => f.type === "File")
+            .map((f) => f.name);
+          return [
+            {
+              name: "audio_file_field",
+              label: "Audio file variable",
+              sublabel: "Set the generated answer to this context variable",
+              type: "String",
+              required: true,
+              attributes: { options: fileFields },
+            },
+            {
+              name: "answer_field",
+              label: "Answer field",
+              sublabel: "Output field will be set to the generated answer",
+              type: "String",
+              required: true,
+              attributes: { options: textFields },
+            },
+            {
+              name: "model",
+              label: "The model name, for example <code>whisper-1</code>",
+              type: "String",
+            },
+            {
+              name: "prompt_template",
+              label: "Prompt",
+              sublabel:
+                "Additional prompt text (only some models). Use interpolations {{ }} to access variables in the row",
+              type: "String",
+              fieldview: "textarea",
+            },
+            //...override_fields,
+          ];
+        }
+      },
+      run: async ({
+        row,
+        table,
+        user,
+        mode,
+        configuration: {
+          audio_file_field,
+          prompt_template,
+          answer_field,
+          //override_config,
+          model,
+        },
+      }) => {
+        const opts = { file: row[audio_file_field] };
+        if (prompt_template)
+          opts.prompt = interpolate(prompt_template, row, user);
+        if (model) opts.model = model;
+        const ans = await getAudioTranscription(config, opts);
+        if (mode === "workflow") return { [answer_field]: ans };
+        else
+          await table.updateRow(
+            { [answer_field]: ans.text },
+            row[table.pk_name]
+          );
+      },
+    },
     llm_generate_image: {
       description: "Generate image with AI based on a text prompt",
       requireRow: true,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@saltcorn/large-language-model",
-  "version": "0.9.9",
+  "version": "0.9.11",
   "description": "Large language models and functionality for Saltcorn",
   "main": "index.js",
   "dependencies": {
@@ -12,7 +12,8 @@
     "@google-cloud/aiplatform": "^3.34.0",
     "googleapis": "^144.0.0",
     "ai": "5.0.44",
-    "@ai-sdk/openai": "2.0.30"
+    "@ai-sdk/openai": "2.0.30",
+    "openai": "6.16.0"
   },
   "author": "Tom Nielsen",
   "license": "MIT",