@saltcorn/large-language-model 0.9.8 → 0.9.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/constants.js CHANGED
@@ -1,22 +1,26 @@
1
1
  const OPENAI_MODELS = [
2
- "gpt-3.5-turbo",
3
- "gpt-3.5-turbo-16k",
4
- "gpt-4o-mini",
5
- "gpt-4",
6
- "gpt-4-32k",
7
- "gpt-4-turbo-preview",
8
- "gpt-4-turbo",
9
2
  "gpt-4o",
3
+ "gpt-4o-mini",
10
4
  "gpt-4.1",
11
5
  "gpt-4.1-mini",
12
6
  "gpt-4.1-nano",
13
- "o1",
7
+ "gpt-5",
8
+ "gpt-5-mini",
9
+ "gpt-5-nano",
10
+ "gpt-5.1",
11
+ "gpt-5.1-mini",
12
+ "gpt-5.1-nano",
13
+ "gpt-5.2",
14
+ "gpt-5.2-pro",
14
15
  "o3",
15
16
  "o3-mini",
17
+ "o3-pro",
16
18
  "o4-mini",
17
- "gpt-5",
18
- "gpt-5-nano",
19
- "gpt-5-mini",
19
+ "codex-mini-latest",
20
+ "gpt-5-codex",
21
+ "gpt-5.1-codex",
22
+ "gpt-5.1-codex-mini",
23
+ "gpt-5.1-codex-max",
20
24
  ];
21
25
 
22
26
  // https://github.com/ollama/ollama/blob/main/docs/faq.md#where-are-models-stored
package/generate.js CHANGED
@@ -9,6 +9,7 @@ const {
9
9
  } = require("@google-cloud/aiplatform");
10
10
  const { google } = require("googleapis");
11
11
  const Plugin = require("@saltcorn/data/models/plugin");
12
+ const File = require("@saltcorn/data/models/file");
12
13
  const path = require("path");
13
14
  const { features, getState } = require("@saltcorn/data/db/state");
14
15
  const {
@@ -18,8 +19,9 @@ const {
18
19
  jsonSchema,
19
20
  embed,
20
21
  embedMany,
22
+ experimental_transcribe,
21
23
  } = require("ai");
22
- const { createOpenAI } = require("@ai-sdk/openai");
24
+ const { openai, createOpenAI } = require("@ai-sdk/openai");
23
25
  let ollamaMod;
24
26
  if (features.esm_plugins) ollamaMod = require("ollama");
25
27
 
@@ -113,6 +115,41 @@ const getImageGeneration = async (config, opts) => {
113
115
  }
114
116
  };
115
117
 
118
+ const getAudioTranscription = async (
119
+ { backend, apiKey, api_key, provider, ai_sdk_provider },
120
+ opts
121
+ ) => {
122
+ switch (backend) {
123
+ case "AI SDK":
124
+ const api_Key = opts?.api_key || api_key || apiKey;
125
+ const prov_obj = createOpenAI({ apiKey: api_Key });
126
+ const audio =
127
+ opts.url ||
128
+ (Buffer.isBuffer(opts.file)
129
+ ? opts.file
130
+ : typeof opts.file === "string"
131
+ ? await (await File.findOne(opts.file)).get_contents()
132
+ : await opts.file.get_contents());
133
+ const extra = {};
134
+ if (opts.prompt)
135
+ extra.providerOptions = {
136
+ openai: {
137
+ prompt: opts.prompt,
138
+ //response_format: "text",
139
+ },
140
+ };
141
+ const transcript = await experimental_transcribe({
142
+ model: prov_obj.transcription(opts?.model || "whisper-1"),
143
+ audio,
144
+ ...extra,
145
+ });
146
+
147
+ return transcript;
148
+ default:
149
+ throw new Error("Audio transcription not implemented for this backend");
150
+ }
151
+ };
152
+
116
153
  const getCompletion = async (config, opts) => {
117
154
  switch (config.backend) {
118
155
  case "AI SDK":
@@ -190,6 +227,16 @@ const getCompletion = async (config, opts) => {
190
227
  }
191
228
  };
192
229
 
230
+ const getAiSdkModel = ({ provider, api_key, model_name }) => {
231
+ switch (provider) {
232
+ case "OpenAI":
233
+ const openai = createOpenAI({ apiKey: api_key });
234
+ return openai(model_name);
235
+ default:
236
+ throw new Error("Provider not found: " + provider);
237
+ }
238
+ };
239
+
193
240
  const getCompletionAISDK = async (
194
241
  { apiKey, model, provider, temperature },
195
242
  {
@@ -204,13 +251,26 @@ const getCompletionAISDK = async (
204
251
  }
205
252
  ) => {
206
253
  const use_model_name = rest.model || model;
207
- let model_obj;
208
- switch (provider) {
209
- case "OpenAI":
210
- const openai = createOpenAI({ apiKey: api_key || apiKey });
211
- model_obj = openai(use_model_name);
212
- break;
213
- }
254
+ let model_obj = getAiSdkModel({
255
+ model_name: use_model_name,
256
+ api_key: api_key || apiKey,
257
+ provider,
258
+ });
259
+ const modifyChat = (chat) => {
260
+ const f = (c) => {
261
+ if (c.type === "image_url")
262
+ return {
263
+ type: "image",
264
+ image: c.image_url?.url || c.image?.url || c.image_url || c.image,
265
+ };
266
+ else return c;
267
+ };
268
+ return {
269
+ ...chat,
270
+ ...(Array.isArray(chat.content) ? { content: chat.content.map(f) } : {}),
271
+ };
272
+ };
273
+ const newChat = chat.map(modifyChat);
214
274
 
215
275
  const body = {
216
276
  ...rest,
@@ -220,7 +280,7 @@ const getCompletionAISDK = async (
220
280
  role: "system",
221
281
  content: systemPrompt || "You are a helpful assistant.",
222
282
  },
223
- ...chat,
283
+ ...newChat,
224
284
  ...(prompt ? [{ role: "user", content: prompt }] : []),
225
285
  ],
226
286
  };
@@ -849,4 +909,9 @@ const getEmbeddingGoogleVertex = async (config, opts, oauth2Client) => {
849
909
  return embeddings;
850
910
  };
851
911
 
852
- module.exports = { getCompletion, getEmbedding, getImageGeneration };
912
+ module.exports = {
913
+ getCompletion,
914
+ getEmbedding,
915
+ getImageGeneration,
916
+ getAudioTranscription,
917
+ };
package/index.js CHANGED
@@ -10,6 +10,7 @@ const {
10
10
  getCompletion,
11
11
  getEmbedding,
12
12
  getImageGeneration,
13
+ getAudioTranscription,
13
14
  } = require("./generate");
14
15
  const { OPENAI_MODELS } = require("./constants.js");
15
16
  const { eval_expression } = require("@saltcorn/data/models/expression");
@@ -400,6 +401,15 @@ const functions = (config) => {
400
401
  description: "Get vector embedding",
401
402
  arguments: [{ name: "prompt", type: "String" }],
402
403
  },
404
+ llm_transcribe: {
405
+ run: async (opts) => {
406
+ const result = await getAudioTranscription(config, opts);
407
+ return result;
408
+ },
409
+ isAsync: true,
410
+ description: "Get vector embedding",
411
+ arguments: [{ name: "prompt", type: "String" }],
412
+ },
403
413
  };
404
414
  };
405
415
 
@@ -640,6 +650,128 @@ module.exports = {
640
650
  else await table.updateRow(upd, row[table.pk_name]);
641
651
  },
642
652
  },
653
+ llm_transcribe_audio: {
654
+ description: "Generate text from audio file",
655
+ requireRow: true,
656
+ configFields: ({ table, mode }) => {
657
+ const override_fields =
658
+ config.backend === "OpenAI-compatible API" &&
659
+ (config.altconfigs || []).filter((c) => c.name).length
660
+ ? [
661
+ {
662
+ name: "override_config",
663
+ label: "Alternative LLM configuration",
664
+ type: "String",
665
+ attributes: { options: config.altconfigs.map((c) => c.name) },
666
+ },
667
+ ]
668
+ : [];
669
+
670
+ if (mode === "workflow") {
671
+ return [
672
+ {
673
+ name: "audio_file_field",
674
+ label: "Audio file variable",
675
+ sublabel: "Set the generated answer to this context variable",
676
+ type: "String",
677
+ required: true,
678
+ },
679
+ {
680
+ name: "answer_field",
681
+ label: "Response variable",
682
+ sublabel: "Set the generated response object to this context variable. The subfield <code>text</code> holds the string transcription",
683
+ type: "String",
684
+ required: true,
685
+ },
686
+ {
687
+ name: "model",
688
+ label: "The model name, for example <code>whisper-1</code>",
689
+ type: "String",
690
+ required: true,
691
+ },
692
+ {
693
+ name: "prompt_template",
694
+ label: "Prompt",
695
+ sublabel:
696
+ "Additional prompt text (only some models). Use interpolations {{ }} to access variables in the context",
697
+ type: "String",
698
+ fieldview: "textarea",
699
+ },
700
+
701
+ //...override_fields,
702
+ ];
703
+ } else if (table) {
704
+ const textFields = table.fields
705
+ .filter((f) => f.type?.sql_name === "text")
706
+ .map((f) => f.name);
707
+ const fileFields = table.fields
708
+ .filter((f) => f.type === "File")
709
+ .map((f) => f.name);
710
+
711
+ return [
712
+ {
713
+ name: "audio_file_field",
714
+ label: "Audio file variable",
715
+ sublabel: "Set the generated answer to this context variable",
716
+ type: "String",
717
+ required: true,
718
+ attributes: { options: fileFields },
719
+ },
720
+ {
721
+ name: "answer_field",
722
+ label: "Answer field",
723
+ sublabel: "Output field will be set to the generated answer",
724
+ type: "String",
725
+ required: true,
726
+ attributes: { options: textFields },
727
+ },
728
+ {
729
+ name: "model",
730
+ label: "The model name, for example <code>whisper-1</code>",
731
+ type: "String",
732
+ required: true,
733
+ },
734
+ {
735
+ name: "prompt_template",
736
+ label: "Prompt",
737
+ sublabel:
738
+ "Additional prompt text (only some models). Use interpolations {{ }} to access variables in the row",
739
+ type: "String",
740
+ fieldview: "textarea",
741
+ },
742
+ //...override_fields,
743
+ ];
744
+ }
745
+ },
746
+ run: async ({
747
+ row,
748
+ table,
749
+ user,
750
+ mode,
751
+ configuration: {
752
+ audio_file_field,
753
+ prompt_template,
754
+ answer_field,
755
+ //override_config,
756
+ model,
757
+ },
758
+ }) => {
759
+ const opts = { file: row[audio_file_field] };
760
+ if (prompt_template)
761
+ opts.prompt = interpolate(prompt_template, row, user);
762
+
763
+ if (model) opts.model = model;
764
+
765
+ const ans = await getAudioTranscription(config, opts);
766
+
767
+ if (mode === "workflow") return { [answer_field]: ans };
768
+ else
769
+ await table.updateRow(
770
+ { [answer_field]: ans.text },
771
+ row[table.pk_name]
772
+ );
773
+ },
774
+ },
643
775
  llm_generate_image: {
644
776
  description: "Generate image with AI based on a text prompt",
645
777
  requireRow: true,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@saltcorn/large-language-model",
3
- "version": "0.9.8",
3
+ "version": "0.9.10",
4
4
  "description": "Large language models and functionality for Saltcorn",
5
5
  "main": "index.js",
6
6
  "dependencies": {