@saltcorn/large-language-model 0.9.9 → 0.9.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/constants.js CHANGED
@@ -1,22 +1,26 @@
1
1
  const OPENAI_MODELS = [
2
- "gpt-3.5-turbo",
3
- "gpt-3.5-turbo-16k",
4
- "gpt-4o-mini",
5
- "gpt-4",
6
- "gpt-4-32k",
7
- "gpt-4-turbo-preview",
8
- "gpt-4-turbo",
9
2
  "gpt-4o",
3
+ "gpt-4o-mini",
10
4
  "gpt-4.1",
11
5
  "gpt-4.1-mini",
12
6
  "gpt-4.1-nano",
13
- "o1",
7
+ "gpt-5",
8
+ "gpt-5-mini",
9
+ "gpt-5-nano",
10
+ "gpt-5.1",
11
+ "gpt-5.1-mini",
12
+ "gpt-5.1-nano",
13
+ "gpt-5.2",
14
+ "gpt-5.2-pro",
14
15
  "o3",
15
16
  "o3-mini",
17
+ "o3-pro",
16
18
  "o4-mini",
17
- "gpt-5",
18
- "gpt-5-nano",
19
- "gpt-5-mini",
19
+ "codex-mini-latest",
20
+ "gpt-5-codex",
21
+ "gpt-5.1-codex",
22
+ "gpt-5.1-codex-mini",
23
+ "gpt-5.1-codex-max",
20
24
  ];
21
25
 
22
26
  // https://github.com/ollama/ollama/blob/main/docs/faq.md#where-are-models-stored
package/generate.js CHANGED
@@ -9,7 +9,9 @@ const {
9
9
  } = require("@google-cloud/aiplatform");
10
10
  const { google } = require("googleapis");
11
11
  const Plugin = require("@saltcorn/data/models/plugin");
12
+ const File = require("@saltcorn/data/models/file");
12
13
  const path = require("path");
14
+ const fs = require("fs");
13
15
  const { features, getState } = require("@saltcorn/data/db/state");
14
16
  const {
15
17
  generateText,
@@ -18,8 +20,10 @@ const {
18
20
  jsonSchema,
19
21
  embed,
20
22
  embedMany,
23
+ experimental_transcribe,
21
24
  } = require("ai");
22
25
  const { createOpenAI } = require("@ai-sdk/openai");
26
+ const OpenAI = require("openai");
23
27
  let ollamaMod;
24
28
  if (features.esm_plugins) ollamaMod = require("ollama");
25
29
 
@@ -113,6 +117,67 @@ const getImageGeneration = async (config, opts) => {
113
117
  }
114
118
  };
115
119
 
120
+ const getAudioTranscription = async (
121
+ { backend, apiKey, api_key, provider, ai_sdk_provider },
122
+ opts
123
+ ) => {
124
+ switch (backend) {
125
+ case "OpenAI":
126
+ const client = new OpenAI({
127
+ apiKey: opts?.api_key || api_key || apiKey,
128
+ });
129
+ const fp = opts.file.location
130
+ ? opts.file.location
131
+ : typeof opts.file === "string"
132
+ ? await (
133
+ await File.findOne(opts.file)
134
+ ).location
135
+ : null;
136
+ const model = opts?.model || "whisper-1";
137
+ const diarize = model === "gpt-4o-transcribe-diarize";
138
+ const transcript1 = await client.audio.transcriptions.create({
139
+ file: Buffer.isBuffer(opts.file) ? opts.file : fs.createReadStream(fp),
140
+
141
+ model,
142
+ ...(diarize
143
+ ? {
144
+ response_format: "diarized_json",
145
+ chunking_strategy: "auto",
146
+ }
147
+ : {}),
148
+ });
149
+ return transcript1;
150
+
151
+ case "AI SDK":
152
+ const api_Key = opts?.api_key || api_key || apiKey;
153
+ const prov_obj = createOpenAI({ apiKey: api_Key });
154
+ const audio =
155
+ opts.url ||
156
+ (Buffer.isBuffer(opts.file)
157
+ ? opts.file
158
+ : typeof opts.file === "string"
159
+ ? await (await File.findOne(opts.file)).get_contents()
160
+ : await opts.file.get_contents());
161
+ const extra = {};
162
+ if (opts.prompt)
163
+ extra.providerOptions = {
164
+ openai: {
165
+ prompt: opts.prompt,
166
+ //response_format: "text",
167
+ },
168
+ };
169
+ const transcript = await experimental_transcribe({
170
+ model: prov_obj.transcription(opts?.model || "whisper-1"),
171
+ audio,
172
+ ...extra,
173
+ });
174
+
175
+ return transcript;
176
+ default:
177
+ throw new Error("Audio transcription not implemented for this backend");
178
+ }
179
+ };
180
+
116
181
  const getCompletion = async (config, opts) => {
117
182
  switch (config.backend) {
118
183
  case "AI SDK":
@@ -190,6 +255,16 @@ const getCompletion = async (config, opts) => {
190
255
  }
191
256
  };
192
257
 
258
+ const getAiSdkModel = ({ provider, api_key, model_name }) => {
259
+ switch (provider) {
260
+ case "OpenAI":
261
+ const openai = createOpenAI({ apiKey: api_key });
262
+ return openai(model_name);
263
+ default:
264
+ throw new Error("Provider not found: " + provider);
265
+ }
266
+ };
267
+
193
268
  const getCompletionAISDK = async (
194
269
  { apiKey, model, provider, temperature },
195
270
  {
@@ -204,13 +279,11 @@ const getCompletionAISDK = async (
204
279
  }
205
280
  ) => {
206
281
  const use_model_name = rest.model || model;
207
- let model_obj;
208
- switch (provider) {
209
- case "OpenAI":
210
- const openai = createOpenAI({ apiKey: api_key || apiKey });
211
- model_obj = openai(use_model_name);
212
- break;
213
- }
282
+ let model_obj = getAiSdkModel({
283
+ model_name: use_model_name,
284
+ api_key: api_key || apiKey,
285
+ provider,
286
+ });
214
287
  const modifyChat = (chat) => {
215
288
  const f = (c) => {
216
289
  if (c.type === "image_url")
@@ -864,4 +937,9 @@ const getEmbeddingGoogleVertex = async (config, opts, oauth2Client) => {
864
937
  return embeddings;
865
938
  };
866
939
 
867
- module.exports = { getCompletion, getEmbedding, getImageGeneration };
940
+ module.exports = {
941
+ getCompletion,
942
+ getEmbedding,
943
+ getImageGeneration,
944
+ getAudioTranscription,
945
+ };
package/index.js CHANGED
@@ -10,6 +10,7 @@ const {
10
10
  getCompletion,
11
11
  getEmbedding,
12
12
  getImageGeneration,
13
+ getAudioTranscription,
13
14
  } = require("./generate");
14
15
  const { OPENAI_MODELS } = require("./constants.js");
15
16
  const { eval_expression } = require("@saltcorn/data/models/expression");
@@ -400,6 +401,15 @@ const functions = (config) => {
400
401
  description: "Get vector embedding",
401
402
  arguments: [{ name: "prompt", type: "String" }],
402
403
  },
404
+ llm_transcribe: {
405
+ run: async (opts) => {
406
+ const result = await getAudioTranscription(config, opts);
407
+ return result;
408
+ },
409
+ isAsync: true,
410
+ description: "Get vector embedding",
411
+ arguments: [{ name: "prompt", type: "String" }],
412
+ },
403
413
  };
404
414
  };
405
415
 
@@ -640,6 +650,126 @@ module.exports = {
640
650
  else await table.updateRow(upd, row[table.pk_name]);
641
651
  },
642
652
  },
653
+ llm_transcribe_audio: {
654
+ description: "Generate text from audio file",
655
+ requireRow: true,
656
+ configFields: ({ table, mode }) => {
657
+ const override_fields =
658
+ config.backend === "OpenAI-compatible API" &&
659
+ (config.altconfigs || []).filter((c) => c.name).length
660
+ ? [
661
+ {
662
+ name: "override_config",
663
+ label: "Alternative LLM configuration",
664
+ type: "String",
665
+ attributes: { options: config.altconfigs.map((c) => c.name) },
666
+ },
667
+ ]
668
+ : [];
669
+
670
+ if (mode === "workflow") {
671
+ return [
672
+ {
673
+ name: "audio_file_field",
674
+ label: "Audio file variable",
675
+ sublabel: "Set the generated answer to this context variable",
676
+ type: "String",
677
+ required: true,
678
+ },
679
+ {
680
+ name: "answer_field",
681
+ label: "Response variable",
682
+ sublabel: "Set the generated response object to this context variable. The subfield <code>text</code> holds the string transcription",
683
+ type: "String",
684
+ required: true,
685
+ },
686
+ {
687
+ name: "model",
688
+ label: "The model name, for example <code>whisper-1</code>",
689
+ type: "String",
690
+ },
691
+ {
692
+ name: "prompt_template",
693
+ label: "Prompt",
694
+ sublabel:
695
+ "Additional prompt text (only some models). Use interpolations {{ }} to access variables in the context",
696
+ type: "String",
697
+ fieldview: "textarea",
698
+ },
699
+
700
+ //...override_fields,
701
+ ];
702
+ } else if (table) {
703
+ const textFields = table.fields
704
+ .filter((f) => f.type?.sql_name === "text")
705
+ .map((f) => f.name);
706
+ const fileFields = table.fields
707
+ .filter((f) => f.type === "File")
708
+ .map((f) => f.name);
709
+
710
+ return [
711
+ {
712
+ name: "audio_file_field",
713
+ label: "Audio file variable",
714
+ sublabel: "Set the generated answer to this context variable",
715
+ type: "String",
716
+ required: true,
717
+ attributes: { options: fileFields },
718
+ },
719
+ {
720
+ name: "answer_field",
721
+ label: "Answer field",
722
+ sublabel: "Output field will be set to the generated answer",
723
+ type: "String",
724
+ required: true,
725
+ attributes: { options: textFields },
726
+ },
727
+ {
728
+ name: "model",
729
+ label: "The model name, for example <code>whisper-1</code>",
730
+ type: "String",
731
+ },
732
+ {
733
+ name: "prompt_template",
734
+ label: "Prompt",
735
+ sublabel:
736
+ "Additional prompt text (only some models). Use interpolations {{ }} to access variables in the row",
737
+ type: "String",
738
+ fieldview: "textarea",
739
+ },
740
+ //...override_fields,
741
+ ];
742
+ }
743
+ },
744
+ run: async ({
745
+ row,
746
+ table,
747
+ user,
748
+ mode,
749
+ configuration: {
750
+ audio_file_field,
751
+ prompt_template,
752
+ answer_field,
753
+ //override_config,
754
+ model,
755
+ },
756
+ }) => {
757
+ const opts = { file: row[audio_file_field] };
758
+ if (prompt_template)
759
+ opts.prompt = interpolate(prompt_template, row, user);
760
+
761
+ if (model) opts.model = model;
762
+
763
+ const ans = await getAudioTranscription(config, opts);
764
+
765
+ if (mode === "workflow") return { [answer_field]: ans };
766
+ else
767
+ await table.updateRow(
768
+ { [answer_field]: ans.text },
769
+ row[table.pk_name]
770
+ );
771
+ },
772
+ },
643
773
  llm_generate_image: {
644
774
  description: "Generate image with AI based on a text prompt",
645
775
  requireRow: true,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@saltcorn/large-language-model",
3
- "version": "0.9.9",
3
+ "version": "0.9.11",
4
4
  "description": "Large language models and functionality for Saltcorn",
5
5
  "main": "index.js",
6
6
  "dependencies": {
@@ -12,7 +12,8 @@
12
12
  "@google-cloud/aiplatform": "^3.34.0",
13
13
  "googleapis": "^144.0.0",
14
14
  "ai": "5.0.44",
15
- "@ai-sdk/openai": "2.0.30"
15
+ "@ai-sdk/openai": "2.0.30",
16
+ "openai": "6.16.0"
16
17
  },
17
18
  "author": "Tom Nielsen",
18
19
  "license": "MIT",