utilitas 2000.3.24 → 2000.3.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/alan.mjs CHANGED
@@ -1063,7 +1063,7 @@ const distillFile = async (attachments, o) => {
1063
1063
  '- You will receive various multimedia files, including images, audio, and videos.',
1064
1064
  '- Please analyze these documents, extract the information, and organize it into an easy-to-read format.',
1065
1065
  '- For document-type files or image files primarily containing text information, act as a document scanner, return the text content, and describe any important images and tables present. Use markdown to format table and other rich text where possible. Use LaTeX for all formulas, subscripts, representations of formulas, and special symbols in mathematics and chemistry, enclosed by "$" symbols. Please mark the description of images in the same position as the original text without creating separate paragraphs for descriptions. Be sure ONLY describe important images and graphs, and ignore backgrounds and decorative small images. Ensure the returned document is clean, well-organized, and highly readable.',
1066
- '- For audio files, please provide a transcript of the spoken voices. If there are background noises or music, attempt to briefly describe the environmental sounds and music sections.',
1066
+ '- For audio files, please transcribe the spoken voices into clean text. If there are background sounds, attempt to briefly describe the environmental sounds and music sections. Only care about the main speech content, meaningful music and environment sounds. Do not be disturbed by useless background noise.',
1067
1067
  '- For images or video files that are not primarily text-based, describe the tragic scene you observe, highlight key details, convey the emotional tone of the setting, and share your impressions.',
1068
1068
  '- For video files, please describe the content, including the theme, subjects, characters, scenes, objects, storyline, and emotional tone.',
1069
1069
  '- Please RETURN ONLY your analysis results without including your thought process or other unrelated information.',
package/lib/manifest.mjs CHANGED
@@ -1,7 +1,7 @@
1
1
  const manifest = {
2
2
  "name": "utilitas",
3
3
  "description": "Just another common utility for JavaScript.",
4
- "version": "2000.3.24",
4
+ "version": "2000.3.26",
5
5
  "private": false,
6
6
  "homepage": "https://github.com/Leask/utilitas",
7
7
  "main": "index.mjs",
package/lib/speech.mjs CHANGED
@@ -31,6 +31,8 @@ const [
31
31
  defaultGeminiSttModel,
32
32
  ] = [GPT_4O_MIMI_TTS, GPT_4O_TRANSCRIBE, GEMINI_25_FLASH_TTS, GEMINI_FLASH];
33
33
 
34
+ const TTS_PROMPT = "As an AI voice assistant, please say the following content in a warm, friendly and professional tone, if the language is English, use an American accent, if it's Traditional Chinese, use Hong Kong Cantonese, if it's Simplified Chinese, use standard Mandarin, for other languages, please speak with a standard, clear accent";
35
+
34
36
  const STT_PROMPT = 'Please transcribe the audio into clean text. Return only the text content, DO NOT include any additional information or metadata. You may encounter input that contains different languages. Please do your best to transcribe text from all possible languages. Please distinguish between background noise and the main speech content. Do not be disturbed by background noise. Only return the main speech content.';
35
37
 
36
38
  const WHISPER_MODELS = [
@@ -166,13 +168,14 @@ const ttsGoogle = async (contents, options) => {
166
168
  assert(contents, 'Text is required.', 400);
167
169
  assert(await countTokens(contents) <= k(32), 'Text is too long.', 400);
168
170
  const resp = await clients.tts({
169
- model: options?.model || defaultGeminiTtsModel, contents,
171
+ model: options?.model || defaultGeminiTtsModel,
172
+ contents: `${options?.prompt || TTS_PROMPT}: ${contents}`,
170
173
  config: mergeAtoB(options?.config, {
171
174
  responseModalities: ['AUDIO'],
172
175
  speechConfig: {
173
176
  voiceConfig: {
174
177
  prebuiltVoiceConfig: {
175
- voiceName: options?.voice || 'Leda',
178
+ voiceName: options?.voice || 'Zephyr',
176
179
  },
177
180
  },
178
181
  },
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "utilitas",
3
3
  "description": "Just another common utility for JavaScript.",
4
- "version": "2000.3.24",
4
+ "version": "2000.3.26",
5
5
  "private": false,
6
6
  "homepage": "https://github.com/Leask/utilitas",
7
7
  "main": "index.mjs",