npm - morpheus-cli - Versions diffs - 0.1.2 → 0.1.4 - Mend

morpheus-cli 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/README.md +27 -0
package/dist/channels/telegram.js +80 -0
package/dist/cli/commands/init.js +66 -9
package/dist/cli/commands/start.js +1 -1
package/dist/config/schemas.js +7 -0
package/dist/runtime/__tests__/manual_start_verify.js +6 -1
package/dist/runtime/audio-agent.js +45 -0
package/dist/runtime/display.js +3 -0
package/dist/types/config.js +5 -0
package/dist/ui/assets/{index-ySbKLOXZ.js → index-Az60Fu0M.js} +10 -10
package/dist/ui/index.html +1 -1
package/package.json +2 -1

package/README.md CHANGED Viewed

@@ -63,6 +63,17 @@ If you installed successfully but can't run the `morpheus` command:
     -   On Linux/Mac, verify `echo $PATH`.
 2.  **Restart Terminal**: New installations might not be visible until you restart your shell.
+## Using NPX
+You can run Morpheus without installing it globally using `npx`:
+```bash
+npx morpheus-cli init
+npx morpheus-cli start
+```
 ## Technical Overview
 Morpheus is built with **Node.js** and **TypeScript**, using **LangChain** as the orchestration engine. It runs as a background daemon process, managing connections to LLM providers (OpenAI, Anthropic, Ollama) and external channels (Telegram, Discord).
@@ -74,6 +85,16 @@ Morpheus is built with **Node.js** and **TypeScript**, using **LangChain** as th
 - **Configuration (`src/config/`)**: Singleton-based configuration manager using `zod` for validation and `js-yaml` for persistence (`~/.morpheus/config.yaml`).
 - **Channels (`src/channels/`)**: Adapters for external communication. Currently supports Telegram (`telegraf`) with strict user whitelisting.
+## Features
+### 🎙️ Audio Transcription (Telegram)
+Send voice messages directly to the Telegram bot. Morpheus will:
+1. Transcribe the audio using **Google Gemini**.
+2. Process the text as a standard prompt.
+3. Reply with the answer.
+*Requires a Google Gemini API Key.*
 ## Development Setup
 This guide is for developers contributing to the Morpheus codebase.
@@ -133,6 +154,12 @@ channels:
     enabled: true
     token: "YOUR_TELEGRAM_BOT_TOKEN"
     allowedUsers: ["123456789"] # Your Telegram User ID
+# Audio Transcription Support
+audio:
+  enabled: true
+  apiKey: "YOUR_GEMINI_API_KEY" # Optional if llm.provider is 'gemini'
+  maxDurationSeconds: 300
 ```
 ## Testing

package/dist/channels/telegram.js CHANGED Viewed

@@ -1,11 +1,19 @@
 import { Telegraf } from 'telegraf';
+import { message } from 'telegraf/filters';
 import chalk from 'chalk';
+import fs from 'fs-extra';
+import path from 'path';
+import os from 'os';
+import { ConfigManager } from '../config/manager.js';
 import { DisplayManager } from '../runtime/display.js';
+import { AudioAgent } from '../runtime/audio-agent.js';
 export class TelegramAdapter {
     bot = null;
     isConnected = false;
     display = DisplayManager.getInstance();
+    config = ConfigManager.getInstance();
     agent;
+    audioAgent = new AudioAgent();
     constructor(agent) {
         this.agent = agent;
     }
@@ -52,6 +60,67 @@ export class TelegramAdapter {
                     }
                 }
             });
+            // Handle Voice Messages
+            this.bot.on(message('voice'), async (ctx) => {
+                const user = ctx.from.username || ctx.from.first_name;
+                const userId = ctx.from.id.toString();
+                const config = this.config.get();
+                // AUTH GUARD
+                if (!this.isAuthorized(userId, allowedUsers)) {
+                    this.display.log(`Unauthorized audio attempt by @${user} (ID: ${userId})`, { source: 'Telegram', level: 'warning' });
+                    return;
+                }
+                if (!config.audio.enabled) {
+                    await ctx.reply("Audio transcription is currently disabled.");
+                    return;
+                }
+                const apiKey = config.audio.apiKey || (config.llm.provider === 'gemini' ? config.llm.api_key : undefined);
+                if (!apiKey) {
+                    this.display.log(`Audio transcription failed: No Gemini API key available`, { source: 'AgentAudio', level: 'error' });
+                    await ctx.reply("Audio transcription requires a Gemini API key. Please configure `audio.apiKey` or set LLM provider to Gemini.");
+                    return;
+                }
+                const duration = ctx.message.voice.duration;
+                if (duration > config.audio.maxDurationSeconds) {
+                    await ctx.reply(`Voice message too long. Max duration is ${config.audio.maxDurationSeconds}s.`);
+                    return;
+                }
+                this.display.log(`Receiving voice message from @${user} (${duration}s)...`, { source: 'AgentAudio' });
+                let filePath = null;
+                try {
+                    await ctx.sendChatAction('typing');
+                    // Download
+                    this.display.log(`Downloading audio for @${user}...`, { source: 'AgentAudio' });
+                    const fileLink = await ctx.telegram.getFileLink(ctx.message.voice.file_id);
+                    filePath = await this.downloadToTemp(fileLink);
+                    // Transcribe
+                    this.display.log(`Transcribing audio for @${user}...`, { source: 'AgentAudio' });
+                    const text = await this.audioAgent.transcribe(filePath, 'audio/ogg', apiKey);
+                    this.display.log(`Transcription success for @${user}: "${text}"`, { source: 'AgentAudio', level: 'success' });
+                    // Reply with transcription (optional, maybe just process it?)
+                    // The prompt says "reply with the answer".
+                    // "Transcribe them... and process the resulting text as a standard user prompt."
+                    // So I should treat 'text' as if it was a text message.
+                    await ctx.reply(`🎤 *Transcription*: _"${text}"_`, { parse_mode: 'Markdown' });
+                    await ctx.sendChatAction('typing');
+                    // Process with Agent
+                    const response = await this.agent.chat(text);
+                    if (response) {
+                        await ctx.reply(response);
+                        this.display.log(`Responded to @${user} (via audio)`, { source: 'Telegram' });
+                    }
+                }
+                catch (error) {
+                    this.display.log(`Audio processing error for @${user}: ${error.message}`, { source: 'AgentAudio', level: 'error' });
+                    await ctx.reply("Sorry, I failed to process your audio message.");
+                }
+                finally {
+                    // Cleanup
+                    if (filePath && await fs.pathExists(filePath)) {
+                        await fs.unlink(filePath).catch(() => { });
+                    }
+                }
+            });
             this.bot.launch().catch((err) => {
                 if (this.isConnected) {
                     this.display.log(`Telegram bot error: ${err}`, { source: 'Telegram', level: 'error' });
@@ -71,6 +140,17 @@ export class TelegramAdapter {
     isAuthorized(userId, allowedUsers) {
         return allowedUsers.includes(userId);
     }
+    async downloadToTemp(url, extension = '.ogg') {
+        const response = await fetch(url);
+        if (!response.ok)
+            throw new Error(`Failed to download audio: ${response.statusText}`);
+        const tmpDir = os.tmpdir();
+        const fileName = `morpheus-audio-${Date.now()}${extension}`;
+        const filePath = path.join(tmpDir, fileName);
+        const buffer = Buffer.from(await response.arrayBuffer());
+        await fs.writeFile(filePath, buffer);
+        return filePath;
+    }
     async disconnect() {
         if (!this.isConnected || !this.bot) {
             return;

package/dist/cli/commands/init.js CHANGED Viewed

@@ -10,17 +10,19 @@ export const initCommand = new Command('init')
     .action(async () => {
     const display = DisplayManager.getInstance();
     renderBanner();
+    const configManager = ConfigManager.getInstance();
+    const currentConfig = await configManager.load();
     // Ensure directory exists
     await scaffold();
     display.log(chalk.blue('Let\'s set up your Morpheus agent!'));
     try {
         const name = await input({
             message: 'Name your agent:',
-            default: 'morpheus',
+            default: currentConfig.agent.name,
         });
         const personality = await input({
             message: 'Describe its personality:',
-            default: 'helpful and concise',
+            default: currentConfig.agent.personality,
         });
         const provider = await select({
             message: 'Select LLM Provider:',
@@ -30,6 +32,7 @@ export const initCommand = new Command('init')
                 { name: 'Ollama', value: 'ollama' },
                 { name: 'Google Gemini', value: 'gemini' },
             ],
+            default: currentConfig.llm.provider,
         });
         let defaultModel = 'gpt-3.5-turbo';
         switch (provider) {
@@ -46,17 +49,23 @@ export const initCommand = new Command('init')
                 defaultModel = 'gemini-pro';
                 break;
         }
+        if (provider === currentConfig.llm.provider) {
+            defaultModel = currentConfig.llm.model;
+        }
         const model = await input({
             message: 'Enter Model Name:',
             default: defaultModel,
         });
         let apiKey;
+        const hasExistingKey = !!currentConfig.llm.api_key;
+        const apiKeyMessage = hasExistingKey
+            ? 'Enter API Key (leave empty to preserve existing, or if using env vars):'
+            : 'Enter API Key (leave empty if using env vars):';
         if (provider !== 'ollama') {
             apiKey = await password({
-                message: 'Enter API Key (leave empty if using env vars):',
+                message: apiKeyMessage,
             });
         }
-        const configManager = ConfigManager.getInstance();
         // Update config
         await configManager.set('agent.name', name);
         await configManager.set('agent.personality', personality);
@@ -65,33 +74,81 @@ export const initCommand = new Command('init')
         if (apiKey) {
             await configManager.set('llm.api_key', apiKey);
         }
+        // Audio Configuration
+        const audioEnabled = await confirm({
+            message: 'Enable Audio Transcription? (Requires Gemini)',
+            default: currentConfig.audio?.enabled || false,
+        });
+        let audioKey;
+        let finalAudioEnabled = audioEnabled;
+        if (audioEnabled) {
+            if (provider === 'gemini') {
+                display.log(chalk.gray('Using main Gemini API key for audio.'));
+            }
+            else {
+                const hasExistingAudioKey = !!currentConfig.audio?.apiKey;
+                const audioKeyMessage = hasExistingAudioKey
+                    ? 'Enter Gemini API Key for Audio (leave empty to preserve existing):'
+                    : 'Enter Gemini API Key for Audio:';
+                audioKey = await password({
+                    message: audioKeyMessage,
+                });
+                // Check if we have a valid key (new or existing)
+                const effectiveKey = audioKey || currentConfig.audio?.apiKey;
+                if (!effectiveKey) {
+                    display.log(chalk.yellow('Audio disabled: Missing Gemini API Key required when using non-Gemini LLM provider.'));
+                    finalAudioEnabled = false;
+                }
+            }
+        }
+        await configManager.set('audio.enabled', finalAudioEnabled);
+        if (audioKey) {
+            await configManager.set('audio.apiKey', audioKey);
+        }
         // External Channels Configuration
         const configureChannels = await confirm({
             message: 'Do you want to configure external channels?',
-            default: false,
+            default: currentConfig.channels.telegram?.enabled || false,
         });
         if (configureChannels) {
             const channels = await checkbox({
                 message: 'Select channels to enable:',
                 choices: [
-                    { name: 'Telegram', value: 'telegram' },
+                    {
+                        name: 'Telegram',
+                        value: 'telegram',
+                        checked: currentConfig.channels.telegram?.enabled || false
+                    },
                 ],
             });
             if (channels.includes('telegram')) {
                 display.log(chalk.yellow('\n--- Telegram Configuration ---'));
                 display.log(chalk.gray('1. Create a bot via @BotFather to get your token.'));
                 display.log(chalk.gray('2. Get your User ID via @userinfobot.\n'));
+                const hasExistingToken = !!currentConfig.channels.telegram?.token;
                 const token = await password({
-                    message: 'Enter Telegram Bot Token:',
-                    validate: (value) => value.length > 0 || 'Token is required.'
+                    message: hasExistingToken
+                        ? 'Enter Telegram Bot Token (leave empty to preserve existing):'
+                        : 'Enter Telegram Bot Token:',
+                    validate: (value) => {
+                        if (value.length > 0)
+                            return true;
+                        if (hasExistingToken)
+                            return true;
+                        return 'Token is required.';
+                    }
                 });
+                const defaultUsers = currentConfig.channels.telegram?.allowedUsers?.join(', ') || '';
                 const allowedUsersInput = await input({
                     message: 'Enter Allowed User IDs (comma separated):',
+                    default: defaultUsers,
                     validate: (value) => value.length > 0 || 'At least one user ID is required for security.'
                 });
                 const allowedUsers = allowedUsersInput.split(',').map(id => id.trim()).filter(id => id.length > 0);
                 await configManager.set('channels.telegram.enabled', true);
-                await configManager.set('channels.telegram.token', token);
+                if (token) {
+                    await configManager.set('channels.telegram.token', token);
+                }
                 await configManager.set('channels.telegram.allowedUsers', allowedUsers);
             }
         }

package/dist/cli/commands/start.js CHANGED Viewed

@@ -130,7 +130,7 @@ export const startCommand = new Command('start')
             });
         }
         // Keep process alive (Mock Agent Loop)
-        display.startSpinner('Agent active and listening... (Press ESC to stop)');
+        display.startSpinner('Agent active and listening... (Press ctrl+c to stop)');
         // Prevent node from exiting
         setInterval(() => {
             // Heartbeat or background tasks would go here

package/dist/config/schemas.js CHANGED Viewed

@@ -1,5 +1,11 @@
 import { z } from 'zod';
 import { DEFAULT_CONFIG } from '../types/config.js';
+export const AudioConfigSchema = z.object({
+    enabled: z.boolean().default(DEFAULT_CONFIG.audio.enabled),
+    apiKey: z.string().optional(),
+    maxDurationSeconds: z.number().default(DEFAULT_CONFIG.audio.maxDurationSeconds),
+    supportedMimeTypes: z.array(z.string()).default(DEFAULT_CONFIG.audio.supportedMimeTypes),
+});
 // Zod Schema matching MorpheusConfig interface
 export const ConfigSchema = z.object({
     agent: z.object({
@@ -12,6 +18,7 @@ export const ConfigSchema = z.object({
         temperature: z.number().min(0).max(1).default(DEFAULT_CONFIG.llm.temperature),
         api_key: z.string().optional(),
     }).default(DEFAULT_CONFIG.llm),
+    audio: AudioConfigSchema.default(DEFAULT_CONFIG.audio),
     channels: z.object({
         telegram: z.object({
             enabled: z.boolean().default(false),

package/dist/runtime/__tests__/manual_start_verify.js CHANGED Viewed

@@ -10,7 +10,12 @@ const mockConfig = {
         discord: { enabled: false }
     },
     ui: { enabled: false, port: 3333 },
-    logging: { enabled: false, level: 'info', retention: '1d' }
+    logging: { enabled: false, level: 'info', retention: '1d' },
+    audio: {
+        enabled: false,
+        maxDurationSeconds: 60,
+        supportedMimeTypes: ['audio/ogg']
+    }
 };
 const run = async () => {
     try {

package/dist/runtime/audio-agent.js ADDED Viewed

@@ -0,0 +1,45 @@
+import { GoogleGenAI } from '@google/genai';
+export class AudioAgent {
+    async transcribe(filePath, mimeType, apiKey) {
+        try {
+            const ai = new GoogleGenAI({ apiKey });
+            // Upload the file
+            const uploadResult = await ai.files.upload({
+                file: filePath,
+                config: { mimeType }
+            });
+            // Generate content (transcription)
+            // using gemini-1.5-flash as it is fast and supports audio
+            const response = await ai.models.generateContent({
+                model: 'gemini-2.5-flash-lite',
+                contents: [
+                    {
+                        role: 'user',
+                        parts: [
+                            {
+                                fileData: {
+                                    fileUri: uploadResult.uri,
+                                    mimeType: uploadResult.mimeType
+                                }
+                            },
+                            { text: "Transcribe this audio message accurately. Return only the transcribed text without any additional commentary." }
+                        ]
+                    }
+                ]
+            });
+            // The new SDK returns text directly on the response object
+            const text = response.text;
+            if (!text) {
+                throw new Error('No transcription generated');
+            }
+            return text;
+        }
+        catch (error) {
+            // Wrap error for clarity
+            if (error instanceof Error) {
+                throw new Error(`Audio transcription failed: ${error.message}`);
+            }
+            throw error;
+        }
+    }
+}

package/dist/runtime/display.js CHANGED Viewed

@@ -80,6 +80,9 @@ export class DisplayManager {
             else if (options.source === 'Agent') {
                 color = chalk.hex('#FFA500');
             }
+            else if (options.source === 'AgentAudio') {
+                color = chalk.hex('#b902b9');
+            }
             prefix = color(`[${options.source}] `);
         }
         let formattedMessage = message;

package/dist/types/config.js CHANGED Viewed

@@ -8,6 +8,11 @@ export const DEFAULT_CONFIG = {
         level: 'info',
         retention: '14d',
     },
+    audio: {
+        enabled: true,
+        maxDurationSeconds: 300,
+        supportedMimeTypes: ['audio/ogg', 'audio/mp3', 'audio/mpeg', 'audio/wav'],
+    },
     llm: {
         provider: 'openai',
         model: 'gpt-4',