morpheus-cli 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -63,6 +63,17 @@ If you installed successfully but can't run the `morpheus` command:
63
63
  - On Linux/Mac, verify `echo $PATH`.
64
64
  2. **Restart Terminal**: New installations might not be visible until you restart your shell.
65
65
 
66
+ ## Using NPX
67
+ You can run Morpheus without installing it globally using `npx`:
68
+
69
+ ```bash
70
+
71
+ npx morpheus-cli init
72
+
73
+ npx morpheus-cli start
74
+
75
+ ```
76
+
66
77
  ## Technical Overview
67
78
 
68
79
  Morpheus is built with **Node.js** and **TypeScript**, using **LangChain** as the orchestration engine. It runs as a background daemon process, managing connections to LLM providers (OpenAI, Anthropic, Ollama) and external channels (Telegram, Discord).
@@ -74,6 +85,16 @@ Morpheus is built with **Node.js** and **TypeScript**, using **LangChain** as th
74
85
  - **Configuration (`src/config/`)**: Singleton-based configuration manager using `zod` for validation and `js-yaml` for persistence (`~/.morpheus/config.yaml`).
75
86
  - **Channels (`src/channels/`)**: Adapters for external communication. Currently supports Telegram (`telegraf`) with strict user whitelisting.
76
87
 
88
+ ## Features
89
+
90
+ ### 🎙️ Audio Transcription (Telegram)
91
+ Send voice messages directly to the Telegram bot. Morpheus will:
92
+ 1. Transcribe the audio using **Google Gemini**.
93
+ 2. Process the text as a standard prompt.
94
+ 3. Reply with the answer.
95
+
96
+ *Requires a Google Gemini API Key.*
97
+
77
98
  ## Development Setup
78
99
 
79
100
  This guide is for developers contributing to the Morpheus codebase.
@@ -133,6 +154,12 @@ channels:
133
154
  enabled: true
134
155
  token: "YOUR_TELEGRAM_BOT_TOKEN"
135
156
  allowedUsers: ["123456789"] # Your Telegram User ID
157
+
158
+ # Audio Transcription Support
159
+ audio:
160
+ enabled: true
161
+ apiKey: "YOUR_GEMINI_API_KEY" # Optional if llm.provider is 'gemini'
162
+ maxDurationSeconds: 300
136
163
  ```
137
164
 
138
165
  ## Testing
@@ -1,11 +1,19 @@
1
1
  import { Telegraf } from 'telegraf';
2
+ import { message } from 'telegraf/filters';
2
3
  import chalk from 'chalk';
4
+ import fs from 'fs-extra';
5
+ import path from 'path';
6
+ import os from 'os';
7
+ import { ConfigManager } from '../config/manager.js';
3
8
  import { DisplayManager } from '../runtime/display.js';
9
+ import { AudioAgent } from '../runtime/audio-agent.js';
4
10
  export class TelegramAdapter {
5
11
  bot = null;
6
12
  isConnected = false;
7
13
  display = DisplayManager.getInstance();
14
+ config = ConfigManager.getInstance();
8
15
  agent;
16
+ audioAgent = new AudioAgent();
9
17
  constructor(agent) {
10
18
  this.agent = agent;
11
19
  }
@@ -52,6 +60,67 @@ export class TelegramAdapter {
52
60
  }
53
61
  }
54
62
  });
63
+ // Handle Voice Messages
64
+ this.bot.on(message('voice'), async (ctx) => {
65
+ const user = ctx.from.username || ctx.from.first_name;
66
+ const userId = ctx.from.id.toString();
67
+ const config = this.config.get();
68
+ // AUTH GUARD
69
+ if (!this.isAuthorized(userId, allowedUsers)) {
70
+ this.display.log(`Unauthorized audio attempt by @${user} (ID: ${userId})`, { source: 'Telegram', level: 'warning' });
71
+ return;
72
+ }
73
+ if (!config.audio.enabled) {
74
+ await ctx.reply("Audio transcription is currently disabled.");
75
+ return;
76
+ }
77
+ const apiKey = config.audio.apiKey || (config.llm.provider === 'gemini' ? config.llm.api_key : undefined);
78
+ if (!apiKey) {
79
+ this.display.log(`Audio transcription failed: No Gemini API key available`, { source: 'AgentAudio', level: 'error' });
80
+ await ctx.reply("Audio transcription requires a Gemini API key. Please configure `audio.apiKey` or set LLM provider to Gemini.");
81
+ return;
82
+ }
83
+ const duration = ctx.message.voice.duration;
84
+ if (duration > config.audio.maxDurationSeconds) {
85
+ await ctx.reply(`Voice message too long. Max duration is ${config.audio.maxDurationSeconds}s.`);
86
+ return;
87
+ }
88
+ this.display.log(`Receiving voice message from @${user} (${duration}s)...`, { source: 'AgentAudio' });
89
+ let filePath = null;
90
+ try {
91
+ await ctx.sendChatAction('typing');
92
+ // Download
93
+ this.display.log(`Downloading audio for @${user}...`, { source: 'AgentAudio' });
94
+ const fileLink = await ctx.telegram.getFileLink(ctx.message.voice.file_id);
95
+ filePath = await this.downloadToTemp(fileLink);
96
+ // Transcribe
97
+ this.display.log(`Transcribing audio for @${user}...`, { source: 'AgentAudio' });
98
+ const text = await this.audioAgent.transcribe(filePath, 'audio/ogg', apiKey);
99
+ this.display.log(`Transcription success for @${user}: "${text}"`, { source: 'AgentAudio', level: 'success' });
100
+ // Reply with transcription (optional, maybe just process it?)
101
+ // The prompt says "reply with the answer".
102
+ // "Transcribe them... and process the resulting text as a standard user prompt."
103
+ // So I should treat 'text' as if it was a text message.
104
+ await ctx.reply(`🎤 *Transcription*: _"${text}"_`, { parse_mode: 'Markdown' });
105
+ await ctx.sendChatAction('typing');
106
+ // Process with Agent
107
+ const response = await this.agent.chat(text);
108
+ if (response) {
109
+ await ctx.reply(response);
110
+ this.display.log(`Responded to @${user} (via audio)`, { source: 'Telegram' });
111
+ }
112
+ }
113
+ catch (error) {
114
+ this.display.log(`Audio processing error for @${user}: ${error.message}`, { source: 'AgentAudio', level: 'error' });
115
+ await ctx.reply("Sorry, I failed to process your audio message.");
116
+ }
117
+ finally {
118
+ // Cleanup
119
+ if (filePath && await fs.pathExists(filePath)) {
120
+ await fs.unlink(filePath).catch(() => { });
121
+ }
122
+ }
123
+ });
55
124
  this.bot.launch().catch((err) => {
56
125
  if (this.isConnected) {
57
126
  this.display.log(`Telegram bot error: ${err}`, { source: 'Telegram', level: 'error' });
@@ -71,6 +140,17 @@ export class TelegramAdapter {
71
140
  isAuthorized(userId, allowedUsers) {
72
141
  return allowedUsers.includes(userId);
73
142
  }
143
+ async downloadToTemp(url, extension = '.ogg') {
144
+ const response = await fetch(url);
145
+ if (!response.ok)
146
+ throw new Error(`Failed to download audio: ${response.statusText}`);
147
+ const tmpDir = os.tmpdir();
148
+ const fileName = `morpheus-audio-${Date.now()}${extension}`;
149
+ const filePath = path.join(tmpDir, fileName);
150
+ const buffer = Buffer.from(await response.arrayBuffer());
151
+ await fs.writeFile(filePath, buffer);
152
+ return filePath;
153
+ }
74
154
  async disconnect() {
75
155
  if (!this.isConnected || !this.bot) {
76
156
  return;
@@ -10,17 +10,19 @@ export const initCommand = new Command('init')
10
10
  .action(async () => {
11
11
  const display = DisplayManager.getInstance();
12
12
  renderBanner();
13
+ const configManager = ConfigManager.getInstance();
14
+ const currentConfig = await configManager.load();
13
15
  // Ensure directory exists
14
16
  await scaffold();
15
17
  display.log(chalk.blue('Let\'s set up your Morpheus agent!'));
16
18
  try {
17
19
  const name = await input({
18
20
  message: 'Name your agent:',
19
- default: 'morpheus',
21
+ default: currentConfig.agent.name,
20
22
  });
21
23
  const personality = await input({
22
24
  message: 'Describe its personality:',
23
- default: 'helpful and concise',
25
+ default: currentConfig.agent.personality,
24
26
  });
25
27
  const provider = await select({
26
28
  message: 'Select LLM Provider:',
@@ -30,6 +32,7 @@ export const initCommand = new Command('init')
30
32
  { name: 'Ollama', value: 'ollama' },
31
33
  { name: 'Google Gemini', value: 'gemini' },
32
34
  ],
35
+ default: currentConfig.llm.provider,
33
36
  });
34
37
  let defaultModel = 'gpt-3.5-turbo';
35
38
  switch (provider) {
@@ -46,17 +49,23 @@ export const initCommand = new Command('init')
46
49
  defaultModel = 'gemini-pro';
47
50
  break;
48
51
  }
52
+ if (provider === currentConfig.llm.provider) {
53
+ defaultModel = currentConfig.llm.model;
54
+ }
49
55
  const model = await input({
50
56
  message: 'Enter Model Name:',
51
57
  default: defaultModel,
52
58
  });
53
59
  let apiKey;
60
+ const hasExistingKey = !!currentConfig.llm.api_key;
61
+ const apiKeyMessage = hasExistingKey
62
+ ? 'Enter API Key (leave empty to preserve existing, or if using env vars):'
63
+ : 'Enter API Key (leave empty if using env vars):';
54
64
  if (provider !== 'ollama') {
55
65
  apiKey = await password({
56
- message: 'Enter API Key (leave empty if using env vars):',
66
+ message: apiKeyMessage,
57
67
  });
58
68
  }
59
- const configManager = ConfigManager.getInstance();
60
69
  // Update config
61
70
  await configManager.set('agent.name', name);
62
71
  await configManager.set('agent.personality', personality);
@@ -65,33 +74,81 @@ export const initCommand = new Command('init')
65
74
  if (apiKey) {
66
75
  await configManager.set('llm.api_key', apiKey);
67
76
  }
77
+ // Audio Configuration
78
+ const audioEnabled = await confirm({
79
+ message: 'Enable Audio Transcription? (Requires Gemini)',
80
+ default: currentConfig.audio?.enabled || false,
81
+ });
82
+ let audioKey;
83
+ let finalAudioEnabled = audioEnabled;
84
+ if (audioEnabled) {
85
+ if (provider === 'gemini') {
86
+ display.log(chalk.gray('Using main Gemini API key for audio.'));
87
+ }
88
+ else {
89
+ const hasExistingAudioKey = !!currentConfig.audio?.apiKey;
90
+ const audioKeyMessage = hasExistingAudioKey
91
+ ? 'Enter Gemini API Key for Audio (leave empty to preserve existing):'
92
+ : 'Enter Gemini API Key for Audio:';
93
+ audioKey = await password({
94
+ message: audioKeyMessage,
95
+ });
96
+ // Check if we have a valid key (new or existing)
97
+ const effectiveKey = audioKey || currentConfig.audio?.apiKey;
98
+ if (!effectiveKey) {
99
+ display.log(chalk.yellow('Audio disabled: Missing Gemini API Key required when using non-Gemini LLM provider.'));
100
+ finalAudioEnabled = false;
101
+ }
102
+ }
103
+ }
104
+ await configManager.set('audio.enabled', finalAudioEnabled);
105
+ if (audioKey) {
106
+ await configManager.set('audio.apiKey', audioKey);
107
+ }
68
108
  // External Channels Configuration
69
109
  const configureChannels = await confirm({
70
110
  message: 'Do you want to configure external channels?',
71
- default: false,
111
+ default: currentConfig.channels.telegram?.enabled || false,
72
112
  });
73
113
  if (configureChannels) {
74
114
  const channels = await checkbox({
75
115
  message: 'Select channels to enable:',
76
116
  choices: [
77
- { name: 'Telegram', value: 'telegram' },
117
+ {
118
+ name: 'Telegram',
119
+ value: 'telegram',
120
+ checked: currentConfig.channels.telegram?.enabled || false
121
+ },
78
122
  ],
79
123
  });
80
124
  if (channels.includes('telegram')) {
81
125
  display.log(chalk.yellow('\n--- Telegram Configuration ---'));
82
126
  display.log(chalk.gray('1. Create a bot via @BotFather to get your token.'));
83
127
  display.log(chalk.gray('2. Get your User ID via @userinfobot.\n'));
128
+ const hasExistingToken = !!currentConfig.channels.telegram?.token;
84
129
  const token = await password({
85
- message: 'Enter Telegram Bot Token:',
86
- validate: (value) => value.length > 0 || 'Token is required.'
130
+ message: hasExistingToken
131
+ ? 'Enter Telegram Bot Token (leave empty to preserve existing):'
132
+ : 'Enter Telegram Bot Token:',
133
+ validate: (value) => {
134
+ if (value.length > 0)
135
+ return true;
136
+ if (hasExistingToken)
137
+ return true;
138
+ return 'Token is required.';
139
+ }
87
140
  });
141
+ const defaultUsers = currentConfig.channels.telegram?.allowedUsers?.join(', ') || '';
88
142
  const allowedUsersInput = await input({
89
143
  message: 'Enter Allowed User IDs (comma separated):',
144
+ default: defaultUsers,
90
145
  validate: (value) => value.length > 0 || 'At least one user ID is required for security.'
91
146
  });
92
147
  const allowedUsers = allowedUsersInput.split(',').map(id => id.trim()).filter(id => id.length > 0);
93
148
  await configManager.set('channels.telegram.enabled', true);
94
- await configManager.set('channels.telegram.token', token);
149
+ if (token) {
150
+ await configManager.set('channels.telegram.token', token);
151
+ }
95
152
  await configManager.set('channels.telegram.allowedUsers', allowedUsers);
96
153
  }
97
154
  }
@@ -130,7 +130,7 @@ export const startCommand = new Command('start')
130
130
  });
131
131
  }
132
132
  // Keep process alive (Mock Agent Loop)
133
- display.startSpinner('Agent active and listening... (Press ESC to stop)');
133
+ display.startSpinner('Agent active and listening... (Press ctrl+c to stop)');
134
134
  // Prevent node from exiting
135
135
  setInterval(() => {
136
136
  // Heartbeat or background tasks would go here
@@ -1,5 +1,11 @@
1
1
  import { z } from 'zod';
2
2
  import { DEFAULT_CONFIG } from '../types/config.js';
3
+ export const AudioConfigSchema = z.object({
4
+ enabled: z.boolean().default(DEFAULT_CONFIG.audio.enabled),
5
+ apiKey: z.string().optional(),
6
+ maxDurationSeconds: z.number().default(DEFAULT_CONFIG.audio.maxDurationSeconds),
7
+ supportedMimeTypes: z.array(z.string()).default(DEFAULT_CONFIG.audio.supportedMimeTypes),
8
+ });
3
9
  // Zod Schema matching MorpheusConfig interface
4
10
  export const ConfigSchema = z.object({
5
11
  agent: z.object({
@@ -12,6 +18,7 @@ export const ConfigSchema = z.object({
12
18
  temperature: z.number().min(0).max(1).default(DEFAULT_CONFIG.llm.temperature),
13
19
  api_key: z.string().optional(),
14
20
  }).default(DEFAULT_CONFIG.llm),
21
+ audio: AudioConfigSchema.default(DEFAULT_CONFIG.audio),
15
22
  channels: z.object({
16
23
  telegram: z.object({
17
24
  enabled: z.boolean().default(false),
@@ -10,7 +10,12 @@ const mockConfig = {
10
10
  discord: { enabled: false }
11
11
  },
12
12
  ui: { enabled: false, port: 3333 },
13
- logging: { enabled: false, level: 'info', retention: '1d' }
13
+ logging: { enabled: false, level: 'info', retention: '1d' },
14
+ audio: {
15
+ enabled: false,
16
+ maxDurationSeconds: 60,
17
+ supportedMimeTypes: ['audio/ogg']
18
+ }
14
19
  };
15
20
  const run = async () => {
16
21
  try {
@@ -0,0 +1,45 @@
1
+ import { GoogleGenAI } from '@google/genai';
2
+ export class AudioAgent {
3
+ async transcribe(filePath, mimeType, apiKey) {
4
+ try {
5
+ const ai = new GoogleGenAI({ apiKey });
6
+ // Upload the file
7
+ const uploadResult = await ai.files.upload({
8
+ file: filePath,
9
+ config: { mimeType }
10
+ });
11
+ // Generate content (transcription)
12
+ // using gemini-1.5-flash as it is fast and supports audio
13
+ const response = await ai.models.generateContent({
14
+ model: 'gemini-2.5-flash-lite',
15
+ contents: [
16
+ {
17
+ role: 'user',
18
+ parts: [
19
+ {
20
+ fileData: {
21
+ fileUri: uploadResult.uri,
22
+ mimeType: uploadResult.mimeType
23
+ }
24
+ },
25
+ { text: "Transcribe this audio message accurately. Return only the transcribed text without any additional commentary." }
26
+ ]
27
+ }
28
+ ]
29
+ });
30
+ // The new SDK returns text directly on the response object
31
+ const text = response.text;
32
+ if (!text) {
33
+ throw new Error('No transcription generated');
34
+ }
35
+ return text;
36
+ }
37
+ catch (error) {
38
+ // Wrap error for clarity
39
+ if (error instanceof Error) {
40
+ throw new Error(`Audio transcription failed: ${error.message}`);
41
+ }
42
+ throw error;
43
+ }
44
+ }
45
+ }
@@ -80,6 +80,9 @@ export class DisplayManager {
80
80
  else if (options.source === 'Agent') {
81
81
  color = chalk.hex('#FFA500');
82
82
  }
83
+ else if (options.source === 'AgentAudio') {
84
+ color = chalk.hex('#b902b9');
85
+ }
83
86
  prefix = color(`[${options.source}] `);
84
87
  }
85
88
  let formattedMessage = message;
@@ -8,6 +8,11 @@ export const DEFAULT_CONFIG = {
8
8
  level: 'info',
9
9
  retention: '14d',
10
10
  },
11
+ audio: {
12
+ enabled: true,
13
+ maxDurationSeconds: 300,
14
+ supportedMimeTypes: ['audio/ogg', 'audio/mp3', 'audio/mpeg', 'audio/wav'],
15
+ },
11
16
  llm: {
12
17
  provider: 'openai',
13
18
  model: 'gpt-4',