npm - obol-ai - Versions diffs - 0.3.15 → 0.3.17 - Mend

obol-ai 0.3.15 → 0.3.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/CHANGELOG.md +6 -0
package/package.json +1 -1
package/src/media/whisper_transcribe.py +42 -0
package/src/status.js +5 -21
package/src/telegram/handlers/media.js +5 -7
package/src/telegram/handlers/special.js +3 -5
package/src/telegram/handlers/text.js +9 -5

package/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,9 @@
+## 0.3.16
+- Merge pull request #7 from jestersimpps/fix/stt-whisper-transcribe
+- fix: STT pipeline - add missing whisper_transcribe.py and fix media handler
+- Merge pull request #6 from jestersimpps/fix/status-instant-tool-labels
+- fix: replace async haiku status labels with instant sync formatToolCall
 ## 0.3.15
 - replace impulse with news system, exact datetime for analysis follow-ups
 - rewrite impulse prompt to sound like a friend, not an assistant

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "obol-ai",
-  "version": "0.3.15",
+  "version": "0.3.17",
   "description": "Self-evolving AI assistant that learns, remembers, and acts on its own. Persistent vector memory, self-rewriting personality, proactive heartbeats.",
   "main": "src/index.js",
   "bin": {

package/src/media/whisper_transcribe.py ADDED Viewed

@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+"""
+whisper_transcribe.py — Transcribe audio files using faster-whisper.
+Called by stt.js as: python3 whisper_transcribe.py <audio_file_path>
+Uses the 'tiny' model for fast CPU inference (~140MB).
+Outputs the transcribed text to stdout.
+Exits with code 1 on failure.
+"""
+import sys
+def transcribe(file_path):
+    """Transcribe an audio file and return the text."""
+    try:
+        from faster_whisper import WhisperModel
+    except ImportError:
+        print("ERROR: faster-whisper not installed. Run: pip3 install faster-whisper", file=sys.stderr)
+        sys.exit(1)
+    try:
+        model = WhisperModel("tiny", device="cpu", compute_type="int8")
+        segments, info = model.transcribe(file_path)
+        text = " ".join(segment.text.strip() for segment in segments).strip()
+        return text
+    except Exception as e:
+        print(f"ERROR: Transcription failed: {e}", file=sys.stderr)
+        sys.exit(1)
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print("Usage: python3 whisper_transcribe.py <audio_file_path>", file=sys.stderr)
+        sys.exit(1)
+    file_path = sys.argv[1]
+    result = transcribe(file_path)
+    if result:
+        print(result)
+    else:
+        sys.exit(1)

package/src/status.js CHANGED Viewed

@@ -1,5 +1,4 @@
 const TERM_WIDTH = 25;
-const _toolDescriptionCache = new Map();
 function buildStatusHtml({ route, elapsed, toolStatus, title = 'OBOL' }) {
   const pad = Math.max(0, TERM_WIDTH - title.length - 3);
@@ -28,25 +27,10 @@ function buildStatusHtml({ route, elapsed, toolStatus, title = 'OBOL' }) {
   return `<pre>${lines.join('\n')}</pre>`;
 }
-function describeToolCall(client, toolName, inputSummary) {
-  const key = `${toolName}:${inputSummary}`;
-  const cached = _toolDescriptionCache.get(key);
-  if (cached) return Promise.resolve(cached);
-  return client.messages.create({
-    model: 'claude-haiku-4-5',
-    max_tokens: 30,
-    system: 'Describe this tool call in 3-8 words from the user\'s perspective. Present participle. No quotes, period, or emoji.',
-    messages: [{ role: 'user', content: `${toolName}: ${inputSummary}` }],
-  }).then(r => {
-    const desc = r.content[0]?.text?.trim() || null;
-    if (desc) _toolDescriptionCache.set(key, desc);
-    if (_toolDescriptionCache.size > 200) {
-      const first = _toolDescriptionCache.keys().next().value;
-      _toolDescriptionCache.delete(first);
-    }
-    return desc;
-  }).catch(() => null);
+function formatToolCall(toolName, inputSummary) {
+  if (!inputSummary) return toolName;
+  const truncated = inputSummary.length > 40 ? inputSummary.slice(0, 37) + '...' : inputSummary;
+  return `${toolName} "${truncated}"`;
 }
-module.exports = { buildStatusHtml, describeToolCall, TERM_WIDTH };
+module.exports = { buildStatusHtml, formatToolCall, TERM_WIDTH };

package/src/telegram/handlers/media.js CHANGED Viewed

@@ -1,6 +1,6 @@
 const path = require('path');
 const { getTenant } = require('../../tenant');
-const { buildStatusHtml, describeToolCall } = require('../../status');
+const { buildStatusHtml, formatToolCall } = require('../../status');
 const media = require('../../media');
 const { sendHtml, startTyping, splitMessage } = require('../utils');
 const { MAX_MEDIA_SIZE, MEDIA_GROUP_DELAY_MS } = require('../constants');
@@ -43,9 +43,9 @@ async function processMediaItems(ctx, items, { config, allowedUsers, bot, create
         imageBlocks.push(media.bufferToImageBlock(item.buffer, item.fileInfo.mimeType));
       } else if (
         (item.fileInfo.mediaType === 'voice' || item.fileInfo.mediaType === 'audio') &&
-        tenant.toolPrefs?.get?.('speech_to_text')?.enabled === true
+        tenant.toolPrefs?.get?.('speech_to_text')?.enabled !== false
       ) {
-        const { transcribe } = require('../../stt');
+        const { transcribe } = require('../../media/stt');
         const transcription = await transcribe(savedPath);
         nonImageParts.push(transcription
           ? `[Voice message transcription: ${transcription}]`
@@ -80,11 +80,9 @@ async function processMediaItems(ctx, items, { config, allowedUsers, bot, create
       if (update.model) ri.model = update.model;
     };
     mediaChatCtx._onToolStart = (toolName, inputSummary) => {
-      status.setStatusText('Processing');
-      describeToolCall(tenant.claude.client, toolName, inputSummary).then(desc => {
-        if (desc) status.setStatusText(desc);
-      });
+      status.setStatusText(formatToolCall(toolName, inputSummary));
       status.start();
+      status.pushUpdate();
     };
     mediaChatCtx._onLockTimeout = () => {
       status.clear();

package/src/telegram/handlers/special.js CHANGED Viewed

@@ -1,5 +1,5 @@
 const { getTenant } = require('../../tenant');
-const { describeToolCall } = require('../../status');
+const { formatToolCall } = require('../../status');
 const { sendHtml, startTyping, splitMessage } = require('../utils');
 const { createChatContext, createStatusTracker } = require('./text');
@@ -92,11 +92,9 @@ async function processSpecial(ctx, prompt, deps) {
       if (update.model) ri.model = update.model;
     };
     chatCtx._onToolStart = (toolName, inputSummary) => {
-      status.setStatusText('Processing');
-      describeToolCall(tenant.claude.client, toolName, inputSummary).then(desc => {
-        if (desc) status.setStatusText(desc);
-      });
+      status.setStatusText(formatToolCall(toolName, inputSummary));
       status.start();
+      status.pushUpdate();
     };
     const { text: response, usage, model } = await tenant.claude.chat(prompt, chatCtx);

package/src/telegram/handlers/text.js CHANGED Viewed

@@ -1,6 +1,6 @@
 const { InlineKeyboard } = require('grammy');
 const { getTenant } = require('../../tenant');
-const { buildStatusHtml, describeToolCall } = require('../../status');
+const { buildStatusHtml, formatToolCall } = require('../../status');
 const { sendHtml, startTyping, splitMessage } = require('../utils');
 const { TEXT_BUFFER_GAP_MS, TEXT_BUFFER_MAX_PARTS, TEXT_BUFFER_MAX_CHARS, TEXT_BUFFER_THRESHOLD } = require('../constants');
@@ -133,6 +133,12 @@ function createStatusTracker(ctx, botName) {
       const html = buildStatusHtml({ route: routeInfo, elapsed, toolStatus: 'Formatting output', title });
       ctx.api.editMessageText(ctx.chat.id, statusMsgId, html, { parse_mode: 'HTML' }).catch(() => {});
     },
+    pushUpdate() {
+      if (!statusMsgId) return;
+      const elapsed = statusStart ? Math.round((Date.now() - statusStart) / 1000) : 0;
+      const html = buildStatusHtml({ route: routeInfo, elapsed, toolStatus: statusText, title });
+      ctx.api.editMessageText(ctx.chat.id, statusMsgId, html, { parse_mode: 'HTML', reply_markup: stopBtn }).catch(() => {});
+    },
     deleteMsg() {
       if (statusMsgId) ctx.api.deleteMessage(ctx.chat.id, statusMsgId).catch(() => {});
     },
@@ -185,11 +191,9 @@ async function processTextMessage(ctx, fullMessage, { config, allowedUsers, bot,
       if (update.model) ri.model = update.model;
     };
     chatContext._onToolStart = (toolName, inputSummary) => {
-      status.setStatusText('Processing');
-      describeToolCall(tenant.claude.client, toolName, inputSummary).then(desc => {
-        if (desc) status.setStatusText(desc);
-      });
+      status.setStatusText(formatToolCall(toolName, inputSummary));
       status.start();
+      status.pushUpdate();
     };
     chatContext._onLockTimeout = () => {
       status.clear();