npm - @blockrun/franklin - Versions diffs - 3.15.4 → 3.15.5 - Mend

@blockrun/franklin 3.15.4 → 3.15.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/agent/context.js CHANGED Viewed

@@ -83,7 +83,13 @@ A user approving an action once does NOT mean they approve it in all contexts. M
 }
 function getOutputEfficiencySection() {
     return `# Output Efficiency
-Go straight to the point. Lead with the action, not the reasoning. Do not restate what the user said. Do not narrate your actions ("Let me read the file...", "I'll now search for..."). Just call the tools.
+Go straight to the point. Lead with the action, not the reasoning. Do not restate what the user said.
+**No pre-tool narration.** Do NOT write things like "让我先 X...", "Let me read the file...", "I'll now search for...", "好的，让我研究一下...", "现在我来 X", "OK now I have everything I need", "完美！", "好，现在我完全明白了". These phrases are internal monologue — the user can see your tool calls directly and does not need step-by-step play-by-play. Just call the tool.
+The exception: a single short sentence between tool calls is fine when it tells the user something they would otherwise miss — a finding ("Build passes — moving on to tests."), a course correction ("That approach won't work — switching to X."), or a one-line status before a long-running operation. One sentence per update is enough.
+**No internal-language leakage.** Always write your visible response in the same language the user is using. If your private reasoning happens in a different language (English while the user writes Chinese, Korean while the user writes Chinese, etc.), do NOT let phrases from that language appear in the user-facing text. The user should never see a stray "좋아", "OK now", or "Alright" in the middle of a Chinese reply.
 Focus text output on:
 - Decisions that need the user's input
@@ -97,7 +103,7 @@ function getToneAndStyleSection() {
 - Only use emojis if the user explicitly requests it. Avoid using emojis in all communication unless asked.
 - Your responses should be short and concise.
 - When referencing specific functions or pieces of code include the pattern file_path:line_number to allow the user to easily navigate to the source code location.
-- Do not use a colon before tool calls. Your tool calls may not be shown directly in the output, so text like "Let me read the file:" followed by a read tool call should just be "Let me read the file." with a period.`;
+- See "Output Efficiency" above for the rules on pre-tool narration and language consistency. Those override any habit you may have of writing "Let me X..." before a tool call.`;
 }
 function getGitProtocolSection() {
     return `# Git Protocol

package/dist/agent/loop.js CHANGED Viewed

@@ -651,13 +651,27 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
             // Circuit breaker: stop retrying after 3 consecutive failures
             if (compactFailures < 3) {
                 try {
+                    // Capture pre-compaction size so we can surface "saved X%" to the
+                    // user. Without this, the per-turn input-token count would silently
+                    // drop from e.g. 215K → 9K and look like a metric bug.
+                    const beforeTokens = estimateHistoryTokens(history);
                     const { history: compacted, compacted: didCompact } = await autoCompactIfNeeded(history, config.model, client, config.debug);
                     if (didCompact) {
                         replaceHistory(history, compacted);
                         resetTokenAnchor();
                         compactFailures = 0;
+                        const afterTokens = estimateHistoryTokens(history);
+                        const pct = beforeTokens > 0
+                            ? Math.round((1 - afterTokens / beforeTokens) * 100)
+                            : 0;
+                        // Visible to the user — explains the upcoming token-count drop
+                        // in the next turn footer and frames it as a feature, not a bug.
+                        onEvent({
+                            kind: 'text_delta',
+                            text: `\n*🗜 Auto-compacted: ~${(beforeTokens / 1000).toFixed(0)}K → ~${(afterTokens / 1000).toFixed(0)}K tokens (saved ${pct}%)*\n\n`,
+                        });
                         if (config.debug) {
-                            console.error(`[franklin] History compacted: ~${estimateHistoryTokens(history)} tokens`);
+                            console.error(`[franklin] History compacted: ~${afterTokens} tokens`);
                         }
                     }
                 }

package/dist/router/index.js CHANGED Viewed

@@ -148,6 +148,11 @@ const AGENTIC_URL_PATTERNS = [
     /github\.com/i, /gitlab\.com/i, /bitbucket\.org/i,
     /npmjs\.com/i, /pypi\.org/i, /crates\.io/i,
     /stackoverflow\.com/i, /docs\.\w+/i,
+    // Media URLs need the model to actually fetch+understand content,
+    // not just regurgitate from weights. Bumping these prevents the
+    // "user pastes 3 YouTube links → SIMPLE-tier model gives up" path.
+    /youtube\.com/i, /youtu\.be/i,
+    /twitter\.com/i, /x\.com/i,
 ];
 function countMatches(text, keywords) {
     const lower = text.toLowerCase();

package/dist/tools/webfetch.js CHANGED Viewed

@@ -59,6 +59,35 @@ async function execute(input, ctx) {
         return { output: `Error: only http/https URLs are supported`, isError: true };
     }
     const maxLen = Math.min(max_length ?? DEFAULT_MAX_LENGTH, MAX_BODY_BYTES);
+    // ── YouTube special case ──
+    // Plain HTML fetch on a youtube.com URL returns the SPA bundle (a wall of
+    // minified JS), which is useless to the model and was the failure mode
+    // behind "I can't access YouTube" responses. Auto-redirect to the caption
+    // track so the model gets the actual spoken content. Transparent to
+    // callers — same WebFetch tool, the right thing happens for video URLs.
+    const videoId = extractYouTubeVideoId(parsed);
+    if (videoId) {
+        const ytKey = cacheKey(`youtube-transcript:${videoId}`, maxLen);
+        const ytCached = getCached(ytKey);
+        if (ytCached)
+            return { output: ytCached + '\n\n(cached)' };
+        const transcript = await fetchYouTubeTranscript(videoId, ctx.abortSignal);
+        if (transcript.ok) {
+            const truncated = transcript.text.length > maxLen
+                ? transcript.text.slice(0, maxLen) + '\n\n... (transcript truncated)'
+                : transcript.text;
+            const output = `URL: ${url}\nSource: YouTube auto-captions (videoId=${videoId}, lang=${transcript.lang})\n\n${truncated}`;
+            setCached(ytKey, output);
+            return { output };
+        }
+        // Fall through to raw HTML fetch only if transcript path failed entirely;
+        // surface why so the model can decide what to do (e.g., suggest a manual
+        // step) instead of silently scraping JS.
+        return {
+            output: `YouTube transcript unavailable for ${url} — ${transcript.reason}. The video may have captions disabled or be region-locked.`,
+            isError: true,
+        };
+    }
     const key = cacheKey(url, maxLen);
     // Check cache first
     const cached = getCached(key);
@@ -147,6 +176,143 @@ async function execute(input, ctx) {
         ctx.abortSignal.removeEventListener('abort', onAbort);
     }
 }
+// ─── YouTube transcript fetcher ─────────────────────────────────────────────
+// Fetches auto-generated or uploaded captions for a YouTube video by parsing
+// the watch-page's `ytInitialPlayerResponse` JSON. Pure HTTP, no deps. Saves
+// us from the alternative (shelling out to yt-dlp, which the user may not
+// have installed) and from leaving the model to guess at JS bundles.
+function extractYouTubeVideoId(parsed) {
+    const host = parsed.hostname.replace(/^www\./, '');
+    if (host === 'youtu.be') {
+        return parsed.pathname.slice(1).split('/')[0] || null;
+    }
+    if (host === 'youtube.com' || host === 'm.youtube.com' || host === 'music.youtube.com') {
+        if (parsed.pathname === '/watch') {
+            return parsed.searchParams.get('v');
+        }
+        // /shorts/{id}, /live/{id}, /embed/{id}
+        const shortsMatch = parsed.pathname.match(/^\/(?:shorts|live|embed)\/([A-Za-z0-9_-]{6,})/);
+        if (shortsMatch)
+            return shortsMatch[1];
+    }
+    return null;
+}
+async function fetchYouTubeTranscript(videoId, abortSignal) {
+    const watchUrl = `https://www.youtube.com/watch?v=${encodeURIComponent(videoId)}&hl=en`;
+    const ctrl = new AbortController();
+    const timer = setTimeout(() => ctrl.abort(), 20_000);
+    const onAbort = () => ctrl.abort();
+    abortSignal.addEventListener('abort', onAbort, { once: true });
+    try {
+        const res = await fetch(watchUrl, {
+            signal: ctrl.signal,
+            headers: {
+                // Pretend to be a desktop browser so YouTube serves the watch page
+                // with the player config inlined. The default Node fetch UA gets a
+                // consent-redirect HTML stub that has no caption metadata.
+                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
+                'Accept-Language': 'en-US,en;q=0.9',
+            },
+            redirect: 'follow',
+        });
+        if (!res.ok) {
+            return { ok: false, reason: `watch page HTTP ${res.status}` };
+        }
+        const html = await res.text();
+        // ytInitialPlayerResponse can be assigned in two shapes; both occur in
+        // practice across mobile vs desktop responses.
+        const match = html.match(/var\s+ytInitialPlayerResponse\s*=\s*(\{.+?\})\s*;\s*var\s+meta/s) ||
+            html.match(/ytInitialPlayerResponse\s*=\s*(\{.+?\});/s);
+        if (!match) {
+            return { ok: false, reason: 'could not locate ytInitialPlayerResponse in watch page' };
+        }
+        let player;
+        try {
+            player = JSON.parse(match[1]);
+        }
+        catch {
+            return { ok: false, reason: 'ytInitialPlayerResponse JSON parse failed' };
+        }
+        const tracks = player.captions?.playerCaptionsTracklistRenderer?.captionTracks ?? [];
+        if (tracks.length === 0) {
+            return { ok: false, reason: 'no caption tracks (video has captions disabled)' };
+        }
+        // Prefer English; fall back to first available; auto-captions are fine.
+        const track = tracks.find(t => (t.languageCode || '').startsWith('en')) ||
+            tracks[0];
+        if (!track?.baseUrl) {
+            return { ok: false, reason: 'caption track has no baseUrl' };
+        }
+        // Request the JSON3 format — easier to parse than the default XML and
+        // YouTube serves it on the same endpoint with a query flag.
+        const captionUrl = track.baseUrl + (track.baseUrl.includes('fmt=') ? '' : '&fmt=json3');
+        const capRes = await fetch(captionUrl, {
+            signal: ctrl.signal,
+            headers: { 'User-Agent': 'Mozilla/5.0' },
+        });
+        if (!capRes.ok) {
+            return { ok: false, reason: `caption fetch HTTP ${capRes.status}` };
+        }
+        const capRaw = await capRes.text();
+        const text = parseJson3Captions(capRaw) || parseXmlCaptions(capRaw);
+        if (!text) {
+            return { ok: false, reason: 'caption response had no readable text segments' };
+        }
+        return { ok: true, text, lang: track.languageCode || 'unknown' };
+    }
+    catch (err) {
+        if (abortSignal.aborted) {
+            return { ok: false, reason: 'request aborted' };
+        }
+        return {
+            ok: false,
+            reason: `fetch error: ${err instanceof Error ? err.message : String(err)}`,
+        };
+    }
+    finally {
+        clearTimeout(timer);
+        abortSignal.removeEventListener('abort', onAbort);
+    }
+}
+function parseJson3Captions(raw) {
+    try {
+        const obj = JSON.parse(raw);
+        if (!obj.events)
+            return '';
+        const out = [];
+        for (const ev of obj.events) {
+            if (!ev.segs)
+                continue;
+            for (const seg of ev.segs) {
+                if (seg.utf8)
+                    out.push(seg.utf8);
+            }
+        }
+        // Collapse the per-word fragments YouTube emits into readable lines.
+        return out.join('').replace(/\n+/g, ' ').replace(/\s{2,}/g, ' ').trim();
+    }
+    catch {
+        return '';
+    }
+}
+function parseXmlCaptions(raw) {
+    // Fallback for older XML format. Regex-only parse — captions text is
+    // simple enough that pulling in xml2js for this would be overkill.
+    const matches = [...raw.matchAll(/<text[^>]*>([\s\S]*?)<\/text>/g)];
+    if (matches.length === 0)
+        return '';
+    return matches
+        .map(m => m[1]
+        .replace(/&amp;/g, '&')
+        .replace(/&lt;/g, '<')
+        .replace(/&gt;/g, '>')
+        .replace(/&quot;/g, '"')
+        .replace(/&#39;/g, "'")
+        .replace(/\s+/g, ' ')
+        .trim())
+        .filter(Boolean)
+        .join(' ');
+}
 function stripHtml(html) {
     return html
         // Remove non-content elements

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@blockrun/franklin",
-  "version": "3.15.4",
+  "version": "3.15.5",
   "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
   "type": "module",
   "exports": {