@blockrun/franklin 3.15.4 → 3.15.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -83,7 +83,13 @@ A user approving an action once does NOT mean they approve it in all contexts. M
83
83
  }
84
84
  function getOutputEfficiencySection() {
85
85
  return `# Output Efficiency
86
- Go straight to the point. Lead with the action, not the reasoning. Do not restate what the user said. Do not narrate your actions ("Let me read the file...", "I'll now search for..."). Just call the tools.
86
+ Go straight to the point. Lead with the action, not the reasoning. Do not restate what the user said.
87
+
88
+ **No pre-tool narration.** Do NOT write things like "让我先 X...", "Let me read the file...", "I'll now search for...", "好的,让我研究一下...", "现在我来 X", "OK now I have everything I need", "完美!", "好,现在我完全明白了". These phrases are internal monologue — the user can see your tool calls directly and does not need step-by-step play-by-play. Just call the tool.
89
+
90
+ The exception: a single short sentence between tool calls is fine when it tells the user something they would otherwise miss — a finding ("Build passes — moving on to tests."), a course correction ("That approach won't work — switching to X."), or a one-line status before a long-running operation. One sentence per update is enough.
91
+
92
+ **No internal-language leakage.** Always write your visible response in the same language the user is using. If your private reasoning happens in a different language (English while the user writes Chinese, Korean while the user writes Chinese, etc.), do NOT let phrases from that language appear in the user-facing text. The user should never see a stray "좋아", "OK now", or "Alright" in the middle of a Chinese reply.
87
93
 
88
94
  Focus text output on:
89
95
  - Decisions that need the user's input
@@ -97,7 +103,7 @@ function getToneAndStyleSection() {
97
103
  - Only use emojis if the user explicitly requests it. Avoid using emojis in all communication unless asked.
98
104
  - Your responses should be short and concise.
99
105
  - When referencing specific functions or pieces of code include the pattern file_path:line_number to allow the user to easily navigate to the source code location.
100
- - Do not use a colon before tool calls. Your tool calls may not be shown directly in the output, so text like "Let me read the file:" followed by a read tool call should just be "Let me read the file." with a period.`;
106
+ - See "Output Efficiency" above for the rules on pre-tool narration and language consistency. Those override any habit you may have of writing "Let me X..." before a tool call.`;
101
107
  }
102
108
  function getGitProtocolSection() {
103
109
  return `# Git Protocol
@@ -651,13 +651,27 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
651
651
  // Circuit breaker: stop retrying after 3 consecutive failures
652
652
  if (compactFailures < 3) {
653
653
  try {
654
+ // Capture pre-compaction size so we can surface "saved X%" to the
655
+ // user. Without this, the per-turn input-token count would silently
656
+ // drop from e.g. 215K → 9K and look like a metric bug.
657
+ const beforeTokens = estimateHistoryTokens(history);
654
658
  const { history: compacted, compacted: didCompact } = await autoCompactIfNeeded(history, config.model, client, config.debug);
655
659
  if (didCompact) {
656
660
  replaceHistory(history, compacted);
657
661
  resetTokenAnchor();
658
662
  compactFailures = 0;
663
+ const afterTokens = estimateHistoryTokens(history);
664
+ const pct = beforeTokens > 0
665
+ ? Math.round((1 - afterTokens / beforeTokens) * 100)
666
+ : 0;
667
+ // Visible to the user — explains the upcoming token-count drop
668
+ // in the next turn footer and frames it as a feature, not a bug.
669
+ onEvent({
670
+ kind: 'text_delta',
671
+ text: `\n*🗜 Auto-compacted: ~${(beforeTokens / 1000).toFixed(0)}K → ~${(afterTokens / 1000).toFixed(0)}K tokens (saved ${pct}%)*\n\n`,
672
+ });
659
673
  if (config.debug) {
660
- console.error(`[franklin] History compacted: ~${estimateHistoryTokens(history)} tokens`);
674
+ console.error(`[franklin] History compacted: ~${afterTokens} tokens`);
661
675
  }
662
676
  }
663
677
  }
@@ -148,6 +148,11 @@ const AGENTIC_URL_PATTERNS = [
148
148
  /github\.com/i, /gitlab\.com/i, /bitbucket\.org/i,
149
149
  /npmjs\.com/i, /pypi\.org/i, /crates\.io/i,
150
150
  /stackoverflow\.com/i, /docs\.\w+/i,
151
+ // Media URLs need the model to actually fetch+understand content,
152
+ // not just regurgitate from weights. Bumping these prevents the
153
+ // "user pastes 3 YouTube links → SIMPLE-tier model gives up" path.
154
+ /youtube\.com/i, /youtu\.be/i,
155
+ /twitter\.com/i, /x\.com/i,
151
156
  ];
152
157
  function countMatches(text, keywords) {
153
158
  const lower = text.toLowerCase();
@@ -59,6 +59,35 @@ async function execute(input, ctx) {
59
59
  return { output: `Error: only http/https URLs are supported`, isError: true };
60
60
  }
61
61
  const maxLen = Math.min(max_length ?? DEFAULT_MAX_LENGTH, MAX_BODY_BYTES);
62
+ // ── YouTube special case ──
63
+ // Plain HTML fetch on a youtube.com URL returns the SPA bundle (a wall of
64
+ // minified JS), which is useless to the model and was the failure mode
65
+ // behind "I can't access YouTube" responses. Auto-redirect to the caption
66
+ // track so the model gets the actual spoken content. Transparent to
67
+ // callers — same WebFetch tool, the right thing happens for video URLs.
68
+ const videoId = extractYouTubeVideoId(parsed);
69
+ if (videoId) {
70
+ const ytKey = cacheKey(`youtube-transcript:${videoId}`, maxLen);
71
+ const ytCached = getCached(ytKey);
72
+ if (ytCached)
73
+ return { output: ytCached + '\n\n(cached)' };
74
+ const transcript = await fetchYouTubeTranscript(videoId, ctx.abortSignal);
75
+ if (transcript.ok) {
76
+ const truncated = transcript.text.length > maxLen
77
+ ? transcript.text.slice(0, maxLen) + '\n\n... (transcript truncated)'
78
+ : transcript.text;
79
+ const output = `URL: ${url}\nSource: YouTube auto-captions (videoId=${videoId}, lang=${transcript.lang})\n\n${truncated}`;
80
+ setCached(ytKey, output);
81
+ return { output };
82
+ }
83
+ // Fall through to raw HTML fetch only if transcript path failed entirely;
84
+ // surface why so the model can decide what to do (e.g., suggest a manual
85
+ // step) instead of silently scraping JS.
86
+ return {
87
+ output: `YouTube transcript unavailable for ${url} — ${transcript.reason}. The video may have captions disabled or be region-locked.`,
88
+ isError: true,
89
+ };
90
+ }
62
91
  const key = cacheKey(url, maxLen);
63
92
  // Check cache first
64
93
  const cached = getCached(key);
@@ -147,6 +176,143 @@ async function execute(input, ctx) {
147
176
  ctx.abortSignal.removeEventListener('abort', onAbort);
148
177
  }
149
178
  }
179
+ // ─── YouTube transcript fetcher ─────────────────────────────────────────────
180
+ // Fetches auto-generated or uploaded captions for a YouTube video by parsing
181
+ // the watch-page's `ytInitialPlayerResponse` JSON. Pure HTTP, no deps. Saves
182
+ // us from the alternative (shelling out to yt-dlp, which the user may not
183
+ // have installed) and from leaving the model to guess at JS bundles.
184
+ function extractYouTubeVideoId(parsed) {
185
+ const host = parsed.hostname.replace(/^www\./, '');
186
+ if (host === 'youtu.be') {
187
+ return parsed.pathname.slice(1).split('/')[0] || null;
188
+ }
189
+ if (host === 'youtube.com' || host === 'm.youtube.com' || host === 'music.youtube.com') {
190
+ if (parsed.pathname === '/watch') {
191
+ return parsed.searchParams.get('v');
192
+ }
193
+ // /shorts/{id}, /live/{id}, /embed/{id}
194
+ const shortsMatch = parsed.pathname.match(/^\/(?:shorts|live|embed)\/([A-Za-z0-9_-]{6,})/);
195
+ if (shortsMatch)
196
+ return shortsMatch[1];
197
+ }
198
+ return null;
199
+ }
200
+ async function fetchYouTubeTranscript(videoId, abortSignal) {
201
+ const watchUrl = `https://www.youtube.com/watch?v=${encodeURIComponent(videoId)}&hl=en`;
202
+ const ctrl = new AbortController();
203
+ const timer = setTimeout(() => ctrl.abort(), 20_000);
204
+ const onAbort = () => ctrl.abort();
205
+ abortSignal.addEventListener('abort', onAbort, { once: true });
206
+ try {
207
+ const res = await fetch(watchUrl, {
208
+ signal: ctrl.signal,
209
+ headers: {
210
+ // Pretend to be a desktop browser so YouTube serves the watch page
211
+ // with the player config inlined. The default Node fetch UA gets a
212
+ // consent-redirect HTML stub that has no caption metadata.
213
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
214
+ 'Accept-Language': 'en-US,en;q=0.9',
215
+ },
216
+ redirect: 'follow',
217
+ });
218
+ if (!res.ok) {
219
+ return { ok: false, reason: `watch page HTTP ${res.status}` };
220
+ }
221
+ const html = await res.text();
222
+ // ytInitialPlayerResponse can be assigned in two shapes; both occur in
223
+ // practice across mobile vs desktop responses.
224
+ const match = html.match(/var\s+ytInitialPlayerResponse\s*=\s*(\{.+?\})\s*;\s*var\s+meta/s) ||
225
+ html.match(/ytInitialPlayerResponse\s*=\s*(\{.+?\});/s);
226
+ if (!match) {
227
+ return { ok: false, reason: 'could not locate ytInitialPlayerResponse in watch page' };
228
+ }
229
+ let player;
230
+ try {
231
+ player = JSON.parse(match[1]);
232
+ }
233
+ catch {
234
+ return { ok: false, reason: 'ytInitialPlayerResponse JSON parse failed' };
235
+ }
236
+ const tracks = player.captions?.playerCaptionsTracklistRenderer?.captionTracks ?? [];
237
+ if (tracks.length === 0) {
238
+ return { ok: false, reason: 'no caption tracks (video has captions disabled)' };
239
+ }
240
+ // Prefer English; fall back to first available; auto-captions are fine.
241
+ const track = tracks.find(t => (t.languageCode || '').startsWith('en')) ||
242
+ tracks[0];
243
+ if (!track?.baseUrl) {
244
+ return { ok: false, reason: 'caption track has no baseUrl' };
245
+ }
246
+ // Request the JSON3 format — easier to parse than the default XML and
247
+ // YouTube serves it on the same endpoint with a query flag.
248
+ const captionUrl = track.baseUrl + (track.baseUrl.includes('fmt=') ? '' : '&fmt=json3');
249
+ const capRes = await fetch(captionUrl, {
250
+ signal: ctrl.signal,
251
+ headers: { 'User-Agent': 'Mozilla/5.0' },
252
+ });
253
+ if (!capRes.ok) {
254
+ return { ok: false, reason: `caption fetch HTTP ${capRes.status}` };
255
+ }
256
+ const capRaw = await capRes.text();
257
+ const text = parseJson3Captions(capRaw) || parseXmlCaptions(capRaw);
258
+ if (!text) {
259
+ return { ok: false, reason: 'caption response had no readable text segments' };
260
+ }
261
+ return { ok: true, text, lang: track.languageCode || 'unknown' };
262
+ }
263
+ catch (err) {
264
+ if (abortSignal.aborted) {
265
+ return { ok: false, reason: 'request aborted' };
266
+ }
267
+ return {
268
+ ok: false,
269
+ reason: `fetch error: ${err instanceof Error ? err.message : String(err)}`,
270
+ };
271
+ }
272
+ finally {
273
+ clearTimeout(timer);
274
+ abortSignal.removeEventListener('abort', onAbort);
275
+ }
276
+ }
277
+ function parseJson3Captions(raw) {
278
+ try {
279
+ const obj = JSON.parse(raw);
280
+ if (!obj.events)
281
+ return '';
282
+ const out = [];
283
+ for (const ev of obj.events) {
284
+ if (!ev.segs)
285
+ continue;
286
+ for (const seg of ev.segs) {
287
+ if (seg.utf8)
288
+ out.push(seg.utf8);
289
+ }
290
+ }
291
+ // Collapse the per-word fragments YouTube emits into readable lines.
292
+ return out.join('').replace(/\n+/g, ' ').replace(/\s{2,}/g, ' ').trim();
293
+ }
294
+ catch {
295
+ return '';
296
+ }
297
+ }
298
+ function parseXmlCaptions(raw) {
299
+ // Fallback for older XML format. Regex-only parse — captions text is
300
+ // simple enough that pulling in xml2js for this would be overkill.
301
+ const matches = [...raw.matchAll(/<text[^>]*>([\s\S]*?)<\/text>/g)];
302
+ if (matches.length === 0)
303
+ return '';
304
+ return matches
305
+ .map(m => m[1]
306
+ .replace(/&amp;/g, '&')
307
+ .replace(/&lt;/g, '<')
308
+ .replace(/&gt;/g, '>')
309
+ .replace(/&quot;/g, '"')
310
+ .replace(/&#39;/g, "'")
311
+ .replace(/\s+/g, ' ')
312
+ .trim())
313
+ .filter(Boolean)
314
+ .join(' ');
315
+ }
150
316
  function stripHtml(html) {
151
317
  return html
152
318
  // Remove non-content elements
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/franklin",
3
- "version": "3.15.4",
3
+ "version": "3.15.5",
4
4
  "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
5
5
  "type": "module",
6
6
  "exports": {