channel-worker 1.6.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/lib/stats-syncer.js +78 -184
  2. package/package.json +1 -1
@@ -1,8 +1,9 @@
1
1
  const WebSocket = require('ws');
2
2
 
3
3
  /**
4
- * Sync YouTube Studio stats via Nstbrowser CDP.
5
- * Flow: Launch profile → connect CDP → navigate to Studio scrape stats close
4
+ * Sync YouTube stats via Nstbrowser CDP.
5
+ * Flow: Launch profile → connect CDP → get channel ID from Studio
6
+ * navigate to channel About page → read #additional-info-container table
6
7
  */
7
8
  class StatsSyncer {
8
9
  constructor(nstManager, apiClient) {
@@ -92,221 +93,114 @@ class StatsSyncer {
92
93
  await s('Page.enable');
93
94
  await s('Runtime.enable');
94
95
 
95
- let stats = { subscribers: 0, total_views: 0, video_count: 0 };
96
-
97
- // Step 1: Go to YouTube Studio → auto-redirects to correct channel
96
+ // Step 1: Go to YouTube Studio to get channel ID
98
97
  console.log('[stats] Navigating to YouTube Studio...');
99
98
  await s('Page.navigate', { url: 'https://studio.youtube.com' });
100
99
  await this.waitForLoad(s);
101
- await this.sleep(5000);
100
+ await this.sleep(4000);
102
101
 
103
- // Get channel ID from URL
104
102
  const urlResult = await s('Runtime.evaluate', { expression: 'window.location.href', returnByValue: true });
105
103
  const studioUrl = urlResult?.result?.value || '';
106
104
  console.log(`[stats] Studio URL: ${studioUrl}`);
107
105
 
108
- const channelIdMatch = studioUrl.match(/channel\/(UC[\w-]+)/);
109
- const channelId = channelIdMatch ? channelIdMatch[1] : null;
110
-
111
- if (!channelId) {
112
- // Try ytcfg
106
+ // Extract channel ID
107
+ let cid = null;
108
+ const cidMatch = studioUrl.match(/channel\/(UC[\w-]+)/);
109
+ if (cidMatch) {
110
+ cid = cidMatch[1];
111
+ } else {
113
112
  const cfgResult = await s('Runtime.evaluate', {
114
113
  expression: '(window.ytcfg && window.ytcfg.get) ? window.ytcfg.get("CHANNEL_ID") : ""',
115
114
  returnByValue: true,
116
115
  });
117
- console.log(`[stats] ytcfg CHANNEL_ID: ${cfgResult?.result?.value}`);
116
+ cid = cfgResult?.result?.value || null;
118
117
  }
119
118
 
120
- const cid = channelId || (await s('Runtime.evaluate', {
121
- expression: '(window.ytcfg && window.ytcfg.get) ? window.ytcfg.get("CHANNEL_ID") : ""',
122
- returnByValue: true,
123
- }))?.result?.value;
124
-
125
- // Step 2: Navigate to Studio Analytics page (English URL works regardless of UI language)
126
- if (cid) {
127
- const analyticsUrl = `https://studio.youtube.com/channel/${cid}/analytics/tab-overview/period-default`;
128
- console.log(`[stats] Navigating to analytics: ${analyticsUrl}`);
129
- await s('Page.navigate', { url: analyticsUrl });
130
- await this.waitForLoad(s);
131
- await this.sleep(6000);
132
-
133
- // Scrape analytics page — look for numbers regardless of language
134
- const analyticsResult = await s('Runtime.evaluate', {
135
- expression: `
136
- (function() {
137
- var result = { subscribers: 0, total_views: 0, video_count: 0, debug_text: '' };
138
- var text = document.body ? document.body.innerText : '';
139
- result.debug_text = text.substring(0, 800);
140
-
141
- // YouTube Studio Analytics shows metric cards with labels and numbers
142
- // The layout is: Label (in any language) followed by a number
143
- // We look for large numbers near subscriber/view keywords in ANY language
144
-
145
- // Strategy: find all metric-like elements by looking for number patterns
146
- // Studio analytics typically shows: Views, Watch time, Subscribers
147
- // in cards with the number prominently displayed
148
-
149
- // Try to find subscriber-related numbers
150
- // Multi-language patterns: subscribers, người đăng ký, abonnés, Abonnenten, etc.
151
- var subKeywords = /subscri|đăng ký|abonn|подписч|구독/i;
152
- var viewKeywords = /views?|lượt xem|vues?|aufrufe|просмотр|조회/i;
153
-
154
- var lines = text.split('\\n').map(function(l) { return l.trim(); }).filter(function(l) { return l.length > 0; });
155
-
156
- for (var i = 0; i < lines.length; i++) {
157
- var line = lines[i];
158
- var nextLine = lines[i + 1] || '';
159
- var prevLine = lines[i - 1] || '';
160
-
161
- // Check if this line or adjacent lines contain keywords
162
- if (subKeywords.test(line) || subKeywords.test(prevLine)) {
163
- // Look for a number in this line or nearby
164
- var numInLine = extractNumber(line);
165
- var numInNext = extractNumber(nextLine);
166
- if (numInLine > 0 && result.subscribers === 0) result.subscribers = numInLine;
167
- else if (numInNext > 0 && result.subscribers === 0) result.subscribers = numInNext;
168
- }
169
-
170
- if (viewKeywords.test(line) || viewKeywords.test(prevLine)) {
171
- var numInLine2 = extractNumber(line);
172
- var numInNext2 = extractNumber(nextLine);
173
- if (numInLine2 > 0 && result.total_views === 0) result.total_views = numInLine2;
174
- else if (numInNext2 > 0 && result.total_views === 0) result.total_views = numInNext2;
175
- }
176
- }
177
-
178
- // Also try: Current subscribers count shown in Studio sidebar
179
- // Pattern: "Current subscribers\\n123" or "Người đăng ký hiện tại\\n123"
180
- var currentSubMatch = text.match(/(?:current subscribers|người đăng ký hiện tại|subscriber|đăng ký)[\\s\\n]*([\\d,\\.]+[KMB]?)/i);
181
- if (currentSubMatch && result.subscribers === 0) {
182
- result.subscribers = parseHumanNum(currentSubMatch[1]);
183
- }
184
-
185
- function extractNumber(str) {
186
- if (!str) return 0;
187
- // Match standalone numbers like "1,234" or "5.2K" or "123"
188
- var m = str.match(/^([\\d,\\.]+[KMB]?)$/i) || str.match(/\\b([\\d,\\.]+[KMB]?)\\b/);
189
- if (m) return parseHumanNum(m[1]);
190
- return 0;
191
- }
192
-
193
- function parseHumanNum(str) {
194
- if (!str) return 0;
195
- str = str.replace(/,/g, '').trim();
196
- var m = str.match(/([\\d\\.]+)\\s*([KMB])/i);
197
- if (m) {
198
- var n = parseFloat(m[1]);
199
- var s = m[2].toUpperCase();
200
- if (s === 'K') return Math.round(n * 1000);
201
- if (s === 'M') return Math.round(n * 1000000);
202
- if (s === 'B') return Math.round(n * 1000000000);
203
- }
204
- return parseInt(str, 10) || 0;
205
- }
206
-
207
- return JSON.stringify(result);
208
- })()
209
- `,
210
- returnByValue: true,
211
- });
212
-
213
- try {
214
- var parsed = JSON.parse(analyticsResult?.result?.value || '{}');
215
- console.log('[stats] Analytics debug text (first 300):', (parsed.debug_text || '').substring(0, 300));
216
- if (parsed.subscribers > 0) stats.subscribers = parsed.subscribers;
217
- if (parsed.total_views > 0) stats.total_views = parsed.total_views;
218
- } catch {}
119
+ if (!cid) {
120
+ throw new Error('Could not find channel ID from YouTube Studio');
219
121
  }
122
+ console.log(`[stats] Channel ID: ${cid}`);
220
123
 
221
- // Step 3: Get video count + subscriber from channel page via ytInitialData
222
- var channelPageUrl = cid
223
- ? 'https://www.youtube.com/channel/' + cid
224
- : 'https://www.youtube.com/@me';
225
-
226
- console.log('[stats] Navigating to channel page:', channelPageUrl);
227
- await s('Page.navigate', { url: channelPageUrl });
124
+ // Step 2: Navigate to channel About page
125
+ const aboutUrl = `https://www.youtube.com/channel/${cid}/about`;
126
+ console.log(`[stats] Navigating to: ${aboutUrl}`);
127
+ await s('Page.navigate', { url: aboutUrl });
228
128
  await this.waitForLoad(s);
229
129
  await this.sleep(4000);
230
130
 
231
- var ytResult = await s('Runtime.evaluate', {
131
+ // Step 3: Read #additional-info-container table
132
+ const result = await s('Runtime.evaluate', {
232
133
  expression: `
233
134
  (function() {
234
- var result = { subscribers: 0, total_views: 0, video_count: 0 };
235
- try {
236
- var data = window.ytInitialData;
237
- if (!data) return JSON.stringify({ error: 'no ytInitialData' });
238
-
239
- // Try pageHeaderRenderer (new layout)
240
- var pageHeader = data.header && data.header.pageHeaderRenderer;
241
- if (pageHeader) {
242
- var content = pageHeader.content && pageHeader.content.pageHeaderViewModel;
243
- if (content && content.metadata && content.metadata.contentMetadataViewModel) {
244
- var rows = content.metadata.contentMetadataViewModel.metadataRows || [];
245
- for (var r = 0; r < rows.length; r++) {
246
- var parts = rows[r].metadataParts || [];
247
- for (var p = 0; p < parts.length; p++) {
248
- var t = (parts[p].text && parts[p].text.content) || '';
249
- // subscriber text like "123 subscribers" or "0 người đăng ký"
250
- if (/subscri|đăng ký/i.test(t)) {
251
- var m = t.match(/([\\d,\\.]+[KMB]?)/);
252
- if (m) result.subscribers = parseN(m[1]);
253
- }
254
- // video count like "1 video" or "15 video"
255
- if (/video/i.test(t)) {
256
- var m2 = t.match(/([\\d,]+)/);
257
- if (m2) result.video_count = parseInt(m2[1].replace(/,/g, ''), 10);
258
- }
259
- }
260
- }
261
- }
262
- }
135
+ var stats = { subscribers: -1, total_views: -1, video_count: -1, debug: '' };
263
136
 
264
- // Try c4TabbedHeaderRenderer (old layout)
265
- var c4Header = data.header && data.header.c4TabbedHeaderRenderer;
266
- if (c4Header) {
267
- var subText = c4Header.subscriberCountText && c4Header.subscriberCountText.simpleText || '';
268
- if (subText) {
269
- var m3 = subText.match(/([\\d,\\.]+[KMB]?)/);
270
- if (m3 && result.subscribers === 0) result.subscribers = parseN(m3[1]);
271
- }
272
- if (c4Header.videosCountText) {
273
- var vText = c4Header.videosCountText.runs ? c4Header.videosCountText.runs.map(function(r){return r.text}).join('') : (c4Header.videosCountText.simpleText || '');
274
- var m4 = vText.match(/([\\d,]+)/);
275
- if (m4) result.video_count = parseInt(m4[1].replace(/,/g, ''), 10);
276
- }
277
- }
137
+ // Find the table inside #additional-info-container
138
+ var container = document.querySelector('#additional-info-container');
139
+ if (!container) {
140
+ stats.debug = 'no #additional-info-container found';
141
+ // Fallback: try to find stats from page text
142
+ var text = document.body ? document.body.innerText : '';
143
+ stats.debug = text.substring(0, 500);
144
+ return JSON.stringify(stats);
145
+ }
278
146
 
279
- } catch (e) {
280
- return JSON.stringify({ error: e.message });
147
+ var table = container.querySelector('table');
148
+ if (!table) {
149
+ stats.debug = 'no table in container, text: ' + container.innerText.substring(0, 300);
150
+ return JSON.stringify(stats);
281
151
  }
282
152
 
283
- function parseN(str) {
284
- if (!str) return 0;
285
- str = str.replace(/,/g, '').trim();
286
- var m = str.match(/([\\d\\.]+)\\s*([KMB])/i);
287
- if (m) {
288
- var n = parseFloat(m[1]);
289
- var s = m[2].toUpperCase();
290
- if (s === 'K') return Math.round(n * 1000);
291
- if (s === 'M') return Math.round(n * 1000000);
292
- if (s === 'B') return Math.round(n * 1000000000);
153
+ // Each row has: icon cell + text cell (e.g. "1 subscriber", "3 videos", "743 views")
154
+ var rows = table.querySelectorAll('tr');
155
+ var texts = [];
156
+ rows.forEach(function(row) {
157
+ var text = row.innerText.trim();
158
+ texts.push(text);
159
+
160
+ // Match patterns: "X subscriber(s)", "X video(s)", "X view(s)"
161
+ // Also Vietnamese: "X người đăng ký", "X video", "X lượt xem"
162
+ var num = 0;
163
+ var numMatch = text.match(/([\\d,\\.]+)/);
164
+ if (numMatch) {
165
+ num = parseInt(numMatch[1].replace(/,/g, ''), 10);
166
+ }
167
+
168
+ // Check for K/M/B suffix
169
+ var suffixMatch = text.match(/([\\d,\\.]+)\\s*([KMB])/i);
170
+ if (suffixMatch) {
171
+ var n = parseFloat(suffixMatch[1].replace(/,/g, ''));
172
+ var suffix = suffixMatch[2].toUpperCase();
173
+ if (suffix === 'K') num = Math.round(n * 1000);
174
+ else if (suffix === 'M') num = Math.round(n * 1000000);
175
+ else if (suffix === 'B') num = Math.round(n * 1000000000);
293
176
  }
294
- return parseInt(str, 10) || 0;
295
- }
296
177
 
297
- return JSON.stringify(result);
178
+ if (/subscriber|đăng ký/i.test(text)) {
179
+ stats.subscribers = num;
180
+ } else if (/\\bvideos?\\b/i.test(text)) {
181
+ stats.video_count = num;
182
+ } else if (/\\bviews?\\b|lượt xem/i.test(text)) {
183
+ stats.total_views = num;
184
+ }
185
+ });
186
+
187
+ stats.debug = texts.join(' | ');
188
+ return JSON.stringify(stats);
298
189
  })()
299
190
  `,
300
191
  returnByValue: true,
301
192
  });
302
193
 
194
+ var stats = { subscribers: 0, total_views: 0, video_count: 0 };
303
195
  try {
304
- var ytData = JSON.parse(ytResult?.result?.value || '{}');
305
- console.log('[stats] ytInitialData result:', JSON.stringify(ytData));
306
- if (ytData.subscribers > 0 && stats.subscribers === 0) stats.subscribers = ytData.subscribers;
307
- if (ytData.video_count > 0) stats.video_count = ytData.video_count;
308
- if (ytData.total_views > 0 && stats.total_views === 0) stats.total_views = ytData.total_views;
309
- } catch {}
196
+ var parsed = JSON.parse(result?.result?.value || '{}');
197
+ console.log(`[stats] About page data: ${parsed.debug}`);
198
+ if (parsed.subscribers >= 0) stats.subscribers = parsed.subscribers;
199
+ if (parsed.total_views >= 0) stats.total_views = parsed.total_views;
200
+ if (parsed.video_count >= 0) stats.video_count = parsed.video_count;
201
+ } catch (e) {
202
+ console.error(`[stats] Parse error: ${e.message}`);
203
+ }
310
204
 
311
205
  clearTimeout(timeout);
312
206
  ws.close();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "channel-worker",
3
- "version": "1.6.0",
3
+ "version": "1.6.1",
4
4
  "description": "Channel Manager worker daemon — runs on remote machines to execute video pipeline jobs",
5
5
  "main": "lib/daemon.js",
6
6
  "bin": {