deaf-intelligence 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2072 @@
1
+ /**
2
+ * Spotify GraphQL scraper — pure Node.js, minimal dependencies.
3
+ *
4
+ * Uses playwright-core (4MB) + system Chrome. No Python. No browser download.
5
+ *
6
+ * VERIFIED 2026-03-22:
7
+ * - KAYO: 38 songs, 982K streams, 16,429 ML, 535 followers, top 5 cities
8
+ * - ABDE: 65 songs, 49.8M streams, 99K ML, solo/ft/feat roles
9
+ *
10
+ * GOTCHAS:
11
+ * - get_access_token returns 403 from pure HTTP (TLS fingerprint check)
12
+ * → SOLUTION: playwright-core launches system Chrome, captures token from network
13
+ * - Track pages don't show play counts without login
14
+ * → SOLUTION: queryAlbumTracks GraphQL endpoint returns them
15
+ * - Rate limiting: ~50 fast calls triggers 429
16
+ * → SOLUTION: 300ms delay between album queries, retry with 5s backoff
17
+ * - GraphQL hashes may change when Spotify updates their client
18
+ * → SOLUTION: hashes from Spicetify community, monitor for failures
19
+ */
20
+ import { chromium } from "playwright-core";
21
+ import Database from "./db.js";
22
+ import ExcelJS from "exceljs";
23
+ import * as fs from "fs";
24
+ import { constrainedDisaggregate, roundWithAnchor } from "./constrained-disagg.js";
25
+ import { validateS4AData, printValidationReport } from "./validation.js";
26
+ import * as path from "path";
27
+ // GraphQL operation hashes (from Spotify web client / Spicetify)
28
+ const HASHES = {
29
+ queryArtistOverview: "35648a112beb1794e39ab931365f6ae4a8d45e65396d641eeda94e4003d41497",
30
+ queryArtistDiscographyAll: "9380995a9d4663cbcb5113fef3c6aabf70ae6d407ba61793fd01e2a1dd6929b0",
31
+ queryAlbumTracks: "3ea563e1d68f486d8df30f69de9dcedae74c77e684b889ba7408c589d30f7f2e",
32
+ };
33
+ const API_BASE = "https://api-partner.spotify.com/pathfinder/v1/query";
34
+ // Find system Chrome/Edge
35
+ function findChrome() {
36
+ const paths = [
37
+ "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
38
+ "/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
39
+ "/Applications/Chromium.app/Contents/MacOS/Chromium",
40
+ "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
41
+ "C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe",
42
+ "C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedge.exe",
43
+ "/usr/bin/google-chrome",
44
+ "/usr/bin/google-chrome-stable",
45
+ "/usr/bin/chromium-browser",
46
+ "/usr/bin/chromium",
47
+ ];
48
+ for (const p of paths) {
49
+ if (fs.existsSync(p))
50
+ return p;
51
+ }
52
+ throw new Error("Chrome/Edge not found. Install Chrome or set CHROME_PATH env var.");
53
+ }
54
+ // ─── Token ───
55
+ export async function getAnonymousToken(artistId) {
56
+ const chromePath = process.env.CHROME_PATH || findChrome();
57
+ const browser = await chromium.launch({ executablePath: chromePath, headless: true });
58
+ const ctx = await browser.newContext({
59
+ userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
60
+ });
61
+ const page = await ctx.newPage();
62
+ let token = null;
63
+ page.on("response", async (resp) => {
64
+ if (token)
65
+ return;
66
+ try {
67
+ const body = await resp.text();
68
+ const m = body.match(/"accessToken":"([^"]+)"/);
69
+ if (m)
70
+ token = m[1];
71
+ }
72
+ catch { }
73
+ });
74
+ await page.goto(`https://open.spotify.com/artist/${artistId}`, { waitUntil: "networkidle", timeout: 30000 });
75
+ await new Promise((r) => setTimeout(r, 3000));
76
+ await browser.close();
77
+ if (!token)
78
+ throw new Error("Failed to capture Spotify token");
79
+ return token;
80
+ }
81
+ // ─── GraphQL ───
82
+ async function gql(token, operation, variables) {
83
+ const params = new URLSearchParams({
84
+ operationName: operation,
85
+ variables: JSON.stringify(variables),
86
+ extensions: JSON.stringify({ persistedQuery: { version: 1, sha256Hash: HASHES[operation] } }),
87
+ });
88
+ const resp = await fetch(`${API_BASE}?${params}`, {
89
+ headers: {
90
+ Authorization: `Bearer ${token}`,
91
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
92
+ },
93
+ });
94
+ if (resp.status === 429) {
95
+ // Rate limited — wait and retry once
96
+ await new Promise((r) => setTimeout(r, 5000));
97
+ const retry = await fetch(`${API_BASE}?${params}`, {
98
+ headers: {
99
+ Authorization: `Bearer ${token}`,
100
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
101
+ },
102
+ });
103
+ if (!retry.ok)
104
+ throw new Error(`GraphQL ${operation}: ${retry.status}`);
105
+ return retry.json();
106
+ }
107
+ if (!resp.ok)
108
+ throw new Error(`GraphQL ${operation}: ${resp.status}`);
109
+ return resp.json();
110
+ }
111
+ const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
112
+ export async function getArtistOverview(token, artistId) {
113
+ const result = await gql(token, "queryArtistOverview", {
114
+ uri: `spotify:artist:${artistId}`,
115
+ locale: "en",
116
+ includePrerelease: true,
117
+ });
118
+ const artist = result.data.artistUnion;
119
+ const stats = artist.stats || {};
120
+ const profile = artist.profile || {};
121
+ const visuals = artist.visuals || {};
122
+ const disco = artist.discography || {};
123
+ const related = artist.relatedContent || {};
124
+ // Images: avatar (largest first), header banner, full gallery
125
+ const avatarSources = visuals.avatarImage?.sources || [];
126
+ const profileImage = avatarSources.sort((a, b) => (b.width || 0) - (a.width || 0))[0]?.url || null;
127
+ const headerImage = visuals.headerImage?.sources?.[0]?.url || null;
128
+ const galleryImages = (visuals.gallery?.items || [])
129
+ .map((item) => item.sources?.[0]?.url)
130
+ .filter(Boolean);
131
+ const brandColor = visuals.avatarImage?.extractedColors?.colorRaw?.hex
132
+ || visuals.headerImage?.extractedColors?.colorRaw?.hex
133
+ || null;
134
+ return {
135
+ artist_id: artistId,
136
+ name: profile.name || "",
137
+ verified: profile.verified || false,
138
+ bio: profile.biography?.text || null,
139
+ profile_image: profileImage,
140
+ header_image: headerImage,
141
+ gallery_images: galleryImages,
142
+ brand_color: brandColor,
143
+ monthly_listeners: Number(stats.monthlyListeners) || 0,
144
+ followers: Number(stats.followers) || 0,
145
+ world_rank: Number(stats.worldRank) || 0,
146
+ top_cities: (stats.topCities?.items || []).map((c) => ({
147
+ city: c.city, country: c.country, listeners: Number(c.numberOfListeners) || 0,
148
+ })),
149
+ top_tracks: (disco.topTracks?.items || []).map((item) => {
150
+ const t = item.track || {};
151
+ return {
152
+ track_id: (t.uri || "").split(":").pop() || "",
153
+ title: t.name || "",
154
+ playcount: parseInt(t.playcount || "0", 10),
155
+ artists: (t.artists?.items || []).map((a) => a.profile?.name || "?"),
156
+ };
157
+ }),
158
+ related_artists: (related.relatedArtists?.items || []).map((item) => {
159
+ const a = item.artist || {};
160
+ return { name: a.profile?.name || "?", id: a.id || "", followers: Number(a.stats?.followers) || 0 };
161
+ }).filter((a) => a.name !== "?"),
162
+ discovered_on: (related.discoveredOnV2?.items || [])
163
+ .filter((item) => item.data?.__typename === "Playlist")
164
+ .map((item) => {
165
+ const pl = item.data || {};
166
+ return { name: pl.name || "", owner: pl.ownerV2?.data?.name || "" };
167
+ })
168
+ .filter((d) => d.name),
169
+ external_links: (profile.externalLinks?.items || []).map((l) => ({ name: l.name, url: l.url })),
170
+ latest_release: disco.latest ? {
171
+ name: disco.latest.name || "",
172
+ type: disco.latest.type || "",
173
+ label: disco.latest.label || "",
174
+ date: disco.latest.date?.isoString || String(disco.latest.date?.year || ""),
175
+ } : null,
176
+ singles_count: disco.singles?.totalCount || 0,
177
+ albums_count: disco.albums?.totalCount || 0,
178
+ };
179
+ }
180
+ export async function getAllTracks(token, artistId) {
181
+ const result = await gql(token, "queryArtistDiscographyAll", {
182
+ uri: `spotify:artist:${artistId}`, offset: 0, limit: 100,
183
+ });
184
+ const albums = [];
185
+ for (const item of result.data.artistUnion.discography.all.items) {
186
+ for (const r of item.releases?.items || []) {
187
+ albums.push({
188
+ id: r.id,
189
+ name: r.name,
190
+ type: r.type || "SINGLE",
191
+ date: r.date?.isoString || String(r.date?.year || ""),
192
+ cover: r.coverArt?.sources?.[0]?.url || null,
193
+ });
194
+ }
195
+ }
196
+ const allTracks = [];
197
+ const seen = new Set();
198
+ for (const album of albums) {
199
+ await sleep(300); // Rate limit courtesy
200
+ try {
201
+ const res = await gql(token, "queryAlbumTracks", {
202
+ uri: `spotify:album:${album.id}`, offset: 0, limit: 300,
203
+ });
204
+ for (const tItem of res.data.album.tracks.items) {
205
+ const track = tItem.track || {};
206
+ const tid = (track.uri || "").split(":").pop() || "";
207
+ if (!tid || seen.has(tid))
208
+ continue;
209
+ seen.add(tid);
210
+ const artists = (track.artists?.items || []).map((a) => a.profile?.name || "?");
211
+ const artistIds = (track.artists?.items || []).map((a) => (a.uri || "").split(":").pop() || "");
212
+ const isPrimary = artistIds[0] === artistId;
213
+ const isSolo = artists.length === 1 && isPrimary;
214
+ const isFeat = artistIds.includes(artistId) && !isPrimary;
215
+ let role;
216
+ if (isSolo)
217
+ role = "SOLO";
218
+ else if (isPrimary) {
219
+ const featNames = artists.filter((_, i) => artistIds[i] !== artistId);
220
+ role = `ft. ${featNames.join(" & ")}`;
221
+ }
222
+ else if (isFeat)
223
+ role = `FEAT on ${artists[0]}`;
224
+ else
225
+ role = "OTHER";
226
+ allTracks.push({
227
+ track_id: tid, title: track.name || "", playcount: parseInt(track.playcount || "0", 10),
228
+ popularity: 0, // populated by batch Web API call below
229
+ artists, artist_ids: artistIds, role,
230
+ album_name: album.name, album_id: album.id,
231
+ release_date: album.date, release_type: album.type, cover_art: album.cover,
232
+ });
233
+ }
234
+ }
235
+ catch (e) {
236
+ // Skip failed albums silently, continue with rest
237
+ }
238
+ }
239
+ // Batch fetch popularity scores from Spotify Web API (50 per call)
240
+ const trackIds = allTracks.map(t => t.track_id);
241
+ for (let i = 0; i < trackIds.length; i += 50) {
242
+ const batch = trackIds.slice(i, i + 50);
243
+ try {
244
+ const resp = await fetch(`https://api.spotify.com/v1/tracks?ids=${batch.join(",")}`, {
245
+ headers: { Authorization: `Bearer ${token}` },
246
+ });
247
+ if (resp.ok) {
248
+ const data = await resp.json();
249
+ for (const t of data.tracks || []) {
250
+ if (!t)
251
+ continue;
252
+ const match = allTracks.find(at => at.track_id === t.id);
253
+ if (match)
254
+ match.popularity = Number(t.popularity) || 0;
255
+ }
256
+ }
257
+ await sleep(300);
258
+ }
259
+ catch { }
260
+ }
261
+ return allTracks;
262
+ }
263
+ // ─── SQLite Storage ───
264
+ const SCHEMA = `
265
+ PRAGMA journal_mode = DELETE;
266
+
267
+ CREATE TABLE IF NOT EXISTS artist (
268
+ spotify_id TEXT PRIMARY KEY, name TEXT NOT NULL, verified INTEGER DEFAULT 0,
269
+ bio TEXT, brand_color TEXT, created_at INTEGER DEFAULT (unixepoch())
270
+ );
271
+ CREATE TABLE IF NOT EXISTS daily_stats (
272
+ id INTEGER PRIMARY KEY AUTOINCREMENT, artist_id TEXT NOT NULL,
273
+ metric_name TEXT NOT NULL, metric_value REAL NOT NULL,
274
+ delta_value REAL, delta_pct REAL, scraped_at INTEGER NOT NULL
275
+ );
276
+ CREATE INDEX IF NOT EXISTS idx_daily ON daily_stats(artist_id, metric_name, scraped_at);
277
+
278
+ CREATE TABLE IF NOT EXISTS song_snapshots (
279
+ id INTEGER PRIMARY KEY AUTOINCREMENT, artist_id TEXT NOT NULL,
280
+ track_id TEXT, track_title TEXT NOT NULL, play_count INTEGER NOT NULL,
281
+ popularity INTEGER DEFAULT 0,
282
+ delta_value INTEGER, delta_pct REAL, scraped_at INTEGER NOT NULL
283
+ );
284
+ CREATE INDEX IF NOT EXISTS idx_songs ON song_snapshots(artist_id, track_title, scraped_at);
285
+
286
+ CREATE TABLE IF NOT EXISTS tracks (
287
+ spotify_id TEXT, artist_id TEXT NOT NULL, title TEXT NOT NULL,
288
+ release_type TEXT, release_date TEXT, release_year INTEGER,
289
+ cover_art_url TEXT, role TEXT, UNIQUE(artist_id, title)
290
+ );
291
+ CREATE TABLE IF NOT EXISTS locations (
292
+ id INTEGER PRIMARY KEY AUTOINCREMENT, artist_id TEXT NOT NULL,
293
+ location_type TEXT NOT NULL, location_name TEXT NOT NULL,
294
+ listener_count INTEGER, rank INTEGER, scraped_at INTEGER NOT NULL
295
+ );
296
+ CREATE TABLE IF NOT EXISTS related_artists (
297
+ id INTEGER PRIMARY KEY AUTOINCREMENT, artist_id TEXT NOT NULL,
298
+ related_name TEXT NOT NULL, related_spotify_id TEXT, scraped_at INTEGER NOT NULL
299
+ );
300
+ CREATE TABLE IF NOT EXISTS discovered_on (
301
+ id INTEGER PRIMARY KEY AUTOINCREMENT, artist_id TEXT NOT NULL,
302
+ playlist_name TEXT NOT NULL, playlist_owner TEXT, scraped_at INTEGER NOT NULL
303
+ );
304
+ CREATE TABLE IF NOT EXISTS external_links (
305
+ id INTEGER PRIMARY KEY AUTOINCREMENT, artist_id TEXT NOT NULL,
306
+ link_name TEXT NOT NULL, link_url TEXT NOT NULL, scraped_at INTEGER NOT NULL,
307
+ UNIQUE(artist_id, link_name, link_url)
308
+ );
309
+ `;
310
+ export function storeData(dbPath, overview, tracks) {
311
+ const db = new Database(dbPath);
312
+ db.exec(SCHEMA);
313
+ // Migrate: add columns that may not exist in older DBs
314
+ const cols = db.prepare("PRAGMA table_info(artist)").all();
315
+ const colNames = new Set(cols.map((c) => c.name));
316
+ if (!colNames.has("brand_color"))
317
+ db.exec("ALTER TABLE artist ADD COLUMN brand_color TEXT");
318
+ const snapCols = db.prepare("PRAGMA table_info(song_snapshots)").all();
319
+ const snapColNames = new Set(snapCols.map((c) => c.name));
320
+ if (!snapColNames.has("popularity"))
321
+ db.exec("ALTER TABLE song_snapshots ADD COLUMN popularity INTEGER DEFAULT 0");
322
+ const ts = Math.floor(Date.now() / 1000);
323
+ const aid = overview.artist_id;
324
+ // Artist (including bio + brand_color)
325
+ db.prepare("INSERT OR REPLACE INTO artist (spotify_id, name, verified, bio, brand_color) VALUES (?, ?, ?, ?, ?)")
326
+ .run(aid, overview.name, overview.verified ? 1 : 0, overview.bio, overview.brand_color);
327
+ // Daily stats with delta
328
+ const prevStmt = db.prepare("SELECT metric_value FROM daily_stats WHERE artist_id=? AND metric_name=? ORDER BY scraped_at DESC LIMIT 1");
329
+ const insertStat = db.prepare("INSERT INTO daily_stats (artist_id, metric_name, metric_value, delta_value, delta_pct, scraped_at) VALUES (?,?,?,?,?,?)");
330
+ for (const [metric, value] of [
331
+ ["monthly_listeners", overview.monthly_listeners],
332
+ ["followers", overview.followers],
333
+ ["world_rank", overview.world_rank],
334
+ ]) {
335
+ if (value == null)
336
+ continue;
337
+ const prev = prevStmt.get(aid, metric);
338
+ let delta = null, deltaPct = null;
339
+ if (prev?.metric_value > 0) {
340
+ delta = value - prev.metric_value;
341
+ deltaPct = (delta / prev.metric_value) * 100;
342
+ }
343
+ insertStat.run(aid, metric, value, delta, deltaPct, ts);
344
+ }
345
+ // Cities
346
+ const insertCity = db.prepare("INSERT INTO locations (artist_id, location_type, location_name, listener_count, rank, scraped_at) VALUES (?,?,?,?,?,?)");
347
+ overview.top_cities.forEach((c, i) => insertCity.run(aid, "city", `${c.city}, ${c.country}`, c.listeners, i + 1, ts));
348
+ // Related
349
+ const insertRelated = db.prepare("INSERT INTO related_artists (artist_id, related_name, related_spotify_id, scraped_at) VALUES (?,?,?,?)");
350
+ for (const ra of overview.related_artists) {
351
+ if (ra.name)
352
+ insertRelated.run(aid, ra.name, ra.id, ts);
353
+ }
354
+ // Discovered on playlists
355
+ const insertDiscovered = db.prepare("INSERT INTO discovered_on (artist_id, playlist_name, playlist_owner, scraped_at) VALUES (?,?,?,?)");
356
+ for (const d of overview.discovered_on) {
357
+ if (d.name)
358
+ insertDiscovered.run(aid, d.name, d.owner, ts);
359
+ }
360
+ // External links (INSERT OR IGNORE — only add new ones)
361
+ const insertLink = db.prepare("INSERT OR IGNORE INTO external_links (artist_id, link_name, link_url, scraped_at) VALUES (?,?,?,?)");
362
+ for (const l of overview.external_links) {
363
+ if (l.url)
364
+ insertLink.run(aid, l.name, l.url, ts);
365
+ }
366
+ // Tracks + snapshots
367
+ const upsertTrack = db.prepare("INSERT OR REPLACE INTO tracks (spotify_id, artist_id, title, release_type, release_date, release_year, cover_art_url, role) VALUES (?,?,?,?,?,?,?,?)");
368
+ const prevPlay = db.prepare("SELECT play_count FROM song_snapshots WHERE artist_id=? AND track_title=? ORDER BY scraped_at DESC LIMIT 1");
369
+ const insertSnap = db.prepare("INSERT INTO song_snapshots (artist_id, track_id, track_title, play_count, popularity, delta_value, delta_pct, scraped_at) VALUES (?,?,?,?,?,?,?,?)");
370
+ const insertAll = db.transaction(() => {
371
+ for (const t of tracks) {
372
+ const year = t.release_date?.length >= 4 ? parseInt(t.release_date.substring(0, 4), 10) : null;
373
+ upsertTrack.run(t.track_id, aid, t.title, t.release_type, t.release_date, year, t.cover_art, t.role);
374
+ const prev = prevPlay.get(aid, t.title);
375
+ let delta = null, deltaPct = null;
376
+ if (prev?.play_count > 0) {
377
+ delta = t.playcount - prev.play_count;
378
+ deltaPct = (delta / prev.play_count) * 100;
379
+ }
380
+ insertSnap.run(aid, t.track_id, t.title, t.playcount, t.popularity || 0, delta, deltaPct, ts);
381
+ }
382
+ });
383
+ insertAll();
384
+ db.close();
385
+ }
386
+ // ─── Image Download ───
387
+ async function downloadImage(url, destPath) {
388
+ try {
389
+ const resp = await fetch(url);
390
+ if (!resp.ok)
391
+ return false;
392
+ const buffer = Buffer.from(await resp.arrayBuffer());
393
+ fs.mkdirSync(path.dirname(destPath), { recursive: true });
394
+ fs.writeFileSync(destPath, buffer);
395
+ return true;
396
+ }
397
+ catch {
398
+ return false;
399
+ }
400
+ }
401
+ // Extract Spotify's unique image ID from CDN URL.
402
+ // URL format: https://i.scdn.co/image/ab6761610000e5ebd3bb05bb2490318ab924b02a
403
+ // Falls back to sha256 hash of full URL if format changes.
404
+ function imageIdFromUrl(url) {
405
+ const match = url.match(/\/image\/([a-f0-9]+)/);
406
+ if (match)
407
+ return match[1];
408
+ const crypto = require("crypto");
409
+ return crypto.createHash("sha256").update(url).digest("hex").slice(0, 24);
410
+ }
411
+ function readManifest(manifestPath) {
412
+ try {
413
+ return JSON.parse(fs.readFileSync(manifestPath, "utf-8"));
414
+ }
415
+ catch {
416
+ return { gallery_urls: [], cover_urls: {} };
417
+ }
418
+ }
419
+ function writeManifest(manifestPath, manifest) {
420
+ fs.mkdirSync(path.dirname(manifestPath), { recursive: true });
421
+ fs.writeFileSync(manifestPath, JSON.stringify(manifest, null, 2));
422
+ }
423
+ // Artist folder: ~/.artist-os/{Artist Name}/ — everything customer sees lives here
424
+ function artistDir(dataDir, artistName) {
425
+ return path.join(dataDir, artistName);
426
+ }
427
+ export async function downloadImages(artistId, overview, tracks, dataDir) {
428
+ const imgDir = path.join(artistDir(dataDir, overview.name), "images");
429
+ const coversDir = path.join(imgDir, "covers");
430
+ const galleryDir = path.join(imgDir, "gallery");
431
+ const manifestPath = path.join(imgDir, "manifest.json");
432
+ const manifest = readManifest(manifestPath);
433
+ let downloaded = 0, skipped = 0;
434
+ // --- Profile image ---
435
+ // Always profile.jpg = latest. If URL changed, archive old as profile_{image_id}.jpg
436
+ if (overview.profile_image) {
437
+ const dest = path.join(imgDir, "profile.jpg");
438
+ if (manifest.profile_url && manifest.profile_url !== overview.profile_image && fs.existsSync(dest)) {
439
+ // URL changed — archive the old one, then download new
440
+ const oldId = imageIdFromUrl(manifest.profile_url);
441
+ fs.renameSync(dest, path.join(imgDir, `profile_${oldId}.jpg`));
442
+ if (await downloadImage(overview.profile_image, dest))
443
+ downloaded++;
444
+ }
445
+ else if (!fs.existsSync(dest)) {
446
+ if (await downloadImage(overview.profile_image, dest))
447
+ downloaded++;
448
+ }
449
+ else
450
+ skipped++;
451
+ manifest.profile_url = overview.profile_image;
452
+ }
453
+ // --- Header banner ---
454
+ // Same logic: header.jpg = latest, archive old on change
455
+ if (overview.header_image) {
456
+ const dest = path.join(imgDir, "header.jpg");
457
+ if (manifest.header_url && manifest.header_url !== overview.header_image && fs.existsSync(dest)) {
458
+ const oldId = imageIdFromUrl(manifest.header_url);
459
+ fs.renameSync(dest, path.join(imgDir, `header_${oldId}.jpg`));
460
+ if (await downloadImage(overview.header_image, dest))
461
+ downloaded++;
462
+ }
463
+ else if (!fs.existsSync(dest)) {
464
+ if (await downloadImage(overview.header_image, dest))
465
+ downloaded++;
466
+ }
467
+ else
468
+ skipped++;
469
+ manifest.header_url = overview.header_image;
470
+ }
471
+ // --- Gallery ---
472
+ // Use Spotify image ID as filename. Photos accumulate — never deleted.
473
+ // Added photo → new file. Removed photo → old file stays. Reordered → no change.
474
+ for (const url of overview.gallery_images) {
475
+ const id = imageIdFromUrl(url);
476
+ const dest = path.join(galleryDir, `${id}.jpg`);
477
+ if (fs.existsSync(dest)) {
478
+ skipped++;
479
+ continue;
480
+ }
481
+ if (await downloadImage(url, dest))
482
+ downloaded++;
483
+ await sleep(100);
484
+ }
485
+ manifest.gallery_urls = overview.gallery_images;
486
+ // --- Cover arts ---
487
+ // Keyed by track_id — covers don't change after release.
488
+ for (const t of tracks) {
489
+ if (!t.cover_art)
490
+ continue;
491
+ const dest = path.join(coversDir, `${t.track_id}.jpg`);
492
+ if (fs.existsSync(dest)) {
493
+ skipped++;
494
+ continue;
495
+ }
496
+ if (await downloadImage(t.cover_art, dest))
497
+ downloaded++;
498
+ await sleep(100);
499
+ }
500
+ writeManifest(manifestPath, manifest);
501
+ return { downloaded, skipped };
502
+ }
503
+ // ─── Load all historical S4A dumps for an artist ───
504
+ function loadAllDumps(artistId) {
505
+ const dumpRoot = path.join(process.env.HOME || "~", ".artist-os", "s4a-dumps");
506
+ if (!fs.existsSync(dumpRoot))
507
+ return [];
508
+ const dirs = fs.readdirSync(dumpRoot).filter((d) => /^\d{4}-\d{2}-\d{2}$/.test(d)).sort();
509
+ const dumps = [];
510
+ for (const dir of dirs) {
511
+ const file = path.join(dumpRoot, dir, `${artistId}_s4a.json`);
512
+ if (!fs.existsSync(file))
513
+ continue;
514
+ try {
515
+ const data = JSON.parse(fs.readFileSync(file, "utf-8"));
516
+ if (data.perSong && Object.keys(data.perSong).length > 0)
517
+ dumps.push({ date: dir, data });
518
+ }
519
+ catch { }
520
+ }
521
+ return dumps;
522
+ }
523
+ // ─── Master XLSX: one workbook per artist, grows forever ───
524
+ export async function generateWorkbook(dbPath, dataDir, s4a) {
525
+ const db = new Database(dbPath, { readonly: true });
526
+ const artist = db.prepare("SELECT name, spotify_id, verified, bio FROM artist LIMIT 1").get();
527
+ try {
528
+ const bc = db.prepare("SELECT brand_color FROM artist LIMIT 1").get();
529
+ if (bc)
530
+ artist.brand_color = bc.brand_color;
531
+ }
532
+ catch { }
533
+ if (!artist) {
534
+ db.close();
535
+ return;
536
+ }
537
+ const aid = artist.spotify_id;
538
+ const artDir = artistDir(dataDir, artist.name);
539
+ fs.mkdirSync(artDir, { recursive: true });
540
+ const xlsxPath = path.join(artDir, `${artist.name}.xlsx`);
541
+ // JEDEN timestamp per scrape — nikdy nepřepočítávat Date.now() znovu
542
+ const scrapeTs = Math.floor(Date.now() / 1000);
543
+ const now = scrapeTs;
544
+ // today = datum SCRAPU (metadata), NE datum data pointu!
545
+ const today = new Date(scrapeTs * 1000).toISOString().slice(0, 10);
546
+ const wb = new ExcelJS.Workbook();
547
+ wb.creator = "Artist-OS";
548
+ wb.created = new Date();
549
+ const hdrDark = {
550
+ font: { bold: true, color: { argb: "FFFFFFFF" }, size: 11 },
551
+ fill: { type: "pattern", pattern: "solid", fgColor: { argb: "FF1a1a2e" } },
552
+ alignment: { horizontal: "center" },
553
+ };
554
+ const hdrGreen = {
555
+ font: { bold: true, color: { argb: "FFFFFFFF" }, size: 11 },
556
+ fill: { type: "pattern", pattern: "solid", fgColor: { argb: "FF1B5E20" } },
557
+ alignment: { horizontal: "center" },
558
+ };
559
+ const estFill = { type: "pattern", pattern: "solid", fgColor: { argb: "FFF5F5DC" } };
560
+ const projFill = { type: "pattern", pattern: "solid", fgColor: { argb: "FFE8EAF6" } };
561
+ const numFmt = "#,##0";
562
+ const applyHdr = (ws, style) => ws.getRow(1).eachCell(c => { Object.assign(c, { style }); });
563
+ const sectionStyle = {
564
+ font: { bold: true, size: 12 },
565
+ fill: { type: "pattern", pattern: "solid", fgColor: { argb: "FFE8EAF6" } },
566
+ alignment: { horizontal: "center", vertical: "middle" },
567
+ };
568
+ const labelStyle = {
569
+ font: { bold: true, color: { argb: "FF333333" } },
570
+ alignment: { horizontal: "center", vertical: "middle" },
571
+ };
572
+ const metaStyle = {
573
+ font: { italic: true, color: { argb: "FF666666" }, size: 9 },
574
+ alignment: { horizontal: "center", vertical: "middle" },
575
+ };
576
+ const addSection = (ws, name) => {
577
+ const row = ws.addRow([`═══ ${name} ═══`, "", ""]);
578
+ row.eachCell(c => { Object.assign(c, { style: sectionStyle }); });
579
+ };
580
+ const addField = (ws, field, value, since) => {
581
+ const row = ws.addRow([field, value ?? "", since]);
582
+ row.getCell(1).style = labelStyle;
583
+ row.getCell(2).alignment = { horizontal: "center", vertical: "middle", wrapText: true };
584
+ row.getCell(3).alignment = { horizontal: "center", vertical: "middle" };
585
+ };
586
+ const addHistoryNote = (ws, label) => {
587
+ const row = ws.addRow([" HISTORY", label, ""]);
588
+ row.eachCell(c => { Object.assign(c, { style: metaStyle }); });
589
+ };
590
+ const addSpacer = (ws) => ws.addRow(["", "", ""]);
591
+ const scrapeDate = today;
592
+ // ═══════════════════════════════════════════
593
+ // SHEET 1: PROFILE (v2 — ACTUAL + HISTORY)
594
+ // ═══════════════════════════════════════════
595
+ const profileSheet = wb.addWorksheet("Profile", { properties: { tabColor: { argb: "FF2E7D32" } } });
596
+ profileSheet.columns = [
597
+ { header: "Field", width: 22 },
598
+ { header: "Value", width: 55 },
599
+ { header: "Since", width: 14 },
600
+ ];
601
+ applyHdr(profileSheet, hdrDark);
602
+ let links = [];
603
+ try {
604
+ links = db.prepare("SELECT link_name, link_url FROM external_links WHERE artist_id = ?").all(aid);
605
+ }
606
+ catch { }
607
+ const trackCountTotal = db.prepare("SELECT COUNT(*) as c FROM tracks WHERE artist_id = ?").get(aid)?.c || 0;
608
+ const latestRel = db.prepare("SELECT title, release_type, release_date FROM tracks WHERE artist_id = ? ORDER BY release_date DESC LIMIT 1").get(aid);
609
+ // Identity
610
+ addSection(profileSheet, "IDENTITY");
611
+ addField(profileSheet, "Spotify ID", aid, "—");
612
+ addField(profileSheet, "Name", artist.name, scrapeDate);
613
+ addHistoryNote(profileSheet, "(no changes)");
614
+ addSpacer(profileSheet);
615
+ addField(profileSheet, "Verified", artist.verified ? "Yes" : "No", scrapeDate);
616
+ addHistoryNote(profileSheet, "(no changes)");
617
+ addSpacer(profileSheet);
618
+ addField(profileSheet, "Bio", artist.bio || "(empty)", scrapeDate);
619
+ addHistoryNote(profileSheet, "(no changes)");
620
+ addSpacer(profileSheet);
621
+ addField(profileSheet, "Brand Color", artist.brand_color || "(not captured)", scrapeDate);
622
+ addHistoryNote(profileSheet, "(no changes)");
623
+ addSpacer(profileSheet);
624
+ // Images
625
+ addSection(profileSheet, "IMAGES");
626
+ const imgDir = path.join(artDir, "images");
627
+ const profileImgPath = path.join(imgDir, "profile.jpg");
628
+ if (fs.existsSync(profileImgPath)) {
629
+ addField(profileSheet, "Profile Image", "See below", scrapeDate);
630
+ const pImgId = wb.addImage({ filename: profileImgPath, extension: "jpeg" });
631
+ const pImgRow = profileSheet.rowCount + 1;
632
+ profileSheet.addRow(["", "", ""]);
633
+ profileSheet.getRow(pImgRow).height = 120;
634
+ profileSheet.addImage(pImgId, { tl: { col: 1, row: pImgRow - 1 }, ext: { width: 120, height: 120 } });
635
+ }
636
+ else {
637
+ addField(profileSheet, "Profile Image", "(not downloaded)", scrapeDate);
638
+ }
639
+ addHistoryNote(profileSheet, "(no changes)");
640
+ addSpacer(profileSheet);
641
+ const headerImgPath = path.join(imgDir, "header.jpg");
642
+ if (fs.existsSync(headerImgPath)) {
643
+ addField(profileSheet, "Header Image", "See below", scrapeDate);
644
+ const hImgId = wb.addImage({ filename: headerImgPath, extension: "jpeg" });
645
+ const hImgRow = profileSheet.rowCount + 1;
646
+ profileSheet.addRow(["", "", ""]);
647
+ profileSheet.getRow(hImgRow).height = 80;
648
+ profileSheet.addImage(hImgId, { tl: { col: 1, row: hImgRow - 1 }, ext: { width: 300, height: 80 } });
649
+ }
650
+ else {
651
+ addField(profileSheet, "Header Image", "(not downloaded)", scrapeDate);
652
+ }
653
+ addHistoryNote(profileSheet, "(no changes)");
654
+ addSpacer(profileSheet);
655
+ const galleryDirPath = path.join(imgDir, "gallery");
656
+ const galFiles = fs.existsSync(galleryDirPath)
657
+ ? fs.readdirSync(galleryDirPath).filter(f => f.endsWith(".jpg")).slice(0, 8) : [];
658
+ addField(profileSheet, "Gallery", `${galFiles.length} photos`, scrapeDate);
659
+ if (galFiles.length > 0) {
660
+ for (let i = 0; i < galFiles.length; i++) {
661
+ const col = profileSheet.getColumn(2 + i);
662
+ if ((col.width || 0) < 16)
663
+ col.width = 16;
664
+ }
665
+ const galRow = profileSheet.rowCount + 1;
666
+ profileSheet.addRow(["", "", ""]);
667
+ profileSheet.getRow(galRow).height = 100;
668
+ for (let i = 0; i < galFiles.length; i++) {
669
+ const gId = wb.addImage({ filename: path.join(galleryDirPath, galFiles[i]), extension: "jpeg" });
670
+ profileSheet.addImage(gId, { tl: { col: 1 + i, row: galRow - 1 }, ext: { width: 100, height: 100 } });
671
+ }
672
+ }
673
+ addHistoryNote(profileSheet, "(no changes)");
674
+ addSpacer(profileSheet);
675
+ // External Links
676
+ addSection(profileSheet, "EXTERNAL LINKS");
677
+ if (links.length) {
678
+ for (const l of links)
679
+ addField(profileSheet, l.link_name, l.link_url, scrapeDate);
680
+ }
681
+ else {
682
+ addField(profileSheet, "(none captured)", "", "");
683
+ }
684
+ addHistoryNote(profileSheet, "(no changes)");
685
+ addSpacer(profileSheet);
686
+ // Release Info
687
+ addSection(profileSheet, "RELEASE INFO");
688
+ if (latestRel) {
689
+ addField(profileSheet, "Latest Release", `${latestRel.title} / ${latestRel.release_type} / ${latestRel.release_date?.slice(0, 10)}`, scrapeDate);
690
+ }
691
+ addField(profileSheet, "Label", "—", scrapeDate);
692
+ addField(profileSheet, "Singles Count", trackCountTotal, scrapeDate);
693
+ addField(profileSheet, "Albums Count", 0, scrapeDate);
694
+ addHistoryNote(profileSheet, "(no changes)");
695
+ addSpacer(profileSheet);
696
+ // S4A section — fields 14-21 per SPEC-02_DATA.md (from REAL S4A data)
697
+ addSection(profileSheet, "S4A");
698
+ if (s4a) {
699
+ // 14: Account Owner
700
+ const owner = s4a.accountOwner;
701
+ addField(profileSheet, "Account Owner", owner?.name ? `${owner.name} (${owner.email || ""})` : "Connected", scrapeDate);
702
+ addHistoryNote(profileSheet, "(no changes)");
703
+ addSpacer(profileSheet);
704
+ // 15: Permissions
705
+ const perms = s4a.permissions;
706
+ addField(profileSheet, "Permissions", perms ? JSON.stringify(perms).slice(0, 100) : "(not captured)", scrapeDate);
707
+ addHistoryNote(profileSheet, "(no changes)");
708
+ addSpacer(profileSheet);
709
+ // 16: Canvas Permissions
710
+ const canvas = s4a.canvasPermissions;
711
+ addField(profileSheet, "Canvas Permissions", canvas ? JSON.stringify(canvas).slice(0, 100) : "(not captured)", scrapeDate);
712
+ addHistoryNote(profileSheet, "(no changes)");
713
+ addSpacer(profileSheet);
714
+ // 17: Campaign Eligibility
715
+ const campaign = s4a.campaignEligibility;
716
+ if (campaign) {
717
+ const parts = [];
718
+ if (campaign.marquee)
719
+ parts.push(`Marquee: ${campaign.marquee.eligible ? "YES" : "NO"}${campaign.marquee.meetsThreshold ? " (threshold: YES)" : ""}`);
720
+ if (campaign.showcase)
721
+ parts.push(`Showcase: ${campaign.showcase.eligible ? "YES" : "NO"}`);
722
+ addField(profileSheet, "Campaign Eligibility", parts.join(" / ") || JSON.stringify(campaign).slice(0, 100), scrapeDate);
723
+ }
724
+ else {
725
+ addField(profileSheet, "Campaign Eligibility", "(not captured)", scrapeDate);
726
+ }
727
+ addHistoryNote(profileSheet, "(no changes)");
728
+ addSpacer(profileSheet);
729
+ // 18: Team Membership
730
+ const team = s4a.teamMembership;
731
+ addField(profileSheet, "Team Membership", team?.isArtistTeamMember != null ? (team.isArtistTeamMember ? "Yes" : "No") : "(not captured)", scrapeDate);
732
+ addHistoryNote(profileSheet, "(no changes)");
733
+ addSpacer(profileSheet);
734
+ // 19: Upcoming Release — from /music/upcoming page (captured in upcoming field)
735
+ addField(profileSheet, "Upcoming Release", s4a.upcoming ? JSON.stringify(s4a.upcoming).slice(0, 200) : "(none or not captured)", scrapeDate);
736
+ addHistoryNote(profileSheet, "(no changes)");
737
+ addSpacer(profileSheet);
738
+ // 20: Pitch Status — part of upcoming data
739
+ addField(profileSheet, "Pitch Status", "(see Upcoming Release)", scrapeDate);
740
+ addSpacer(profileSheet);
741
+ // 21: Rights Access — from perSong data
742
+ const allRights = new Set();
743
+ if (s4a.perSong) {
744
+ for (const tData of Object.values(s4a.perSong)) {
745
+ if (tData.rights_access)
746
+ allRights.add(tData.rights_access);
747
+ }
748
+ }
749
+ addField(profileSheet, "Rights Access", allRights.size > 0 ? Array.from(allRights).join(", ") : "(not captured)", scrapeDate);
750
+ addHistoryNote(profileSheet, "(no changes)");
751
+ }
752
+ else {
753
+ // S4A not connected
754
+ addField(profileSheet, "Status", hasS4ASession() ? "Session expired — run connect_s4a" : "Not connected — run connect_s4a for full data", "—");
755
+ addSpacer(profileSheet);
756
+ for (const f of ["Account Owner", "Permissions", "Canvas Permissions",
757
+ "Campaign Eligibility", "Team Membership", "Upcoming Release",
758
+ "Pitch Status", "Rights Access"]) {
759
+ addField(profileSheet, f, "NULL", "—");
760
+ }
761
+ }
762
+ // ═══════════════════════════════════════════
763
+ // SHEET 2: STREAMS (v2 — per track per day)
764
+ // ═══════════════════════════════════════════
765
+ const latestTracksForStreams = db.prepare(`
766
+ SELECT t.title, t.spotify_id as track_id, t.release_date, t.role, t.release_type, s.play_count
767
+ FROM tracks t
768
+ JOIN song_snapshots s ON s.track_id = t.spotify_id AND s.artist_id = t.artist_id
769
+ WHERE t.artist_id = ? AND s.scraped_at = (SELECT MAX(scraped_at) FROM song_snapshots WHERE artist_id = ?)
770
+ ORDER BY s.play_count DESC
771
+ `).all(aid, aid);
772
+ const streamTrackNames = latestTracksForStreams.map((t) => t.title);
773
+ const s4aFill = { type: "pattern", pattern: "solid", fgColor: { argb: "FFE8F5E9" } };
774
+ const dayNames = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"];
775
+ // Build track ID → name mapping for S4A perSong lookup
776
+ const trackIdToName = {};
777
+ const trackNameToId = {};
778
+ for (const t of latestTracksForStreams) {
779
+ trackIdToName[t.track_id] = t.title;
780
+ trackNameToId[t.title] = t.track_id;
781
+ }
782
+ // Scraped cumulative play counts by date (from DB — only for Streams sheet)
783
+ const allSnaps = db.prepare("SELECT date(s.scraped_at, 'unixepoch') as date, s.track_title, s.play_count FROM song_snapshots s WHERE s.artist_id = ? ORDER BY s.scraped_at ASC").all(aid);
784
+ const streamsByDate = {};
785
+ for (const r of allSnaps) {
786
+ if (!streamsByDate[r.date])
787
+ streamsByDate[r.date] = {};
788
+ streamsByDate[r.date][r.track_title] = r.play_count;
789
+ }
790
+ // Velocity for estimation (streams only)
791
+ const vel = {};
792
+ for (const t of latestTracksForStreams) {
793
+ if (!t.release_date)
794
+ continue;
795
+ const days = Math.max(1, Math.floor((now - new Date(t.release_date).getTime() / 1000) / 86400));
796
+ vel[t.title] = t.play_count / days;
797
+ }
798
+ const sReleaseDates = latestTracksForStreams.map((t) => t.release_date?.slice(0, 10)).filter(Boolean).sort();
799
+ const sEarliestRelease = sReleaseDates[0];
800
+ // ── Helper: generate a metric sheet (365d daily + S4A_PREV + YoY + summary) ──
801
+ const metricSheets = [
802
+ { name: "Streams", key: "streams", color: "FFff6b6b", numFormat: "#,##0", hasEst: true },
803
+ { name: "Listeners", key: "listeners", color: "FF42A5F5", numFormat: "#,##0", hasEst: false },
804
+ { name: "Saves", key: "saves", color: "FF66BB6A", numFormat: "#,##0", hasEst: false },
805
+ { name: "Playlist Adds", key: "playlist_adds", color: "FFFFCA28", numFormat: "#,##0", hasEst: false },
806
+ { name: "Streams per Listener", key: "streams_per_listener", color: "FFAB47BC", numFormat: "#,##0.00", hasEst: false },
807
+ ];
808
+ for (const metric of metricSheets) {
809
+ const ws = wb.addWorksheet(metric.name, { properties: { tabColor: { argb: metric.color } } });
810
+ ws.columns = [
811
+ { header: "Date", width: 12 }, { header: "Day", width: 5 }, { header: "Source", width: 8 },
812
+ ...streamTrackNames.map((n) => ({ header: n, width: 13 })),
813
+ { header: "TOTAL", width: 14 },
814
+ ];
815
+ applyHdr(ws, hdrDark);
816
+ // Metadata rows
817
+ const relRow = ws.addRow(["Release", "", "", ...streamTrackNames.map((n) => {
818
+ const t = latestTracksForStreams.find((x) => x.title === n);
819
+ return t?.release_date?.slice(0, 10) || "";
820
+ }), ""]);
821
+ relRow.eachCell(c => { Object.assign(c, { style: metaStyle }); });
822
+ const roleRow = ws.addRow(["Role", "", "", ...streamTrackNames.map((n) => {
823
+ const t = latestTracksForStreams.find((x) => x.title === n);
824
+ return t?.role || "";
825
+ }), ""]);
826
+ roleRow.eachCell(c => { Object.assign(c, { style: metaStyle }); });
827
+ ws.addRow([""]);
828
+ // Extract S4A 365d daily timeseries for this metric
829
+ const s4aMetricDaily = {};
830
+ if (s4a?.perSong) {
831
+ for (const [tid, tData] of Object.entries(s4a.perSong)) {
832
+ const name = trackIdToName[tid] || tData.metadata?.name;
833
+ if (!name)
834
+ continue;
835
+ const ts = tData[metric.key]?.current_period_timeseries;
836
+ if (!ts || !Array.isArray(ts))
837
+ continue;
838
+ for (const pt of ts) {
839
+ if (!pt.x)
840
+ continue;
841
+ if (!s4aMetricDaily[pt.x])
842
+ s4aMetricDaily[pt.x] = {};
843
+ s4aMetricDaily[pt.x][name] = metric.key === "streams_per_listener"
844
+ ? parseFloat(pt.y) || 0
845
+ : parseInt(pt.y) || 0;
846
+ }
847
+ }
848
+ }
849
+ const s4aDates = Object.keys(s4aMetricDaily).sort();
850
+ // 1+2. EST + S4A_PREV — constrained disaggregation from release to S4A start
851
+ // Uses constrainedDisaggregate() for smooth, sum-preserving daily estimates.
852
+ const s4aPrevFill = { type: "pattern", pattern: "solid", fgColor: { argb: "FFFFF3E0" } };
853
+ if (s4aDates.length > 0) {
854
+ const s4aStartDate = new Date(s4aDates[0]);
855
+ const s4aPrevBoundary = new Date(s4aStartDate.getTime() - 365 * 86400000);
856
+ // UTC! new Date(y,m,1) is LOCAL timezone — April 1 CET = March 31 UTC,
857
+ // causing off-by-one in day indices vs ISO string comparisons.
858
+ const _earliest = new Date(sEarliestRelease || s4aDates[0]);
859
+ const preS4aStartDate = new Date(Date.UTC(_earliest.getUTCFullYear(), _earliest.getUTCMonth(), 1));
860
+ const totalPreS4aDays = Math.max(0, Math.floor((s4aStartDate.getTime() - preS4aStartDate.getTime()) / 86400000));
861
+ if (totalPreS4aDays > 0 && sEarliestRelease) {
862
+ const boundaryDayIdx = Math.floor((s4aPrevBoundary.getTime() - preS4aStartDate.getTime()) / 86400000);
863
+ // Per-track disaggregated daily values
864
+ const perTrackDaily = {};
865
+ for (const t of latestTracksForStreams) {
866
+ if (!t.release_date)
867
+ continue;
868
+ const tid = trackNameToId[t.title];
869
+ const prevAgg = Number(tid && s4a?.perSong && s4a.perSong[tid]?.[metric.key]?.previous_period_agg) || 0;
870
+ const curAgg = Number(tid && s4a?.perSong && s4a.perSong[tid]?.[metric.key]?.current_period_agg) || 0;
871
+ let estTotal;
872
+ if (metric.key === "streams") {
873
+ estTotal = Math.max(0, t.play_count - curAgg - prevAgg);
874
+ }
875
+ else {
876
+ estTotal = 0;
877
+ }
878
+ const firstS4aVal = Number(s4aMetricDaily[s4aDates[0]]?.[t.title]) || 0;
879
+ // Compute tailAvg from last 14 S4A data points for this track
880
+ const last14 = s4aDates.slice(-14);
881
+ let tailSum = 0, tailCount = 0;
882
+ for (const d of last14) {
883
+ const v = Number(s4aMetricDaily[d]?.[t.title]) || 0;
884
+ if (v > 0) {
885
+ tailSum += v;
886
+ tailCount++;
887
+ }
888
+ }
889
+ const tailAvg = tailCount > 0 ? tailSum / tailCount : (prevAgg / Math.max(1, totalPreS4aDays - boundaryDayIdx));
890
+ // streams_per_listener: prev_agg is cumulative period ratio (total streams / unique listeners)
891
+ // NOT comparable to daily ratio — skip EST/S4A_PREV timeline entirely.
892
+ // The prev_agg is shown as summary info row in the summary section below.
893
+ if (metric.key === "streams_per_listener") {
894
+ continue;
895
+ }
896
+ // Per-metric spike params
897
+ let spikeA, spikeLambda;
898
+ switch (metric.key) {
899
+ case "streams":
900
+ spikeA = tailAvg * 3;
901
+ spikeLambda = 0.06;
902
+ break;
903
+ case "listeners":
904
+ spikeA = tailAvg * 2.5;
905
+ spikeLambda = 0.08;
906
+ break;
907
+ case "saves":
908
+ spikeA = tailAvg * 5;
909
+ spikeLambda = 0.12;
910
+ break;
911
+ case "playlist_adds":
912
+ spikeA = tailAvg * 8;
913
+ spikeLambda = 0.50;
914
+ break;
915
+ default:
916
+ spikeA = tailAvg * 3;
917
+ spikeLambda = 0.06;
918
+ }
919
+ const releaseDayIdx = Math.floor((new Date(t.release_date).getTime() - preS4aStartDate.getTime()) / 86400000);
920
+ // Listeners: prev_agg = UNIQUE listeners (not sum of daily values).
921
+ // Daily listener values overlap (same person on multiple days), so
922
+ // sum(daily) >> unique_count. Compute overlap ratio from S4A current
923
+ // period and adjust prevAgg to represent listener-DAYS for disagg.
924
+ let disaggPrevAgg = prevAgg;
925
+ if (metric.key === "listeners" && prevAgg > 0 && curAgg > 0) {
926
+ let dailyListenerSum = 0;
927
+ for (const d of s4aDates) {
928
+ dailyListenerSum += Number(s4aMetricDaily[d]?.[t.title]) || 0;
929
+ }
930
+ const overlapRatio = dailyListenerSum / curAgg;
931
+ if (overlapRatio > 1) {
932
+ disaggPrevAgg = Math.round(prevAgg * overlapRatio);
933
+ }
934
+ }
935
+ const result = constrainedDisaggregate({
936
+ totalDays: totalPreS4aDays,
937
+ boundaryIdx: boundaryDayIdx,
938
+ estTotal,
939
+ prevAgg: disaggPrevAgg,
940
+ firstS4aVal,
941
+ releaseDayIdx,
942
+ spikeA,
943
+ spikeLambda,
944
+ baseline: tailAvg,
945
+ });
946
+ // Round with anchor: Largest Remainder (Hamilton) with pinned endpoint.
947
+ // Pass explicit target (estTotal + disaggPrevAgg) so rounding hits the
948
+ // KNOWN total, not the float sum which may differ due to blend absorption.
949
+ const anchorIdx = totalPreS4aDays - 1;
950
+ const intTarget = Math.round(estTotal + disaggPrevAgg);
951
+ const rounded = roundWithAnchor(result.daily, anchorIdx, Math.round(firstS4aVal), intTarget);
952
+ // Zero out days before release (shape already has zeros, but safety net)
953
+ const releaseStr = t.release_date.slice(0, 10);
954
+ for (let day = 0; day < totalPreS4aDays; day++) {
955
+ const dd = new Date(preS4aStartDate.getTime() + day * 86400000);
956
+ if (dd.toISOString().slice(0, 10) < releaseStr)
957
+ rounded[day] = 0;
958
+ }
959
+ perTrackDaily[t.title] = rounded;
960
+ }
961
+ // Write daily rows — EST and S4A_PREV with appropriate colors
962
+ for (let day = 0; day < totalPreS4aDays; day++) {
963
+ const dd = new Date(preS4aStartDate.getTime() + day * 86400000);
964
+ const d = dd.toISOString().slice(0, 10);
965
+ const dow = dayNames[dd.getDay()];
966
+ const isS4aPrev = day >= boundaryDayIdx;
967
+ if (!isS4aPrev && !metric.hasEst)
968
+ continue;
969
+ const source = isS4aPrev ? "S4A_PREV" : "EST";
970
+ const fill = isS4aPrev ? s4aPrevFill : estFill;
971
+ const values = [];
972
+ for (const t of latestTracksForStreams) {
973
+ const daily = perTrackDaily[t.title];
974
+ if (!daily || !t.release_date || d < t.release_date.slice(0, 10)) {
975
+ values.push(null);
976
+ continue;
977
+ }
978
+ const v = daily[day];
979
+ // Pre-release: null (track didn't exist). Post-release: 0 is a real value.
980
+ if (metric.key === "streams_per_listener") {
981
+ values.push(v > 0 ? Math.round(v * 100) / 100 : (v === 0 ? 0 : null));
982
+ }
983
+ else {
984
+ values.push(v != null ? v : null);
985
+ }
986
+ }
987
+ const total = values.reduce((s, v) => s + (v || 0), 0);
988
+ const r = ws.addRow([d, dow, source, ...values, total]);
989
+ r.eachCell(c => { if (typeof c.value === "number") {
990
+ c.numFmt = metric.numFormat;
991
+ c.fill = fill;
992
+ } });
993
+ }
994
+ }
995
+ }
996
+ // 3. S4A rows — 365d daily
997
+ for (const d of s4aDates) {
998
+ const dow = dayNames[new Date(d).getDay()];
999
+ const values = [];
1000
+ let total = 0;
1001
+ for (const t of latestTracksForStreams) {
1002
+ const v = s4aMetricDaily[d]?.[t.title];
1003
+ if (v != null) {
1004
+ values.push(v);
1005
+ total += v;
1006
+ }
1007
+ else {
1008
+ values.push(null);
1009
+ }
1010
+ }
1011
+ const row = ws.addRow([d, dow, "S4A", ...values, metric.key === "streams_per_listener" ? "" : total]);
1012
+ row.eachCell(c => { if (typeof c.value === "number")
1013
+ c.numFmt = metric.numFormat; c.fill = s4aFill; });
1014
+ }
1015
+ // 4. SCR rows (only Streams — cumulative play counts from GraphQL)
1016
+ // Offset rule: SCR date = scrape date - 1 day (S4A data ends yesterday)
1017
+ if (metric.key === "streams") {
1018
+ for (const [scrapeDate, tData] of Object.entries(streamsByDate).sort()) {
1019
+ const offsetDate = new Date(new Date(scrapeDate).getTime() - 86400000).toISOString().slice(0, 10);
1020
+ const dow = dayNames[new Date(offsetDate).getDay()];
1021
+ const values = streamTrackNames.map((n) => tData[n] ?? null);
1022
+ const total = values.reduce((s, v) => s + (v || 0), 0);
1023
+ const row = ws.addRow([offsetDate, dow, "SCR", ...values, total]);
1024
+ row.eachCell(c => { if (typeof c.value === "number")
1025
+ c.numFmt = metric.numFormat; });
1026
+ }
1027
+ }
1028
+ // 5. PROJ rows (6 months forward projection)
1029
+ // Uses YoY% (period_change_pct) as long-term trend + seasonal patterns from 365d data.
1030
+ // - YoY% = verified 12m growth rate, immune to short-term dips/spikes
1031
+ // - Weekday pattern = Mon-Sun factors from 365d average (e.g. Fri release day spike)
1032
+ // - Monthly pattern = Jan-Dec factors from 365d average (summer vs winter)
1033
+ // Track age < 60d: decay curve from initial spike (YoY not meaningful)
1034
+ // Streams/Listener: no projection (ratio, not estimable)
1035
+ if (s4aDates.length > 0 && metric.key !== "streams_per_listener") {
1036
+ const projStart = new Date(s4aDates[s4aDates.length - 1]);
1037
+ projStart.setDate(projStart.getDate() + 1);
1038
+ // Extract seasonal patterns from 365d S4A data (across all tracks combined)
1039
+ // Weekday factors: average streams per weekday relative to overall mean
1040
+ const dowSums = [0, 0, 0, 0, 0, 0, 0]; // Sun-Sat
1041
+ const dowCounts = [0, 0, 0, 0, 0, 0, 0];
1042
+ // Monthly factors: average streams per month relative to overall mean
1043
+ const monthSums = new Array(12).fill(0);
1044
+ const monthCounts = new Array(12).fill(0);
1045
+ let grandTotal = 0, grandCount = 0;
1046
+ for (const d of s4aDates) {
1047
+ const dt = new Date(d + "T12:00:00Z");
1048
+ const dow = dt.getUTCDay();
1049
+ const month = dt.getUTCMonth();
1050
+ let daySum = 0, dayN = 0;
1051
+ for (const t of latestTracksForStreams) {
1052
+ const v = s4aMetricDaily[d]?.[t.title];
1053
+ if (v != null && v > 0) {
1054
+ daySum += v;
1055
+ dayN++;
1056
+ }
1057
+ }
1058
+ if (dayN > 0) {
1059
+ dowSums[dow] += daySum;
1060
+ dowCounts[dow]++;
1061
+ monthSums[month] += daySum;
1062
+ monthCounts[month]++;
1063
+ grandTotal += daySum;
1064
+ grandCount++;
1065
+ }
1066
+ }
1067
+ const grandAvg = grandCount > 0 ? grandTotal / grandCount : 1;
1068
+ // Weekday factor: ratio of weekday avg to grand avg (1.0 = average day)
1069
+ const dowFactors = dowSums.map((s, i) => dowCounts[i] > 0 ? (s / dowCounts[i]) / grandAvg : 1);
1070
+ // Monthly factor: ratio of month avg to grand avg
1071
+ const monthFactors = monthSums.map((s, i) => monthCounts[i] > 0 ? (s / monthCounts[i]) / grandAvg : 1);
1072
+ const projParams = {};
1073
+ for (const t of latestTracksForStreams) {
1074
+ if (!t.release_date)
1075
+ continue;
1076
+ const daysSinceRelease = Math.max(1, Math.floor((now - new Date(t.release_date).getTime() / 1000) / 86400));
1077
+ const tid = trackNameToId[t.title];
1078
+ const vals = [];
1079
+ for (const d of s4aDates) {
1080
+ const v = s4aMetricDaily[d]?.[t.title];
1081
+ if (v != null)
1082
+ vals.push(v);
1083
+ }
1084
+ if (vals.length < 7)
1085
+ continue;
1086
+ const lastVal = vals[vals.length - 1];
1087
+ if (daysSinceRelease < 60) {
1088
+ // Young track: still decaying — use exponential decay from spike
1089
+ const last90 = vals.slice(-Math.min(90, vals.length));
1090
+ const B = last90.reduce((a, b) => a + b, 0) / last90.length;
1091
+ const early = vals.slice(0, 14);
1092
+ const earlyAvg = early.reduce((a, b) => a + b, 0) / early.length;
1093
+ const A = Math.max(0, earlyAvg - B);
1094
+ const lambda = A > 0 ? 0.04 : 0;
1095
+ projParams[t.title] = { lastVal, dailyRate: 0, isDecay: true, A, lambda, tOffset: daysSinceRelease, B };
1096
+ }
1097
+ else {
1098
+ // Determine trend source: YoY vs median slope
1099
+ let dailyRate = 0;
1100
+ const prevAggVal = Number(tid && s4a.perSong[tid]?.[metric.key]?.previous_period_agg) || 0;
1101
+ const yoyPct = Number(tid && s4a.perSong[tid]?.[metric.key]?.period_change_pct) || 0;
1102
+ // YoY is trustworthy only when: track > 15 months AND prev_period_agg is substantial
1103
+ const yoyReliable = daysSinceRelease > 450 && prevAggVal > 10;
1104
+ if (yoyReliable) {
1105
+ // Mature track with reliable YoY: cap to [-50%, +200%] annually
1106
+ const cappedYoy = Math.max(-50, Math.min(200, yoyPct));
1107
+ const annualMultiplier = 1 + cappedYoy / 100;
1108
+ dailyRate = annualMultiplier > 0 ? Math.pow(annualMultiplier, 1 / 365) - 1 : 0;
1109
+ }
1110
+ else {
1111
+ // Track 60d-450d OR unreliable YoY: use median-based slope from last 90d
1112
+ // Median slope is robust to spikes, bot removals, and seasonal noise
1113
+ const window = vals.slice(-Math.min(90, vals.length));
1114
+ if (window.length >= 14) {
1115
+ // Split into two halves, compare medians for robust trend
1116
+ const half = Math.floor(window.length / 2);
1117
+ const firstHalf = [...window.slice(0, half)].sort((a, b) => a - b);
1118
+ const secondHalf = [...window.slice(half)].sort((a, b) => a - b);
1119
+ const medFirst = firstHalf[Math.floor(firstHalf.length / 2)];
1120
+ const medSecond = secondHalf[Math.floor(secondHalf.length / 2)];
1121
+ if (medFirst > 0) {
1122
+ const slopePct = (medSecond - medFirst) / medFirst;
1123
+ // Annualize the half-window slope, then cap
1124
+ const halfDays = window.length / 2;
1125
+ const annualized = Math.pow(1 + slopePct, 365 / halfDays) - 1;
1126
+ const capped = Math.max(-0.5, Math.min(2.0, annualized));
1127
+ dailyRate = Math.pow(1 + capped, 1 / 365) - 1;
1128
+ }
1129
+ }
1130
+ }
1131
+ projParams[t.title] = { lastVal, dailyRate, isDecay: false, A: 0, lambda: 0, tOffset: 0, B: lastVal };
1132
+ }
1133
+ }
1134
+ for (let day = 0; day < 180; day++) {
1135
+ const d = new Date(projStart.getTime() + day * 86400000);
1136
+ const dateStr = d.toISOString().slice(0, 10);
1137
+ const dow = dayNames[d.getDay()];
1138
+ const dowIdx = d.getDay();
1139
+ const monthIdx = d.getMonth();
1140
+ const values = [];
1141
+ let total = 0;
1142
+ for (const t of latestTracksForStreams) {
1143
+ const p = projParams[t.title];
1144
+ if (!p) {
1145
+ values.push(null);
1146
+ continue;
1147
+ }
1148
+ let proj;
1149
+ if (p.isDecay) {
1150
+ // Young track: decay curve continuing
1151
+ proj = p.A * Math.exp(-p.lambda * (p.tOffset + day)) + p.B;
1152
+ }
1153
+ else {
1154
+ // Mature track: compound growth × seasonal factors
1155
+ const trendVal = p.lastVal * Math.pow(1 + p.dailyRate, day + 1);
1156
+ proj = trendVal * dowFactors[dowIdx] * monthFactors[monthIdx];
1157
+ // Guard rails: floor at 10% of last value, ceiling at 5× last value
1158
+ proj = Math.max(p.lastVal * 0.1, Math.min(p.lastVal * 5, proj));
1159
+ }
1160
+ const rounded = Math.round(proj);
1161
+ values.push(rounded);
1162
+ total += rounded;
1163
+ }
1164
+ const row = ws.addRow([dateStr, dow, "PROJ", ...values, total]);
1165
+ row.eachCell(c => {
1166
+ if (typeof c.value === "number")
1167
+ c.numFmt = metric.numFormat;
1168
+ c.fill = projFill;
1169
+ });
1170
+ }
1171
+ }
1172
+ // ── Summary section ──
1173
+ ws.addRow([""]);
1174
+ // YoY %
1175
+ if (s4a?.perSong) {
1176
+ const yoyHdr = ws.addRow(["YoY %", "", "", ...streamTrackNames.map(() => ""), ""]);
1177
+ yoyHdr.eachCell(c => { Object.assign(c, { style: sectionStyle }); });
1178
+ const yoyVals = streamTrackNames.map((n) => {
1179
+ const tid = trackNameToId[n];
1180
+ const pct = Number(tid && s4a.perSong[tid]?.[metric.key]?.period_change_pct);
1181
+ return !isNaN(pct) ? Math.round(pct * 10) / 10 : null;
1182
+ });
1183
+ ws.addRow(["YoY change", "", "", ...yoyVals, ""]).eachCell(c => { if (typeof c.value === "number")
1184
+ c.numFmt = "+0.0\"%\";-0.0\"%\""; });
1185
+ // Prev + Cur 12m totals
1186
+ const prevVals = streamTrackNames.map((n) => {
1187
+ const tid = trackNameToId[n];
1188
+ const v = Number(tid && s4a.perSong[tid]?.[metric.key]?.previous_period_agg) || 0;
1189
+ return v != null ? (metric.key === "streams_per_listener" ? Math.round(v * 100) / 100 : Math.round(v)) : null;
1190
+ });
1191
+ const prevLabel = metric.key === "streams_per_listener" ? "Prev 12m avg ratio" : "Prev 12m total";
1192
+ ws.addRow([prevLabel, "", "", ...prevVals, ""]).eachCell(c => { if (typeof c.value === "number")
1193
+ c.numFmt = metric.numFormat; });
1194
+ const curVals = streamTrackNames.map((n) => {
1195
+ const tid = trackNameToId[n];
1196
+ const v = Number(tid && s4a.perSong[tid]?.[metric.key]?.current_period_agg) || 0;
1197
+ return v > 0 ? (metric.key === "streams_per_listener" ? Math.round(v * 100) / 100 : Math.round(v)) : null;
1198
+ });
1199
+ const curLabel = metric.key === "streams_per_listener" ? "Cur 12m avg ratio" : "Cur 12m total";
1200
+ ws.addRow([curLabel, "", "", ...curVals, ""]).eachCell(c => { if (typeof c.value === "number")
1201
+ c.numFmt = metric.numFormat; });
1202
+ }
1203
+ // Velocity + Share % (only Streams)
1204
+ if (metric.key === "streams") {
1205
+ ws.addRow([""]);
1206
+ const vHdr = ws.addRow(["VELOCITY (streams/day)", "", "", ...streamTrackNames.map(() => ""), ""]);
1207
+ vHdr.eachCell(c => { Object.assign(c, { style: sectionStyle }); });
1208
+ const vVals = streamTrackNames.map((n) => vel[n] ? Math.round(vel[n] * 10) / 10 : null);
1209
+ ws.addRow(["Lifetime avg", "", "", ...vVals, ""]).eachCell(c => { if (typeof c.value === "number")
1210
+ c.numFmt = "#,##0.0"; });
1211
+ ws.addRow([""]);
1212
+ const shHdr = ws.addRow(["SHARE %", "", "", ...streamTrackNames.map(() => ""), ""]);
1213
+ shHdr.eachCell(c => { Object.assign(c, { style: sectionStyle }); });
1214
+ const totStr = latestTracksForStreams.reduce((s, t) => s + (t.play_count || 0), 0);
1215
+ const shVals = streamTrackNames.map((n) => {
1216
+ const t = latestTracksForStreams.find((x) => x.title === n);
1217
+ return t?.play_count && totStr ? Math.round((t.play_count / totStr) * 1000) / 10 : null;
1218
+ });
1219
+ ws.addRow(["Current", "", "", ...shVals, "100%"]).eachCell(c => { if (typeof c.value === "number")
1220
+ c.numFmt = "0.0\"%\""; });
1221
+ // Cumulative running total (Streams only — daily values summed over time)
1222
+ ws.addRow([""]);
1223
+ const cumHdr = ws.addRow(["CUMULATIVE TOTAL", "", "", ...streamTrackNames.map(() => ""), ""]);
1224
+ cumHdr.eachCell(c => { Object.assign(c, { style: sectionStyle }); });
1225
+ // Latest SCR value = current known cumulative total per track
1226
+ const latestTotal = {};
1227
+ for (const t of latestTracksForStreams)
1228
+ latestTotal[t.title] = t.play_count;
1229
+ const cumRow = ws.addRow(["Latest (SCR)", "", "", ...streamTrackNames.map((n) => latestTotal[n] || null),
1230
+ Object.values(latestTotal).reduce((a, b) => a + b, 0)]);
1231
+ cumRow.eachCell(c => { if (typeof c.value === "number")
1232
+ c.numFmt = numFmt; });
1233
+ // Total stream count from S4A (all-time per track)
1234
+ if (s4a?.perSong) {
1235
+ const s4aTotals = streamTrackNames.map((n) => {
1236
+ const tid = trackNameToId[n];
1237
+ const v = tid && s4a.perSong[tid]?.total_stream_count;
1238
+ return v ? parseInt(v) : null;
1239
+ });
1240
+ ws.addRow(["S4A all-time", "", "", ...s4aTotals, ""]).eachCell(c => { if (typeof c.value === "number")
1241
+ c.numFmt = numFmt; });
1242
+ }
1243
+ }
1244
+ } // end metricSheets loop
1245
+ // ─────────────────────────────────────────────
1246
+ // D8: Countries — daily timeline (tracks side by side)
1247
+ // ─────────────────────────────────────────────
1248
+ // Merges ALL historical dumps to build complete picture:
1249
+ // - Timeline countries = UNION of all countries ever in top 3 (across all dumps)
1250
+ // - 28d snapshot shares = latest available per country
1251
+ // - Daily values: TIMELINE (exact) for known countries, DISAGG (estimated) for rest
1252
+ // Columns grow over time as new dumps add countries to timeline or snapshots.
1253
+ if (s4a?.perSong) {
1254
+ const ws8 = wb.addWorksheet("Countries", { properties: { tabColor: { argb: "FF26A69A" } } });
1255
+ const dn8 = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"];
1256
+ const disaggFill = { type: "pattern", pattern: "solid", fgColor: { argb: "FFF5F5DC" } };
1257
+ // Load ALL historical dumps
1258
+ const allDumps = loadAllDumps(aid);
1259
+ // Build merged data per track across ALL dumps
1260
+ const trackMap = new Map();
1261
+ const allDates8 = new Set();
1262
+ // Process each dump (oldest first → newest overwrites)
1263
+ for (const dump of allDumps) {
1264
+ if (!dump.data.perSong)
1265
+ continue;
1266
+ for (const [tid, d] of Object.entries(dump.data.perSong)) {
1267
+ const nm = d.metadata?.name || d.name || tid;
1268
+ if (!trackMap.has(tid)) {
1269
+ trackMap.set(tid, {
1270
+ name: nm, allCodes: [], timelineCodes: new Set(),
1271
+ timelinePts: {}, snapShares: {}, dailyTotals: {},
1272
+ });
1273
+ }
1274
+ const t = trackMap.get(tid);
1275
+ // Merge timeline data (UNION of countries, latest daily values win)
1276
+ for (const ct of (d.countryTimelines || [])) {
1277
+ const cc = ct.countryCode || "??";
1278
+ t.timelineCodes.add(cc); // once in top 3, always a timeline column
1279
+ if (!t.timelinePts[cc])
1280
+ t.timelinePts[cc] = {};
1281
+ for (const p of (ct.timelinePoint || [])) {
1282
+ t.timelinePts[cc][p.date] = Number(p.num) || 0;
1283
+ allDates8.add(p.date);
1284
+ }
1285
+ }
1286
+ // Merge 28d snapshot (latest wins)
1287
+ let raw = d.perSongCountries || [];
1288
+ if (!raw.length && d.geography?.length && d.geography[0]?.localized_country)
1289
+ raw = d.geography;
1290
+ if (raw.length > 0) {
1291
+ const snap = {};
1292
+ for (const c of raw)
1293
+ snap[c.name] = Number(c.num) || 0;
1294
+ const tlTotal = [...t.timelineCodes].reduce((s, cc) => s + (snap[cc] || 0), 0);
1295
+ const nonTlTotal = Object.entries(snap).filter(([cc]) => !t.timelineCodes.has(cc)).reduce((s, [, v]) => s + v, 0);
1296
+ for (const [cc, v] of Object.entries(snap)) {
1297
+ if (!t.timelineCodes.has(cc) && nonTlTotal > 0)
1298
+ t.snapShares[cc] = v / nonTlTotal;
1299
+ }
1300
+ }
1301
+ // Merge daily totals (latest timeseries wins)
1302
+ const streamTs = d.streams?.current_period_timeseries;
1303
+ if (Array.isArray(streamTs)) {
1304
+ for (const pt of streamTs) {
1305
+ if (pt.x)
1306
+ t.dailyTotals[pt.x] = Number(pt.y) || 0;
1307
+ }
1308
+ }
1309
+ }
1310
+ }
1311
+ // Build allCodes per track: timeline countries first, then snapshot countries, sorted by total desc
1312
+ const tracks8 = [];
1313
+ for (const [, t] of [...trackMap.entries()].sort((a, b) => {
1314
+ const totA = Object.values(a[1].dailyTotals).reduce((s, v) => s + v, 0);
1315
+ const totB = Object.values(b[1].dailyTotals).reduce((s, v) => s + v, 0);
1316
+ return totB - totA;
1317
+ })) {
1318
+ // Timeline codes sorted by total timeline streams desc
1319
+ const tlSorted = [...t.timelineCodes].sort((a, b) => {
1320
+ const sa = Object.values(t.timelinePts[a] || {}).reduce((s, v) => s + v, 0);
1321
+ const sb = Object.values(t.timelinePts[b] || {}).reduce((s, v) => s + v, 0);
1322
+ return sb - sa;
1323
+ });
1324
+ // Non-timeline codes sorted by share desc
1325
+ const nonTlSorted = Object.entries(t.snapShares)
1326
+ .filter(([cc]) => !t.timelineCodes.has(cc))
1327
+ .sort((a, b) => b[1] - a[1])
1328
+ .map(([cc]) => cc);
1329
+ t.allCodes = [...tlSorted, ...nonTlSorted];
1330
+ if (t.allCodes.length > 0)
1331
+ tracks8.push(t);
1332
+ }
1333
+ const dates8 = [...allDates8].sort();
1334
+ // Row 1: track names
1335
+ const r1 = ["", ""];
1336
+ for (const t of tracks8) {
1337
+ r1.push(t.name);
1338
+ for (let i = 1; i < t.allCodes.length; i++)
1339
+ r1.push("");
1340
+ r1.push("");
1341
+ }
1342
+ const row1 = ws8.addRow(r1);
1343
+ let ci8 = 3;
1344
+ for (const t of tracks8) {
1345
+ row1.getCell(ci8).font = { bold: true, size: 11 };
1346
+ ci8 += t.allCodes.length + 1;
1347
+ }
1348
+ // Row 2: country codes + TOTAL
1349
+ const r2 = ["Date", "Day"];
1350
+ for (const t of tracks8)
1351
+ r2.push(...t.allCodes, "TOTAL");
1352
+ ws8.addRow(r2).eachCell(c => { Object.assign(c, { style: hdrDark }); });
1353
+ // Row 3: source indicator (S4A = timeline exact, DISAGG = estimated)
1354
+ const r3 = ["Source", ""];
1355
+ for (const t of tracks8) {
1356
+ r3.push(...t.allCodes.map(cc => t.timelineCodes.has(cc) ? "S4A" : "DISAGG"), "");
1357
+ }
1358
+ ws8.addRow(r3).eachCell(c => { c.font = { size: 8, italic: true, color: { argb: "FF999999" } }; });
1359
+ // 365d daily rows
1360
+ for (const date of dates8) {
1361
+ const d = new Date(date);
1362
+ const row = [date, dn8[d.getUTCDay()]];
1363
+ for (const t of tracks8) {
1364
+ const timelineSum = [...t.timelineCodes].reduce((s, cc) => s + (t.timelinePts[cc]?.[date] || 0), 0);
1365
+ const dailyTotal = t.dailyTotals[date] || 0;
1366
+ const remainder = Math.max(0, dailyTotal - timelineSum);
1367
+ const vals = [];
1368
+ for (const cc of t.allCodes) {
1369
+ if (t.timelineCodes.has(cc)) {
1370
+ vals.push(t.timelinePts[cc]?.[date] || 0);
1371
+ }
1372
+ else {
1373
+ vals.push(Math.round(remainder * (t.snapShares[cc] || 0)));
1374
+ }
1375
+ }
1376
+ row.push(...vals, vals.reduce((a, b) => a + b, 0));
1377
+ }
1378
+ const xlRow = ws8.addRow(row);
1379
+ xlRow.eachCell((c, colNumber) => {
1380
+ if (typeof c.value === "number") {
1381
+ c.numFmt = numFmt;
1382
+ if (colNumber > 2) {
1383
+ let off = colNumber - 2;
1384
+ for (const t of tracks8) {
1385
+ if (off <= t.allCodes.length) {
1386
+ const cc = t.allCodes[off - 1];
1387
+ if (cc && !t.timelineCodes.has(cc))
1388
+ c.fill = disaggFill;
1389
+ break;
1390
+ }
1391
+ off -= t.allCodes.length + 1;
1392
+ }
1393
+ }
1394
+ }
1395
+ });
1396
+ }
1397
+ // Summary
1398
+ const totR = ["TOTAL (12m)", ""];
1399
+ for (const t of tracks8) {
1400
+ const sums = t.allCodes.map(cc => {
1401
+ if (t.timelineCodes.has(cc))
1402
+ return Object.values(t.timelinePts[cc] || {}).reduce((a, b) => a + b, 0);
1403
+ let s = 0;
1404
+ for (const date of dates8) {
1405
+ const tlSum = [...t.timelineCodes].reduce((s2, cc2) => s2 + (t.timelinePts[cc2]?.[date] || 0), 0);
1406
+ s += Math.round(Math.max(0, (t.dailyTotals[date] || 0) - tlSum) * (t.snapShares[cc] || 0));
1407
+ }
1408
+ return s;
1409
+ });
1410
+ totR.push(...sums, sums.reduce((a, b) => a + b, 0));
1411
+ }
1412
+ ws8.addRow(totR).eachCell(c => { c.font = { bold: true }; if (typeof c.value === "number")
1413
+ c.numFmt = numFmt; });
1414
+ ws8.getColumn(1).width = 12;
1415
+ ws8.getColumn(2).width = 5;
1416
+ const tc8 = 2 + tracks8.reduce((s, t) => s + t.allCodes.length + 1, 0);
1417
+ for (let i = 3; i <= tc8; i++)
1418
+ ws8.getColumn(i).width = 10;
1419
+ ws8.views = [{ state: "frozen", xSplit: 2, ySplit: 3, topLeftCell: "C4", activeCell: "C4" }];
1420
+ }
1421
+ // ─────────────────────────────────────────────
1422
+ // D8b: Country Snapshots (accumulated across ALL scrapes)
1423
+ // ─────────────────────────────────────────────
1424
+ // Each scrape date = one block of rows. Grows with every new dump.
1425
+ // Layout: Scrape Date | Country | Track1 streams | Track2 streams | ...
1426
+ // This is the append-only log that feeds delta computation.
1427
+ {
1428
+ const wsSn = wb.addWorksheet("Country Snapshots", { properties: { tabColor: { argb: "FF80CBC4" } } });
1429
+ const allDumps = loadAllDumps(aid);
1430
+ // Collect track names (sorted by total streams in latest dump)
1431
+ const trackOrder = [];
1432
+ if (s4a?.perSong) {
1433
+ for (const [tid, d] of Object.entries(s4a.perSong)
1434
+ .sort((a, b) => Number(b[1].total_stream_count || 0) - Number(a[1].total_stream_count || 0))) {
1435
+ trackOrder.push({ tid, name: d.metadata?.name || d.name || tid });
1436
+ }
1437
+ }
1438
+ // Header
1439
+ const h1 = ["Scrape Date", "Country"];
1440
+ for (const t of trackOrder)
1441
+ h1.push(t.name);
1442
+ wsSn.addRow(h1);
1443
+ for (let i = 3; i <= h1.length; i++)
1444
+ wsSn.getRow(1).getCell(i).font = { bold: true, size: 10 };
1445
+ const h2 = ["", ""];
1446
+ for (const t of trackOrder)
1447
+ h2.push("28d Streams");
1448
+ wsSn.addRow(h2).eachCell(c => { Object.assign(c, { style: hdrDark }); });
1449
+ // One block per dump date
1450
+ for (const dump of allDumps) {
1451
+ if (!dump.data.perSong)
1452
+ continue;
1453
+ // Collect all countries from this dump
1454
+ const countriesInDump = new Set();
1455
+ const snapByTrack = {};
1456
+ for (const [tid, d] of Object.entries(dump.data.perSong)) {
1457
+ let raw = d.perSongCountries || [];
1458
+ if (!raw.length && d.geography?.length && d.geography[0]?.localized_country)
1459
+ raw = d.geography;
1460
+ snapByTrack[tid] = {};
1461
+ for (const c of raw) {
1462
+ const code = c.name;
1463
+ countriesInDump.add(code);
1464
+ snapByTrack[tid][code] = Number(c.num) || 0;
1465
+ }
1466
+ }
1467
+ const countries = [...countriesInDump].sort();
1468
+ for (const code of countries) {
1469
+ const row = [dump.date, code];
1470
+ for (const t of trackOrder) {
1471
+ row.push(snapByTrack[t.tid]?.[code] ?? null);
1472
+ }
1473
+ wsSn.addRow(row).eachCell(c => { if (typeof c.value === "number")
1474
+ c.numFmt = numFmt; });
1475
+ }
1476
+ // Separator between dump dates
1477
+ wsSn.addRow([]);
1478
+ }
1479
+ wsSn.getColumn(1).width = 12;
1480
+ wsSn.getColumn(2).width = 8;
1481
+ for (let i = 3; i <= 2 + trackOrder.length; i++)
1482
+ wsSn.getColumn(i).width = 10;
1483
+ wsSn.views = [{ state: "frozen", xSplit: 2, ySplit: 2, topLeftCell: "C3", activeCell: "C3" }];
1484
+ }
1485
+ // ─────────────────────────────────────────────
1486
+ // D9: Cities Snapshots (28d per scrape, per-song)
1487
+ // ─────────────────────────────────────────────
1488
+ // Same concept as Country Snapshots but for cities.
1489
+ if (s4a?.perSong) {
1490
+ const wsCi = wb.addWorksheet("City Snapshots", { properties: { tabColor: { argb: "FF7986CB" } } });
1491
+ const sortedTracks = Object.entries(s4a.perSong)
1492
+ .sort((a, b) => Number(b[1].total_stream_count || 0) - Number(a[1].total_stream_count || 0));
1493
+ // Collect all unique cities
1494
+ const allCities = new Map();
1495
+ const trackCitySnaps = [];
1496
+ for (const [, d] of sortedTracks) {
1497
+ const nm = d.metadata?.name || d.name || "?";
1498
+ let raw = d.perSongCities || [];
1499
+ if (!raw.length && d.geography?.length && d.geography[0]?.region)
1500
+ raw = d.geography;
1501
+ const map = {};
1502
+ for (const x of raw) {
1503
+ const key = `${x.name}|${x.country || ""}`;
1504
+ allCities.set(key, { country: x.country || "", region: x.region || "" });
1505
+ map[key] = Number(x.num) || 0;
1506
+ }
1507
+ trackCitySnaps.push({ name: nm, data: map });
1508
+ }
1509
+ // Sort cities by total streams across all tracks
1510
+ const cityTotals = {};
1511
+ for (const t of trackCitySnaps)
1512
+ for (const [k, v] of Object.entries(t.data))
1513
+ cityTotals[k] = (cityTotals[k] || 0) + v;
1514
+ const citiesSorted = Object.entries(cityTotals).sort((a, b) => b[1] - a[1]).map(([k]) => k);
1515
+ // Header 1: track names
1516
+ const h1 = ["City", "Country"];
1517
+ for (const t of trackCitySnaps)
1518
+ h1.push(t.name);
1519
+ wsCi.addRow(h1);
1520
+ for (let i = 3; i <= h1.length; i++)
1521
+ wsCi.getRow(1).getCell(i).font = { bold: true, size: 10 };
1522
+ // Header 2
1523
+ const h2 = ["", ""];
1524
+ for (const t of trackCitySnaps)
1525
+ h2.push("Streams");
1526
+ wsCi.addRow(h2).eachCell(c => { Object.assign(c, { style: hdrDark }); });
1527
+ // One row per city
1528
+ for (const key of citiesSorted) {
1529
+ const [cityName] = key.split("|");
1530
+ const meta = allCities.get(key);
1531
+ const row = [cityName, meta.country];
1532
+ for (const t of trackCitySnaps)
1533
+ row.push(t.data[key] || null);
1534
+ wsCi.addRow(row).eachCell(c => { if (typeof c.value === "number")
1535
+ c.numFmt = numFmt; });
1536
+ }
1537
+ wsCi.getColumn(1).width = 20;
1538
+ wsCi.getColumn(2).width = 6;
1539
+ for (let i = 3; i <= 2 + trackCitySnaps.length; i++)
1540
+ wsCi.getColumn(i).width = 10;
1541
+ wsCi.views = [{ state: "frozen", xSplit: 2, ySplit: 2, topLeftCell: "C3", activeCell: "C3" }];
1542
+ }
1543
+ await wb.xlsx.writeFile(xlsxPath);
1544
+ db.close();
1545
+ }
1546
+ // ─── S4A Authentication ───
1547
+ const S4A_SESSION_DIR = path.join(process.env.HOME || "~", ".artist-os", ".s4a-session");
1548
+ export async function connectS4A(artistId) {
1549
+ const chromePath = process.env.CHROME_PATH || findChrome();
1550
+ const CDP_PORT = 9222;
1551
+ const CHROME_DEBUG_DIR = path.join(process.env.HOME || "~", ".artist-os", ".chrome-debug");
1552
+ const systemChromeProfile = path.join(process.env.HOME || "~", "Library", "Application Support", "Google", "Chrome");
1553
+ fs.mkdirSync(S4A_SESSION_DIR, { recursive: true });
1554
+ // Step 1: Copy Chrome profile (one-time) — has saved passwords, Google account, autofill
1555
+ if (!fs.existsSync(path.join(CHROME_DEBUG_DIR, "Default"))) {
1556
+ const { execSync } = require("child_process");
1557
+ fs.mkdirSync(CHROME_DEBUG_DIR, { recursive: true });
1558
+ execSync(`cp -r "${path.join(systemChromeProfile, "Default")}" "${CHROME_DEBUG_DIR}/Default"`, { stdio: "ignore" });
1559
+ try {
1560
+ execSync(`cp "${path.join(systemChromeProfile, "Local State")}" "${CHROME_DEBUG_DIR}/"`, { stdio: "ignore" });
1561
+ }
1562
+ catch { }
1563
+ }
1564
+ // Step 2: Clean crash state in debug profile
1565
+ const defaultDir = path.join(CHROME_DEBUG_DIR, "Default");
1566
+ for (const f of ["Last Session", "Last Tabs", "Current Session", "Current Tabs"]) {
1567
+ try {
1568
+ fs.unlinkSync(path.join(defaultDir, f));
1569
+ }
1570
+ catch { }
1571
+ }
1572
+ try {
1573
+ const prefsPath = path.join(defaultDir, "Preferences");
1574
+ const p = JSON.parse(fs.readFileSync(prefsPath, "utf-8"));
1575
+ if (!p.profile)
1576
+ p.profile = {};
1577
+ p.profile.exit_type = "Normal";
1578
+ p.profile.exited_cleanly = true;
1579
+ fs.writeFileSync(prefsPath, JSON.stringify(p));
1580
+ }
1581
+ catch { }
1582
+ // Step 3: Kill Chrome, relaunch with debug profile + debugging port
1583
+ const { execSync, spawn } = require("child_process");
1584
+ try {
1585
+ execSync("pkill -f 'Google Chrome'", { stdio: "ignore" });
1586
+ }
1587
+ catch { }
1588
+ await new Promise(r => setTimeout(r, 2000));
1589
+ const chromeProc = spawn(chromePath, [
1590
+ `--user-data-dir=${CHROME_DEBUG_DIR}`,
1591
+ `--remote-debugging-port=${CDP_PORT}`,
1592
+ "--no-first-run",
1593
+ `https://artists.spotify.com/c/artist/${artistId}/home`,
1594
+ ], { detached: true, stdio: "ignore" });
1595
+ chromeProc.unref();
1596
+ await new Promise(r => setTimeout(r, 5000));
1597
+ // Step 4: Connect via CDP
1598
+ const browser = await chromium.connectOverCDP(`http://localhost:${CDP_PORT}`);
1599
+ const context = browser.contexts()[0];
1600
+ // Step 5: Wait for S4A dashboard (max 5 min — user may need to enter SMS code)
1601
+ const deadline = Date.now() + 5 * 60 * 1000;
1602
+ let loggedIn = false;
1603
+ while (Date.now() < deadline) {
1604
+ const pages = context.pages();
1605
+ const s4aPage = pages.find(p => {
1606
+ const u = p.url();
1607
+ return u.includes("artists.spotify.com") && u.includes("/home") &&
1608
+ !u.includes("accounts.spotify.com") && !u.includes("challenge.spotify.com");
1609
+ });
1610
+ if (s4aPage) {
1611
+ loggedIn = true;
1612
+ break;
1613
+ }
1614
+ await new Promise(r => setTimeout(r, 2000));
1615
+ }
1616
+ if (!loggedIn) {
1617
+ await browser.close();
1618
+ return { success: false, error: "Login timed out after 5 minutes. Run connect_s4a again." };
1619
+ }
1620
+ // Step 6: Extract and save cookies
1621
+ let name = artistId;
1622
+ try {
1623
+ const cookies = await context.cookies("https://artists.spotify.com");
1624
+ fs.writeFileSync(path.join(S4A_SESSION_DIR, "cookies.json"), JSON.stringify(cookies, null, 2));
1625
+ const spDc = cookies.find((c) => c.name === "sp_dc");
1626
+ if (spDc)
1627
+ fs.writeFileSync(path.join(S4A_SESSION_DIR, "sp_dc.txt"), spDc.value);
1628
+ }
1629
+ catch { }
1630
+ // Step 7: Close only S4A tabs, Chrome stays open for the user
1631
+ try {
1632
+ const s4aPages = context.pages().filter(p => p.url().includes("artists.spotify.com"));
1633
+ for (const p of s4aPages)
1634
+ await p.close();
1635
+ }
1636
+ catch { }
1637
+ await browser.close(); // Disconnect CDP — Chrome stays running
1638
+ return { success: true, name };
1639
+ }
1640
+ export function hasS4ASession() {
1641
+ return fs.existsSync(path.join(S4A_SESSION_DIR, "sp_dc.txt"));
1642
+ }
1643
+ function getSpDc() {
1644
+ try {
1645
+ return fs.readFileSync(path.join(S4A_SESSION_DIR, "sp_dc.txt"), "utf-8").trim();
1646
+ }
1647
+ catch {
1648
+ return null;
1649
+ }
1650
+ }
1651
+ // Get S4A bearer token from sp_dc cookie (valid ~1 hour)
1652
+ async function getS4ABearerToken(spDc) {
1653
+ const resp = await fetch("https://generic.wg.spotify.com/creator-auth-proxy/v1/web/token", {
1654
+ headers: {
1655
+ "Cookie": `sp_dc=${spDc}`,
1656
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
1657
+ },
1658
+ });
1659
+ if (!resp.ok)
1660
+ throw new Error(`S4A token failed: ${resp.status}`);
1661
+ const data = await resp.json();
1662
+ return data.accessToken || data.access_token;
1663
+ }
1664
+ // S4A API call helper
1665
+ async function s4aApi(token, endpoint) {
1666
+ const resp = await fetch(`https://generic.wg.spotify.com/${endpoint}`, {
1667
+ headers: {
1668
+ "Authorization": `Bearer ${token}`,
1669
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
1670
+ "Accept": "application/json",
1671
+ },
1672
+ });
1673
+ if (!resp.ok)
1674
+ return null;
1675
+ return resp.json();
1676
+ }
1677
+ // Scrape S4A via headless browser (Spotify blocks direct API calls via TLS fingerprinting)
1678
+ export async function scrapeS4ADirect(artistId) {
1679
+ const spDc = getSpDc();
1680
+ if (!spDc)
1681
+ return null;
1682
+ // Launch headless browser with injected cookies (proven working approach)
1683
+ const chromePath = process.env.CHROME_PATH || findChrome();
1684
+ const browser = await chromium.launch({ executablePath: chromePath, headless: true });
1685
+ const context = await browser.newContext({
1686
+ userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
1687
+ });
1688
+ // Inject all saved cookies
1689
+ try {
1690
+ const savedCookies = JSON.parse(fs.readFileSync(path.join(S4A_SESSION_DIR, "cookies.json"), "utf-8"));
1691
+ const validCookies = savedCookies.filter((c) => c.name && c.value && c.domain).map((c) => ({
1692
+ name: c.name, value: c.value, domain: c.domain, path: c.path || "/",
1693
+ httpOnly: c.httpOnly ?? true, secure: c.secure ?? true, sameSite: "None",
1694
+ }));
1695
+ await context.addCookies(validCookies);
1696
+ }
1697
+ catch {
1698
+ // Fallback: inject just sp_dc
1699
+ await context.addCookies([
1700
+ { name: "sp_dc", value: spDc, domain: ".spotify.com", path: "/", httpOnly: true, secure: true, sameSite: "None" },
1701
+ ]);
1702
+ }
1703
+ const data = {};
1704
+ const aid = artistId;
1705
+ const page = await context.newPage();
1706
+ const captured = {};
1707
+ // Intercept API responses
1708
+ page.on("response", async (resp) => {
1709
+ const url = resp.url();
1710
+ if (!url.includes("spotify.com") || resp.status() !== 200)
1711
+ return;
1712
+ const patterns = ["audience-engagement", "catalog-view", "fanatic-audience", "s4x-insights-api", "song-stats", "entity-comparison", "source-of-stream", "s4x-me", "canvaz-view", "buyerxp-campaign-view", "app-manager", "artist-videos"];
1713
+ if (!patterns.some(p => url.includes(p)))
1714
+ return;
1715
+ try {
1716
+ captured[url] = await resp.json();
1717
+ }
1718
+ catch { }
1719
+ });
1720
+ // RULE: tyto datumy jsou pro URL REQUEST, ne pro ukládání.
1721
+ // Skutečné datumy data pointů bereme Z RESPONSE (timeseries `x` pole).
1722
+ // S4A vrací data končící VČERA, ne dnes.
1723
+ const requestToDate = new Date().toISOString().slice(0, 10);
1724
+ const requestFromDate12m = new Date(Date.now() - 365 * 86400000).toISOString().slice(0, 10);
1725
+ // Artist-level sections
1726
+ // NOTE: audience/stats, demographic, location — NO custom date params!
1727
+ // S4A API uses from_date/to_date (snake_case) internally. Passing fromDate (camelCase)
1728
+ // in the PAGE URL causes the internal API call to fail with 400.
1729
+ // Let S4A choose its own date range — it defaults to 28d which is what we get.
1730
+ const sections = [
1731
+ `/c/artist/${aid}/home`,
1732
+ `/c/artist/${aid}/audience/stats`,
1733
+ `/c/artist/${aid}/audience/demographic`,
1734
+ `/c/artist/${aid}/audience/location`,
1735
+ `/c/artist/${aid}/audience/segments`,
1736
+ `/c/artist/${aid}/music/songs?time-filter=last12months`,
1737
+ `/c/artist/${aid}/music/playlists?time-filter=last12months`,
1738
+ `/c/artist/${aid}/music/upcoming`,
1739
+ `/c/artist/${aid}/music/videos`,
1740
+ ];
1741
+ for (const section of sections) {
1742
+ try {
1743
+ await page.goto(`https://artists.spotify.com${section}`, { waitUntil: "networkidle", timeout: 25000 });
1744
+ await sleep(2000);
1745
+ }
1746
+ catch { }
1747
+ }
1748
+ // Parse captured responses into data object (helper — called multiple times)
1749
+ const parseCaptured = () => {
1750
+ data.perSong = data.perSong || {};
1751
+ for (const [url, resp] of Object.entries(captured)) {
1752
+ if (url.includes("gender-by-age"))
1753
+ data.genderByAge = resp;
1754
+ else if (url.includes("/gender") && !url.includes("gender-by-age"))
1755
+ data.gender = resp;
1756
+ else if (url.includes("top-cities") && !url.includes("recording"))
1757
+ data.topCities = resp;
1758
+ else if (url.includes("/locations") && !url.includes("recording"))
1759
+ data.locations = resp;
1760
+ else if (url.includes("fanatic-audience-segments"))
1761
+ data.segments = resp;
1762
+ else if (url.includes("audience-engagement") && url.includes("stats") && !url.includes("recording"))
1763
+ data.audienceStats = resp;
1764
+ else if (url.includes("catalog-view") && url.includes("songs"))
1765
+ data.catalog = resp;
1766
+ else if (url.includes("playlists/curated")) {
1767
+ if (!data.playlists)
1768
+ data.playlists = {};
1769
+ data.playlists.curated = resp;
1770
+ }
1771
+ else if (url.includes("playlists/listener")) {
1772
+ if (!data.playlists)
1773
+ data.playlists = {};
1774
+ data.playlists.listener = resp;
1775
+ }
1776
+ else if (url.includes("playlists/personalized")) {
1777
+ if (!data.playlists)
1778
+ data.playlists = {};
1779
+ data.playlists.personalized = resp;
1780
+ }
1781
+ else if (url.includes("artist-videos"))
1782
+ data.videos = resp;
1783
+ // Profile fields 14-21
1784
+ else if (url.includes("s4x-me") && url.includes("/me") && !url.includes("recent"))
1785
+ data.accountOwner = resp;
1786
+ else if (url.includes("s4x-me") && url.includes("artists/"))
1787
+ data.permissions = resp;
1788
+ else if (url.includes("canvaz-view") && url.includes("permissions"))
1789
+ data.canvasPermissions = resp;
1790
+ else if (url.includes("buyerxp-campaign-view") && url.includes("eligibility"))
1791
+ data.campaignEligibility = resp;
1792
+ else if (url.includes("app-manager") && url.includes("banner-metadata"))
1793
+ data.teamMembership = resp;
1794
+ // Per-song location data (top-cities and locations per recording)
1795
+ else if (url.includes("top-cities") && url.includes("recording")) {
1796
+ const m = url.match(/recording[_/]([a-zA-Z0-9]+)/);
1797
+ if (m) {
1798
+ if (!data.perSong[m[1]])
1799
+ data.perSong[m[1]] = {};
1800
+ data.perSong[m[1]].topCities = resp;
1801
+ }
1802
+ }
1803
+ else if (url.includes("/locations") && url.includes("recording")) {
1804
+ const m = url.match(/recording[_/]([a-zA-Z0-9]+)/);
1805
+ if (m) {
1806
+ if (!data.perSong[m[1]])
1807
+ data.perSong[m[1]] = {};
1808
+ data.perSong[m[1]].topCountries = resp;
1809
+ }
1810
+ }
1811
+ // Per-song demographics (gender, gender-by-age per recording)
1812
+ else if (url.includes("gender-by-age") && url.includes("recording")) {
1813
+ const m = url.match(/recording[_/]([a-zA-Z0-9]+)/);
1814
+ if (m) {
1815
+ if (!data.perSong[m[1]])
1816
+ data.perSong[m[1]] = {};
1817
+ data.perSong[m[1]].perSongGenderByAge = resp;
1818
+ }
1819
+ }
1820
+ else if (url.includes("/gender") && !url.includes("gender-by-age") && url.includes("recording")) {
1821
+ const m = url.match(/recording[_/]([a-zA-Z0-9]+)/);
1822
+ if (m) {
1823
+ if (!data.perSong[m[1]])
1824
+ data.perSong[m[1]] = {};
1825
+ data.perSong[m[1]].perSongGender = resp;
1826
+ }
1827
+ }
1828
+ // Per-song source of streams (28d aggregate — editorial/algorithmic/user/other)
1829
+ else if (url.includes("source-of-stream") && url.includes("recording")) {
1830
+ const m = url.match(/recording[_/]([a-zA-Z0-9]+)/);
1831
+ if (m) {
1832
+ if (!data.perSong[m[1]])
1833
+ data.perSong[m[1]] = {};
1834
+ data.perSong[m[1]].sourceOfStreams = resp;
1835
+ }
1836
+ }
1837
+ // Per-song country comparison timeseries (365d daily per selected country)
1838
+ else if (url.includes("audience-engagement") && url.includes("recording") && url.includes("stats")) {
1839
+ const m = url.match(/recording[_/]([a-zA-Z0-9]+)/);
1840
+ if (m) {
1841
+ if (!data.perSong[m[1]])
1842
+ data.perSong[m[1]] = {};
1843
+ if (!data.perSong[m[1]].countryTimeline)
1844
+ data.perSong[m[1]].countryTimeline = {};
1845
+ // Store keyed by country code if available in response, or accumulate
1846
+ data.perSong[m[1]].countryTimeline._raw = resp;
1847
+ }
1848
+ }
1849
+ else if (url.includes("song-stats")) {
1850
+ const m = url.match(/recording[_/]([a-zA-Z0-9]+)/);
1851
+ if (m) {
1852
+ if (!data.perSong[m[1]])
1853
+ data.perSong[m[1]] = {};
1854
+ const existing = data.perSong[m[1]];
1855
+ // Before merge: protect geography data by saving cities vs countries separately
1856
+ if (resp.geography && Array.isArray(resp.geography) && resp.geography.length > 0) {
1857
+ const first = resp.geography[0];
1858
+ if (first.region) {
1859
+ // Has region = cities data (e.g. Prague, CZ, region=10)
1860
+ existing.perSongCities = resp.geography;
1861
+ }
1862
+ else if (first.localized_country) {
1863
+ // Has localized_country = countries data (e.g. CZ, Czech Republic)
1864
+ existing.perSongCountries = resp.geography;
1865
+ }
1866
+ }
1867
+ // Protect countryTimelines from being overwritten
1868
+ if (resp.countryTimelines && Array.isArray(resp.countryTimelines)) {
1869
+ existing.countryTimelines = resp.countryTimelines;
1870
+ }
1871
+ // Prefer longer timeseries — 365d response must not be overwritten by 28d
1872
+ const existingLen = existing.streams?.current_period_timeseries?.length || 0;
1873
+ const newLen = resp.streams?.current_period_timeseries?.length || 0;
1874
+ if (newLen > 0 && existingLen > newLen) {
1875
+ // Keep existing longer timeseries, only merge non-timeseries fields
1876
+ const { streams, listeners, saves, playlist_adds, streams_per_listener, geography, countryTimelines, ...rest } = resp;
1877
+ Object.assign(existing, rest);
1878
+ }
1879
+ else {
1880
+ const { geography, countryTimelines, ...rest } = resp;
1881
+ Object.assign(existing, rest);
1882
+ // Only merge timeseries metrics if new data is longer
1883
+ if (resp.streams)
1884
+ existing.streams = resp.streams;
1885
+ if (resp.listeners)
1886
+ existing.listeners = resp.listeners;
1887
+ if (resp.saves)
1888
+ existing.saves = resp.saves;
1889
+ if (resp.playlist_adds)
1890
+ existing.playlist_adds = resp.playlist_adds;
1891
+ if (resp.streams_per_listener)
1892
+ existing.streams_per_listener = resp.streams_per_listener;
1893
+ }
1894
+ }
1895
+ }
1896
+ }
1897
+ };
1898
+ // Parse artist-level data BEFORE per-song loop so we have trackIds
1899
+ parseCaptured();
1900
+ // Per-song pages — each track × each metric tab = 12 months of daily data
1901
+ const trackIds = [];
1902
+ if (data.catalog?.songs) {
1903
+ for (const s of data.catalog.songs)
1904
+ trackIds.push(s.id);
1905
+ }
1906
+ if (trackIds.length === 0) {
1907
+ console.error(`[S4A] WARNING: No track IDs from catalog (catalog songs: ${data.catalog?.songs?.length ?? 'none'}). Per-song scrape skipped.`);
1908
+ }
1909
+ else {
1910
+ console.log(`[S4A] Scraping per-song data for ${trackIds.length} tracks...`);
1911
+ }
1912
+ // Per-song tabs:
1913
+ // 1. Overview (stats) — ONE call returns ALL 5 metrics (365 days daily each)
1914
+ // 2. Source of stream — user/personalized/catalog/editorial/network/other
1915
+ // 3. Location — top countries + cities + country comparison timeline
1916
+ // 4. Demographics — gender + gender×age per track
1917
+ // 5. Playlists — top playlists per track
1918
+ // 6. SongDNA — TODO (new feature)
1919
+ // NOTE: URL is "source-of-stream" (singular!), not "source-of-streams"
1920
+ // Per-song: navigate to stats, click "12 months" → S4A picks correct date range.
1921
+ // Don't guess toDate — Spotify updates when it wants. Let S4A decide.
1922
+ // ONE stats call returns ALL 5 metrics × 365d daily + previous_period_agg.
1923
+ // Then source-of-stream and playlists tabs for additional data.
1924
+ for (let i = 0; i < trackIds.length; i++) {
1925
+ const tid = trackIds[i];
1926
+ console.log(`[S4A] Track ${i + 1}/${trackIds.length}: ${tid}`);
1927
+ // Stats — load default, click "12 months"
1928
+ try {
1929
+ await page.goto(`https://artists.spotify.com/c/artist/${aid}/song/${tid}/stats`, { waitUntil: "networkidle", timeout: 20000 });
1930
+ await sleep(1500);
1931
+ await page.evaluate(`(() => {
1932
+ const b = Array.from(document.querySelectorAll("button")).find(b => b.textContent?.trim() === "12 months");
1933
+ if (b) b.click();
1934
+ })()`);
1935
+ await sleep(3000);
1936
+ }
1937
+ catch { }
1938
+ // Source of streams
1939
+ try {
1940
+ await page.goto(`https://artists.spotify.com/c/artist/${aid}/song/${tid}/source-of-stream`, { waitUntil: "networkidle", timeout: 20000 });
1941
+ await sleep(1500);
1942
+ }
1943
+ catch { }
1944
+ // Location — top countries (28d, all countries) + top cities (28d, top 50) + country comparison timeline (365d, default top 3)
1945
+ try {
1946
+ await page.goto(`https://artists.spotify.com/c/artist/${aid}/song/${tid}/location`, { waitUntil: "networkidle", timeout: 20000 });
1947
+ await sleep(1500);
1948
+ // Click "12 months" for country comparison timeline (365d daily data)
1949
+ await page.evaluate(`(() => {
1950
+ const b = Array.from(document.querySelectorAll("button")).find(b => b.textContent?.trim() === "12 months");
1951
+ if (b) b.click();
1952
+ })()`);
1953
+ await sleep(2000);
1954
+ }
1955
+ catch { }
1956
+ // Demographics — per-song gender + gender×age breakdown
1957
+ try {
1958
+ await page.goto(`https://artists.spotify.com/c/artist/${aid}/song/${tid}/demographic`, { waitUntil: "networkidle", timeout: 20000 });
1959
+ await sleep(1500);
1960
+ }
1961
+ catch { }
1962
+ // Playlists
1963
+ try {
1964
+ await page.goto(`https://artists.spotify.com/c/artist/${aid}/song/${tid}/playlists`, { waitUntil: "networkidle", timeout: 20000 });
1965
+ await sleep(1500);
1966
+ }
1967
+ catch { }
1968
+ }
1969
+ await browser.close();
1970
+ // Final parse — picks up per-song responses
1971
+ parseCaptured();
1972
+ // Save raw dump
1973
+ const dumpDir = path.join(process.env.HOME || "~", ".artist-os", "s4a-dumps", new Date().toISOString().slice(0, 10));
1974
+ fs.mkdirSync(dumpDir, { recursive: true });
1975
+ fs.writeFileSync(path.join(dumpDir, `${aid}_s4a.json`), JSON.stringify(data, null, 2));
1976
+ return Object.keys(data).length > 0 ? data : null;
1977
+ }
1978
+ export async function scrapeS4A(artistId, outputDir) {
1979
+ if (!hasS4ASession())
1980
+ throw new Error("Not connected to S4A. Use connect_s4a first.");
1981
+ const chromePath = process.env.CHROME_PATH || findChrome();
1982
+ const browser = await chromium.launchPersistentContext(S4A_SESSION_DIR, {
1983
+ executablePath: chromePath,
1984
+ headless: true,
1985
+ });
1986
+ const captured = {};
1987
+ const page = browser.pages()[0] || await browser.newPage();
1988
+ // Intercept API responses
1989
+ page.on("response", async (resp) => {
1990
+ const url = resp.url();
1991
+ if (!url.includes("spotify.com") || resp.status() !== 200)
1992
+ return;
1993
+ const interesting = ["audience-engagement", "catalog-view", "s4x-insights", "fanatic",
1994
+ "song-stats", "entity-comparison", "canvaz-view", "buyerxp", "app-manager", "s4x-me"];
1995
+ if (!interesting.some(k => url.includes(k)))
1996
+ return;
1997
+ try {
1998
+ const data = await resp.json();
1999
+ const key = url.split("spotify.com/")[1]?.replace(/[/?&=]/g, "_").slice(0, 120) || "unknown";
2000
+ captured[key] = { url, data };
2001
+ }
2002
+ catch { }
2003
+ });
2004
+ // Navigate through S4A sections
2005
+ const sections = [
2006
+ `/c/artist/${artistId}/home`,
2007
+ `/c/artist/${artistId}/audience/stats`,
2008
+ `/c/artist/${artistId}/audience/demographic`,
2009
+ `/c/artist/${artistId}/audience/location`,
2010
+ `/c/artist/${artistId}/audience/segments`,
2011
+ `/c/artist/${artistId}/music/songs`,
2012
+ `/c/artist/${artistId}/music/releases`,
2013
+ `/c/artist/${artistId}/music/upcoming`,
2014
+ `/c/artist/${artistId}/music/playlists`,
2015
+ ];
2016
+ for (const section of sections) {
2017
+ try {
2018
+ await page.goto(`https://artists.spotify.com${section}`, { waitUntil: "networkidle", timeout: 30000 });
2019
+ await new Promise(r => setTimeout(r, 2000));
2020
+ }
2021
+ catch { }
2022
+ }
2023
+ await browser.close();
2024
+ // Save captured data
2025
+ fs.mkdirSync(outputDir, { recursive: true });
2026
+ let count = 0;
2027
+ for (const [key, entry] of Object.entries(captured)) {
2028
+ const filepath = path.join(outputDir, `${key}.json`);
2029
+ fs.writeFileSync(filepath, JSON.stringify(entry.data, null, 2));
2030
+ count++;
2031
+ }
2032
+ // Save manifest
2033
+ fs.writeFileSync(path.join(outputDir, "_manifest.json"), JSON.stringify({
2034
+ artist_id: artistId,
2035
+ scraped_at: new Date().toISOString(),
2036
+ files: count,
2037
+ }, null, 2));
2038
+ return { files: count };
2039
+ }
2040
+ export async function fullScrape(artistId, dbPath) {
2041
+ const token = await getAnonymousToken(artistId);
2042
+ const overview = await getArtistOverview(token, artistId);
2043
+ const tracks = await getAllTracks(token, artistId);
2044
+ const total_streams = tracks.reduce((sum, t) => sum + t.playcount, 0);
2045
+ const dataDir = dbPath ? path.dirname(dbPath) : path.join(process.env.HOME || "~", ".artist-os");
2046
+ // S4A scrape — automatic if session exists
2047
+ let s4a = null;
2048
+ if (hasS4ASession()) {
2049
+ try {
2050
+ s4a = await scrapeS4ADirect(artistId);
2051
+ }
2052
+ catch { }
2053
+ }
2054
+ if (dbPath) {
2055
+ storeData(dbPath, overview, tracks);
2056
+ }
2057
+ const images = await downloadImages(artistId, overview, tracks, dataDir);
2058
+ // Validate S4A data before workbook generation
2059
+ if (s4a) {
2060
+ const report = validateS4AData(s4a);
2061
+ printValidationReport(report);
2062
+ // Save report alongside dump
2063
+ const dumpDir = path.join(process.env.HOME || "~", ".artist-os", "s4a-dumps", new Date().toISOString().slice(0, 10));
2064
+ if (fs.existsSync(dumpDir)) {
2065
+ fs.writeFileSync(path.join(dumpDir, `${artistId}_validation.json`), JSON.stringify(report, null, 2));
2066
+ }
2067
+ }
2068
+ if (dbPath) {
2069
+ await generateWorkbook(dbPath, dataDir, s4a);
2070
+ }
2071
+ return { overview, tracks, total_streams, images, s4a };
2072
+ }