deaf-intelligence 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +77 -0
- package/README.md +75 -0
- package/dist/constrained-disagg.d.ts +76 -0
- package/dist/constrained-disagg.js +499 -0
- package/dist/db.d.ts +34 -0
- package/dist/db.js +65 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +450 -0
- package/dist/scraper.d.ts +111 -0
- package/dist/scraper.js +2072 -0
- package/dist/validation.d.ts +43 -0
- package/dist/validation.js +138 -0
- package/package.json +52 -0
package/dist/scraper.js
ADDED
|
@@ -0,0 +1,2072 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Spotify GraphQL scraper — pure Node.js, minimal dependencies.
|
|
3
|
+
*
|
|
4
|
+
* Uses playwright-core (4MB) + system Chrome. No Python. No browser download.
|
|
5
|
+
*
|
|
6
|
+
* VERIFIED 2026-03-22:
|
|
7
|
+
* - KAYO: 38 songs, 982K streams, 16,429 ML, 535 followers, top 5 cities
|
|
8
|
+
* - ABDE: 65 songs, 49.8M streams, 99K ML, solo/ft/feat roles
|
|
9
|
+
*
|
|
10
|
+
* GOTCHAS:
|
|
11
|
+
* - get_access_token returns 403 from pure HTTP (TLS fingerprint check)
|
|
12
|
+
* → SOLUTION: playwright-core launches system Chrome, captures token from network
|
|
13
|
+
* - Track pages don't show play counts without login
|
|
14
|
+
* → SOLUTION: queryAlbumTracks GraphQL endpoint returns them
|
|
15
|
+
* - Rate limiting: ~50 fast calls triggers 429
|
|
16
|
+
* → SOLUTION: 300ms delay between album queries, retry with 5s backoff
|
|
17
|
+
* - GraphQL hashes may change when Spotify updates their client
|
|
18
|
+
* → SOLUTION: hashes from Spicetify community, monitor for failures
|
|
19
|
+
*/
|
|
20
|
+
import { chromium } from "playwright-core";
|
|
21
|
+
import Database from "./db.js";
|
|
22
|
+
import ExcelJS from "exceljs";
|
|
23
|
+
import * as fs from "fs";
|
|
24
|
+
import { constrainedDisaggregate, roundWithAnchor } from "./constrained-disagg.js";
|
|
25
|
+
import { validateS4AData, printValidationReport } from "./validation.js";
|
|
26
|
+
import * as path from "path";
|
|
27
|
+
// GraphQL operation hashes (from Spotify web client / Spicetify)
|
|
28
|
+
const HASHES = {
|
|
29
|
+
queryArtistOverview: "35648a112beb1794e39ab931365f6ae4a8d45e65396d641eeda94e4003d41497",
|
|
30
|
+
queryArtistDiscographyAll: "9380995a9d4663cbcb5113fef3c6aabf70ae6d407ba61793fd01e2a1dd6929b0",
|
|
31
|
+
queryAlbumTracks: "3ea563e1d68f486d8df30f69de9dcedae74c77e684b889ba7408c589d30f7f2e",
|
|
32
|
+
};
|
|
33
|
+
const API_BASE = "https://api-partner.spotify.com/pathfinder/v1/query";
|
|
34
|
+
// Find system Chrome/Edge
|
|
35
|
+
function findChrome() {
|
|
36
|
+
const paths = [
|
|
37
|
+
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
|
38
|
+
"/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
|
|
39
|
+
"/Applications/Chromium.app/Contents/MacOS/Chromium",
|
|
40
|
+
"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
|
|
41
|
+
"C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe",
|
|
42
|
+
"C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedge.exe",
|
|
43
|
+
"/usr/bin/google-chrome",
|
|
44
|
+
"/usr/bin/google-chrome-stable",
|
|
45
|
+
"/usr/bin/chromium-browser",
|
|
46
|
+
"/usr/bin/chromium",
|
|
47
|
+
];
|
|
48
|
+
for (const p of paths) {
|
|
49
|
+
if (fs.existsSync(p))
|
|
50
|
+
return p;
|
|
51
|
+
}
|
|
52
|
+
throw new Error("Chrome/Edge not found. Install Chrome or set CHROME_PATH env var.");
|
|
53
|
+
}
|
|
54
|
+
// ─── Token ───
|
|
55
|
+
export async function getAnonymousToken(artistId) {
|
|
56
|
+
const chromePath = process.env.CHROME_PATH || findChrome();
|
|
57
|
+
const browser = await chromium.launch({ executablePath: chromePath, headless: true });
|
|
58
|
+
const ctx = await browser.newContext({
|
|
59
|
+
userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
60
|
+
});
|
|
61
|
+
const page = await ctx.newPage();
|
|
62
|
+
let token = null;
|
|
63
|
+
page.on("response", async (resp) => {
|
|
64
|
+
if (token)
|
|
65
|
+
return;
|
|
66
|
+
try {
|
|
67
|
+
const body = await resp.text();
|
|
68
|
+
const m = body.match(/"accessToken":"([^"]+)"/);
|
|
69
|
+
if (m)
|
|
70
|
+
token = m[1];
|
|
71
|
+
}
|
|
72
|
+
catch { }
|
|
73
|
+
});
|
|
74
|
+
await page.goto(`https://open.spotify.com/artist/${artistId}`, { waitUntil: "networkidle", timeout: 30000 });
|
|
75
|
+
await new Promise((r) => setTimeout(r, 3000));
|
|
76
|
+
await browser.close();
|
|
77
|
+
if (!token)
|
|
78
|
+
throw new Error("Failed to capture Spotify token");
|
|
79
|
+
return token;
|
|
80
|
+
}
|
|
81
|
+
// ─── GraphQL ───
|
|
82
|
+
async function gql(token, operation, variables) {
|
|
83
|
+
const params = new URLSearchParams({
|
|
84
|
+
operationName: operation,
|
|
85
|
+
variables: JSON.stringify(variables),
|
|
86
|
+
extensions: JSON.stringify({ persistedQuery: { version: 1, sha256Hash: HASHES[operation] } }),
|
|
87
|
+
});
|
|
88
|
+
const resp = await fetch(`${API_BASE}?${params}`, {
|
|
89
|
+
headers: {
|
|
90
|
+
Authorization: `Bearer ${token}`,
|
|
91
|
+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
|
|
92
|
+
},
|
|
93
|
+
});
|
|
94
|
+
if (resp.status === 429) {
|
|
95
|
+
// Rate limited — wait and retry once
|
|
96
|
+
await new Promise((r) => setTimeout(r, 5000));
|
|
97
|
+
const retry = await fetch(`${API_BASE}?${params}`, {
|
|
98
|
+
headers: {
|
|
99
|
+
Authorization: `Bearer ${token}`,
|
|
100
|
+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
|
|
101
|
+
},
|
|
102
|
+
});
|
|
103
|
+
if (!retry.ok)
|
|
104
|
+
throw new Error(`GraphQL ${operation}: ${retry.status}`);
|
|
105
|
+
return retry.json();
|
|
106
|
+
}
|
|
107
|
+
if (!resp.ok)
|
|
108
|
+
throw new Error(`GraphQL ${operation}: ${resp.status}`);
|
|
109
|
+
return resp.json();
|
|
110
|
+
}
|
|
111
|
+
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
|
|
112
|
+
export async function getArtistOverview(token, artistId) {
|
|
113
|
+
const result = await gql(token, "queryArtistOverview", {
|
|
114
|
+
uri: `spotify:artist:${artistId}`,
|
|
115
|
+
locale: "en",
|
|
116
|
+
includePrerelease: true,
|
|
117
|
+
});
|
|
118
|
+
const artist = result.data.artistUnion;
|
|
119
|
+
const stats = artist.stats || {};
|
|
120
|
+
const profile = artist.profile || {};
|
|
121
|
+
const visuals = artist.visuals || {};
|
|
122
|
+
const disco = artist.discography || {};
|
|
123
|
+
const related = artist.relatedContent || {};
|
|
124
|
+
// Images: avatar (largest first), header banner, full gallery
|
|
125
|
+
const avatarSources = visuals.avatarImage?.sources || [];
|
|
126
|
+
const profileImage = avatarSources.sort((a, b) => (b.width || 0) - (a.width || 0))[0]?.url || null;
|
|
127
|
+
const headerImage = visuals.headerImage?.sources?.[0]?.url || null;
|
|
128
|
+
const galleryImages = (visuals.gallery?.items || [])
|
|
129
|
+
.map((item) => item.sources?.[0]?.url)
|
|
130
|
+
.filter(Boolean);
|
|
131
|
+
const brandColor = visuals.avatarImage?.extractedColors?.colorRaw?.hex
|
|
132
|
+
|| visuals.headerImage?.extractedColors?.colorRaw?.hex
|
|
133
|
+
|| null;
|
|
134
|
+
return {
|
|
135
|
+
artist_id: artistId,
|
|
136
|
+
name: profile.name || "",
|
|
137
|
+
verified: profile.verified || false,
|
|
138
|
+
bio: profile.biography?.text || null,
|
|
139
|
+
profile_image: profileImage,
|
|
140
|
+
header_image: headerImage,
|
|
141
|
+
gallery_images: galleryImages,
|
|
142
|
+
brand_color: brandColor,
|
|
143
|
+
monthly_listeners: Number(stats.monthlyListeners) || 0,
|
|
144
|
+
followers: Number(stats.followers) || 0,
|
|
145
|
+
world_rank: Number(stats.worldRank) || 0,
|
|
146
|
+
top_cities: (stats.topCities?.items || []).map((c) => ({
|
|
147
|
+
city: c.city, country: c.country, listeners: Number(c.numberOfListeners) || 0,
|
|
148
|
+
})),
|
|
149
|
+
top_tracks: (disco.topTracks?.items || []).map((item) => {
|
|
150
|
+
const t = item.track || {};
|
|
151
|
+
return {
|
|
152
|
+
track_id: (t.uri || "").split(":").pop() || "",
|
|
153
|
+
title: t.name || "",
|
|
154
|
+
playcount: parseInt(t.playcount || "0", 10),
|
|
155
|
+
artists: (t.artists?.items || []).map((a) => a.profile?.name || "?"),
|
|
156
|
+
};
|
|
157
|
+
}),
|
|
158
|
+
related_artists: (related.relatedArtists?.items || []).map((item) => {
|
|
159
|
+
const a = item.artist || {};
|
|
160
|
+
return { name: a.profile?.name || "?", id: a.id || "", followers: Number(a.stats?.followers) || 0 };
|
|
161
|
+
}).filter((a) => a.name !== "?"),
|
|
162
|
+
discovered_on: (related.discoveredOnV2?.items || [])
|
|
163
|
+
.filter((item) => item.data?.__typename === "Playlist")
|
|
164
|
+
.map((item) => {
|
|
165
|
+
const pl = item.data || {};
|
|
166
|
+
return { name: pl.name || "", owner: pl.ownerV2?.data?.name || "" };
|
|
167
|
+
})
|
|
168
|
+
.filter((d) => d.name),
|
|
169
|
+
external_links: (profile.externalLinks?.items || []).map((l) => ({ name: l.name, url: l.url })),
|
|
170
|
+
latest_release: disco.latest ? {
|
|
171
|
+
name: disco.latest.name || "",
|
|
172
|
+
type: disco.latest.type || "",
|
|
173
|
+
label: disco.latest.label || "",
|
|
174
|
+
date: disco.latest.date?.isoString || String(disco.latest.date?.year || ""),
|
|
175
|
+
} : null,
|
|
176
|
+
singles_count: disco.singles?.totalCount || 0,
|
|
177
|
+
albums_count: disco.albums?.totalCount || 0,
|
|
178
|
+
};
|
|
179
|
+
}
|
|
180
|
+
export async function getAllTracks(token, artistId) {
|
|
181
|
+
const result = await gql(token, "queryArtistDiscographyAll", {
|
|
182
|
+
uri: `spotify:artist:${artistId}`, offset: 0, limit: 100,
|
|
183
|
+
});
|
|
184
|
+
const albums = [];
|
|
185
|
+
for (const item of result.data.artistUnion.discography.all.items) {
|
|
186
|
+
for (const r of item.releases?.items || []) {
|
|
187
|
+
albums.push({
|
|
188
|
+
id: r.id,
|
|
189
|
+
name: r.name,
|
|
190
|
+
type: r.type || "SINGLE",
|
|
191
|
+
date: r.date?.isoString || String(r.date?.year || ""),
|
|
192
|
+
cover: r.coverArt?.sources?.[0]?.url || null,
|
|
193
|
+
});
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
const allTracks = [];
|
|
197
|
+
const seen = new Set();
|
|
198
|
+
for (const album of albums) {
|
|
199
|
+
await sleep(300); // Rate limit courtesy
|
|
200
|
+
try {
|
|
201
|
+
const res = await gql(token, "queryAlbumTracks", {
|
|
202
|
+
uri: `spotify:album:${album.id}`, offset: 0, limit: 300,
|
|
203
|
+
});
|
|
204
|
+
for (const tItem of res.data.album.tracks.items) {
|
|
205
|
+
const track = tItem.track || {};
|
|
206
|
+
const tid = (track.uri || "").split(":").pop() || "";
|
|
207
|
+
if (!tid || seen.has(tid))
|
|
208
|
+
continue;
|
|
209
|
+
seen.add(tid);
|
|
210
|
+
const artists = (track.artists?.items || []).map((a) => a.profile?.name || "?");
|
|
211
|
+
const artistIds = (track.artists?.items || []).map((a) => (a.uri || "").split(":").pop() || "");
|
|
212
|
+
const isPrimary = artistIds[0] === artistId;
|
|
213
|
+
const isSolo = artists.length === 1 && isPrimary;
|
|
214
|
+
const isFeat = artistIds.includes(artistId) && !isPrimary;
|
|
215
|
+
let role;
|
|
216
|
+
if (isSolo)
|
|
217
|
+
role = "SOLO";
|
|
218
|
+
else if (isPrimary) {
|
|
219
|
+
const featNames = artists.filter((_, i) => artistIds[i] !== artistId);
|
|
220
|
+
role = `ft. ${featNames.join(" & ")}`;
|
|
221
|
+
}
|
|
222
|
+
else if (isFeat)
|
|
223
|
+
role = `FEAT on ${artists[0]}`;
|
|
224
|
+
else
|
|
225
|
+
role = "OTHER";
|
|
226
|
+
allTracks.push({
|
|
227
|
+
track_id: tid, title: track.name || "", playcount: parseInt(track.playcount || "0", 10),
|
|
228
|
+
popularity: 0, // populated by batch Web API call below
|
|
229
|
+
artists, artist_ids: artistIds, role,
|
|
230
|
+
album_name: album.name, album_id: album.id,
|
|
231
|
+
release_date: album.date, release_type: album.type, cover_art: album.cover,
|
|
232
|
+
});
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
catch (e) {
|
|
236
|
+
// Skip failed albums silently, continue with rest
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
// Batch fetch popularity scores from Spotify Web API (50 per call)
|
|
240
|
+
const trackIds = allTracks.map(t => t.track_id);
|
|
241
|
+
for (let i = 0; i < trackIds.length; i += 50) {
|
|
242
|
+
const batch = trackIds.slice(i, i + 50);
|
|
243
|
+
try {
|
|
244
|
+
const resp = await fetch(`https://api.spotify.com/v1/tracks?ids=${batch.join(",")}`, {
|
|
245
|
+
headers: { Authorization: `Bearer ${token}` },
|
|
246
|
+
});
|
|
247
|
+
if (resp.ok) {
|
|
248
|
+
const data = await resp.json();
|
|
249
|
+
for (const t of data.tracks || []) {
|
|
250
|
+
if (!t)
|
|
251
|
+
continue;
|
|
252
|
+
const match = allTracks.find(at => at.track_id === t.id);
|
|
253
|
+
if (match)
|
|
254
|
+
match.popularity = Number(t.popularity) || 0;
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
await sleep(300);
|
|
258
|
+
}
|
|
259
|
+
catch { }
|
|
260
|
+
}
|
|
261
|
+
return allTracks;
|
|
262
|
+
}
|
|
263
|
+
// ─── SQLite Storage ───
|
|
264
|
+
const SCHEMA = `
|
|
265
|
+
PRAGMA journal_mode = DELETE;
|
|
266
|
+
|
|
267
|
+
CREATE TABLE IF NOT EXISTS artist (
|
|
268
|
+
spotify_id TEXT PRIMARY KEY, name TEXT NOT NULL, verified INTEGER DEFAULT 0,
|
|
269
|
+
bio TEXT, brand_color TEXT, created_at INTEGER DEFAULT (unixepoch())
|
|
270
|
+
);
|
|
271
|
+
CREATE TABLE IF NOT EXISTS daily_stats (
|
|
272
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT, artist_id TEXT NOT NULL,
|
|
273
|
+
metric_name TEXT NOT NULL, metric_value REAL NOT NULL,
|
|
274
|
+
delta_value REAL, delta_pct REAL, scraped_at INTEGER NOT NULL
|
|
275
|
+
);
|
|
276
|
+
CREATE INDEX IF NOT EXISTS idx_daily ON daily_stats(artist_id, metric_name, scraped_at);
|
|
277
|
+
|
|
278
|
+
CREATE TABLE IF NOT EXISTS song_snapshots (
|
|
279
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT, artist_id TEXT NOT NULL,
|
|
280
|
+
track_id TEXT, track_title TEXT NOT NULL, play_count INTEGER NOT NULL,
|
|
281
|
+
popularity INTEGER DEFAULT 0,
|
|
282
|
+
delta_value INTEGER, delta_pct REAL, scraped_at INTEGER NOT NULL
|
|
283
|
+
);
|
|
284
|
+
CREATE INDEX IF NOT EXISTS idx_songs ON song_snapshots(artist_id, track_title, scraped_at);
|
|
285
|
+
|
|
286
|
+
CREATE TABLE IF NOT EXISTS tracks (
|
|
287
|
+
spotify_id TEXT, artist_id TEXT NOT NULL, title TEXT NOT NULL,
|
|
288
|
+
release_type TEXT, release_date TEXT, release_year INTEGER,
|
|
289
|
+
cover_art_url TEXT, role TEXT, UNIQUE(artist_id, title)
|
|
290
|
+
);
|
|
291
|
+
CREATE TABLE IF NOT EXISTS locations (
|
|
292
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT, artist_id TEXT NOT NULL,
|
|
293
|
+
location_type TEXT NOT NULL, location_name TEXT NOT NULL,
|
|
294
|
+
listener_count INTEGER, rank INTEGER, scraped_at INTEGER NOT NULL
|
|
295
|
+
);
|
|
296
|
+
CREATE TABLE IF NOT EXISTS related_artists (
|
|
297
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT, artist_id TEXT NOT NULL,
|
|
298
|
+
related_name TEXT NOT NULL, related_spotify_id TEXT, scraped_at INTEGER NOT NULL
|
|
299
|
+
);
|
|
300
|
+
CREATE TABLE IF NOT EXISTS discovered_on (
|
|
301
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT, artist_id TEXT NOT NULL,
|
|
302
|
+
playlist_name TEXT NOT NULL, playlist_owner TEXT, scraped_at INTEGER NOT NULL
|
|
303
|
+
);
|
|
304
|
+
CREATE TABLE IF NOT EXISTS external_links (
|
|
305
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT, artist_id TEXT NOT NULL,
|
|
306
|
+
link_name TEXT NOT NULL, link_url TEXT NOT NULL, scraped_at INTEGER NOT NULL,
|
|
307
|
+
UNIQUE(artist_id, link_name, link_url)
|
|
308
|
+
);
|
|
309
|
+
`;
|
|
310
|
+
export function storeData(dbPath, overview, tracks) {
|
|
311
|
+
const db = new Database(dbPath);
|
|
312
|
+
db.exec(SCHEMA);
|
|
313
|
+
// Migrate: add columns that may not exist in older DBs
|
|
314
|
+
const cols = db.prepare("PRAGMA table_info(artist)").all();
|
|
315
|
+
const colNames = new Set(cols.map((c) => c.name));
|
|
316
|
+
if (!colNames.has("brand_color"))
|
|
317
|
+
db.exec("ALTER TABLE artist ADD COLUMN brand_color TEXT");
|
|
318
|
+
const snapCols = db.prepare("PRAGMA table_info(song_snapshots)").all();
|
|
319
|
+
const snapColNames = new Set(snapCols.map((c) => c.name));
|
|
320
|
+
if (!snapColNames.has("popularity"))
|
|
321
|
+
db.exec("ALTER TABLE song_snapshots ADD COLUMN popularity INTEGER DEFAULT 0");
|
|
322
|
+
const ts = Math.floor(Date.now() / 1000);
|
|
323
|
+
const aid = overview.artist_id;
|
|
324
|
+
// Artist (including bio + brand_color)
|
|
325
|
+
db.prepare("INSERT OR REPLACE INTO artist (spotify_id, name, verified, bio, brand_color) VALUES (?, ?, ?, ?, ?)")
|
|
326
|
+
.run(aid, overview.name, overview.verified ? 1 : 0, overview.bio, overview.brand_color);
|
|
327
|
+
// Daily stats with delta
|
|
328
|
+
const prevStmt = db.prepare("SELECT metric_value FROM daily_stats WHERE artist_id=? AND metric_name=? ORDER BY scraped_at DESC LIMIT 1");
|
|
329
|
+
const insertStat = db.prepare("INSERT INTO daily_stats (artist_id, metric_name, metric_value, delta_value, delta_pct, scraped_at) VALUES (?,?,?,?,?,?)");
|
|
330
|
+
for (const [metric, value] of [
|
|
331
|
+
["monthly_listeners", overview.monthly_listeners],
|
|
332
|
+
["followers", overview.followers],
|
|
333
|
+
["world_rank", overview.world_rank],
|
|
334
|
+
]) {
|
|
335
|
+
if (value == null)
|
|
336
|
+
continue;
|
|
337
|
+
const prev = prevStmt.get(aid, metric);
|
|
338
|
+
let delta = null, deltaPct = null;
|
|
339
|
+
if (prev?.metric_value > 0) {
|
|
340
|
+
delta = value - prev.metric_value;
|
|
341
|
+
deltaPct = (delta / prev.metric_value) * 100;
|
|
342
|
+
}
|
|
343
|
+
insertStat.run(aid, metric, value, delta, deltaPct, ts);
|
|
344
|
+
}
|
|
345
|
+
// Cities
|
|
346
|
+
const insertCity = db.prepare("INSERT INTO locations (artist_id, location_type, location_name, listener_count, rank, scraped_at) VALUES (?,?,?,?,?,?)");
|
|
347
|
+
overview.top_cities.forEach((c, i) => insertCity.run(aid, "city", `${c.city}, ${c.country}`, c.listeners, i + 1, ts));
|
|
348
|
+
// Related
|
|
349
|
+
const insertRelated = db.prepare("INSERT INTO related_artists (artist_id, related_name, related_spotify_id, scraped_at) VALUES (?,?,?,?)");
|
|
350
|
+
for (const ra of overview.related_artists) {
|
|
351
|
+
if (ra.name)
|
|
352
|
+
insertRelated.run(aid, ra.name, ra.id, ts);
|
|
353
|
+
}
|
|
354
|
+
// Discovered on playlists
|
|
355
|
+
const insertDiscovered = db.prepare("INSERT INTO discovered_on (artist_id, playlist_name, playlist_owner, scraped_at) VALUES (?,?,?,?)");
|
|
356
|
+
for (const d of overview.discovered_on) {
|
|
357
|
+
if (d.name)
|
|
358
|
+
insertDiscovered.run(aid, d.name, d.owner, ts);
|
|
359
|
+
}
|
|
360
|
+
// External links (INSERT OR IGNORE — only add new ones)
|
|
361
|
+
const insertLink = db.prepare("INSERT OR IGNORE INTO external_links (artist_id, link_name, link_url, scraped_at) VALUES (?,?,?,?)");
|
|
362
|
+
for (const l of overview.external_links) {
|
|
363
|
+
if (l.url)
|
|
364
|
+
insertLink.run(aid, l.name, l.url, ts);
|
|
365
|
+
}
|
|
366
|
+
// Tracks + snapshots
|
|
367
|
+
const upsertTrack = db.prepare("INSERT OR REPLACE INTO tracks (spotify_id, artist_id, title, release_type, release_date, release_year, cover_art_url, role) VALUES (?,?,?,?,?,?,?,?)");
|
|
368
|
+
const prevPlay = db.prepare("SELECT play_count FROM song_snapshots WHERE artist_id=? AND track_title=? ORDER BY scraped_at DESC LIMIT 1");
|
|
369
|
+
const insertSnap = db.prepare("INSERT INTO song_snapshots (artist_id, track_id, track_title, play_count, popularity, delta_value, delta_pct, scraped_at) VALUES (?,?,?,?,?,?,?,?)");
|
|
370
|
+
const insertAll = db.transaction(() => {
|
|
371
|
+
for (const t of tracks) {
|
|
372
|
+
const year = t.release_date?.length >= 4 ? parseInt(t.release_date.substring(0, 4), 10) : null;
|
|
373
|
+
upsertTrack.run(t.track_id, aid, t.title, t.release_type, t.release_date, year, t.cover_art, t.role);
|
|
374
|
+
const prev = prevPlay.get(aid, t.title);
|
|
375
|
+
let delta = null, deltaPct = null;
|
|
376
|
+
if (prev?.play_count > 0) {
|
|
377
|
+
delta = t.playcount - prev.play_count;
|
|
378
|
+
deltaPct = (delta / prev.play_count) * 100;
|
|
379
|
+
}
|
|
380
|
+
insertSnap.run(aid, t.track_id, t.title, t.playcount, t.popularity || 0, delta, deltaPct, ts);
|
|
381
|
+
}
|
|
382
|
+
});
|
|
383
|
+
insertAll();
|
|
384
|
+
db.close();
|
|
385
|
+
}
|
|
386
|
+
// ─── Image Download ───
|
|
387
|
+
async function downloadImage(url, destPath) {
|
|
388
|
+
try {
|
|
389
|
+
const resp = await fetch(url);
|
|
390
|
+
if (!resp.ok)
|
|
391
|
+
return false;
|
|
392
|
+
const buffer = Buffer.from(await resp.arrayBuffer());
|
|
393
|
+
fs.mkdirSync(path.dirname(destPath), { recursive: true });
|
|
394
|
+
fs.writeFileSync(destPath, buffer);
|
|
395
|
+
return true;
|
|
396
|
+
}
|
|
397
|
+
catch {
|
|
398
|
+
return false;
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
// Extract Spotify's unique image ID from CDN URL.
|
|
402
|
+
// URL format: https://i.scdn.co/image/ab6761610000e5ebd3bb05bb2490318ab924b02a
|
|
403
|
+
// Falls back to sha256 hash of full URL if format changes.
|
|
404
|
+
function imageIdFromUrl(url) {
|
|
405
|
+
const match = url.match(/\/image\/([a-f0-9]+)/);
|
|
406
|
+
if (match)
|
|
407
|
+
return match[1];
|
|
408
|
+
const crypto = require("crypto");
|
|
409
|
+
return crypto.createHash("sha256").update(url).digest("hex").slice(0, 24);
|
|
410
|
+
}
|
|
411
|
+
function readManifest(manifestPath) {
|
|
412
|
+
try {
|
|
413
|
+
return JSON.parse(fs.readFileSync(manifestPath, "utf-8"));
|
|
414
|
+
}
|
|
415
|
+
catch {
|
|
416
|
+
return { gallery_urls: [], cover_urls: {} };
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
function writeManifest(manifestPath, manifest) {
|
|
420
|
+
fs.mkdirSync(path.dirname(manifestPath), { recursive: true });
|
|
421
|
+
fs.writeFileSync(manifestPath, JSON.stringify(manifest, null, 2));
|
|
422
|
+
}
|
|
423
|
+
// Artist folder: ~/.artist-os/{Artist Name}/ — everything customer sees lives here
|
|
424
|
+
function artistDir(dataDir, artistName) {
|
|
425
|
+
return path.join(dataDir, artistName);
|
|
426
|
+
}
|
|
427
|
+
export async function downloadImages(artistId, overview, tracks, dataDir) {
|
|
428
|
+
const imgDir = path.join(artistDir(dataDir, overview.name), "images");
|
|
429
|
+
const coversDir = path.join(imgDir, "covers");
|
|
430
|
+
const galleryDir = path.join(imgDir, "gallery");
|
|
431
|
+
const manifestPath = path.join(imgDir, "manifest.json");
|
|
432
|
+
const manifest = readManifest(manifestPath);
|
|
433
|
+
let downloaded = 0, skipped = 0;
|
|
434
|
+
// --- Profile image ---
|
|
435
|
+
// Always profile.jpg = latest. If URL changed, archive old as profile_{image_id}.jpg
|
|
436
|
+
if (overview.profile_image) {
|
|
437
|
+
const dest = path.join(imgDir, "profile.jpg");
|
|
438
|
+
if (manifest.profile_url && manifest.profile_url !== overview.profile_image && fs.existsSync(dest)) {
|
|
439
|
+
// URL changed — archive the old one, then download new
|
|
440
|
+
const oldId = imageIdFromUrl(manifest.profile_url);
|
|
441
|
+
fs.renameSync(dest, path.join(imgDir, `profile_${oldId}.jpg`));
|
|
442
|
+
if (await downloadImage(overview.profile_image, dest))
|
|
443
|
+
downloaded++;
|
|
444
|
+
}
|
|
445
|
+
else if (!fs.existsSync(dest)) {
|
|
446
|
+
if (await downloadImage(overview.profile_image, dest))
|
|
447
|
+
downloaded++;
|
|
448
|
+
}
|
|
449
|
+
else
|
|
450
|
+
skipped++;
|
|
451
|
+
manifest.profile_url = overview.profile_image;
|
|
452
|
+
}
|
|
453
|
+
// --- Header banner ---
|
|
454
|
+
// Same logic: header.jpg = latest, archive old on change
|
|
455
|
+
if (overview.header_image) {
|
|
456
|
+
const dest = path.join(imgDir, "header.jpg");
|
|
457
|
+
if (manifest.header_url && manifest.header_url !== overview.header_image && fs.existsSync(dest)) {
|
|
458
|
+
const oldId = imageIdFromUrl(manifest.header_url);
|
|
459
|
+
fs.renameSync(dest, path.join(imgDir, `header_${oldId}.jpg`));
|
|
460
|
+
if (await downloadImage(overview.header_image, dest))
|
|
461
|
+
downloaded++;
|
|
462
|
+
}
|
|
463
|
+
else if (!fs.existsSync(dest)) {
|
|
464
|
+
if (await downloadImage(overview.header_image, dest))
|
|
465
|
+
downloaded++;
|
|
466
|
+
}
|
|
467
|
+
else
|
|
468
|
+
skipped++;
|
|
469
|
+
manifest.header_url = overview.header_image;
|
|
470
|
+
}
|
|
471
|
+
// --- Gallery ---
|
|
472
|
+
// Use Spotify image ID as filename. Photos accumulate — never deleted.
|
|
473
|
+
// Added photo → new file. Removed photo → old file stays. Reordered → no change.
|
|
474
|
+
for (const url of overview.gallery_images) {
|
|
475
|
+
const id = imageIdFromUrl(url);
|
|
476
|
+
const dest = path.join(galleryDir, `${id}.jpg`);
|
|
477
|
+
if (fs.existsSync(dest)) {
|
|
478
|
+
skipped++;
|
|
479
|
+
continue;
|
|
480
|
+
}
|
|
481
|
+
if (await downloadImage(url, dest))
|
|
482
|
+
downloaded++;
|
|
483
|
+
await sleep(100);
|
|
484
|
+
}
|
|
485
|
+
manifest.gallery_urls = overview.gallery_images;
|
|
486
|
+
// --- Cover arts ---
|
|
487
|
+
// Keyed by track_id — covers don't change after release.
|
|
488
|
+
for (const t of tracks) {
|
|
489
|
+
if (!t.cover_art)
|
|
490
|
+
continue;
|
|
491
|
+
const dest = path.join(coversDir, `${t.track_id}.jpg`);
|
|
492
|
+
if (fs.existsSync(dest)) {
|
|
493
|
+
skipped++;
|
|
494
|
+
continue;
|
|
495
|
+
}
|
|
496
|
+
if (await downloadImage(t.cover_art, dest))
|
|
497
|
+
downloaded++;
|
|
498
|
+
await sleep(100);
|
|
499
|
+
}
|
|
500
|
+
writeManifest(manifestPath, manifest);
|
|
501
|
+
return { downloaded, skipped };
|
|
502
|
+
}
|
|
503
|
+
// ─── Load all historical S4A dumps for an artist ───
|
|
504
|
+
function loadAllDumps(artistId) {
|
|
505
|
+
const dumpRoot = path.join(process.env.HOME || "~", ".artist-os", "s4a-dumps");
|
|
506
|
+
if (!fs.existsSync(dumpRoot))
|
|
507
|
+
return [];
|
|
508
|
+
const dirs = fs.readdirSync(dumpRoot).filter((d) => /^\d{4}-\d{2}-\d{2}$/.test(d)).sort();
|
|
509
|
+
const dumps = [];
|
|
510
|
+
for (const dir of dirs) {
|
|
511
|
+
const file = path.join(dumpRoot, dir, `${artistId}_s4a.json`);
|
|
512
|
+
if (!fs.existsSync(file))
|
|
513
|
+
continue;
|
|
514
|
+
try {
|
|
515
|
+
const data = JSON.parse(fs.readFileSync(file, "utf-8"));
|
|
516
|
+
if (data.perSong && Object.keys(data.perSong).length > 0)
|
|
517
|
+
dumps.push({ date: dir, data });
|
|
518
|
+
}
|
|
519
|
+
catch { }
|
|
520
|
+
}
|
|
521
|
+
return dumps;
|
|
522
|
+
}
|
|
523
|
+
// ─── Master XLSX: one workbook per artist, grows forever ───
|
|
524
|
+
export async function generateWorkbook(dbPath, dataDir, s4a) {
|
|
525
|
+
const db = new Database(dbPath, { readonly: true });
|
|
526
|
+
const artist = db.prepare("SELECT name, spotify_id, verified, bio FROM artist LIMIT 1").get();
|
|
527
|
+
try {
|
|
528
|
+
const bc = db.prepare("SELECT brand_color FROM artist LIMIT 1").get();
|
|
529
|
+
if (bc)
|
|
530
|
+
artist.brand_color = bc.brand_color;
|
|
531
|
+
}
|
|
532
|
+
catch { }
|
|
533
|
+
if (!artist) {
|
|
534
|
+
db.close();
|
|
535
|
+
return;
|
|
536
|
+
}
|
|
537
|
+
const aid = artist.spotify_id;
|
|
538
|
+
const artDir = artistDir(dataDir, artist.name);
|
|
539
|
+
fs.mkdirSync(artDir, { recursive: true });
|
|
540
|
+
const xlsxPath = path.join(artDir, `${artist.name}.xlsx`);
|
|
541
|
+
// JEDEN timestamp per scrape — nikdy nepřepočítávat Date.now() znovu
|
|
542
|
+
const scrapeTs = Math.floor(Date.now() / 1000);
|
|
543
|
+
const now = scrapeTs;
|
|
544
|
+
// today = datum SCRAPU (metadata), NE datum data pointu!
|
|
545
|
+
const today = new Date(scrapeTs * 1000).toISOString().slice(0, 10);
|
|
546
|
+
const wb = new ExcelJS.Workbook();
|
|
547
|
+
wb.creator = "Artist-OS";
|
|
548
|
+
wb.created = new Date();
|
|
549
|
+
const hdrDark = {
|
|
550
|
+
font: { bold: true, color: { argb: "FFFFFFFF" }, size: 11 },
|
|
551
|
+
fill: { type: "pattern", pattern: "solid", fgColor: { argb: "FF1a1a2e" } },
|
|
552
|
+
alignment: { horizontal: "center" },
|
|
553
|
+
};
|
|
554
|
+
const hdrGreen = {
|
|
555
|
+
font: { bold: true, color: { argb: "FFFFFFFF" }, size: 11 },
|
|
556
|
+
fill: { type: "pattern", pattern: "solid", fgColor: { argb: "FF1B5E20" } },
|
|
557
|
+
alignment: { horizontal: "center" },
|
|
558
|
+
};
|
|
559
|
+
const estFill = { type: "pattern", pattern: "solid", fgColor: { argb: "FFF5F5DC" } };
|
|
560
|
+
const projFill = { type: "pattern", pattern: "solid", fgColor: { argb: "FFE8EAF6" } };
|
|
561
|
+
const numFmt = "#,##0";
|
|
562
|
+
const applyHdr = (ws, style) => ws.getRow(1).eachCell(c => { Object.assign(c, { style }); });
|
|
563
|
+
const sectionStyle = {
|
|
564
|
+
font: { bold: true, size: 12 },
|
|
565
|
+
fill: { type: "pattern", pattern: "solid", fgColor: { argb: "FFE8EAF6" } },
|
|
566
|
+
alignment: { horizontal: "center", vertical: "middle" },
|
|
567
|
+
};
|
|
568
|
+
const labelStyle = {
|
|
569
|
+
font: { bold: true, color: { argb: "FF333333" } },
|
|
570
|
+
alignment: { horizontal: "center", vertical: "middle" },
|
|
571
|
+
};
|
|
572
|
+
const metaStyle = {
|
|
573
|
+
font: { italic: true, color: { argb: "FF666666" }, size: 9 },
|
|
574
|
+
alignment: { horizontal: "center", vertical: "middle" },
|
|
575
|
+
};
|
|
576
|
+
const addSection = (ws, name) => {
|
|
577
|
+
const row = ws.addRow([`═══ ${name} ═══`, "", ""]);
|
|
578
|
+
row.eachCell(c => { Object.assign(c, { style: sectionStyle }); });
|
|
579
|
+
};
|
|
580
|
+
const addField = (ws, field, value, since) => {
|
|
581
|
+
const row = ws.addRow([field, value ?? "", since]);
|
|
582
|
+
row.getCell(1).style = labelStyle;
|
|
583
|
+
row.getCell(2).alignment = { horizontal: "center", vertical: "middle", wrapText: true };
|
|
584
|
+
row.getCell(3).alignment = { horizontal: "center", vertical: "middle" };
|
|
585
|
+
};
|
|
586
|
+
const addHistoryNote = (ws, label) => {
|
|
587
|
+
const row = ws.addRow([" HISTORY", label, ""]);
|
|
588
|
+
row.eachCell(c => { Object.assign(c, { style: metaStyle }); });
|
|
589
|
+
};
|
|
590
|
+
const addSpacer = (ws) => ws.addRow(["", "", ""]);
|
|
591
|
+
const scrapeDate = today;
|
|
592
|
+
// ═══════════════════════════════════════════
|
|
593
|
+
// SHEET 1: PROFILE (v2 — ACTUAL + HISTORY)
|
|
594
|
+
// ═══════════════════════════════════════════
|
|
595
|
+
const profileSheet = wb.addWorksheet("Profile", { properties: { tabColor: { argb: "FF2E7D32" } } });
|
|
596
|
+
profileSheet.columns = [
|
|
597
|
+
{ header: "Field", width: 22 },
|
|
598
|
+
{ header: "Value", width: 55 },
|
|
599
|
+
{ header: "Since", width: 14 },
|
|
600
|
+
];
|
|
601
|
+
applyHdr(profileSheet, hdrDark);
|
|
602
|
+
let links = [];
|
|
603
|
+
try {
|
|
604
|
+
links = db.prepare("SELECT link_name, link_url FROM external_links WHERE artist_id = ?").all(aid);
|
|
605
|
+
}
|
|
606
|
+
catch { }
|
|
607
|
+
const trackCountTotal = db.prepare("SELECT COUNT(*) as c FROM tracks WHERE artist_id = ?").get(aid)?.c || 0;
|
|
608
|
+
const latestRel = db.prepare("SELECT title, release_type, release_date FROM tracks WHERE artist_id = ? ORDER BY release_date DESC LIMIT 1").get(aid);
|
|
609
|
+
// Identity
|
|
610
|
+
addSection(profileSheet, "IDENTITY");
|
|
611
|
+
addField(profileSheet, "Spotify ID", aid, "—");
|
|
612
|
+
addField(profileSheet, "Name", artist.name, scrapeDate);
|
|
613
|
+
addHistoryNote(profileSheet, "(no changes)");
|
|
614
|
+
addSpacer(profileSheet);
|
|
615
|
+
addField(profileSheet, "Verified", artist.verified ? "Yes" : "No", scrapeDate);
|
|
616
|
+
addHistoryNote(profileSheet, "(no changes)");
|
|
617
|
+
addSpacer(profileSheet);
|
|
618
|
+
addField(profileSheet, "Bio", artist.bio || "(empty)", scrapeDate);
|
|
619
|
+
addHistoryNote(profileSheet, "(no changes)");
|
|
620
|
+
addSpacer(profileSheet);
|
|
621
|
+
addField(profileSheet, "Brand Color", artist.brand_color || "(not captured)", scrapeDate);
|
|
622
|
+
addHistoryNote(profileSheet, "(no changes)");
|
|
623
|
+
addSpacer(profileSheet);
|
|
624
|
+
// Images
|
|
625
|
+
addSection(profileSheet, "IMAGES");
|
|
626
|
+
const imgDir = path.join(artDir, "images");
|
|
627
|
+
const profileImgPath = path.join(imgDir, "profile.jpg");
|
|
628
|
+
if (fs.existsSync(profileImgPath)) {
|
|
629
|
+
addField(profileSheet, "Profile Image", "See below", scrapeDate);
|
|
630
|
+
const pImgId = wb.addImage({ filename: profileImgPath, extension: "jpeg" });
|
|
631
|
+
const pImgRow = profileSheet.rowCount + 1;
|
|
632
|
+
profileSheet.addRow(["", "", ""]);
|
|
633
|
+
profileSheet.getRow(pImgRow).height = 120;
|
|
634
|
+
profileSheet.addImage(pImgId, { tl: { col: 1, row: pImgRow - 1 }, ext: { width: 120, height: 120 } });
|
|
635
|
+
}
|
|
636
|
+
else {
|
|
637
|
+
addField(profileSheet, "Profile Image", "(not downloaded)", scrapeDate);
|
|
638
|
+
}
|
|
639
|
+
addHistoryNote(profileSheet, "(no changes)");
|
|
640
|
+
addSpacer(profileSheet);
|
|
641
|
+
const headerImgPath = path.join(imgDir, "header.jpg");
|
|
642
|
+
if (fs.existsSync(headerImgPath)) {
|
|
643
|
+
addField(profileSheet, "Header Image", "See below", scrapeDate);
|
|
644
|
+
const hImgId = wb.addImage({ filename: headerImgPath, extension: "jpeg" });
|
|
645
|
+
const hImgRow = profileSheet.rowCount + 1;
|
|
646
|
+
profileSheet.addRow(["", "", ""]);
|
|
647
|
+
profileSheet.getRow(hImgRow).height = 80;
|
|
648
|
+
profileSheet.addImage(hImgId, { tl: { col: 1, row: hImgRow - 1 }, ext: { width: 300, height: 80 } });
|
|
649
|
+
}
|
|
650
|
+
else {
|
|
651
|
+
addField(profileSheet, "Header Image", "(not downloaded)", scrapeDate);
|
|
652
|
+
}
|
|
653
|
+
addHistoryNote(profileSheet, "(no changes)");
|
|
654
|
+
addSpacer(profileSheet);
|
|
655
|
+
const galleryDirPath = path.join(imgDir, "gallery");
|
|
656
|
+
const galFiles = fs.existsSync(galleryDirPath)
|
|
657
|
+
? fs.readdirSync(galleryDirPath).filter(f => f.endsWith(".jpg")).slice(0, 8) : [];
|
|
658
|
+
addField(profileSheet, "Gallery", `${galFiles.length} photos`, scrapeDate);
|
|
659
|
+
if (galFiles.length > 0) {
|
|
660
|
+
for (let i = 0; i < galFiles.length; i++) {
|
|
661
|
+
const col = profileSheet.getColumn(2 + i);
|
|
662
|
+
if ((col.width || 0) < 16)
|
|
663
|
+
col.width = 16;
|
|
664
|
+
}
|
|
665
|
+
const galRow = profileSheet.rowCount + 1;
|
|
666
|
+
profileSheet.addRow(["", "", ""]);
|
|
667
|
+
profileSheet.getRow(galRow).height = 100;
|
|
668
|
+
for (let i = 0; i < galFiles.length; i++) {
|
|
669
|
+
const gId = wb.addImage({ filename: path.join(galleryDirPath, galFiles[i]), extension: "jpeg" });
|
|
670
|
+
profileSheet.addImage(gId, { tl: { col: 1 + i, row: galRow - 1 }, ext: { width: 100, height: 100 } });
|
|
671
|
+
}
|
|
672
|
+
}
|
|
673
|
+
addHistoryNote(profileSheet, "(no changes)");
|
|
674
|
+
addSpacer(profileSheet);
|
|
675
|
+
// External Links
|
|
676
|
+
addSection(profileSheet, "EXTERNAL LINKS");
|
|
677
|
+
if (links.length) {
|
|
678
|
+
for (const l of links)
|
|
679
|
+
addField(profileSheet, l.link_name, l.link_url, scrapeDate);
|
|
680
|
+
}
|
|
681
|
+
else {
|
|
682
|
+
addField(profileSheet, "(none captured)", "", "");
|
|
683
|
+
}
|
|
684
|
+
addHistoryNote(profileSheet, "(no changes)");
|
|
685
|
+
addSpacer(profileSheet);
|
|
686
|
+
// Release Info
|
|
687
|
+
addSection(profileSheet, "RELEASE INFO");
|
|
688
|
+
if (latestRel) {
|
|
689
|
+
addField(profileSheet, "Latest Release", `${latestRel.title} / ${latestRel.release_type} / ${latestRel.release_date?.slice(0, 10)}`, scrapeDate);
|
|
690
|
+
}
|
|
691
|
+
addField(profileSheet, "Label", "—", scrapeDate);
|
|
692
|
+
addField(profileSheet, "Singles Count", trackCountTotal, scrapeDate);
|
|
693
|
+
addField(profileSheet, "Albums Count", 0, scrapeDate);
|
|
694
|
+
addHistoryNote(profileSheet, "(no changes)");
|
|
695
|
+
addSpacer(profileSheet);
|
|
696
|
+
// S4A section — fields 14-21 per SPEC-02_DATA.md (from REAL S4A data)
|
|
697
|
+
addSection(profileSheet, "S4A");
|
|
698
|
+
if (s4a) {
|
|
699
|
+
// 14: Account Owner
|
|
700
|
+
const owner = s4a.accountOwner;
|
|
701
|
+
addField(profileSheet, "Account Owner", owner?.name ? `${owner.name} (${owner.email || ""})` : "Connected", scrapeDate);
|
|
702
|
+
addHistoryNote(profileSheet, "(no changes)");
|
|
703
|
+
addSpacer(profileSheet);
|
|
704
|
+
// 15: Permissions
|
|
705
|
+
const perms = s4a.permissions;
|
|
706
|
+
addField(profileSheet, "Permissions", perms ? JSON.stringify(perms).slice(0, 100) : "(not captured)", scrapeDate);
|
|
707
|
+
addHistoryNote(profileSheet, "(no changes)");
|
|
708
|
+
addSpacer(profileSheet);
|
|
709
|
+
// 16: Canvas Permissions
|
|
710
|
+
const canvas = s4a.canvasPermissions;
|
|
711
|
+
addField(profileSheet, "Canvas Permissions", canvas ? JSON.stringify(canvas).slice(0, 100) : "(not captured)", scrapeDate);
|
|
712
|
+
addHistoryNote(profileSheet, "(no changes)");
|
|
713
|
+
addSpacer(profileSheet);
|
|
714
|
+
// 17: Campaign Eligibility
|
|
715
|
+
const campaign = s4a.campaignEligibility;
|
|
716
|
+
if (campaign) {
|
|
717
|
+
const parts = [];
|
|
718
|
+
if (campaign.marquee)
|
|
719
|
+
parts.push(`Marquee: ${campaign.marquee.eligible ? "YES" : "NO"}${campaign.marquee.meetsThreshold ? " (threshold: YES)" : ""}`);
|
|
720
|
+
if (campaign.showcase)
|
|
721
|
+
parts.push(`Showcase: ${campaign.showcase.eligible ? "YES" : "NO"}`);
|
|
722
|
+
addField(profileSheet, "Campaign Eligibility", parts.join(" / ") || JSON.stringify(campaign).slice(0, 100), scrapeDate);
|
|
723
|
+
}
|
|
724
|
+
else {
|
|
725
|
+
addField(profileSheet, "Campaign Eligibility", "(not captured)", scrapeDate);
|
|
726
|
+
}
|
|
727
|
+
addHistoryNote(profileSheet, "(no changes)");
|
|
728
|
+
addSpacer(profileSheet);
|
|
729
|
+
// 18: Team Membership
|
|
730
|
+
const team = s4a.teamMembership;
|
|
731
|
+
addField(profileSheet, "Team Membership", team?.isArtistTeamMember != null ? (team.isArtistTeamMember ? "Yes" : "No") : "(not captured)", scrapeDate);
|
|
732
|
+
addHistoryNote(profileSheet, "(no changes)");
|
|
733
|
+
addSpacer(profileSheet);
|
|
734
|
+
// 19: Upcoming Release — from /music/upcoming page (captured in upcoming field)
|
|
735
|
+
addField(profileSheet, "Upcoming Release", s4a.upcoming ? JSON.stringify(s4a.upcoming).slice(0, 200) : "(none or not captured)", scrapeDate);
|
|
736
|
+
addHistoryNote(profileSheet, "(no changes)");
|
|
737
|
+
addSpacer(profileSheet);
|
|
738
|
+
// 20: Pitch Status — part of upcoming data
|
|
739
|
+
addField(profileSheet, "Pitch Status", "(see Upcoming Release)", scrapeDate);
|
|
740
|
+
addSpacer(profileSheet);
|
|
741
|
+
// 21: Rights Access — from perSong data
|
|
742
|
+
const allRights = new Set();
|
|
743
|
+
if (s4a.perSong) {
|
|
744
|
+
for (const tData of Object.values(s4a.perSong)) {
|
|
745
|
+
if (tData.rights_access)
|
|
746
|
+
allRights.add(tData.rights_access);
|
|
747
|
+
}
|
|
748
|
+
}
|
|
749
|
+
addField(profileSheet, "Rights Access", allRights.size > 0 ? Array.from(allRights).join(", ") : "(not captured)", scrapeDate);
|
|
750
|
+
addHistoryNote(profileSheet, "(no changes)");
|
|
751
|
+
}
|
|
752
|
+
else {
|
|
753
|
+
// S4A not connected
|
|
754
|
+
addField(profileSheet, "Status", hasS4ASession() ? "Session expired — run connect_s4a" : "Not connected — run connect_s4a for full data", "—");
|
|
755
|
+
addSpacer(profileSheet);
|
|
756
|
+
for (const f of ["Account Owner", "Permissions", "Canvas Permissions",
|
|
757
|
+
"Campaign Eligibility", "Team Membership", "Upcoming Release",
|
|
758
|
+
"Pitch Status", "Rights Access"]) {
|
|
759
|
+
addField(profileSheet, f, "NULL", "—");
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
// ═══════════════════════════════════════════
|
|
763
|
+
// SHEET 2: STREAMS (v2 — per track per day)
|
|
764
|
+
// ═══════════════════════════════════════════
|
|
765
|
+
const latestTracksForStreams = db.prepare(`
|
|
766
|
+
SELECT t.title, t.spotify_id as track_id, t.release_date, t.role, t.release_type, s.play_count
|
|
767
|
+
FROM tracks t
|
|
768
|
+
JOIN song_snapshots s ON s.track_id = t.spotify_id AND s.artist_id = t.artist_id
|
|
769
|
+
WHERE t.artist_id = ? AND s.scraped_at = (SELECT MAX(scraped_at) FROM song_snapshots WHERE artist_id = ?)
|
|
770
|
+
ORDER BY s.play_count DESC
|
|
771
|
+
`).all(aid, aid);
|
|
772
|
+
const streamTrackNames = latestTracksForStreams.map((t) => t.title);
|
|
773
|
+
const s4aFill = { type: "pattern", pattern: "solid", fgColor: { argb: "FFE8F5E9" } };
|
|
774
|
+
const dayNames = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"];
|
|
775
|
+
// Build track ID → name mapping for S4A perSong lookup
|
|
776
|
+
const trackIdToName = {};
|
|
777
|
+
const trackNameToId = {};
|
|
778
|
+
for (const t of latestTracksForStreams) {
|
|
779
|
+
trackIdToName[t.track_id] = t.title;
|
|
780
|
+
trackNameToId[t.title] = t.track_id;
|
|
781
|
+
}
|
|
782
|
+
// Scraped cumulative play counts by date (from DB — only for Streams sheet)
|
|
783
|
+
const allSnaps = db.prepare("SELECT date(s.scraped_at, 'unixepoch') as date, s.track_title, s.play_count FROM song_snapshots s WHERE s.artist_id = ? ORDER BY s.scraped_at ASC").all(aid);
|
|
784
|
+
const streamsByDate = {};
|
|
785
|
+
for (const r of allSnaps) {
|
|
786
|
+
if (!streamsByDate[r.date])
|
|
787
|
+
streamsByDate[r.date] = {};
|
|
788
|
+
streamsByDate[r.date][r.track_title] = r.play_count;
|
|
789
|
+
}
|
|
790
|
+
// Velocity for estimation (streams only)
|
|
791
|
+
const vel = {};
|
|
792
|
+
for (const t of latestTracksForStreams) {
|
|
793
|
+
if (!t.release_date)
|
|
794
|
+
continue;
|
|
795
|
+
const days = Math.max(1, Math.floor((now - new Date(t.release_date).getTime() / 1000) / 86400));
|
|
796
|
+
vel[t.title] = t.play_count / days;
|
|
797
|
+
}
|
|
798
|
+
const sReleaseDates = latestTracksForStreams.map((t) => t.release_date?.slice(0, 10)).filter(Boolean).sort();
|
|
799
|
+
const sEarliestRelease = sReleaseDates[0];
|
|
800
|
+
// ── Helper: generate a metric sheet (365d daily + S4A_PREV + YoY + summary) ──
|
|
801
|
+
const metricSheets = [
|
|
802
|
+
{ name: "Streams", key: "streams", color: "FFff6b6b", numFormat: "#,##0", hasEst: true },
|
|
803
|
+
{ name: "Listeners", key: "listeners", color: "FF42A5F5", numFormat: "#,##0", hasEst: false },
|
|
804
|
+
{ name: "Saves", key: "saves", color: "FF66BB6A", numFormat: "#,##0", hasEst: false },
|
|
805
|
+
{ name: "Playlist Adds", key: "playlist_adds", color: "FFFFCA28", numFormat: "#,##0", hasEst: false },
|
|
806
|
+
{ name: "Streams per Listener", key: "streams_per_listener", color: "FFAB47BC", numFormat: "#,##0.00", hasEst: false },
|
|
807
|
+
];
|
|
808
|
+
for (const metric of metricSheets) {
|
|
809
|
+
const ws = wb.addWorksheet(metric.name, { properties: { tabColor: { argb: metric.color } } });
|
|
810
|
+
ws.columns = [
|
|
811
|
+
{ header: "Date", width: 12 }, { header: "Day", width: 5 }, { header: "Source", width: 8 },
|
|
812
|
+
...streamTrackNames.map((n) => ({ header: n, width: 13 })),
|
|
813
|
+
{ header: "TOTAL", width: 14 },
|
|
814
|
+
];
|
|
815
|
+
applyHdr(ws, hdrDark);
|
|
816
|
+
// Metadata rows
|
|
817
|
+
const relRow = ws.addRow(["Release", "", "", ...streamTrackNames.map((n) => {
|
|
818
|
+
const t = latestTracksForStreams.find((x) => x.title === n);
|
|
819
|
+
return t?.release_date?.slice(0, 10) || "";
|
|
820
|
+
}), ""]);
|
|
821
|
+
relRow.eachCell(c => { Object.assign(c, { style: metaStyle }); });
|
|
822
|
+
const roleRow = ws.addRow(["Role", "", "", ...streamTrackNames.map((n) => {
|
|
823
|
+
const t = latestTracksForStreams.find((x) => x.title === n);
|
|
824
|
+
return t?.role || "";
|
|
825
|
+
}), ""]);
|
|
826
|
+
roleRow.eachCell(c => { Object.assign(c, { style: metaStyle }); });
|
|
827
|
+
ws.addRow([""]);
|
|
828
|
+
// Extract S4A 365d daily timeseries for this metric
|
|
829
|
+
const s4aMetricDaily = {};
|
|
830
|
+
if (s4a?.perSong) {
|
|
831
|
+
for (const [tid, tData] of Object.entries(s4a.perSong)) {
|
|
832
|
+
const name = trackIdToName[tid] || tData.metadata?.name;
|
|
833
|
+
if (!name)
|
|
834
|
+
continue;
|
|
835
|
+
const ts = tData[metric.key]?.current_period_timeseries;
|
|
836
|
+
if (!ts || !Array.isArray(ts))
|
|
837
|
+
continue;
|
|
838
|
+
for (const pt of ts) {
|
|
839
|
+
if (!pt.x)
|
|
840
|
+
continue;
|
|
841
|
+
if (!s4aMetricDaily[pt.x])
|
|
842
|
+
s4aMetricDaily[pt.x] = {};
|
|
843
|
+
s4aMetricDaily[pt.x][name] = metric.key === "streams_per_listener"
|
|
844
|
+
? parseFloat(pt.y) || 0
|
|
845
|
+
: parseInt(pt.y) || 0;
|
|
846
|
+
}
|
|
847
|
+
}
|
|
848
|
+
}
|
|
849
|
+
const s4aDates = Object.keys(s4aMetricDaily).sort();
|
|
850
|
+
// 1+2. EST + S4A_PREV — constrained disaggregation from release to S4A start
|
|
851
|
+
// Uses constrainedDisaggregate() for smooth, sum-preserving daily estimates.
|
|
852
|
+
const s4aPrevFill = { type: "pattern", pattern: "solid", fgColor: { argb: "FFFFF3E0" } };
|
|
853
|
+
if (s4aDates.length > 0) {
|
|
854
|
+
const s4aStartDate = new Date(s4aDates[0]);
|
|
855
|
+
const s4aPrevBoundary = new Date(s4aStartDate.getTime() - 365 * 86400000);
|
|
856
|
+
// UTC! new Date(y,m,1) is LOCAL timezone — April 1 CET = March 31 UTC,
|
|
857
|
+
// causing off-by-one in day indices vs ISO string comparisons.
|
|
858
|
+
const _earliest = new Date(sEarliestRelease || s4aDates[0]);
|
|
859
|
+
const preS4aStartDate = new Date(Date.UTC(_earliest.getUTCFullYear(), _earliest.getUTCMonth(), 1));
|
|
860
|
+
const totalPreS4aDays = Math.max(0, Math.floor((s4aStartDate.getTime() - preS4aStartDate.getTime()) / 86400000));
|
|
861
|
+
if (totalPreS4aDays > 0 && sEarliestRelease) {
|
|
862
|
+
const boundaryDayIdx = Math.floor((s4aPrevBoundary.getTime() - preS4aStartDate.getTime()) / 86400000);
|
|
863
|
+
// Per-track disaggregated daily values
|
|
864
|
+
const perTrackDaily = {};
|
|
865
|
+
for (const t of latestTracksForStreams) {
|
|
866
|
+
if (!t.release_date)
|
|
867
|
+
continue;
|
|
868
|
+
const tid = trackNameToId[t.title];
|
|
869
|
+
const prevAgg = Number(tid && s4a?.perSong && s4a.perSong[tid]?.[metric.key]?.previous_period_agg) || 0;
|
|
870
|
+
const curAgg = Number(tid && s4a?.perSong && s4a.perSong[tid]?.[metric.key]?.current_period_agg) || 0;
|
|
871
|
+
let estTotal;
|
|
872
|
+
if (metric.key === "streams") {
|
|
873
|
+
estTotal = Math.max(0, t.play_count - curAgg - prevAgg);
|
|
874
|
+
}
|
|
875
|
+
else {
|
|
876
|
+
estTotal = 0;
|
|
877
|
+
}
|
|
878
|
+
const firstS4aVal = Number(s4aMetricDaily[s4aDates[0]]?.[t.title]) || 0;
|
|
879
|
+
// Compute tailAvg from last 14 S4A data points for this track
|
|
880
|
+
const last14 = s4aDates.slice(-14);
|
|
881
|
+
let tailSum = 0, tailCount = 0;
|
|
882
|
+
for (const d of last14) {
|
|
883
|
+
const v = Number(s4aMetricDaily[d]?.[t.title]) || 0;
|
|
884
|
+
if (v > 0) {
|
|
885
|
+
tailSum += v;
|
|
886
|
+
tailCount++;
|
|
887
|
+
}
|
|
888
|
+
}
|
|
889
|
+
const tailAvg = tailCount > 0 ? tailSum / tailCount : (prevAgg / Math.max(1, totalPreS4aDays - boundaryDayIdx));
|
|
890
|
+
// streams_per_listener: prev_agg is cumulative period ratio (total streams / unique listeners)
|
|
891
|
+
// NOT comparable to daily ratio — skip EST/S4A_PREV timeline entirely.
|
|
892
|
+
// The prev_agg is shown as summary info row in the summary section below.
|
|
893
|
+
if (metric.key === "streams_per_listener") {
|
|
894
|
+
continue;
|
|
895
|
+
}
|
|
896
|
+
// Per-metric spike params
|
|
897
|
+
let spikeA, spikeLambda;
|
|
898
|
+
switch (metric.key) {
|
|
899
|
+
case "streams":
|
|
900
|
+
spikeA = tailAvg * 3;
|
|
901
|
+
spikeLambda = 0.06;
|
|
902
|
+
break;
|
|
903
|
+
case "listeners":
|
|
904
|
+
spikeA = tailAvg * 2.5;
|
|
905
|
+
spikeLambda = 0.08;
|
|
906
|
+
break;
|
|
907
|
+
case "saves":
|
|
908
|
+
spikeA = tailAvg * 5;
|
|
909
|
+
spikeLambda = 0.12;
|
|
910
|
+
break;
|
|
911
|
+
case "playlist_adds":
|
|
912
|
+
spikeA = tailAvg * 8;
|
|
913
|
+
spikeLambda = 0.50;
|
|
914
|
+
break;
|
|
915
|
+
default:
|
|
916
|
+
spikeA = tailAvg * 3;
|
|
917
|
+
spikeLambda = 0.06;
|
|
918
|
+
}
|
|
919
|
+
const releaseDayIdx = Math.floor((new Date(t.release_date).getTime() - preS4aStartDate.getTime()) / 86400000);
|
|
920
|
+
// Listeners: prev_agg = UNIQUE listeners (not sum of daily values).
|
|
921
|
+
// Daily listener values overlap (same person on multiple days), so
|
|
922
|
+
// sum(daily) >> unique_count. Compute overlap ratio from S4A current
|
|
923
|
+
// period and adjust prevAgg to represent listener-DAYS for disagg.
|
|
924
|
+
let disaggPrevAgg = prevAgg;
|
|
925
|
+
if (metric.key === "listeners" && prevAgg > 0 && curAgg > 0) {
|
|
926
|
+
let dailyListenerSum = 0;
|
|
927
|
+
for (const d of s4aDates) {
|
|
928
|
+
dailyListenerSum += Number(s4aMetricDaily[d]?.[t.title]) || 0;
|
|
929
|
+
}
|
|
930
|
+
const overlapRatio = dailyListenerSum / curAgg;
|
|
931
|
+
if (overlapRatio > 1) {
|
|
932
|
+
disaggPrevAgg = Math.round(prevAgg * overlapRatio);
|
|
933
|
+
}
|
|
934
|
+
}
|
|
935
|
+
const result = constrainedDisaggregate({
|
|
936
|
+
totalDays: totalPreS4aDays,
|
|
937
|
+
boundaryIdx: boundaryDayIdx,
|
|
938
|
+
estTotal,
|
|
939
|
+
prevAgg: disaggPrevAgg,
|
|
940
|
+
firstS4aVal,
|
|
941
|
+
releaseDayIdx,
|
|
942
|
+
spikeA,
|
|
943
|
+
spikeLambda,
|
|
944
|
+
baseline: tailAvg,
|
|
945
|
+
});
|
|
946
|
+
// Round with anchor: Largest Remainder (Hamilton) with pinned endpoint.
|
|
947
|
+
// Pass explicit target (estTotal + disaggPrevAgg) so rounding hits the
|
|
948
|
+
// KNOWN total, not the float sum which may differ due to blend absorption.
|
|
949
|
+
const anchorIdx = totalPreS4aDays - 1;
|
|
950
|
+
const intTarget = Math.round(estTotal + disaggPrevAgg);
|
|
951
|
+
const rounded = roundWithAnchor(result.daily, anchorIdx, Math.round(firstS4aVal), intTarget);
|
|
952
|
+
// Zero out days before release (shape already has zeros, but safety net)
|
|
953
|
+
const releaseStr = t.release_date.slice(0, 10);
|
|
954
|
+
for (let day = 0; day < totalPreS4aDays; day++) {
|
|
955
|
+
const dd = new Date(preS4aStartDate.getTime() + day * 86400000);
|
|
956
|
+
if (dd.toISOString().slice(0, 10) < releaseStr)
|
|
957
|
+
rounded[day] = 0;
|
|
958
|
+
}
|
|
959
|
+
perTrackDaily[t.title] = rounded;
|
|
960
|
+
}
|
|
961
|
+
// Write daily rows — EST and S4A_PREV with appropriate colors
|
|
962
|
+
for (let day = 0; day < totalPreS4aDays; day++) {
|
|
963
|
+
const dd = new Date(preS4aStartDate.getTime() + day * 86400000);
|
|
964
|
+
const d = dd.toISOString().slice(0, 10);
|
|
965
|
+
const dow = dayNames[dd.getDay()];
|
|
966
|
+
const isS4aPrev = day >= boundaryDayIdx;
|
|
967
|
+
if (!isS4aPrev && !metric.hasEst)
|
|
968
|
+
continue;
|
|
969
|
+
const source = isS4aPrev ? "S4A_PREV" : "EST";
|
|
970
|
+
const fill = isS4aPrev ? s4aPrevFill : estFill;
|
|
971
|
+
const values = [];
|
|
972
|
+
for (const t of latestTracksForStreams) {
|
|
973
|
+
const daily = perTrackDaily[t.title];
|
|
974
|
+
if (!daily || !t.release_date || d < t.release_date.slice(0, 10)) {
|
|
975
|
+
values.push(null);
|
|
976
|
+
continue;
|
|
977
|
+
}
|
|
978
|
+
const v = daily[day];
|
|
979
|
+
// Pre-release: null (track didn't exist). Post-release: 0 is a real value.
|
|
980
|
+
if (metric.key === "streams_per_listener") {
|
|
981
|
+
values.push(v > 0 ? Math.round(v * 100) / 100 : (v === 0 ? 0 : null));
|
|
982
|
+
}
|
|
983
|
+
else {
|
|
984
|
+
values.push(v != null ? v : null);
|
|
985
|
+
}
|
|
986
|
+
}
|
|
987
|
+
const total = values.reduce((s, v) => s + (v || 0), 0);
|
|
988
|
+
const r = ws.addRow([d, dow, source, ...values, total]);
|
|
989
|
+
r.eachCell(c => { if (typeof c.value === "number") {
|
|
990
|
+
c.numFmt = metric.numFormat;
|
|
991
|
+
c.fill = fill;
|
|
992
|
+
} });
|
|
993
|
+
}
|
|
994
|
+
}
|
|
995
|
+
}
|
|
996
|
+
// 3. S4A rows — 365d daily
|
|
997
|
+
for (const d of s4aDates) {
|
|
998
|
+
const dow = dayNames[new Date(d).getDay()];
|
|
999
|
+
const values = [];
|
|
1000
|
+
let total = 0;
|
|
1001
|
+
for (const t of latestTracksForStreams) {
|
|
1002
|
+
const v = s4aMetricDaily[d]?.[t.title];
|
|
1003
|
+
if (v != null) {
|
|
1004
|
+
values.push(v);
|
|
1005
|
+
total += v;
|
|
1006
|
+
}
|
|
1007
|
+
else {
|
|
1008
|
+
values.push(null);
|
|
1009
|
+
}
|
|
1010
|
+
}
|
|
1011
|
+
const row = ws.addRow([d, dow, "S4A", ...values, metric.key === "streams_per_listener" ? "" : total]);
|
|
1012
|
+
row.eachCell(c => { if (typeof c.value === "number")
|
|
1013
|
+
c.numFmt = metric.numFormat; c.fill = s4aFill; });
|
|
1014
|
+
}
|
|
1015
|
+
// 4. SCR rows (only Streams — cumulative play counts from GraphQL)
|
|
1016
|
+
// Offset rule: SCR date = scrape date - 1 day (S4A data ends yesterday)
|
|
1017
|
+
if (metric.key === "streams") {
|
|
1018
|
+
for (const [scrapeDate, tData] of Object.entries(streamsByDate).sort()) {
|
|
1019
|
+
const offsetDate = new Date(new Date(scrapeDate).getTime() - 86400000).toISOString().slice(0, 10);
|
|
1020
|
+
const dow = dayNames[new Date(offsetDate).getDay()];
|
|
1021
|
+
const values = streamTrackNames.map((n) => tData[n] ?? null);
|
|
1022
|
+
const total = values.reduce((s, v) => s + (v || 0), 0);
|
|
1023
|
+
const row = ws.addRow([offsetDate, dow, "SCR", ...values, total]);
|
|
1024
|
+
row.eachCell(c => { if (typeof c.value === "number")
|
|
1025
|
+
c.numFmt = metric.numFormat; });
|
|
1026
|
+
}
|
|
1027
|
+
}
|
|
1028
|
+
// 5. PROJ rows (6 months forward projection)
|
|
1029
|
+
// Uses YoY% (period_change_pct) as long-term trend + seasonal patterns from 365d data.
|
|
1030
|
+
// - YoY% = verified 12m growth rate, immune to short-term dips/spikes
|
|
1031
|
+
// - Weekday pattern = Mon-Sun factors from 365d average (e.g. Fri release day spike)
|
|
1032
|
+
// - Monthly pattern = Jan-Dec factors from 365d average (summer vs winter)
|
|
1033
|
+
// Track age < 60d: decay curve from initial spike (YoY not meaningful)
|
|
1034
|
+
// Streams/Listener: no projection (ratio, not estimable)
|
|
1035
|
+
if (s4aDates.length > 0 && metric.key !== "streams_per_listener") {
|
|
1036
|
+
const projStart = new Date(s4aDates[s4aDates.length - 1]);
|
|
1037
|
+
projStart.setDate(projStart.getDate() + 1);
|
|
1038
|
+
// Extract seasonal patterns from 365d S4A data (across all tracks combined)
|
|
1039
|
+
// Weekday factors: average streams per weekday relative to overall mean
|
|
1040
|
+
const dowSums = [0, 0, 0, 0, 0, 0, 0]; // Sun-Sat
|
|
1041
|
+
const dowCounts = [0, 0, 0, 0, 0, 0, 0];
|
|
1042
|
+
// Monthly factors: average streams per month relative to overall mean
|
|
1043
|
+
const monthSums = new Array(12).fill(0);
|
|
1044
|
+
const monthCounts = new Array(12).fill(0);
|
|
1045
|
+
let grandTotal = 0, grandCount = 0;
|
|
1046
|
+
for (const d of s4aDates) {
|
|
1047
|
+
const dt = new Date(d + "T12:00:00Z");
|
|
1048
|
+
const dow = dt.getUTCDay();
|
|
1049
|
+
const month = dt.getUTCMonth();
|
|
1050
|
+
let daySum = 0, dayN = 0;
|
|
1051
|
+
for (const t of latestTracksForStreams) {
|
|
1052
|
+
const v = s4aMetricDaily[d]?.[t.title];
|
|
1053
|
+
if (v != null && v > 0) {
|
|
1054
|
+
daySum += v;
|
|
1055
|
+
dayN++;
|
|
1056
|
+
}
|
|
1057
|
+
}
|
|
1058
|
+
if (dayN > 0) {
|
|
1059
|
+
dowSums[dow] += daySum;
|
|
1060
|
+
dowCounts[dow]++;
|
|
1061
|
+
monthSums[month] += daySum;
|
|
1062
|
+
monthCounts[month]++;
|
|
1063
|
+
grandTotal += daySum;
|
|
1064
|
+
grandCount++;
|
|
1065
|
+
}
|
|
1066
|
+
}
|
|
1067
|
+
const grandAvg = grandCount > 0 ? grandTotal / grandCount : 1;
|
|
1068
|
+
// Weekday factor: ratio of weekday avg to grand avg (1.0 = average day)
|
|
1069
|
+
const dowFactors = dowSums.map((s, i) => dowCounts[i] > 0 ? (s / dowCounts[i]) / grandAvg : 1);
|
|
1070
|
+
// Monthly factor: ratio of month avg to grand avg
|
|
1071
|
+
const monthFactors = monthSums.map((s, i) => monthCounts[i] > 0 ? (s / monthCounts[i]) / grandAvg : 1);
|
|
1072
|
+
const projParams = {};
|
|
1073
|
+
for (const t of latestTracksForStreams) {
|
|
1074
|
+
if (!t.release_date)
|
|
1075
|
+
continue;
|
|
1076
|
+
const daysSinceRelease = Math.max(1, Math.floor((now - new Date(t.release_date).getTime() / 1000) / 86400));
|
|
1077
|
+
const tid = trackNameToId[t.title];
|
|
1078
|
+
const vals = [];
|
|
1079
|
+
for (const d of s4aDates) {
|
|
1080
|
+
const v = s4aMetricDaily[d]?.[t.title];
|
|
1081
|
+
if (v != null)
|
|
1082
|
+
vals.push(v);
|
|
1083
|
+
}
|
|
1084
|
+
if (vals.length < 7)
|
|
1085
|
+
continue;
|
|
1086
|
+
const lastVal = vals[vals.length - 1];
|
|
1087
|
+
if (daysSinceRelease < 60) {
|
|
1088
|
+
// Young track: still decaying — use exponential decay from spike
|
|
1089
|
+
const last90 = vals.slice(-Math.min(90, vals.length));
|
|
1090
|
+
const B = last90.reduce((a, b) => a + b, 0) / last90.length;
|
|
1091
|
+
const early = vals.slice(0, 14);
|
|
1092
|
+
const earlyAvg = early.reduce((a, b) => a + b, 0) / early.length;
|
|
1093
|
+
const A = Math.max(0, earlyAvg - B);
|
|
1094
|
+
const lambda = A > 0 ? 0.04 : 0;
|
|
1095
|
+
projParams[t.title] = { lastVal, dailyRate: 0, isDecay: true, A, lambda, tOffset: daysSinceRelease, B };
|
|
1096
|
+
}
|
|
1097
|
+
else {
|
|
1098
|
+
// Determine trend source: YoY vs median slope
|
|
1099
|
+
let dailyRate = 0;
|
|
1100
|
+
const prevAggVal = Number(tid && s4a.perSong[tid]?.[metric.key]?.previous_period_agg) || 0;
|
|
1101
|
+
const yoyPct = Number(tid && s4a.perSong[tid]?.[metric.key]?.period_change_pct) || 0;
|
|
1102
|
+
// YoY is trustworthy only when: track > 15 months AND prev_period_agg is substantial
|
|
1103
|
+
const yoyReliable = daysSinceRelease > 450 && prevAggVal > 10;
|
|
1104
|
+
if (yoyReliable) {
|
|
1105
|
+
// Mature track with reliable YoY: cap to [-50%, +200%] annually
|
|
1106
|
+
const cappedYoy = Math.max(-50, Math.min(200, yoyPct));
|
|
1107
|
+
const annualMultiplier = 1 + cappedYoy / 100;
|
|
1108
|
+
dailyRate = annualMultiplier > 0 ? Math.pow(annualMultiplier, 1 / 365) - 1 : 0;
|
|
1109
|
+
}
|
|
1110
|
+
else {
|
|
1111
|
+
// Track 60d-450d OR unreliable YoY: use median-based slope from last 90d
|
|
1112
|
+
// Median slope is robust to spikes, bot removals, and seasonal noise
|
|
1113
|
+
const window = vals.slice(-Math.min(90, vals.length));
|
|
1114
|
+
if (window.length >= 14) {
|
|
1115
|
+
// Split into two halves, compare medians for robust trend
|
|
1116
|
+
const half = Math.floor(window.length / 2);
|
|
1117
|
+
const firstHalf = [...window.slice(0, half)].sort((a, b) => a - b);
|
|
1118
|
+
const secondHalf = [...window.slice(half)].sort((a, b) => a - b);
|
|
1119
|
+
const medFirst = firstHalf[Math.floor(firstHalf.length / 2)];
|
|
1120
|
+
const medSecond = secondHalf[Math.floor(secondHalf.length / 2)];
|
|
1121
|
+
if (medFirst > 0) {
|
|
1122
|
+
const slopePct = (medSecond - medFirst) / medFirst;
|
|
1123
|
+
// Annualize the half-window slope, then cap
|
|
1124
|
+
const halfDays = window.length / 2;
|
|
1125
|
+
const annualized = Math.pow(1 + slopePct, 365 / halfDays) - 1;
|
|
1126
|
+
const capped = Math.max(-0.5, Math.min(2.0, annualized));
|
|
1127
|
+
dailyRate = Math.pow(1 + capped, 1 / 365) - 1;
|
|
1128
|
+
}
|
|
1129
|
+
}
|
|
1130
|
+
}
|
|
1131
|
+
projParams[t.title] = { lastVal, dailyRate, isDecay: false, A: 0, lambda: 0, tOffset: 0, B: lastVal };
|
|
1132
|
+
}
|
|
1133
|
+
}
|
|
1134
|
+
for (let day = 0; day < 180; day++) {
|
|
1135
|
+
const d = new Date(projStart.getTime() + day * 86400000);
|
|
1136
|
+
const dateStr = d.toISOString().slice(0, 10);
|
|
1137
|
+
const dow = dayNames[d.getDay()];
|
|
1138
|
+
const dowIdx = d.getDay();
|
|
1139
|
+
const monthIdx = d.getMonth();
|
|
1140
|
+
const values = [];
|
|
1141
|
+
let total = 0;
|
|
1142
|
+
for (const t of latestTracksForStreams) {
|
|
1143
|
+
const p = projParams[t.title];
|
|
1144
|
+
if (!p) {
|
|
1145
|
+
values.push(null);
|
|
1146
|
+
continue;
|
|
1147
|
+
}
|
|
1148
|
+
let proj;
|
|
1149
|
+
if (p.isDecay) {
|
|
1150
|
+
// Young track: decay curve continuing
|
|
1151
|
+
proj = p.A * Math.exp(-p.lambda * (p.tOffset + day)) + p.B;
|
|
1152
|
+
}
|
|
1153
|
+
else {
|
|
1154
|
+
// Mature track: compound growth × seasonal factors
|
|
1155
|
+
const trendVal = p.lastVal * Math.pow(1 + p.dailyRate, day + 1);
|
|
1156
|
+
proj = trendVal * dowFactors[dowIdx] * monthFactors[monthIdx];
|
|
1157
|
+
// Guard rails: floor at 10% of last value, ceiling at 5× last value
|
|
1158
|
+
proj = Math.max(p.lastVal * 0.1, Math.min(p.lastVal * 5, proj));
|
|
1159
|
+
}
|
|
1160
|
+
const rounded = Math.round(proj);
|
|
1161
|
+
values.push(rounded);
|
|
1162
|
+
total += rounded;
|
|
1163
|
+
}
|
|
1164
|
+
const row = ws.addRow([dateStr, dow, "PROJ", ...values, total]);
|
|
1165
|
+
row.eachCell(c => {
|
|
1166
|
+
if (typeof c.value === "number")
|
|
1167
|
+
c.numFmt = metric.numFormat;
|
|
1168
|
+
c.fill = projFill;
|
|
1169
|
+
});
|
|
1170
|
+
}
|
|
1171
|
+
}
|
|
1172
|
+
// ── Summary section ──
|
|
1173
|
+
ws.addRow([""]);
|
|
1174
|
+
// YoY %
|
|
1175
|
+
if (s4a?.perSong) {
|
|
1176
|
+
const yoyHdr = ws.addRow(["YoY %", "", "", ...streamTrackNames.map(() => ""), ""]);
|
|
1177
|
+
yoyHdr.eachCell(c => { Object.assign(c, { style: sectionStyle }); });
|
|
1178
|
+
const yoyVals = streamTrackNames.map((n) => {
|
|
1179
|
+
const tid = trackNameToId[n];
|
|
1180
|
+
const pct = Number(tid && s4a.perSong[tid]?.[metric.key]?.period_change_pct);
|
|
1181
|
+
return !isNaN(pct) ? Math.round(pct * 10) / 10 : null;
|
|
1182
|
+
});
|
|
1183
|
+
ws.addRow(["YoY change", "", "", ...yoyVals, ""]).eachCell(c => { if (typeof c.value === "number")
|
|
1184
|
+
c.numFmt = "+0.0\"%\";-0.0\"%\""; });
|
|
1185
|
+
// Prev + Cur 12m totals
|
|
1186
|
+
const prevVals = streamTrackNames.map((n) => {
|
|
1187
|
+
const tid = trackNameToId[n];
|
|
1188
|
+
const v = Number(tid && s4a.perSong[tid]?.[metric.key]?.previous_period_agg) || 0;
|
|
1189
|
+
return v != null ? (metric.key === "streams_per_listener" ? Math.round(v * 100) / 100 : Math.round(v)) : null;
|
|
1190
|
+
});
|
|
1191
|
+
const prevLabel = metric.key === "streams_per_listener" ? "Prev 12m avg ratio" : "Prev 12m total";
|
|
1192
|
+
ws.addRow([prevLabel, "", "", ...prevVals, ""]).eachCell(c => { if (typeof c.value === "number")
|
|
1193
|
+
c.numFmt = metric.numFormat; });
|
|
1194
|
+
const curVals = streamTrackNames.map((n) => {
|
|
1195
|
+
const tid = trackNameToId[n];
|
|
1196
|
+
const v = Number(tid && s4a.perSong[tid]?.[metric.key]?.current_period_agg) || 0;
|
|
1197
|
+
return v > 0 ? (metric.key === "streams_per_listener" ? Math.round(v * 100) / 100 : Math.round(v)) : null;
|
|
1198
|
+
});
|
|
1199
|
+
const curLabel = metric.key === "streams_per_listener" ? "Cur 12m avg ratio" : "Cur 12m total";
|
|
1200
|
+
ws.addRow([curLabel, "", "", ...curVals, ""]).eachCell(c => { if (typeof c.value === "number")
|
|
1201
|
+
c.numFmt = metric.numFormat; });
|
|
1202
|
+
}
|
|
1203
|
+
// Velocity + Share % (only Streams)
|
|
1204
|
+
if (metric.key === "streams") {
|
|
1205
|
+
ws.addRow([""]);
|
|
1206
|
+
const vHdr = ws.addRow(["VELOCITY (streams/day)", "", "", ...streamTrackNames.map(() => ""), ""]);
|
|
1207
|
+
vHdr.eachCell(c => { Object.assign(c, { style: sectionStyle }); });
|
|
1208
|
+
const vVals = streamTrackNames.map((n) => vel[n] ? Math.round(vel[n] * 10) / 10 : null);
|
|
1209
|
+
ws.addRow(["Lifetime avg", "", "", ...vVals, ""]).eachCell(c => { if (typeof c.value === "number")
|
|
1210
|
+
c.numFmt = "#,##0.0"; });
|
|
1211
|
+
ws.addRow([""]);
|
|
1212
|
+
const shHdr = ws.addRow(["SHARE %", "", "", ...streamTrackNames.map(() => ""), ""]);
|
|
1213
|
+
shHdr.eachCell(c => { Object.assign(c, { style: sectionStyle }); });
|
|
1214
|
+
const totStr = latestTracksForStreams.reduce((s, t) => s + (t.play_count || 0), 0);
|
|
1215
|
+
const shVals = streamTrackNames.map((n) => {
|
|
1216
|
+
const t = latestTracksForStreams.find((x) => x.title === n);
|
|
1217
|
+
return t?.play_count && totStr ? Math.round((t.play_count / totStr) * 1000) / 10 : null;
|
|
1218
|
+
});
|
|
1219
|
+
ws.addRow(["Current", "", "", ...shVals, "100%"]).eachCell(c => { if (typeof c.value === "number")
|
|
1220
|
+
c.numFmt = "0.0\"%\""; });
|
|
1221
|
+
// Cumulative running total (Streams only — daily values summed over time)
|
|
1222
|
+
ws.addRow([""]);
|
|
1223
|
+
const cumHdr = ws.addRow(["CUMULATIVE TOTAL", "", "", ...streamTrackNames.map(() => ""), ""]);
|
|
1224
|
+
cumHdr.eachCell(c => { Object.assign(c, { style: sectionStyle }); });
|
|
1225
|
+
// Latest SCR value = current known cumulative total per track
|
|
1226
|
+
const latestTotal = {};
|
|
1227
|
+
for (const t of latestTracksForStreams)
|
|
1228
|
+
latestTotal[t.title] = t.play_count;
|
|
1229
|
+
const cumRow = ws.addRow(["Latest (SCR)", "", "", ...streamTrackNames.map((n) => latestTotal[n] || null),
|
|
1230
|
+
Object.values(latestTotal).reduce((a, b) => a + b, 0)]);
|
|
1231
|
+
cumRow.eachCell(c => { if (typeof c.value === "number")
|
|
1232
|
+
c.numFmt = numFmt; });
|
|
1233
|
+
// Total stream count from S4A (all-time per track)
|
|
1234
|
+
if (s4a?.perSong) {
|
|
1235
|
+
const s4aTotals = streamTrackNames.map((n) => {
|
|
1236
|
+
const tid = trackNameToId[n];
|
|
1237
|
+
const v = tid && s4a.perSong[tid]?.total_stream_count;
|
|
1238
|
+
return v ? parseInt(v) : null;
|
|
1239
|
+
});
|
|
1240
|
+
ws.addRow(["S4A all-time", "", "", ...s4aTotals, ""]).eachCell(c => { if (typeof c.value === "number")
|
|
1241
|
+
c.numFmt = numFmt; });
|
|
1242
|
+
}
|
|
1243
|
+
}
|
|
1244
|
+
} // end metricSheets loop
|
|
1245
|
+
// ─────────────────────────────────────────────
|
|
1246
|
+
// D8: Countries — daily timeline (tracks side by side)
|
|
1247
|
+
// ─────────────────────────────────────────────
|
|
1248
|
+
// Merges ALL historical dumps to build complete picture:
|
|
1249
|
+
// - Timeline countries = UNION of all countries ever in top 3 (across all dumps)
|
|
1250
|
+
// - 28d snapshot shares = latest available per country
|
|
1251
|
+
// - Daily values: TIMELINE (exact) for known countries, DISAGG (estimated) for rest
|
|
1252
|
+
// Columns grow over time as new dumps add countries to timeline or snapshots.
|
|
1253
|
+
if (s4a?.perSong) {
|
|
1254
|
+
const ws8 = wb.addWorksheet("Countries", { properties: { tabColor: { argb: "FF26A69A" } } });
|
|
1255
|
+
const dn8 = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"];
|
|
1256
|
+
const disaggFill = { type: "pattern", pattern: "solid", fgColor: { argb: "FFF5F5DC" } };
|
|
1257
|
+
// Load ALL historical dumps
|
|
1258
|
+
const allDumps = loadAllDumps(aid);
|
|
1259
|
+
// Build merged data per track across ALL dumps
|
|
1260
|
+
const trackMap = new Map();
|
|
1261
|
+
const allDates8 = new Set();
|
|
1262
|
+
// Process each dump (oldest first → newest overwrites)
|
|
1263
|
+
for (const dump of allDumps) {
|
|
1264
|
+
if (!dump.data.perSong)
|
|
1265
|
+
continue;
|
|
1266
|
+
for (const [tid, d] of Object.entries(dump.data.perSong)) {
|
|
1267
|
+
const nm = d.metadata?.name || d.name || tid;
|
|
1268
|
+
if (!trackMap.has(tid)) {
|
|
1269
|
+
trackMap.set(tid, {
|
|
1270
|
+
name: nm, allCodes: [], timelineCodes: new Set(),
|
|
1271
|
+
timelinePts: {}, snapShares: {}, dailyTotals: {},
|
|
1272
|
+
});
|
|
1273
|
+
}
|
|
1274
|
+
const t = trackMap.get(tid);
|
|
1275
|
+
// Merge timeline data (UNION of countries, latest daily values win)
|
|
1276
|
+
for (const ct of (d.countryTimelines || [])) {
|
|
1277
|
+
const cc = ct.countryCode || "??";
|
|
1278
|
+
t.timelineCodes.add(cc); // once in top 3, always a timeline column
|
|
1279
|
+
if (!t.timelinePts[cc])
|
|
1280
|
+
t.timelinePts[cc] = {};
|
|
1281
|
+
for (const p of (ct.timelinePoint || [])) {
|
|
1282
|
+
t.timelinePts[cc][p.date] = Number(p.num) || 0;
|
|
1283
|
+
allDates8.add(p.date);
|
|
1284
|
+
}
|
|
1285
|
+
}
|
|
1286
|
+
// Merge 28d snapshot (latest wins)
|
|
1287
|
+
let raw = d.perSongCountries || [];
|
|
1288
|
+
if (!raw.length && d.geography?.length && d.geography[0]?.localized_country)
|
|
1289
|
+
raw = d.geography;
|
|
1290
|
+
if (raw.length > 0) {
|
|
1291
|
+
const snap = {};
|
|
1292
|
+
for (const c of raw)
|
|
1293
|
+
snap[c.name] = Number(c.num) || 0;
|
|
1294
|
+
const tlTotal = [...t.timelineCodes].reduce((s, cc) => s + (snap[cc] || 0), 0);
|
|
1295
|
+
const nonTlTotal = Object.entries(snap).filter(([cc]) => !t.timelineCodes.has(cc)).reduce((s, [, v]) => s + v, 0);
|
|
1296
|
+
for (const [cc, v] of Object.entries(snap)) {
|
|
1297
|
+
if (!t.timelineCodes.has(cc) && nonTlTotal > 0)
|
|
1298
|
+
t.snapShares[cc] = v / nonTlTotal;
|
|
1299
|
+
}
|
|
1300
|
+
}
|
|
1301
|
+
// Merge daily totals (latest timeseries wins)
|
|
1302
|
+
const streamTs = d.streams?.current_period_timeseries;
|
|
1303
|
+
if (Array.isArray(streamTs)) {
|
|
1304
|
+
for (const pt of streamTs) {
|
|
1305
|
+
if (pt.x)
|
|
1306
|
+
t.dailyTotals[pt.x] = Number(pt.y) || 0;
|
|
1307
|
+
}
|
|
1308
|
+
}
|
|
1309
|
+
}
|
|
1310
|
+
}
|
|
1311
|
+
// Build allCodes per track: timeline countries first, then snapshot countries, sorted by total desc
|
|
1312
|
+
const tracks8 = [];
|
|
1313
|
+
for (const [, t] of [...trackMap.entries()].sort((a, b) => {
|
|
1314
|
+
const totA = Object.values(a[1].dailyTotals).reduce((s, v) => s + v, 0);
|
|
1315
|
+
const totB = Object.values(b[1].dailyTotals).reduce((s, v) => s + v, 0);
|
|
1316
|
+
return totB - totA;
|
|
1317
|
+
})) {
|
|
1318
|
+
// Timeline codes sorted by total timeline streams desc
|
|
1319
|
+
const tlSorted = [...t.timelineCodes].sort((a, b) => {
|
|
1320
|
+
const sa = Object.values(t.timelinePts[a] || {}).reduce((s, v) => s + v, 0);
|
|
1321
|
+
const sb = Object.values(t.timelinePts[b] || {}).reduce((s, v) => s + v, 0);
|
|
1322
|
+
return sb - sa;
|
|
1323
|
+
});
|
|
1324
|
+
// Non-timeline codes sorted by share desc
|
|
1325
|
+
const nonTlSorted = Object.entries(t.snapShares)
|
|
1326
|
+
.filter(([cc]) => !t.timelineCodes.has(cc))
|
|
1327
|
+
.sort((a, b) => b[1] - a[1])
|
|
1328
|
+
.map(([cc]) => cc);
|
|
1329
|
+
t.allCodes = [...tlSorted, ...nonTlSorted];
|
|
1330
|
+
if (t.allCodes.length > 0)
|
|
1331
|
+
tracks8.push(t);
|
|
1332
|
+
}
|
|
1333
|
+
const dates8 = [...allDates8].sort();
|
|
1334
|
+
// Row 1: track names
|
|
1335
|
+
const r1 = ["", ""];
|
|
1336
|
+
for (const t of tracks8) {
|
|
1337
|
+
r1.push(t.name);
|
|
1338
|
+
for (let i = 1; i < t.allCodes.length; i++)
|
|
1339
|
+
r1.push("");
|
|
1340
|
+
r1.push("");
|
|
1341
|
+
}
|
|
1342
|
+
const row1 = ws8.addRow(r1);
|
|
1343
|
+
let ci8 = 3;
|
|
1344
|
+
for (const t of tracks8) {
|
|
1345
|
+
row1.getCell(ci8).font = { bold: true, size: 11 };
|
|
1346
|
+
ci8 += t.allCodes.length + 1;
|
|
1347
|
+
}
|
|
1348
|
+
// Row 2: country codes + TOTAL
|
|
1349
|
+
const r2 = ["Date", "Day"];
|
|
1350
|
+
for (const t of tracks8)
|
|
1351
|
+
r2.push(...t.allCodes, "TOTAL");
|
|
1352
|
+
ws8.addRow(r2).eachCell(c => { Object.assign(c, { style: hdrDark }); });
|
|
1353
|
+
// Row 3: source indicator (S4A = timeline exact, DISAGG = estimated)
|
|
1354
|
+
const r3 = ["Source", ""];
|
|
1355
|
+
for (const t of tracks8) {
|
|
1356
|
+
r3.push(...t.allCodes.map(cc => t.timelineCodes.has(cc) ? "S4A" : "DISAGG"), "");
|
|
1357
|
+
}
|
|
1358
|
+
ws8.addRow(r3).eachCell(c => { c.font = { size: 8, italic: true, color: { argb: "FF999999" } }; });
|
|
1359
|
+
// 365d daily rows
|
|
1360
|
+
for (const date of dates8) {
|
|
1361
|
+
const d = new Date(date);
|
|
1362
|
+
const row = [date, dn8[d.getUTCDay()]];
|
|
1363
|
+
for (const t of tracks8) {
|
|
1364
|
+
const timelineSum = [...t.timelineCodes].reduce((s, cc) => s + (t.timelinePts[cc]?.[date] || 0), 0);
|
|
1365
|
+
const dailyTotal = t.dailyTotals[date] || 0;
|
|
1366
|
+
const remainder = Math.max(0, dailyTotal - timelineSum);
|
|
1367
|
+
const vals = [];
|
|
1368
|
+
for (const cc of t.allCodes) {
|
|
1369
|
+
if (t.timelineCodes.has(cc)) {
|
|
1370
|
+
vals.push(t.timelinePts[cc]?.[date] || 0);
|
|
1371
|
+
}
|
|
1372
|
+
else {
|
|
1373
|
+
vals.push(Math.round(remainder * (t.snapShares[cc] || 0)));
|
|
1374
|
+
}
|
|
1375
|
+
}
|
|
1376
|
+
row.push(...vals, vals.reduce((a, b) => a + b, 0));
|
|
1377
|
+
}
|
|
1378
|
+
const xlRow = ws8.addRow(row);
|
|
1379
|
+
xlRow.eachCell((c, colNumber) => {
|
|
1380
|
+
if (typeof c.value === "number") {
|
|
1381
|
+
c.numFmt = numFmt;
|
|
1382
|
+
if (colNumber > 2) {
|
|
1383
|
+
let off = colNumber - 2;
|
|
1384
|
+
for (const t of tracks8) {
|
|
1385
|
+
if (off <= t.allCodes.length) {
|
|
1386
|
+
const cc = t.allCodes[off - 1];
|
|
1387
|
+
if (cc && !t.timelineCodes.has(cc))
|
|
1388
|
+
c.fill = disaggFill;
|
|
1389
|
+
break;
|
|
1390
|
+
}
|
|
1391
|
+
off -= t.allCodes.length + 1;
|
|
1392
|
+
}
|
|
1393
|
+
}
|
|
1394
|
+
}
|
|
1395
|
+
});
|
|
1396
|
+
}
|
|
1397
|
+
// Summary
|
|
1398
|
+
const totR = ["TOTAL (12m)", ""];
|
|
1399
|
+
for (const t of tracks8) {
|
|
1400
|
+
const sums = t.allCodes.map(cc => {
|
|
1401
|
+
if (t.timelineCodes.has(cc))
|
|
1402
|
+
return Object.values(t.timelinePts[cc] || {}).reduce((a, b) => a + b, 0);
|
|
1403
|
+
let s = 0;
|
|
1404
|
+
for (const date of dates8) {
|
|
1405
|
+
const tlSum = [...t.timelineCodes].reduce((s2, cc2) => s2 + (t.timelinePts[cc2]?.[date] || 0), 0);
|
|
1406
|
+
s += Math.round(Math.max(0, (t.dailyTotals[date] || 0) - tlSum) * (t.snapShares[cc] || 0));
|
|
1407
|
+
}
|
|
1408
|
+
return s;
|
|
1409
|
+
});
|
|
1410
|
+
totR.push(...sums, sums.reduce((a, b) => a + b, 0));
|
|
1411
|
+
}
|
|
1412
|
+
ws8.addRow(totR).eachCell(c => { c.font = { bold: true }; if (typeof c.value === "number")
|
|
1413
|
+
c.numFmt = numFmt; });
|
|
1414
|
+
ws8.getColumn(1).width = 12;
|
|
1415
|
+
ws8.getColumn(2).width = 5;
|
|
1416
|
+
const tc8 = 2 + tracks8.reduce((s, t) => s + t.allCodes.length + 1, 0);
|
|
1417
|
+
for (let i = 3; i <= tc8; i++)
|
|
1418
|
+
ws8.getColumn(i).width = 10;
|
|
1419
|
+
ws8.views = [{ state: "frozen", xSplit: 2, ySplit: 3, topLeftCell: "C4", activeCell: "C4" }];
|
|
1420
|
+
}
|
|
1421
|
+
// ─────────────────────────────────────────────
|
|
1422
|
+
// D8b: Country Snapshots (accumulated across ALL scrapes)
|
|
1423
|
+
// ─────────────────────────────────────────────
|
|
1424
|
+
// Each scrape date = one block of rows. Grows with every new dump.
|
|
1425
|
+
// Layout: Scrape Date | Country | Track1 streams | Track2 streams | ...
|
|
1426
|
+
// This is the append-only log that feeds delta computation.
|
|
1427
|
+
{
|
|
1428
|
+
const wsSn = wb.addWorksheet("Country Snapshots", { properties: { tabColor: { argb: "FF80CBC4" } } });
|
|
1429
|
+
const allDumps = loadAllDumps(aid);
|
|
1430
|
+
// Collect track names (sorted by total streams in latest dump)
|
|
1431
|
+
const trackOrder = [];
|
|
1432
|
+
if (s4a?.perSong) {
|
|
1433
|
+
for (const [tid, d] of Object.entries(s4a.perSong)
|
|
1434
|
+
.sort((a, b) => Number(b[1].total_stream_count || 0) - Number(a[1].total_stream_count || 0))) {
|
|
1435
|
+
trackOrder.push({ tid, name: d.metadata?.name || d.name || tid });
|
|
1436
|
+
}
|
|
1437
|
+
}
|
|
1438
|
+
// Header
|
|
1439
|
+
const h1 = ["Scrape Date", "Country"];
|
|
1440
|
+
for (const t of trackOrder)
|
|
1441
|
+
h1.push(t.name);
|
|
1442
|
+
wsSn.addRow(h1);
|
|
1443
|
+
for (let i = 3; i <= h1.length; i++)
|
|
1444
|
+
wsSn.getRow(1).getCell(i).font = { bold: true, size: 10 };
|
|
1445
|
+
const h2 = ["", ""];
|
|
1446
|
+
for (const t of trackOrder)
|
|
1447
|
+
h2.push("28d Streams");
|
|
1448
|
+
wsSn.addRow(h2).eachCell(c => { Object.assign(c, { style: hdrDark }); });
|
|
1449
|
+
// One block per dump date
|
|
1450
|
+
for (const dump of allDumps) {
|
|
1451
|
+
if (!dump.data.perSong)
|
|
1452
|
+
continue;
|
|
1453
|
+
// Collect all countries from this dump
|
|
1454
|
+
const countriesInDump = new Set();
|
|
1455
|
+
const snapByTrack = {};
|
|
1456
|
+
for (const [tid, d] of Object.entries(dump.data.perSong)) {
|
|
1457
|
+
let raw = d.perSongCountries || [];
|
|
1458
|
+
if (!raw.length && d.geography?.length && d.geography[0]?.localized_country)
|
|
1459
|
+
raw = d.geography;
|
|
1460
|
+
snapByTrack[tid] = {};
|
|
1461
|
+
for (const c of raw) {
|
|
1462
|
+
const code = c.name;
|
|
1463
|
+
countriesInDump.add(code);
|
|
1464
|
+
snapByTrack[tid][code] = Number(c.num) || 0;
|
|
1465
|
+
}
|
|
1466
|
+
}
|
|
1467
|
+
const countries = [...countriesInDump].sort();
|
|
1468
|
+
for (const code of countries) {
|
|
1469
|
+
const row = [dump.date, code];
|
|
1470
|
+
for (const t of trackOrder) {
|
|
1471
|
+
row.push(snapByTrack[t.tid]?.[code] ?? null);
|
|
1472
|
+
}
|
|
1473
|
+
wsSn.addRow(row).eachCell(c => { if (typeof c.value === "number")
|
|
1474
|
+
c.numFmt = numFmt; });
|
|
1475
|
+
}
|
|
1476
|
+
// Separator between dump dates
|
|
1477
|
+
wsSn.addRow([]);
|
|
1478
|
+
}
|
|
1479
|
+
wsSn.getColumn(1).width = 12;
|
|
1480
|
+
wsSn.getColumn(2).width = 8;
|
|
1481
|
+
for (let i = 3; i <= 2 + trackOrder.length; i++)
|
|
1482
|
+
wsSn.getColumn(i).width = 10;
|
|
1483
|
+
wsSn.views = [{ state: "frozen", xSplit: 2, ySplit: 2, topLeftCell: "C3", activeCell: "C3" }];
|
|
1484
|
+
}
|
|
1485
|
+
// ─────────────────────────────────────────────
|
|
1486
|
+
// D9: Cities Snapshots (28d per scrape, per-song)
|
|
1487
|
+
// ─────────────────────────────────────────────
|
|
1488
|
+
// Same concept as Country Snapshots but for cities.
|
|
1489
|
+
if (s4a?.perSong) {
|
|
1490
|
+
const wsCi = wb.addWorksheet("City Snapshots", { properties: { tabColor: { argb: "FF7986CB" } } });
|
|
1491
|
+
const sortedTracks = Object.entries(s4a.perSong)
|
|
1492
|
+
.sort((a, b) => Number(b[1].total_stream_count || 0) - Number(a[1].total_stream_count || 0));
|
|
1493
|
+
// Collect all unique cities
|
|
1494
|
+
const allCities = new Map();
|
|
1495
|
+
const trackCitySnaps = [];
|
|
1496
|
+
for (const [, d] of sortedTracks) {
|
|
1497
|
+
const nm = d.metadata?.name || d.name || "?";
|
|
1498
|
+
let raw = d.perSongCities || [];
|
|
1499
|
+
if (!raw.length && d.geography?.length && d.geography[0]?.region)
|
|
1500
|
+
raw = d.geography;
|
|
1501
|
+
const map = {};
|
|
1502
|
+
for (const x of raw) {
|
|
1503
|
+
const key = `${x.name}|${x.country || ""}`;
|
|
1504
|
+
allCities.set(key, { country: x.country || "", region: x.region || "" });
|
|
1505
|
+
map[key] = Number(x.num) || 0;
|
|
1506
|
+
}
|
|
1507
|
+
trackCitySnaps.push({ name: nm, data: map });
|
|
1508
|
+
}
|
|
1509
|
+
// Sort cities by total streams across all tracks
|
|
1510
|
+
const cityTotals = {};
|
|
1511
|
+
for (const t of trackCitySnaps)
|
|
1512
|
+
for (const [k, v] of Object.entries(t.data))
|
|
1513
|
+
cityTotals[k] = (cityTotals[k] || 0) + v;
|
|
1514
|
+
const citiesSorted = Object.entries(cityTotals).sort((a, b) => b[1] - a[1]).map(([k]) => k);
|
|
1515
|
+
// Header 1: track names
|
|
1516
|
+
const h1 = ["City", "Country"];
|
|
1517
|
+
for (const t of trackCitySnaps)
|
|
1518
|
+
h1.push(t.name);
|
|
1519
|
+
wsCi.addRow(h1);
|
|
1520
|
+
for (let i = 3; i <= h1.length; i++)
|
|
1521
|
+
wsCi.getRow(1).getCell(i).font = { bold: true, size: 10 };
|
|
1522
|
+
// Header 2
|
|
1523
|
+
const h2 = ["", ""];
|
|
1524
|
+
for (const t of trackCitySnaps)
|
|
1525
|
+
h2.push("Streams");
|
|
1526
|
+
wsCi.addRow(h2).eachCell(c => { Object.assign(c, { style: hdrDark }); });
|
|
1527
|
+
// One row per city
|
|
1528
|
+
for (const key of citiesSorted) {
|
|
1529
|
+
const [cityName] = key.split("|");
|
|
1530
|
+
const meta = allCities.get(key);
|
|
1531
|
+
const row = [cityName, meta.country];
|
|
1532
|
+
for (const t of trackCitySnaps)
|
|
1533
|
+
row.push(t.data[key] || null);
|
|
1534
|
+
wsCi.addRow(row).eachCell(c => { if (typeof c.value === "number")
|
|
1535
|
+
c.numFmt = numFmt; });
|
|
1536
|
+
}
|
|
1537
|
+
wsCi.getColumn(1).width = 20;
|
|
1538
|
+
wsCi.getColumn(2).width = 6;
|
|
1539
|
+
for (let i = 3; i <= 2 + trackCitySnaps.length; i++)
|
|
1540
|
+
wsCi.getColumn(i).width = 10;
|
|
1541
|
+
wsCi.views = [{ state: "frozen", xSplit: 2, ySplit: 2, topLeftCell: "C3", activeCell: "C3" }];
|
|
1542
|
+
}
|
|
1543
|
+
await wb.xlsx.writeFile(xlsxPath);
|
|
1544
|
+
db.close();
|
|
1545
|
+
}
|
|
1546
|
+
// ─── S4A Authentication ───
|
|
1547
|
+
const S4A_SESSION_DIR = path.join(process.env.HOME || "~", ".artist-os", ".s4a-session");
|
|
1548
|
+
export async function connectS4A(artistId) {
|
|
1549
|
+
const chromePath = process.env.CHROME_PATH || findChrome();
|
|
1550
|
+
const CDP_PORT = 9222;
|
|
1551
|
+
const CHROME_DEBUG_DIR = path.join(process.env.HOME || "~", ".artist-os", ".chrome-debug");
|
|
1552
|
+
const systemChromeProfile = path.join(process.env.HOME || "~", "Library", "Application Support", "Google", "Chrome");
|
|
1553
|
+
fs.mkdirSync(S4A_SESSION_DIR, { recursive: true });
|
|
1554
|
+
// Step 1: Copy Chrome profile (one-time) — has saved passwords, Google account, autofill
|
|
1555
|
+
if (!fs.existsSync(path.join(CHROME_DEBUG_DIR, "Default"))) {
|
|
1556
|
+
const { execSync } = require("child_process");
|
|
1557
|
+
fs.mkdirSync(CHROME_DEBUG_DIR, { recursive: true });
|
|
1558
|
+
execSync(`cp -r "${path.join(systemChromeProfile, "Default")}" "${CHROME_DEBUG_DIR}/Default"`, { stdio: "ignore" });
|
|
1559
|
+
try {
|
|
1560
|
+
execSync(`cp "${path.join(systemChromeProfile, "Local State")}" "${CHROME_DEBUG_DIR}/"`, { stdio: "ignore" });
|
|
1561
|
+
}
|
|
1562
|
+
catch { }
|
|
1563
|
+
}
|
|
1564
|
+
// Step 2: Clean crash state in debug profile
|
|
1565
|
+
const defaultDir = path.join(CHROME_DEBUG_DIR, "Default");
|
|
1566
|
+
for (const f of ["Last Session", "Last Tabs", "Current Session", "Current Tabs"]) {
|
|
1567
|
+
try {
|
|
1568
|
+
fs.unlinkSync(path.join(defaultDir, f));
|
|
1569
|
+
}
|
|
1570
|
+
catch { }
|
|
1571
|
+
}
|
|
1572
|
+
try {
|
|
1573
|
+
const prefsPath = path.join(defaultDir, "Preferences");
|
|
1574
|
+
const p = JSON.parse(fs.readFileSync(prefsPath, "utf-8"));
|
|
1575
|
+
if (!p.profile)
|
|
1576
|
+
p.profile = {};
|
|
1577
|
+
p.profile.exit_type = "Normal";
|
|
1578
|
+
p.profile.exited_cleanly = true;
|
|
1579
|
+
fs.writeFileSync(prefsPath, JSON.stringify(p));
|
|
1580
|
+
}
|
|
1581
|
+
catch { }
|
|
1582
|
+
// Step 3: Kill Chrome, relaunch with debug profile + debugging port
|
|
1583
|
+
const { execSync, spawn } = require("child_process");
|
|
1584
|
+
try {
|
|
1585
|
+
execSync("pkill -f 'Google Chrome'", { stdio: "ignore" });
|
|
1586
|
+
}
|
|
1587
|
+
catch { }
|
|
1588
|
+
await new Promise(r => setTimeout(r, 2000));
|
|
1589
|
+
const chromeProc = spawn(chromePath, [
|
|
1590
|
+
`--user-data-dir=${CHROME_DEBUG_DIR}`,
|
|
1591
|
+
`--remote-debugging-port=${CDP_PORT}`,
|
|
1592
|
+
"--no-first-run",
|
|
1593
|
+
`https://artists.spotify.com/c/artist/${artistId}/home`,
|
|
1594
|
+
], { detached: true, stdio: "ignore" });
|
|
1595
|
+
chromeProc.unref();
|
|
1596
|
+
await new Promise(r => setTimeout(r, 5000));
|
|
1597
|
+
// Step 4: Connect via CDP
|
|
1598
|
+
const browser = await chromium.connectOverCDP(`http://localhost:${CDP_PORT}`);
|
|
1599
|
+
const context = browser.contexts()[0];
|
|
1600
|
+
// Step 5: Wait for S4A dashboard (max 5 min — user may need to enter SMS code)
|
|
1601
|
+
const deadline = Date.now() + 5 * 60 * 1000;
|
|
1602
|
+
let loggedIn = false;
|
|
1603
|
+
while (Date.now() < deadline) {
|
|
1604
|
+
const pages = context.pages();
|
|
1605
|
+
const s4aPage = pages.find(p => {
|
|
1606
|
+
const u = p.url();
|
|
1607
|
+
return u.includes("artists.spotify.com") && u.includes("/home") &&
|
|
1608
|
+
!u.includes("accounts.spotify.com") && !u.includes("challenge.spotify.com");
|
|
1609
|
+
});
|
|
1610
|
+
if (s4aPage) {
|
|
1611
|
+
loggedIn = true;
|
|
1612
|
+
break;
|
|
1613
|
+
}
|
|
1614
|
+
await new Promise(r => setTimeout(r, 2000));
|
|
1615
|
+
}
|
|
1616
|
+
if (!loggedIn) {
|
|
1617
|
+
await browser.close();
|
|
1618
|
+
return { success: false, error: "Login timed out after 5 minutes. Run connect_s4a again." };
|
|
1619
|
+
}
|
|
1620
|
+
// Step 6: Extract and save cookies
|
|
1621
|
+
let name = artistId;
|
|
1622
|
+
try {
|
|
1623
|
+
const cookies = await context.cookies("https://artists.spotify.com");
|
|
1624
|
+
fs.writeFileSync(path.join(S4A_SESSION_DIR, "cookies.json"), JSON.stringify(cookies, null, 2));
|
|
1625
|
+
const spDc = cookies.find((c) => c.name === "sp_dc");
|
|
1626
|
+
if (spDc)
|
|
1627
|
+
fs.writeFileSync(path.join(S4A_SESSION_DIR, "sp_dc.txt"), spDc.value);
|
|
1628
|
+
}
|
|
1629
|
+
catch { }
|
|
1630
|
+
// Step 7: Close only S4A tabs, Chrome stays open for the user
|
|
1631
|
+
try {
|
|
1632
|
+
const s4aPages = context.pages().filter(p => p.url().includes("artists.spotify.com"));
|
|
1633
|
+
for (const p of s4aPages)
|
|
1634
|
+
await p.close();
|
|
1635
|
+
}
|
|
1636
|
+
catch { }
|
|
1637
|
+
await browser.close(); // Disconnect CDP — Chrome stays running
|
|
1638
|
+
return { success: true, name };
|
|
1639
|
+
}
|
|
1640
|
+
export function hasS4ASession() {
|
|
1641
|
+
return fs.existsSync(path.join(S4A_SESSION_DIR, "sp_dc.txt"));
|
|
1642
|
+
}
|
|
1643
|
+
function getSpDc() {
|
|
1644
|
+
try {
|
|
1645
|
+
return fs.readFileSync(path.join(S4A_SESSION_DIR, "sp_dc.txt"), "utf-8").trim();
|
|
1646
|
+
}
|
|
1647
|
+
catch {
|
|
1648
|
+
return null;
|
|
1649
|
+
}
|
|
1650
|
+
}
|
|
1651
|
+
// Get S4A bearer token from sp_dc cookie (valid ~1 hour)
|
|
1652
|
+
async function getS4ABearerToken(spDc) {
|
|
1653
|
+
const resp = await fetch("https://generic.wg.spotify.com/creator-auth-proxy/v1/web/token", {
|
|
1654
|
+
headers: {
|
|
1655
|
+
"Cookie": `sp_dc=${spDc}`,
|
|
1656
|
+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
|
|
1657
|
+
},
|
|
1658
|
+
});
|
|
1659
|
+
if (!resp.ok)
|
|
1660
|
+
throw new Error(`S4A token failed: ${resp.status}`);
|
|
1661
|
+
const data = await resp.json();
|
|
1662
|
+
return data.accessToken || data.access_token;
|
|
1663
|
+
}
|
|
1664
|
+
// S4A API call helper
|
|
1665
|
+
async function s4aApi(token, endpoint) {
|
|
1666
|
+
const resp = await fetch(`https://generic.wg.spotify.com/${endpoint}`, {
|
|
1667
|
+
headers: {
|
|
1668
|
+
"Authorization": `Bearer ${token}`,
|
|
1669
|
+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
|
|
1670
|
+
"Accept": "application/json",
|
|
1671
|
+
},
|
|
1672
|
+
});
|
|
1673
|
+
if (!resp.ok)
|
|
1674
|
+
return null;
|
|
1675
|
+
return resp.json();
|
|
1676
|
+
}
|
|
1677
|
+
// Scrape S4A via headless browser (Spotify blocks direct API calls via TLS fingerprinting)
|
|
1678
|
+
export async function scrapeS4ADirect(artistId) {
|
|
1679
|
+
const spDc = getSpDc();
|
|
1680
|
+
if (!spDc)
|
|
1681
|
+
return null;
|
|
1682
|
+
// Launch headless browser with injected cookies (proven working approach)
|
|
1683
|
+
const chromePath = process.env.CHROME_PATH || findChrome();
|
|
1684
|
+
const browser = await chromium.launch({ executablePath: chromePath, headless: true });
|
|
1685
|
+
const context = await browser.newContext({
|
|
1686
|
+
userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
1687
|
+
});
|
|
1688
|
+
// Inject all saved cookies
|
|
1689
|
+
try {
|
|
1690
|
+
const savedCookies = JSON.parse(fs.readFileSync(path.join(S4A_SESSION_DIR, "cookies.json"), "utf-8"));
|
|
1691
|
+
const validCookies = savedCookies.filter((c) => c.name && c.value && c.domain).map((c) => ({
|
|
1692
|
+
name: c.name, value: c.value, domain: c.domain, path: c.path || "/",
|
|
1693
|
+
httpOnly: c.httpOnly ?? true, secure: c.secure ?? true, sameSite: "None",
|
|
1694
|
+
}));
|
|
1695
|
+
await context.addCookies(validCookies);
|
|
1696
|
+
}
|
|
1697
|
+
catch {
|
|
1698
|
+
// Fallback: inject just sp_dc
|
|
1699
|
+
await context.addCookies([
|
|
1700
|
+
{ name: "sp_dc", value: spDc, domain: ".spotify.com", path: "/", httpOnly: true, secure: true, sameSite: "None" },
|
|
1701
|
+
]);
|
|
1702
|
+
}
|
|
1703
|
+
const data = {};
|
|
1704
|
+
const aid = artistId;
|
|
1705
|
+
const page = await context.newPage();
|
|
1706
|
+
const captured = {};
|
|
1707
|
+
// Intercept API responses
|
|
1708
|
+
page.on("response", async (resp) => {
|
|
1709
|
+
const url = resp.url();
|
|
1710
|
+
if (!url.includes("spotify.com") || resp.status() !== 200)
|
|
1711
|
+
return;
|
|
1712
|
+
const patterns = ["audience-engagement", "catalog-view", "fanatic-audience", "s4x-insights-api", "song-stats", "entity-comparison", "source-of-stream", "s4x-me", "canvaz-view", "buyerxp-campaign-view", "app-manager", "artist-videos"];
|
|
1713
|
+
if (!patterns.some(p => url.includes(p)))
|
|
1714
|
+
return;
|
|
1715
|
+
try {
|
|
1716
|
+
captured[url] = await resp.json();
|
|
1717
|
+
}
|
|
1718
|
+
catch { }
|
|
1719
|
+
});
|
|
1720
|
+
// RULE: tyto datumy jsou pro URL REQUEST, ne pro ukládání.
|
|
1721
|
+
// Skutečné datumy data pointů bereme Z RESPONSE (timeseries `x` pole).
|
|
1722
|
+
// S4A vrací data končící VČERA, ne dnes.
|
|
1723
|
+
const requestToDate = new Date().toISOString().slice(0, 10);
|
|
1724
|
+
const requestFromDate12m = new Date(Date.now() - 365 * 86400000).toISOString().slice(0, 10);
|
|
1725
|
+
// Artist-level sections
|
|
1726
|
+
// NOTE: audience/stats, demographic, location — NO custom date params!
|
|
1727
|
+
// S4A API uses from_date/to_date (snake_case) internally. Passing fromDate (camelCase)
|
|
1728
|
+
// in the PAGE URL causes the internal API call to fail with 400.
|
|
1729
|
+
// Let S4A choose its own date range — it defaults to 28d which is what we get.
|
|
1730
|
+
const sections = [
|
|
1731
|
+
`/c/artist/${aid}/home`,
|
|
1732
|
+
`/c/artist/${aid}/audience/stats`,
|
|
1733
|
+
`/c/artist/${aid}/audience/demographic`,
|
|
1734
|
+
`/c/artist/${aid}/audience/location`,
|
|
1735
|
+
`/c/artist/${aid}/audience/segments`,
|
|
1736
|
+
`/c/artist/${aid}/music/songs?time-filter=last12months`,
|
|
1737
|
+
`/c/artist/${aid}/music/playlists?time-filter=last12months`,
|
|
1738
|
+
`/c/artist/${aid}/music/upcoming`,
|
|
1739
|
+
`/c/artist/${aid}/music/videos`,
|
|
1740
|
+
];
|
|
1741
|
+
for (const section of sections) {
|
|
1742
|
+
try {
|
|
1743
|
+
await page.goto(`https://artists.spotify.com${section}`, { waitUntil: "networkidle", timeout: 25000 });
|
|
1744
|
+
await sleep(2000);
|
|
1745
|
+
}
|
|
1746
|
+
catch { }
|
|
1747
|
+
}
|
|
1748
|
+
// Parse captured responses into data object (helper — called multiple times)
|
|
1749
|
+
const parseCaptured = () => {
|
|
1750
|
+
data.perSong = data.perSong || {};
|
|
1751
|
+
for (const [url, resp] of Object.entries(captured)) {
|
|
1752
|
+
if (url.includes("gender-by-age"))
|
|
1753
|
+
data.genderByAge = resp;
|
|
1754
|
+
else if (url.includes("/gender") && !url.includes("gender-by-age"))
|
|
1755
|
+
data.gender = resp;
|
|
1756
|
+
else if (url.includes("top-cities") && !url.includes("recording"))
|
|
1757
|
+
data.topCities = resp;
|
|
1758
|
+
else if (url.includes("/locations") && !url.includes("recording"))
|
|
1759
|
+
data.locations = resp;
|
|
1760
|
+
else if (url.includes("fanatic-audience-segments"))
|
|
1761
|
+
data.segments = resp;
|
|
1762
|
+
else if (url.includes("audience-engagement") && url.includes("stats") && !url.includes("recording"))
|
|
1763
|
+
data.audienceStats = resp;
|
|
1764
|
+
else if (url.includes("catalog-view") && url.includes("songs"))
|
|
1765
|
+
data.catalog = resp;
|
|
1766
|
+
else if (url.includes("playlists/curated")) {
|
|
1767
|
+
if (!data.playlists)
|
|
1768
|
+
data.playlists = {};
|
|
1769
|
+
data.playlists.curated = resp;
|
|
1770
|
+
}
|
|
1771
|
+
else if (url.includes("playlists/listener")) {
|
|
1772
|
+
if (!data.playlists)
|
|
1773
|
+
data.playlists = {};
|
|
1774
|
+
data.playlists.listener = resp;
|
|
1775
|
+
}
|
|
1776
|
+
else if (url.includes("playlists/personalized")) {
|
|
1777
|
+
if (!data.playlists)
|
|
1778
|
+
data.playlists = {};
|
|
1779
|
+
data.playlists.personalized = resp;
|
|
1780
|
+
}
|
|
1781
|
+
else if (url.includes("artist-videos"))
|
|
1782
|
+
data.videos = resp;
|
|
1783
|
+
// Profile fields 14-21
|
|
1784
|
+
else if (url.includes("s4x-me") && url.includes("/me") && !url.includes("recent"))
|
|
1785
|
+
data.accountOwner = resp;
|
|
1786
|
+
else if (url.includes("s4x-me") && url.includes("artists/"))
|
|
1787
|
+
data.permissions = resp;
|
|
1788
|
+
else if (url.includes("canvaz-view") && url.includes("permissions"))
|
|
1789
|
+
data.canvasPermissions = resp;
|
|
1790
|
+
else if (url.includes("buyerxp-campaign-view") && url.includes("eligibility"))
|
|
1791
|
+
data.campaignEligibility = resp;
|
|
1792
|
+
else if (url.includes("app-manager") && url.includes("banner-metadata"))
|
|
1793
|
+
data.teamMembership = resp;
|
|
1794
|
+
// Per-song location data (top-cities and locations per recording)
|
|
1795
|
+
else if (url.includes("top-cities") && url.includes("recording")) {
|
|
1796
|
+
const m = url.match(/recording[_/]([a-zA-Z0-9]+)/);
|
|
1797
|
+
if (m) {
|
|
1798
|
+
if (!data.perSong[m[1]])
|
|
1799
|
+
data.perSong[m[1]] = {};
|
|
1800
|
+
data.perSong[m[1]].topCities = resp;
|
|
1801
|
+
}
|
|
1802
|
+
}
|
|
1803
|
+
else if (url.includes("/locations") && url.includes("recording")) {
|
|
1804
|
+
const m = url.match(/recording[_/]([a-zA-Z0-9]+)/);
|
|
1805
|
+
if (m) {
|
|
1806
|
+
if (!data.perSong[m[1]])
|
|
1807
|
+
data.perSong[m[1]] = {};
|
|
1808
|
+
data.perSong[m[1]].topCountries = resp;
|
|
1809
|
+
}
|
|
1810
|
+
}
|
|
1811
|
+
// Per-song demographics (gender, gender-by-age per recording)
|
|
1812
|
+
else if (url.includes("gender-by-age") && url.includes("recording")) {
|
|
1813
|
+
const m = url.match(/recording[_/]([a-zA-Z0-9]+)/);
|
|
1814
|
+
if (m) {
|
|
1815
|
+
if (!data.perSong[m[1]])
|
|
1816
|
+
data.perSong[m[1]] = {};
|
|
1817
|
+
data.perSong[m[1]].perSongGenderByAge = resp;
|
|
1818
|
+
}
|
|
1819
|
+
}
|
|
1820
|
+
else if (url.includes("/gender") && !url.includes("gender-by-age") && url.includes("recording")) {
|
|
1821
|
+
const m = url.match(/recording[_/]([a-zA-Z0-9]+)/);
|
|
1822
|
+
if (m) {
|
|
1823
|
+
if (!data.perSong[m[1]])
|
|
1824
|
+
data.perSong[m[1]] = {};
|
|
1825
|
+
data.perSong[m[1]].perSongGender = resp;
|
|
1826
|
+
}
|
|
1827
|
+
}
|
|
1828
|
+
// Per-song source of streams (28d aggregate — editorial/algorithmic/user/other)
|
|
1829
|
+
else if (url.includes("source-of-stream") && url.includes("recording")) {
|
|
1830
|
+
const m = url.match(/recording[_/]([a-zA-Z0-9]+)/);
|
|
1831
|
+
if (m) {
|
|
1832
|
+
if (!data.perSong[m[1]])
|
|
1833
|
+
data.perSong[m[1]] = {};
|
|
1834
|
+
data.perSong[m[1]].sourceOfStreams = resp;
|
|
1835
|
+
}
|
|
1836
|
+
}
|
|
1837
|
+
// Per-song country comparison timeseries (365d daily per selected country)
|
|
1838
|
+
else if (url.includes("audience-engagement") && url.includes("recording") && url.includes("stats")) {
|
|
1839
|
+
const m = url.match(/recording[_/]([a-zA-Z0-9]+)/);
|
|
1840
|
+
if (m) {
|
|
1841
|
+
if (!data.perSong[m[1]])
|
|
1842
|
+
data.perSong[m[1]] = {};
|
|
1843
|
+
if (!data.perSong[m[1]].countryTimeline)
|
|
1844
|
+
data.perSong[m[1]].countryTimeline = {};
|
|
1845
|
+
// Store keyed by country code if available in response, or accumulate
|
|
1846
|
+
data.perSong[m[1]].countryTimeline._raw = resp;
|
|
1847
|
+
}
|
|
1848
|
+
}
|
|
1849
|
+
else if (url.includes("song-stats")) {
|
|
1850
|
+
const m = url.match(/recording[_/]([a-zA-Z0-9]+)/);
|
|
1851
|
+
if (m) {
|
|
1852
|
+
if (!data.perSong[m[1]])
|
|
1853
|
+
data.perSong[m[1]] = {};
|
|
1854
|
+
const existing = data.perSong[m[1]];
|
|
1855
|
+
// Before merge: protect geography data by saving cities vs countries separately
|
|
1856
|
+
if (resp.geography && Array.isArray(resp.geography) && resp.geography.length > 0) {
|
|
1857
|
+
const first = resp.geography[0];
|
|
1858
|
+
if (first.region) {
|
|
1859
|
+
// Has region = cities data (e.g. Prague, CZ, region=10)
|
|
1860
|
+
existing.perSongCities = resp.geography;
|
|
1861
|
+
}
|
|
1862
|
+
else if (first.localized_country) {
|
|
1863
|
+
// Has localized_country = countries data (e.g. CZ, Czech Republic)
|
|
1864
|
+
existing.perSongCountries = resp.geography;
|
|
1865
|
+
}
|
|
1866
|
+
}
|
|
1867
|
+
// Protect countryTimelines from being overwritten
|
|
1868
|
+
if (resp.countryTimelines && Array.isArray(resp.countryTimelines)) {
|
|
1869
|
+
existing.countryTimelines = resp.countryTimelines;
|
|
1870
|
+
}
|
|
1871
|
+
// Prefer longer timeseries — 365d response must not be overwritten by 28d
|
|
1872
|
+
const existingLen = existing.streams?.current_period_timeseries?.length || 0;
|
|
1873
|
+
const newLen = resp.streams?.current_period_timeseries?.length || 0;
|
|
1874
|
+
if (newLen > 0 && existingLen > newLen) {
|
|
1875
|
+
// Keep existing longer timeseries, only merge non-timeseries fields
|
|
1876
|
+
const { streams, listeners, saves, playlist_adds, streams_per_listener, geography, countryTimelines, ...rest } = resp;
|
|
1877
|
+
Object.assign(existing, rest);
|
|
1878
|
+
}
|
|
1879
|
+
else {
|
|
1880
|
+
const { geography, countryTimelines, ...rest } = resp;
|
|
1881
|
+
Object.assign(existing, rest);
|
|
1882
|
+
// Only merge timeseries metrics if new data is longer
|
|
1883
|
+
if (resp.streams)
|
|
1884
|
+
existing.streams = resp.streams;
|
|
1885
|
+
if (resp.listeners)
|
|
1886
|
+
existing.listeners = resp.listeners;
|
|
1887
|
+
if (resp.saves)
|
|
1888
|
+
existing.saves = resp.saves;
|
|
1889
|
+
if (resp.playlist_adds)
|
|
1890
|
+
existing.playlist_adds = resp.playlist_adds;
|
|
1891
|
+
if (resp.streams_per_listener)
|
|
1892
|
+
existing.streams_per_listener = resp.streams_per_listener;
|
|
1893
|
+
}
|
|
1894
|
+
}
|
|
1895
|
+
}
|
|
1896
|
+
}
|
|
1897
|
+
};
|
|
1898
|
+
// Parse artist-level data BEFORE per-song loop so we have trackIds
|
|
1899
|
+
parseCaptured();
|
|
1900
|
+
// Per-song pages — each track × each metric tab = 12 months of daily data
|
|
1901
|
+
const trackIds = [];
|
|
1902
|
+
if (data.catalog?.songs) {
|
|
1903
|
+
for (const s of data.catalog.songs)
|
|
1904
|
+
trackIds.push(s.id);
|
|
1905
|
+
}
|
|
1906
|
+
if (trackIds.length === 0) {
|
|
1907
|
+
console.error(`[S4A] WARNING: No track IDs from catalog (catalog songs: ${data.catalog?.songs?.length ?? 'none'}). Per-song scrape skipped.`);
|
|
1908
|
+
}
|
|
1909
|
+
else {
|
|
1910
|
+
console.log(`[S4A] Scraping per-song data for ${trackIds.length} tracks...`);
|
|
1911
|
+
}
|
|
1912
|
+
// Per-song tabs:
|
|
1913
|
+
// 1. Overview (stats) — ONE call returns ALL 5 metrics (365 days daily each)
|
|
1914
|
+
// 2. Source of stream — user/personalized/catalog/editorial/network/other
|
|
1915
|
+
// 3. Location — top countries + cities + country comparison timeline
|
|
1916
|
+
// 4. Demographics — gender + gender×age per track
|
|
1917
|
+
// 5. Playlists — top playlists per track
|
|
1918
|
+
// 6. SongDNA — TODO (new feature)
|
|
1919
|
+
// NOTE: URL is "source-of-stream" (singular!), not "source-of-streams"
|
|
1920
|
+
// Per-song: navigate to stats, click "12 months" → S4A picks correct date range.
|
|
1921
|
+
// Don't guess toDate — Spotify updates when it wants. Let S4A decide.
|
|
1922
|
+
// ONE stats call returns ALL 5 metrics × 365d daily + previous_period_agg.
|
|
1923
|
+
// Then source-of-stream and playlists tabs for additional data.
|
|
1924
|
+
for (let i = 0; i < trackIds.length; i++) {
|
|
1925
|
+
const tid = trackIds[i];
|
|
1926
|
+
console.log(`[S4A] Track ${i + 1}/${trackIds.length}: ${tid}`);
|
|
1927
|
+
// Stats — load default, click "12 months"
|
|
1928
|
+
try {
|
|
1929
|
+
await page.goto(`https://artists.spotify.com/c/artist/${aid}/song/${tid}/stats`, { waitUntil: "networkidle", timeout: 20000 });
|
|
1930
|
+
await sleep(1500);
|
|
1931
|
+
await page.evaluate(`(() => {
|
|
1932
|
+
const b = Array.from(document.querySelectorAll("button")).find(b => b.textContent?.trim() === "12 months");
|
|
1933
|
+
if (b) b.click();
|
|
1934
|
+
})()`);
|
|
1935
|
+
await sleep(3000);
|
|
1936
|
+
}
|
|
1937
|
+
catch { }
|
|
1938
|
+
// Source of streams
|
|
1939
|
+
try {
|
|
1940
|
+
await page.goto(`https://artists.spotify.com/c/artist/${aid}/song/${tid}/source-of-stream`, { waitUntil: "networkidle", timeout: 20000 });
|
|
1941
|
+
await sleep(1500);
|
|
1942
|
+
}
|
|
1943
|
+
catch { }
|
|
1944
|
+
// Location — top countries (28d, all countries) + top cities (28d, top 50) + country comparison timeline (365d, default top 3)
|
|
1945
|
+
try {
|
|
1946
|
+
await page.goto(`https://artists.spotify.com/c/artist/${aid}/song/${tid}/location`, { waitUntil: "networkidle", timeout: 20000 });
|
|
1947
|
+
await sleep(1500);
|
|
1948
|
+
// Click "12 months" for country comparison timeline (365d daily data)
|
|
1949
|
+
await page.evaluate(`(() => {
|
|
1950
|
+
const b = Array.from(document.querySelectorAll("button")).find(b => b.textContent?.trim() === "12 months");
|
|
1951
|
+
if (b) b.click();
|
|
1952
|
+
})()`);
|
|
1953
|
+
await sleep(2000);
|
|
1954
|
+
}
|
|
1955
|
+
catch { }
|
|
1956
|
+
// Demographics — per-song gender + gender×age breakdown
|
|
1957
|
+
try {
|
|
1958
|
+
await page.goto(`https://artists.spotify.com/c/artist/${aid}/song/${tid}/demographic`, { waitUntil: "networkidle", timeout: 20000 });
|
|
1959
|
+
await sleep(1500);
|
|
1960
|
+
}
|
|
1961
|
+
catch { }
|
|
1962
|
+
// Playlists
|
|
1963
|
+
try {
|
|
1964
|
+
await page.goto(`https://artists.spotify.com/c/artist/${aid}/song/${tid}/playlists`, { waitUntil: "networkidle", timeout: 20000 });
|
|
1965
|
+
await sleep(1500);
|
|
1966
|
+
}
|
|
1967
|
+
catch { }
|
|
1968
|
+
}
|
|
1969
|
+
await browser.close();
|
|
1970
|
+
// Final parse — picks up per-song responses
|
|
1971
|
+
parseCaptured();
|
|
1972
|
+
// Save raw dump
|
|
1973
|
+
const dumpDir = path.join(process.env.HOME || "~", ".artist-os", "s4a-dumps", new Date().toISOString().slice(0, 10));
|
|
1974
|
+
fs.mkdirSync(dumpDir, { recursive: true });
|
|
1975
|
+
fs.writeFileSync(path.join(dumpDir, `${aid}_s4a.json`), JSON.stringify(data, null, 2));
|
|
1976
|
+
return Object.keys(data).length > 0 ? data : null;
|
|
1977
|
+
}
|
|
1978
|
+
export async function scrapeS4A(artistId, outputDir) {
|
|
1979
|
+
if (!hasS4ASession())
|
|
1980
|
+
throw new Error("Not connected to S4A. Use connect_s4a first.");
|
|
1981
|
+
const chromePath = process.env.CHROME_PATH || findChrome();
|
|
1982
|
+
const browser = await chromium.launchPersistentContext(S4A_SESSION_DIR, {
|
|
1983
|
+
executablePath: chromePath,
|
|
1984
|
+
headless: true,
|
|
1985
|
+
});
|
|
1986
|
+
const captured = {};
|
|
1987
|
+
const page = browser.pages()[0] || await browser.newPage();
|
|
1988
|
+
// Intercept API responses
|
|
1989
|
+
page.on("response", async (resp) => {
|
|
1990
|
+
const url = resp.url();
|
|
1991
|
+
if (!url.includes("spotify.com") || resp.status() !== 200)
|
|
1992
|
+
return;
|
|
1993
|
+
const interesting = ["audience-engagement", "catalog-view", "s4x-insights", "fanatic",
|
|
1994
|
+
"song-stats", "entity-comparison", "canvaz-view", "buyerxp", "app-manager", "s4x-me"];
|
|
1995
|
+
if (!interesting.some(k => url.includes(k)))
|
|
1996
|
+
return;
|
|
1997
|
+
try {
|
|
1998
|
+
const data = await resp.json();
|
|
1999
|
+
const key = url.split("spotify.com/")[1]?.replace(/[/?&=]/g, "_").slice(0, 120) || "unknown";
|
|
2000
|
+
captured[key] = { url, data };
|
|
2001
|
+
}
|
|
2002
|
+
catch { }
|
|
2003
|
+
});
|
|
2004
|
+
// Navigate through S4A sections
|
|
2005
|
+
const sections = [
|
|
2006
|
+
`/c/artist/${artistId}/home`,
|
|
2007
|
+
`/c/artist/${artistId}/audience/stats`,
|
|
2008
|
+
`/c/artist/${artistId}/audience/demographic`,
|
|
2009
|
+
`/c/artist/${artistId}/audience/location`,
|
|
2010
|
+
`/c/artist/${artistId}/audience/segments`,
|
|
2011
|
+
`/c/artist/${artistId}/music/songs`,
|
|
2012
|
+
`/c/artist/${artistId}/music/releases`,
|
|
2013
|
+
`/c/artist/${artistId}/music/upcoming`,
|
|
2014
|
+
`/c/artist/${artistId}/music/playlists`,
|
|
2015
|
+
];
|
|
2016
|
+
for (const section of sections) {
|
|
2017
|
+
try {
|
|
2018
|
+
await page.goto(`https://artists.spotify.com${section}`, { waitUntil: "networkidle", timeout: 30000 });
|
|
2019
|
+
await new Promise(r => setTimeout(r, 2000));
|
|
2020
|
+
}
|
|
2021
|
+
catch { }
|
|
2022
|
+
}
|
|
2023
|
+
await browser.close();
|
|
2024
|
+
// Save captured data
|
|
2025
|
+
fs.mkdirSync(outputDir, { recursive: true });
|
|
2026
|
+
let count = 0;
|
|
2027
|
+
for (const [key, entry] of Object.entries(captured)) {
|
|
2028
|
+
const filepath = path.join(outputDir, `${key}.json`);
|
|
2029
|
+
fs.writeFileSync(filepath, JSON.stringify(entry.data, null, 2));
|
|
2030
|
+
count++;
|
|
2031
|
+
}
|
|
2032
|
+
// Save manifest
|
|
2033
|
+
fs.writeFileSync(path.join(outputDir, "_manifest.json"), JSON.stringify({
|
|
2034
|
+
artist_id: artistId,
|
|
2035
|
+
scraped_at: new Date().toISOString(),
|
|
2036
|
+
files: count,
|
|
2037
|
+
}, null, 2));
|
|
2038
|
+
return { files: count };
|
|
2039
|
+
}
|
|
2040
|
+
export async function fullScrape(artistId, dbPath) {
|
|
2041
|
+
const token = await getAnonymousToken(artistId);
|
|
2042
|
+
const overview = await getArtistOverview(token, artistId);
|
|
2043
|
+
const tracks = await getAllTracks(token, artistId);
|
|
2044
|
+
const total_streams = tracks.reduce((sum, t) => sum + t.playcount, 0);
|
|
2045
|
+
const dataDir = dbPath ? path.dirname(dbPath) : path.join(process.env.HOME || "~", ".artist-os");
|
|
2046
|
+
// S4A scrape — automatic if session exists
|
|
2047
|
+
let s4a = null;
|
|
2048
|
+
if (hasS4ASession()) {
|
|
2049
|
+
try {
|
|
2050
|
+
s4a = await scrapeS4ADirect(artistId);
|
|
2051
|
+
}
|
|
2052
|
+
catch { }
|
|
2053
|
+
}
|
|
2054
|
+
if (dbPath) {
|
|
2055
|
+
storeData(dbPath, overview, tracks);
|
|
2056
|
+
}
|
|
2057
|
+
const images = await downloadImages(artistId, overview, tracks, dataDir);
|
|
2058
|
+
// Validate S4A data before workbook generation
|
|
2059
|
+
if (s4a) {
|
|
2060
|
+
const report = validateS4AData(s4a);
|
|
2061
|
+
printValidationReport(report);
|
|
2062
|
+
// Save report alongside dump
|
|
2063
|
+
const dumpDir = path.join(process.env.HOME || "~", ".artist-os", "s4a-dumps", new Date().toISOString().slice(0, 10));
|
|
2064
|
+
if (fs.existsSync(dumpDir)) {
|
|
2065
|
+
fs.writeFileSync(path.join(dumpDir, `${artistId}_validation.json`), JSON.stringify(report, null, 2));
|
|
2066
|
+
}
|
|
2067
|
+
}
|
|
2068
|
+
if (dbPath) {
|
|
2069
|
+
await generateWorkbook(dbPath, dataDir, s4a);
|
|
2070
|
+
}
|
|
2071
|
+
return { overview, tracks, total_streams, images, s4a };
|
|
2072
|
+
}
|