@gscdump/analysis 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4944 @@
1
+ import { enumeratePartitions } from "@gscdump/engine/planner";
2
+ import { METRIC_EXPR } from "@gscdump/engine/sql-fragments";
3
+ import { MS_PER_DAY, daysAgo, toIsoDate } from "gscdump";
4
+ import { between, date, extractDateRange, gsc, page, query } from "gscdump/query";
5
+ import { buildExtrasQueries, buildTotalsSql, mergeExtras, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized } from "@gscdump/engine/resolver";
6
+ const DEFAULT_SQL_REQUIRES = ["executeSql", "partitionedParquet"];
7
+ function defineAnalyzer(opts) {
8
+ const { id, reduce, reduceSql, reduceRows, buildSql, buildRows, sqlRequires = DEFAULT_SQL_REQUIRES, rowsRequires = [] } = opts;
9
+ const sqlReducer = reduceSql ?? reduce;
10
+ const rowsReducer = reduceRows ?? reduce;
11
+ if (buildSql && !sqlReducer) throw new Error(`defineAnalyzer(${id}): buildSql requires reduce or reduceSql`);
12
+ if (buildRows && !rowsReducer) throw new Error(`defineAnalyzer(${id}): buildRows requires reduce or reduceRows`);
13
+ const wrap = (fn) => (rows, params, ctx) => {
14
+ return fn(Array.isArray(rows) ? rows : pickSingle(rows) ?? rows, params, ctx);
15
+ };
16
+ return {
17
+ id,
18
+ sql: buildSql && sqlReducer ? {
19
+ id,
20
+ requires: sqlRequires,
21
+ build(params) {
22
+ const spec = buildSql(params);
23
+ return {
24
+ kind: "sql",
25
+ sql: spec.sql,
26
+ params: spec.params,
27
+ current: spec.current,
28
+ previous: spec.previous,
29
+ extraFiles: spec.extraFiles,
30
+ extraQueries: spec.extraQueries,
31
+ requiresAttachedTables: spec.requiresAttachedTables
32
+ };
33
+ },
34
+ reduce(rows, ctx) {
35
+ const { results, meta } = wrap(sqlReducer)(rows, ctx.params, { extras: ctx.extras });
36
+ return {
37
+ results,
38
+ meta
39
+ };
40
+ }
41
+ } : void 0,
42
+ rows: buildRows && rowsReducer ? {
43
+ id,
44
+ requires: rowsRequires,
45
+ build(params) {
46
+ const queries = buildRows(params);
47
+ return {
48
+ kind: "rows",
49
+ queries: Object.fromEntries(Object.entries(queries).map(([k, state]) => [k, { state }]))
50
+ };
51
+ },
52
+ reduce(rows, ctx) {
53
+ const { results, meta } = wrap(rowsReducer)(rows, ctx.params, {});
54
+ return {
55
+ results,
56
+ meta
57
+ };
58
+ }
59
+ } : void 0
60
+ };
61
+ }
62
+ function pickSingle(rows) {
63
+ const keys = Object.keys(rows);
64
+ return keys.length === 1 ? rows[keys[0]] : void 0;
65
+ }
66
+ function defaultEndDate() {
67
+ return daysAgo(3);
68
+ }
69
+ function defaultStartDate() {
70
+ return daysAgo(31);
71
+ }
72
+ function periodOf(params) {
73
+ return {
74
+ startDate: params.startDate || defaultStartDate(),
75
+ endDate: params.endDate || defaultEndDate()
76
+ };
77
+ }
78
+ function comparisonOf(params) {
79
+ if (!params.prevStartDate || !params.prevEndDate) throw new Error(`${params.type} analysis requires prevStartDate and prevEndDate`);
80
+ return {
81
+ current: periodOf(params),
82
+ previous: {
83
+ startDate: params.prevStartDate,
84
+ endDate: params.prevEndDate
85
+ }
86
+ };
87
+ }
88
+ const DEFAULT_FILL = {
89
+ clicks: 0,
90
+ impressions: 0,
91
+ ctr: 0,
92
+ position: 0
93
+ };
94
+ function padTimeseries(rows, options) {
95
+ const { startDate, endDate } = options;
96
+ const dateKey = options.dateKey ?? "date";
97
+ const fill = options.fill ?? DEFAULT_FILL;
98
+ const byDate = /* @__PURE__ */ new Map();
99
+ for (const row of rows) {
100
+ const d = String(row[dateKey]);
101
+ const bucket = byDate.get(d);
102
+ if (bucket) bucket.push(row);
103
+ else byDate.set(d, [row]);
104
+ }
105
+ const result = [];
106
+ const start = /* @__PURE__ */ new Date(`${startDate}T00:00:00Z`);
107
+ const end = /* @__PURE__ */ new Date(`${endDate}T00:00:00Z`);
108
+ if (Number.isNaN(start.getTime()) || Number.isNaN(end.getTime())) throw new Error(`padTimeseries: invalid date range ${startDate}..${endDate}`);
109
+ for (let cursorMs = start.getTime(), endMs = end.getTime(); cursorMs <= endMs; cursorMs += MS_PER_DAY) {
110
+ const dateStr = toIsoDate(new Date(cursorMs));
111
+ const existing = byDate.get(dateStr);
112
+ if (existing) result.push(...existing);
113
+ else result.push({
114
+ ...fill,
115
+ [dateKey]: dateStr
116
+ });
117
+ }
118
+ return result;
119
+ }
120
+ function num$5(v) {
121
+ if (typeof v === "number") return v;
122
+ if (typeof v === "bigint") return Number(v);
123
+ if (v == null) return 0;
124
+ const n = Number(v);
125
+ return Number.isFinite(n) ? n : 0;
126
+ }
127
+ function str$23(v) {
128
+ return v == null ? "" : String(v);
129
+ }
130
+ const bayesianCtrAnalyzer = defineAnalyzer({
131
+ id: "bayesian-ctr",
132
+ buildSql(params) {
133
+ const { startDate, endDate } = periodOf(params);
134
+ const minImpressions = params.minImpressions ?? 50;
135
+ const limit = params.limit ?? 300;
136
+ const priorMinEntities = 5;
137
+ return {
138
+ sql: `
139
+ WITH entity AS (
140
+ SELECT
141
+ query,
142
+ url,
143
+ ${METRIC_EXPR.clicks} AS clicks,
144
+ ${METRIC_EXPR.impressions} AS impressions,
145
+ ${METRIC_EXPR.ctr} AS observed_ctr,
146
+ ${METRIC_EXPR.position} AS position,
147
+ CAST(ROUND(LEAST(${METRIC_EXPR.position}, 30)) AS INTEGER) AS bucket
148
+ FROM read_parquet({{FILES}}, union_by_name = true)
149
+ WHERE date >= ? AND date <= ?
150
+ AND query IS NOT NULL AND query <> ''
151
+ AND url IS NOT NULL AND url <> ''
152
+ GROUP BY query, url
153
+ HAVING SUM(impressions) >= ?
154
+ AND ${METRIC_EXPR.position} <= 30
155
+ ),
156
+ bucket_mu AS (
157
+ SELECT
158
+ bucket,
159
+ COUNT(*) AS n_entities,
160
+ SUM(observed_ctr * impressions) / NULLIF(SUM(impressions), 0) AS mu,
161
+ SUM(impressions) AS total_impressions
162
+ FROM entity
163
+ GROUP BY bucket
164
+ ),
165
+ bucket_var AS (
166
+ SELECT
167
+ e.bucket,
168
+ GREATEST(
169
+ SUM(e.impressions * POWER(e.observed_ctr - b.mu, 2))
170
+ / NULLIF(SUM(e.impressions), 0),
171
+ 1e-9
172
+ ) AS v
173
+ FROM entity e
174
+ JOIN bucket_mu b USING (bucket)
175
+ GROUP BY e.bucket
176
+ ),
177
+ priors AS (
178
+ SELECT
179
+ m.bucket,
180
+ m.n_entities,
181
+ m.mu,
182
+ v.v,
183
+ CASE
184
+ WHEN m.n_entities >= ${Number(priorMinEntities)}
185
+ AND v.v > 0
186
+ AND m.mu > 0 AND m.mu < 1
187
+ AND (m.mu * (1.0 - m.mu) / v.v - 1.0) > 0
188
+ THEN GREATEST(0.5, m.mu * (m.mu * (1.0 - m.mu) / v.v - 1.0))
189
+ ELSE 2.0
190
+ END AS alpha,
191
+ CASE
192
+ WHEN m.n_entities >= ${Number(priorMinEntities)}
193
+ AND v.v > 0
194
+ AND m.mu > 0 AND m.mu < 1
195
+ AND (m.mu * (1.0 - m.mu) / v.v - 1.0) > 0
196
+ THEN GREATEST(0.5, (1.0 - m.mu) * (m.mu * (1.0 - m.mu) / v.v - 1.0))
197
+ ELSE 48.0
198
+ END AS beta
199
+ FROM bucket_mu m
200
+ JOIN bucket_var v USING (bucket)
201
+ ),
202
+ posterior AS (
203
+ SELECT
204
+ e.query,
205
+ e.url,
206
+ e.clicks,
207
+ e.impressions,
208
+ e.observed_ctr,
209
+ e.position,
210
+ e.bucket,
211
+ p.alpha AS prior_alpha,
212
+ p.beta AS prior_beta,
213
+ p.mu AS bucket_prior_mean,
214
+ p.alpha + e.clicks AS alpha_post,
215
+ p.beta + (e.impressions - e.clicks) AS beta_post
216
+ FROM entity e
217
+ JOIN priors p USING (bucket)
218
+ ),
219
+ scored AS (
220
+ SELECT *,
221
+ alpha_post / (alpha_post + beta_post) AS posterior_mean,
222
+ SQRT((alpha_post * beta_post)
223
+ / (POWER(alpha_post + beta_post, 2) * (alpha_post + beta_post + 1))) AS posterior_sd
224
+ FROM posterior
225
+ )
226
+ SELECT
227
+ query AS keyword,
228
+ url AS page,
229
+ clicks,
230
+ impressions,
231
+ observed_ctr AS observedCtr,
232
+ position,
233
+ bucket,
234
+ prior_alpha AS priorAlpha,
235
+ prior_beta AS priorBeta,
236
+ bucket_prior_mean AS bucketPriorMean,
237
+ posterior_mean AS posteriorMean,
238
+ posterior_sd AS posteriorSd,
239
+ GREATEST(0.0, posterior_mean - 1.96 * posterior_sd) AS ciLow,
240
+ LEAST(1.0, posterior_mean + 1.96 * posterior_sd) AS ciHigh,
241
+ posterior_mean - observed_ctr AS shrinkageDelta,
242
+ (posterior_mean - observed_ctr) * impressions AS expectedClicksDelta,
243
+ ABS(observed_ctr - posterior_mean) / NULLIF(posterior_sd, 0) AS significance,
244
+ CASE
245
+ WHEN observed_ctr > LEAST(1.0, posterior_mean + 1.96 * posterior_sd) THEN 'overperforming'
246
+ WHEN observed_ctr < GREATEST(0.0, posterior_mean - 1.96 * posterior_sd) THEN 'underperforming'
247
+ ELSE 'expected'
248
+ END AS classification
249
+ FROM scored
250
+ ORDER BY significance DESC NULLS LAST
251
+ LIMIT ${Number(limit)}
252
+ `,
253
+ params: [
254
+ startDate,
255
+ endDate,
256
+ minImpressions
257
+ ],
258
+ current: {
259
+ table: "page_keywords",
260
+ partitions: enumeratePartitions(startDate, endDate)
261
+ }
262
+ };
263
+ },
264
+ reduceSql(rows, params) {
265
+ const arr = Array.isArray(rows) ? rows : [];
266
+ const minImpressions = params.minImpressions ?? 50;
267
+ const results = arr.map((r) => ({
268
+ keyword: str$23(r.keyword),
269
+ page: str$23(r.page),
270
+ clicks: num$5(r.clicks),
271
+ impressions: num$5(r.impressions),
272
+ observedCtr: num$5(r.observedCtr),
273
+ position: num$5(r.position),
274
+ bucket: num$5(r.bucket),
275
+ priorAlpha: num$5(r.priorAlpha),
276
+ priorBeta: num$5(r.priorBeta),
277
+ bucketPriorMean: num$5(r.bucketPriorMean),
278
+ posteriorMean: num$5(r.posteriorMean),
279
+ posteriorSd: num$5(r.posteriorSd),
280
+ ciLow: num$5(r.ciLow),
281
+ ciHigh: num$5(r.ciHigh),
282
+ shrinkageDelta: num$5(r.shrinkageDelta),
283
+ expectedClicksDelta: num$5(r.expectedClicksDelta),
284
+ significance: num$5(r.significance),
285
+ classification: str$23(r.classification)
286
+ }));
287
+ const under = results.filter((r) => r.classification === "underperforming").length;
288
+ const over = results.filter((r) => r.classification === "overperforming").length;
289
+ return {
290
+ results,
291
+ meta: {
292
+ total: results.length,
293
+ underperforming: under,
294
+ overperforming: over,
295
+ expected: results.length - under - over,
296
+ minImpressions
297
+ }
298
+ };
299
+ }
300
+ });
301
+ function num$4(v) {
302
+ if (typeof v === "number") return v;
303
+ if (typeof v === "bigint") return Number(v);
304
+ if (v == null) return 0;
305
+ return Number(v);
306
+ }
307
+ function buildPeriodMap(rows, key, value, filter) {
308
+ const out = /* @__PURE__ */ new Map();
309
+ for (const row of rows) {
310
+ if (filter && !filter(row)) continue;
311
+ out.set(key(row), value(row));
312
+ }
313
+ return out;
314
+ }
315
+ function createSorter(getValue, defaultMetric, defaultOrder = "desc") {
316
+ return (items, sortBy = defaultMetric, sortOrder = defaultOrder) => {
317
+ const mult = sortOrder === "desc" ? -1 : 1;
318
+ return [...items].sort((a, b) => (getValue(a, sortBy) - getValue(b, sortBy)) * mult);
319
+ };
320
+ }
321
+ function createMetricSorter(defaultMetric, orderByMetric) {
322
+ return (items, sortBy = defaultMetric) => {
323
+ const mult = orderByMetric[sortBy] === "desc" ? -1 : 1;
324
+ return [...items].sort((a, b) => (a[sortBy] - b[sortBy]) * mult);
325
+ };
326
+ }
327
+ const BIPARTITE_PAGERANK_ITERATIONS = 25;
328
+ const BIPARTITE_PAGERANK_DAMPING = .85;
329
+ function str$22(v) {
330
+ return v == null ? "" : String(v);
331
+ }
332
+ function parseJsonList$16(v) {
333
+ if (Array.isArray(v)) return v;
334
+ if (typeof v === "string" && v.length > 0) {
335
+ const parsed = JSON.parse(v);
336
+ return Array.isArray(parsed) ? parsed : [];
337
+ }
338
+ return [];
339
+ }
340
+ const bipartitePagerankAnalyzer = defineAnalyzer({
341
+ id: "bipartite-pagerank",
342
+ buildSql(params) {
343
+ const { startDate, endDate } = periodOf(params);
344
+ const minImpressions = params.minImpressions ?? 50;
345
+ const topQueries = 1e3;
346
+ const topUrls = 500;
347
+ const limit = params.limit ?? 50;
348
+ const bridgingEdgeThreshold = .05;
349
+ const anchoringEdgeThreshold = .05;
350
+ const iterations = BIPARTITE_PAGERANK_ITERATIONS;
351
+ const d = BIPARTITE_PAGERANK_DAMPING;
352
+ const iterCtes = [];
353
+ for (let i = 1; i <= iterations; i++) iterCtes.push(`
354
+ ranks_${i} AS (
355
+ SELECT
356
+ 'q' AS kind,
357
+ e.qid AS id,
358
+ (1.0 - ${d}) / (SELECT n FROM query_count)
359
+ + ${d} * SUM(e.w_u_to_q * r.rank) AS rank
360
+ FROM u_to_q_weights e
361
+ JOIN ranks_${i - 1} r ON r.kind = 'u' AND r.id = e.uid
362
+ GROUP BY e.qid
363
+ UNION ALL
364
+ SELECT
365
+ 'u' AS kind,
366
+ e.uid AS id,
367
+ (1.0 - ${d}) / (SELECT n FROM url_count)
368
+ + ${d} * SUM(e.w_q_to_u * r.rank) AS rank
369
+ FROM q_to_u_weights e
370
+ JOIN ranks_${i - 1} r ON r.kind = 'q' AND r.id = e.qid
371
+ GROUP BY e.uid
372
+ )`);
373
+ const deltaParts = [];
374
+ for (let i = 1; i <= iterations; i++) deltaParts.push(`
375
+ SELECT ${i} AS step,
376
+ (SELECT COALESCE(SUM(ABS(a.rank - b.rank)), 0.0)
377
+ FROM ranks_${i} a
378
+ JOIN ranks_${i - 1} b USING (kind, id)) AS l1`);
379
+ return {
380
+ sql: `
381
+ WITH edges0 AS (
382
+ SELECT
383
+ query AS qid,
384
+ url AS uid,
385
+ CAST(SUM(impressions) AS DOUBLE) AS impressions
386
+ FROM read_parquet({{FILES}}, union_by_name = true)
387
+ WHERE date >= ? AND date <= ?
388
+ AND query IS NOT NULL AND query <> ''
389
+ AND url IS NOT NULL AND url <> ''
390
+ GROUP BY query, url
391
+ HAVING SUM(impressions) >= ?
392
+ ),
393
+ -- Top-N caps per side keep the iteration tractable.
394
+ query_totals AS (
395
+ SELECT qid, SUM(impressions) AS tot
396
+ FROM edges0 GROUP BY qid
397
+ ),
398
+ url_totals AS (
399
+ SELECT uid, SUM(impressions) AS tot
400
+ FROM edges0 GROUP BY uid
401
+ ),
402
+ top_queries AS (
403
+ SELECT qid FROM query_totals
404
+ ORDER BY tot DESC, qid ASC LIMIT ${Number(topQueries)}
405
+ ),
406
+ top_urls AS (
407
+ SELECT uid FROM url_totals
408
+ ORDER BY tot DESC, uid ASC LIMIT ${Number(topUrls)}
409
+ ),
410
+ edges AS (
411
+ SELECT e.qid, e.uid, e.impressions
412
+ FROM edges0 e
413
+ JOIN top_queries tq USING (qid)
414
+ JOIN top_urls tu USING (uid)
415
+ ),
416
+ query_nodes AS (SELECT DISTINCT qid FROM edges),
417
+ url_nodes AS (SELECT DISTINCT uid FROM edges),
418
+ query_count AS (SELECT GREATEST(COUNT(*), 1) AS n FROM query_nodes),
419
+ url_count AS (SELECT GREATEST(COUNT(*), 1) AS n FROM url_nodes),
420
+ -- Row-stochastic transition weights in each direction. For q->u the
421
+ -- weights out of a query sum to 1; symmetric for u->q.
422
+ q_out AS (SELECT qid, SUM(impressions) AS s FROM edges GROUP BY qid),
423
+ u_out AS (SELECT uid, SUM(impressions) AS s FROM edges GROUP BY uid),
424
+ q_to_u_weights AS (
425
+ SELECT e.qid, e.uid,
426
+ e.impressions / NULLIF(q.s, 0) AS w_q_to_u
427
+ FROM edges e JOIN q_out q USING (qid)
428
+ ),
429
+ u_to_q_weights AS (
430
+ SELECT e.qid, e.uid,
431
+ e.impressions / NULLIF(u.s, 0) AS w_u_to_q
432
+ FROM edges e JOIN u_out u USING (uid)
433
+ ),
434
+ -- Seed: uniform distribution per side. Total mass = 2 (one unit per side).
435
+ ranks_0 AS (
436
+ SELECT 'q' AS kind, q.qid AS id, 1.0 / (SELECT n FROM query_count) AS rank
437
+ FROM query_nodes q
438
+ UNION ALL
439
+ SELECT 'u' AS kind, u.uid AS id, 1.0 / (SELECT n FROM url_count) AS rank
440
+ FROM url_nodes u
441
+ ),
442
+ ${iterCtes.join(",\n")},
443
+ final_ranks AS (SELECT * FROM ranks_${iterations}),
444
+ -- Hub/anchor diagnostics computed from raw edge mass (not rank). A
445
+ -- query "bridges" URLs it sends >= ${bridgingEdgeThreshold} of its mass
446
+ -- to; a URL "anchors" queries that contribute >= ${anchoringEdgeThreshold}
447
+ -- of its incoming mass.
448
+ q_bridging AS (
449
+ SELECT qid, COUNT(*) AS bridging
450
+ FROM q_to_u_weights
451
+ WHERE w_q_to_u >= ${bridgingEdgeThreshold}
452
+ GROUP BY qid
453
+ ),
454
+ u_anchoring AS (
455
+ SELECT uid, COUNT(*) AS anchoring
456
+ FROM u_to_q_weights
457
+ WHERE w_u_to_q >= ${anchoringEdgeThreshold}
458
+ GROUP BY uid
459
+ ),
460
+ q_degree AS (
461
+ SELECT qid, COUNT(*) AS degree, SUM(impressions) AS impressions
462
+ FROM edges GROUP BY qid
463
+ ),
464
+ u_degree AS (
465
+ SELECT uid, COUNT(*) AS degree, SUM(impressions) AS impressions
466
+ FROM edges GROUP BY uid
467
+ ),
468
+ deltas AS (
469
+ ${deltaParts.join("\n UNION ALL\n")}
470
+ ),
471
+ query_rows AS (
472
+ SELECT
473
+ 'query' AS kind, f.id, f.rank,
474
+ COALESCE(b.bridging, 0) AS bridging,
475
+ 0 AS anchoring,
476
+ COALESCE(qd.degree, 0) AS degree,
477
+ COALESCE(qd.impressions, 0) AS impressions
478
+ FROM final_ranks f
479
+ LEFT JOIN q_bridging b ON b.qid = f.id
480
+ LEFT JOIN q_degree qd ON qd.qid = f.id
481
+ WHERE f.kind = 'q'
482
+ ORDER BY f.rank DESC
483
+ LIMIT ${Number(limit)}
484
+ ),
485
+ url_rows AS (
486
+ SELECT
487
+ 'url' AS kind, f.id, f.rank,
488
+ 0 AS bridging,
489
+ COALESCE(a.anchoring, 0) AS anchoring,
490
+ COALESCE(ud.degree, 0) AS degree,
491
+ COALESCE(ud.impressions, 0) AS impressions
492
+ FROM final_ranks f
493
+ LEFT JOIN u_anchoring a ON a.uid = f.id
494
+ LEFT JOIN u_degree ud ON ud.uid = f.id
495
+ WHERE f.kind = 'u'
496
+ ORDER BY f.rank DESC
497
+ LIMIT ${Number(limit)}
498
+ ),
499
+ nodes AS (
500
+ SELECT * FROM query_rows
501
+ UNION ALL
502
+ SELECT * FROM url_rows
503
+ ),
504
+ counts AS (
505
+ SELECT
506
+ (SELECT n FROM query_count) AS q_count,
507
+ (SELECT n FROM url_count) AS u_count
508
+ ),
509
+ deltas_json AS (
510
+ SELECT to_json(list({ 'step': step, 'l1': l1 } ORDER BY step)) AS dj
511
+ FROM deltas
512
+ )
513
+ SELECT
514
+ n.kind,
515
+ n.id,
516
+ n.rank,
517
+ n.bridging,
518
+ n.anchoring,
519
+ n.degree,
520
+ n.impressions,
521
+ c.q_count AS queryCount,
522
+ c.u_count AS urlCount,
523
+ dj.dj AS deltasJson
524
+ FROM nodes n
525
+ CROSS JOIN counts c
526
+ CROSS JOIN deltas_json dj
527
+ ORDER BY n.kind, n.rank DESC
528
+ `,
529
+ params: [
530
+ startDate,
531
+ endDate,
532
+ minImpressions
533
+ ],
534
+ current: {
535
+ table: "page_keywords",
536
+ partitions: enumeratePartitions(startDate, endDate)
537
+ }
538
+ };
539
+ },
540
+ reduceSql(rows) {
541
+ const arr = Array.isArray(rows) ? rows : [];
542
+ const iterations = BIPARTITE_PAGERANK_ITERATIONS;
543
+ const d = BIPARTITE_PAGERANK_DAMPING;
544
+ const results = arr.map((r) => ({
545
+ kind: str$22(r.kind),
546
+ id: str$22(r.id),
547
+ rank: num$4(r.rank),
548
+ bridging: num$4(r.bridging),
549
+ anchoring: num$4(r.anchoring),
550
+ degree: num$4(r.degree),
551
+ impressions: num$4(r.impressions)
552
+ }));
553
+ const first = arr[0] ?? {};
554
+ const queryCount = num$4(first.queryCount);
555
+ const urlCount = num$4(first.urlCount);
556
+ const deltas = parseJsonList$16(first.deltasJson).map((e) => ({
557
+ step: num$4(e.step),
558
+ l1: num$4(e.l1)
559
+ }));
560
+ const convergenceDelta = deltas.length > 0 ? deltas[deltas.length - 1].l1 : 0;
561
+ return {
562
+ results,
563
+ meta: {
564
+ total: results.length,
565
+ convergenceDelta,
566
+ iterations,
567
+ damping: d,
568
+ queryCount,
569
+ urlCount,
570
+ deltas
571
+ }
572
+ };
573
+ }
574
+ });
575
+ const DEFAULT_LIMIT$1 = 25e3;
576
+ function keywordsQueryState(period, limit = DEFAULT_LIMIT$1) {
577
+ return gsc.select(query, page).where(between(date, period.startDate, period.endDate)).limit(limit).getState();
578
+ }
579
+ function pagesQueryState(period, limit = DEFAULT_LIMIT$1) {
580
+ return gsc.select(page).where(between(date, period.startDate, period.endDate)).limit(limit).getState();
581
+ }
582
+ function datesQueryState(period, limit = DEFAULT_LIMIT$1) {
583
+ return gsc.select(date).where(between(date, period.startDate, period.endDate)).limit(limit).getState();
584
+ }
585
+ function escapeRegexAlt(s) {
586
+ return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
587
+ }
588
+ function str$21(v) {
589
+ return v == null ? "" : String(v);
590
+ }
591
+ function analyzeBrandSegmentation(keywords, options) {
592
+ const { brandTerms, minImpressions = 10 } = options;
593
+ const lowerBrandTerms = brandTerms.map((t) => t.toLowerCase());
594
+ const brand = [];
595
+ const nonBrand = [];
596
+ for (const row of keywords) {
597
+ if (num$4(row.impressions) < minImpressions) continue;
598
+ if (lowerBrandTerms.some((term) => row.query.toLowerCase().includes(term))) brand.push(row);
599
+ else nonBrand.push(row);
600
+ }
601
+ const brandClicks = brand.reduce((sum, k) => sum + num$4(k.clicks), 0);
602
+ const nonBrandClicks = nonBrand.reduce((sum, k) => sum + num$4(k.clicks), 0);
603
+ const totalClicks = brandClicks + nonBrandClicks;
604
+ return {
605
+ brand,
606
+ nonBrand,
607
+ summary: {
608
+ brandClicks,
609
+ nonBrandClicks,
610
+ brandShare: totalClicks > 0 ? brandClicks / totalClicks : 0,
611
+ brandImpressions: brand.reduce((sum, k) => sum + num$4(k.impressions), 0),
612
+ nonBrandImpressions: nonBrand.reduce((sum, k) => sum + num$4(k.impressions), 0)
613
+ }
614
+ };
615
+ }
616
+ const brandAnalyzer = defineAnalyzer({
617
+ id: "brand",
618
+ buildSql(params) {
619
+ if (!params.brandTerms?.length) throw new Error("Brand analysis requires brandTerms");
620
+ const { startDate, endDate } = periodOf(params);
621
+ const minImpressions = params.minImpressions ?? 10;
622
+ const limit = params.limit ?? 1e4;
623
+ const regex = `(${params.brandTerms.map((t) => escapeRegexAlt(t.toLowerCase())).join("|")})`;
624
+ return {
625
+ sql: `
626
+ WITH agg AS (
627
+ SELECT
628
+ query,
629
+ url AS page,
630
+ ${METRIC_EXPR.clicks} AS clicks,
631
+ ${METRIC_EXPR.impressions} AS impressions,
632
+ ${METRIC_EXPR.ctr} AS ctr,
633
+ ${METRIC_EXPR.position} AS position
634
+ FROM read_parquet({{FILES}}, union_by_name = true)
635
+ WHERE date >= ? AND date <= ?
636
+ GROUP BY query, url
637
+ HAVING SUM(impressions) >= ?
638
+ )
639
+ SELECT
640
+ query, page, clicks, impressions, ctr, position,
641
+ CASE WHEN regexp_matches(LOWER(query), ?) THEN 'brand' ELSE 'non-brand' END AS segment
642
+ FROM agg
643
+ ORDER BY clicks DESC
644
+ LIMIT ${Number(limit)}
645
+ `,
646
+ params: [
647
+ startDate,
648
+ endDate,
649
+ minImpressions,
650
+ regex
651
+ ],
652
+ current: {
653
+ table: "page_keywords",
654
+ partitions: enumeratePartitions(startDate, endDate)
655
+ }
656
+ };
657
+ },
658
+ reduceSql(rows) {
659
+ const normalized = (Array.isArray(rows) ? rows : []).map((r) => ({
660
+ query: str$21(r.query),
661
+ page: r.page == null ? void 0 : str$21(r.page),
662
+ clicks: num$4(r.clicks),
663
+ impressions: num$4(r.impressions),
664
+ ctr: num$4(r.ctr),
665
+ position: num$4(r.position),
666
+ segment: str$21(r.segment)
667
+ }));
668
+ let brandClicks = 0;
669
+ let nonBrandClicks = 0;
670
+ let brandImpressions = 0;
671
+ let nonBrandImpressions = 0;
672
+ for (const r of normalized) if (r.segment === "brand") {
673
+ brandClicks += r.clicks;
674
+ brandImpressions += r.impressions;
675
+ } else {
676
+ nonBrandClicks += r.clicks;
677
+ nonBrandImpressions += r.impressions;
678
+ }
679
+ const totalClicks = brandClicks + nonBrandClicks;
680
+ return {
681
+ results: normalized,
682
+ meta: {
683
+ total: normalized.length,
684
+ summary: {
685
+ brandClicks,
686
+ nonBrandClicks,
687
+ brandShare: totalClicks > 0 ? brandClicks / totalClicks : 0,
688
+ brandImpressions,
689
+ nonBrandImpressions
690
+ }
691
+ }
692
+ };
693
+ },
694
+ buildRows(params) {
695
+ return { keywords: keywordsQueryState(periodOf(params), params.limit) };
696
+ },
697
+ reduceRows(rows, params) {
698
+ if (!params.brandTerms?.length) throw new Error("Brand analysis requires brandTerms");
699
+ const result = analyzeBrandSegmentation(Array.isArray(rows) ? rows : [], {
700
+ brandTerms: params.brandTerms,
701
+ minImpressions: params.minImpressions
702
+ });
703
+ return {
704
+ results: [...result.brand.map((r) => ({
705
+ ...r,
706
+ segment: "brand"
707
+ })), ...result.nonBrand.map((r) => ({
708
+ ...r,
709
+ segment: "non-brand"
710
+ }))],
711
+ meta: { summary: result.summary }
712
+ };
713
+ }
714
+ });
715
+ const sortRowResults$1 = createSorter((item, metric) => {
716
+ switch (metric) {
717
+ case "clicks": return item.totalClicks;
718
+ case "impressions": return item.totalImpressions;
719
+ case "positionSpread": return item.positionSpread;
720
+ case "pageCount": return item.pages.length;
721
+ }
722
+ }, "clicks");
723
+ function str$20(v) {
724
+ return v == null ? "" : String(v);
725
+ }
726
+ function parseJsonList$15(v) {
727
+ if (Array.isArray(v)) return v;
728
+ if (typeof v === "string" && v.length > 0) {
729
+ const parsed = JSON.parse(v);
730
+ return Array.isArray(parsed) ? parsed : [];
731
+ }
732
+ return [];
733
+ }
734
+ function analyzeCannibalization(rows, options = {}) {
735
+ const { minImpressions = 10, maxPositionSpread = 10, minPages = 2, sortBy = "clicks", sortOrder = "desc" } = options;
736
+ const queryMap = /* @__PURE__ */ new Map();
737
+ for (const row of rows) {
738
+ if (row.impressions < minImpressions) continue;
739
+ const pages = queryMap.get(row.query) || [];
740
+ pages.push({
741
+ page: row.page,
742
+ clicks: row.clicks,
743
+ impressions: row.impressions,
744
+ ctr: row.ctr,
745
+ position: row.position
746
+ });
747
+ queryMap.set(row.query, pages);
748
+ }
749
+ const results = [];
750
+ for (const [query, pages] of queryMap) {
751
+ if (pages.length < minPages) continue;
752
+ pages.sort((a, b) => b.clicks - a.clicks);
753
+ const positions = pages.map((p) => p.position);
754
+ const positionSpread = Math.max(...positions) - Math.min(...positions);
755
+ if (positionSpread > maxPositionSpread) continue;
756
+ results.push({
757
+ query,
758
+ pages,
759
+ totalClicks: pages.reduce((sum, p) => sum + p.clicks, 0),
760
+ totalImpressions: pages.reduce((sum, p) => sum + p.impressions, 0),
761
+ positionSpread
762
+ });
763
+ }
764
+ return sortRowResults$1(results, sortBy, sortOrder);
765
+ }
766
+ const cannibalizationAnalyzer = defineAnalyzer({
767
+ id: "cannibalization",
768
+ buildSql(params) {
769
+ const { startDate, endDate } = periodOf(params);
770
+ const minImpressions = params.minImpressions ?? 50;
771
+ const minCompetitors = 2;
772
+ const minQueryImpressions = (params.minImpressions ?? 50) * 2;
773
+ const limit = params.limit ?? 200;
774
+ return {
775
+ sql: `
776
+ WITH agg AS (
777
+ SELECT
778
+ query,
779
+ url,
780
+ ${METRIC_EXPR.clicks} AS clicks,
781
+ ${METRIC_EXPR.impressions} AS impressions,
782
+ ${METRIC_EXPR.ctr} AS ctr,
783
+ ${METRIC_EXPR.position} AS position
784
+ FROM read_parquet({{FILES}}, union_by_name = true)
785
+ WHERE date >= ? AND date <= ?
786
+ AND query IS NOT NULL AND query <> ''
787
+ AND url IS NOT NULL AND url <> ''
788
+ GROUP BY query, url
789
+ HAVING SUM(impressions) >= ?
790
+ ),
791
+ query_totals AS (
792
+ SELECT
793
+ query,
794
+ SUM(impressions) AS total_impressions,
795
+ SUM(clicks) AS total_clicks,
796
+ COUNT(*) AS competitor_count
797
+ FROM agg
798
+ GROUP BY query
799
+ HAVING COUNT(*) >= ? AND SUM(impressions) >= ?
800
+ ),
801
+ ranked AS (
802
+ SELECT
803
+ a.query,
804
+ a.url,
805
+ a.clicks,
806
+ a.impressions,
807
+ a.ctr,
808
+ a.position,
809
+ a.impressions / NULLIF(t.total_impressions, 0) AS share,
810
+ ROW_NUMBER() OVER (
811
+ PARTITION BY a.query
812
+ ORDER BY a.impressions DESC, a.clicks DESC, a.url ASC
813
+ ) AS rnk
814
+ FROM agg a
815
+ JOIN query_totals t USING (query)
816
+ ),
817
+ leader AS (
818
+ SELECT query, url AS leader_url, ctr AS leader_ctr, position AS leader_position
819
+ FROM ranked WHERE rnk = 1
820
+ ),
821
+ events AS (
822
+ SELECT
823
+ r.query,
824
+ any_value(l.leader_url) AS leader_url,
825
+ any_value(l.leader_ctr) AS leader_ctr,
826
+ any_value(l.leader_position) AS leader_position,
827
+ SUM(POWER(r.share * 100.0, 2)) AS hhi,
828
+ SUM(CASE
829
+ WHEN r.rnk > 1 AND l.leader_ctr > r.ctr
830
+ THEN (l.leader_ctr - r.ctr) * r.impressions
831
+ ELSE 0.0
832
+ END) AS stolen_clicks,
833
+ to_json(list({
834
+ 'url': r.url,
835
+ 'clicks': r.clicks,
836
+ 'impressions': r.impressions,
837
+ 'ctr': r.ctr,
838
+ 'position': r.position,
839
+ 'share': r.share,
840
+ 'rank': r.rnk
841
+ } ORDER BY r.rnk)) AS competitors
842
+ FROM ranked r
843
+ JOIN leader l USING (query)
844
+ GROUP BY r.query
845
+ )
846
+ SELECT
847
+ e.query AS keyword,
848
+ t.total_impressions AS totalImpressions,
849
+ t.total_clicks AS totalClicks,
850
+ t.competitor_count AS competitorCount,
851
+ e.leader_url AS leaderUrl,
852
+ e.leader_ctr AS leaderCtr,
853
+ e.leader_position AS leaderPosition,
854
+ e.hhi AS hhi,
855
+ GREATEST(0.0, 1.0 - e.hhi / 10000.0) AS fragmentation,
856
+ e.stolen_clicks AS stolenClicks,
857
+ e.competitors AS competitors,
858
+ CAST(ROUND(LEAST(100.0,
859
+ 100.0 * POWER(
860
+ GREATEST(1.0 - e.hhi / 10000.0, 0.0)
861
+ * LEAST(e.stolen_clicks / GREATEST(t.total_clicks + e.stolen_clicks, 1.0), 1.0)
862
+ * LEAST(LOG10(GREATEST(t.total_impressions, 10.0)) / 5.0, 1.0),
863
+ 1.0 / 3.0
864
+ )
865
+ )) AS DOUBLE) AS severity
866
+ FROM events e
867
+ JOIN query_totals t USING (query)
868
+ ORDER BY severity DESC, stolenClicks DESC
869
+ LIMIT ${Number(limit)}
870
+ `,
871
+ params: [
872
+ startDate,
873
+ endDate,
874
+ minImpressions,
875
+ minCompetitors,
876
+ minQueryImpressions
877
+ ],
878
+ current: {
879
+ table: "page_keywords",
880
+ partitions: enumeratePartitions(startDate, endDate)
881
+ }
882
+ };
883
+ },
884
+ reduceSql(rows) {
885
+ const events = (Array.isArray(rows) ? rows : []).map((r) => ({
886
+ keyword: str$20(r.keyword),
887
+ totalImpressions: num$4(r.totalImpressions),
888
+ totalClicks: num$4(r.totalClicks),
889
+ competitorCount: num$4(r.competitorCount),
890
+ leaderUrl: str$20(r.leaderUrl),
891
+ leaderCtr: num$4(r.leaderCtr),
892
+ leaderPosition: num$4(r.leaderPosition),
893
+ hhi: num$4(r.hhi),
894
+ fragmentation: num$4(r.fragmentation),
895
+ stolenClicks: num$4(r.stolenClicks),
896
+ severity: num$4(r.severity),
897
+ competitors: parseJsonList$15(r.competitors).map((c) => ({
898
+ url: str$20(c.url),
899
+ clicks: num$4(c.clicks),
900
+ impressions: num$4(c.impressions),
901
+ ctr: num$4(c.ctr),
902
+ position: num$4(c.position),
903
+ share: num$4(c.share),
904
+ rank: num$4(c.rank)
905
+ }))
906
+ }));
907
+ const nodeAgg = /* @__PURE__ */ new Map();
908
+ const edgeAgg = /* @__PURE__ */ new Map();
909
+ for (const ev of events) {
910
+ for (const c of ev.competitors) {
911
+ const n = nodeAgg.get(c.url) ?? {
912
+ impressions: 0,
913
+ clicks: 0,
914
+ queries: /* @__PURE__ */ new Set()
915
+ };
916
+ n.impressions += c.impressions;
917
+ n.clicks += c.clicks;
918
+ n.queries.add(ev.keyword);
919
+ nodeAgg.set(c.url, n);
920
+ }
921
+ for (let i = 0; i < ev.competitors.length; i++) for (let j = i + 1; j < ev.competitors.length; j++) {
922
+ const a = ev.competitors[i];
923
+ const b = ev.competitors[j];
924
+ const [src, tgt] = a.url < b.url ? [a.url, b.url] : [b.url, a.url];
925
+ const key = `${src}${tgt}`;
926
+ const weight = Math.min(a.impressions, b.impressions);
927
+ const edge = edgeAgg.get(key) ?? {
928
+ source: src,
929
+ target: tgt,
930
+ weight: 0,
931
+ queries: 0
932
+ };
933
+ edge.weight += weight;
934
+ edge.queries += 1;
935
+ edgeAgg.set(key, edge);
936
+ }
937
+ }
938
+ const nodes = [...nodeAgg.entries()].map(([url, n]) => ({
939
+ url,
940
+ impressions: n.impressions,
941
+ clicks: n.clicks,
942
+ queryCount: n.queries.size
943
+ }));
944
+ const edges = [...edgeAgg.values()];
945
+ const avgFragmentation = events.length > 0 ? events.reduce((s, e) => s + e.fragmentation, 0) / events.length : 0;
946
+ const totalStolenClicks = events.reduce((s, e) => s + e.stolenClicks, 0);
947
+ return {
948
+ results: events,
949
+ meta: {
950
+ total: events.length,
951
+ totalStolenClicks,
952
+ avgFragmentation,
953
+ graph: {
954
+ nodes,
955
+ edges
956
+ }
957
+ }
958
+ };
959
+ },
960
+ buildRows(params) {
961
+ return { rows: keywordsQueryState(periodOf(params), params.limit) };
962
+ },
963
+ reduceRows(rows, params) {
964
+ const results = analyzeCannibalization(Array.isArray(rows) ? rows : [], {
965
+ minImpressions: params.minImpressions,
966
+ maxPositionSpread: params.maxPositionSpread,
967
+ minPages: params.minPages
968
+ });
969
+ return {
970
+ results,
971
+ meta: { total: results.length }
972
+ };
973
+ }
974
+ });
975
+ function num$3(v) {
976
+ if (typeof v === "number") return v;
977
+ if (typeof v === "bigint") return Number(v);
978
+ if (v == null) return 0;
979
+ const n = Number(v);
980
+ return Number.isFinite(n) ? n : 0;
981
+ }
982
+ function str$19(v) {
983
+ return v == null ? "" : String(v);
984
+ }
985
+ function parseJsonList$14(v) {
986
+ if (Array.isArray(v)) return v;
987
+ if (typeof v === "string" && v.length > 0) {
988
+ const parsed = JSON.parse(v);
989
+ return Array.isArray(parsed) ? parsed : [];
990
+ }
991
+ return [];
992
+ }
993
+ const changePointAnalyzer = defineAnalyzer({
994
+ id: "change-point",
995
+ buildSql(params) {
996
+ const endDate = params.endDate ?? defaultEndDate();
997
+ const startDate = params.startDate ?? daysAgo(93);
998
+ const minDays = 21;
999
+ const minSide = 7;
1000
+ const threshold = params.threshold ?? 10;
1001
+ const minImpressions = params.minImpressions ?? 50;
1002
+ const metric = params.metric === "clicks" || params.metric === "impressions" ? params.metric : "position";
1003
+ const limit = params.limit ?? 100;
1004
+ const valueExpr = metric === "position" ? METRIC_EXPR.position : `CAST(SUM(${metric}) AS DOUBLE)`;
1005
+ return {
1006
+ sql: `
1007
+ WITH daily AS (
1008
+ SELECT
1009
+ query,
1010
+ url AS page,
1011
+ date,
1012
+ ${METRIC_EXPR.clicks} AS clicks,
1013
+ ${METRIC_EXPR.impressions} AS impressions,
1014
+ ${valueExpr} AS value
1015
+ FROM read_parquet({{FILES}}, union_by_name = true)
1016
+ WHERE date >= ? AND date <= ?
1017
+ AND query IS NOT NULL AND query <> ''
1018
+ AND url IS NOT NULL AND url <> ''
1019
+ GROUP BY query, url, date
1020
+ HAVING SUM(impressions) >= 1
1021
+ ),
1022
+ entity_stats AS (
1023
+ SELECT query, page,
1024
+ COUNT(*) AS n_total,
1025
+ SUM(impressions) AS total_impressions,
1026
+ SUM(value) AS sum_total,
1027
+ SUM(value * value) AS sumsq_total
1028
+ FROM daily
1029
+ GROUP BY query, page
1030
+ HAVING COUNT(*) >= ${Number(minDays)}
1031
+ AND SUM(impressions) >= ?
1032
+ ),
1033
+ filtered AS (
1034
+ SELECT d.*,
1035
+ e.n_total, e.sum_total, e.sumsq_total, e.total_impressions
1036
+ FROM daily d
1037
+ JOIN entity_stats e USING (query, page)
1038
+ ),
1039
+ cumulated AS (
1040
+ SELECT *,
1041
+ COUNT(*) OVER w AS n_left,
1042
+ SUM(value) OVER w AS sum_left,
1043
+ SUM(value * value) OVER w AS sumsq_left
1044
+ FROM filtered
1045
+ WINDOW w AS (
1046
+ PARTITION BY query, page
1047
+ ORDER BY date
1048
+ ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
1049
+ )
1050
+ ),
1051
+ llr_scored AS (
1052
+ SELECT *,
1053
+ (n_total - n_left) AS n_right,
1054
+ (sum_total - sum_left) AS sum_right,
1055
+ (sumsq_total - sumsq_left) AS sumsq_right,
1056
+ GREATEST(
1057
+ (sumsq_left / NULLIF(n_left, 0))
1058
+ - (sum_left / NULLIF(n_left, 0)) * (sum_left / NULLIF(n_left, 0)),
1059
+ 1e-9
1060
+ ) AS var_left,
1061
+ GREATEST(
1062
+ ((sumsq_total - sumsq_left) / NULLIF(n_total - n_left, 0))
1063
+ - ((sum_total - sum_left) / NULLIF(n_total - n_left, 0))
1064
+ * ((sum_total - sum_left) / NULLIF(n_total - n_left, 0)),
1065
+ 1e-9
1066
+ ) AS var_right,
1067
+ GREATEST(
1068
+ (sumsq_total / NULLIF(n_total, 0))
1069
+ - (sum_total / NULLIF(n_total, 0)) * (sum_total / NULLIF(n_total, 0)),
1070
+ 1e-9
1071
+ ) AS var_single
1072
+ FROM cumulated
1073
+ ),
1074
+ llr AS (
1075
+ SELECT *,
1076
+ CASE
1077
+ WHEN n_left >= ${Number(minSide)} AND (n_total - n_left) >= ${Number(minSide)}
1078
+ THEN n_total * LN(var_single)
1079
+ - n_left * LN(var_left)
1080
+ - (n_total - n_left) * LN(var_right)
1081
+ ELSE NULL
1082
+ END AS llr
1083
+ FROM llr_scored
1084
+ ),
1085
+ best AS (
1086
+ SELECT query, page, n_total, total_impressions,
1087
+ arg_max(date, llr) AS change_date,
1088
+ MAX(llr) AS best_llr,
1089
+ arg_max(sum_left / NULLIF(n_left, 0), llr) AS left_mean,
1090
+ arg_max((sum_total - sum_left) / NULLIF(n_total - n_left, 0), llr) AS right_mean,
1091
+ arg_max(sqrt(var_left), llr) AS left_std,
1092
+ arg_max(sqrt(var_right), llr) AS right_std
1093
+ FROM llr
1094
+ WHERE llr IS NOT NULL
1095
+ GROUP BY query, page, n_total, total_impressions
1096
+ HAVING MAX(llr) > ${Number(threshold)}
1097
+ ),
1098
+ series AS (
1099
+ SELECT query, page,
1100
+ to_json(list({
1101
+ 'date': strftime(date, '%Y-%m-%d'),
1102
+ 'value': value
1103
+ } ORDER BY date)) AS seriesJson
1104
+ FROM daily
1105
+ GROUP BY query, page
1106
+ )
1107
+ SELECT
1108
+ b.query AS keyword,
1109
+ b.page,
1110
+ CAST(b.n_total AS DOUBLE) AS totalDays,
1111
+ CAST(b.total_impressions AS DOUBLE) AS totalImpressions,
1112
+ strftime(b.change_date, '%Y-%m-%d') AS changeDate,
1113
+ b.best_llr AS llr,
1114
+ b.left_mean AS leftMean,
1115
+ b.right_mean AS rightMean,
1116
+ (b.right_mean - b.left_mean) AS delta,
1117
+ b.left_std AS leftStddev,
1118
+ b.right_std AS rightStddev,
1119
+ s.seriesJson
1120
+ FROM best b
1121
+ LEFT JOIN series s USING (query, page)
1122
+ ORDER BY b.best_llr DESC
1123
+ LIMIT ${Number(limit)}
1124
+ `,
1125
+ params: [
1126
+ startDate,
1127
+ endDate,
1128
+ minImpressions
1129
+ ],
1130
+ current: {
1131
+ table: "page_keywords",
1132
+ partitions: enumeratePartitions(startDate, endDate)
1133
+ }
1134
+ };
1135
+ },
1136
+ reduceSql(rows, params) {
1137
+ const arr = Array.isArray(rows) ? rows : [];
1138
+ const threshold = params.threshold ?? 10;
1139
+ const metric = params.metric === "clicks" || params.metric === "impressions" ? params.metric : "position";
1140
+ const lowerIsBetter = metric === "position";
1141
+ const results = arr.map((r) => {
1142
+ const delta = num$3(r.delta);
1143
+ const improved = lowerIsBetter ? delta < 0 : delta > 0;
1144
+ return {
1145
+ keyword: str$19(r.keyword),
1146
+ page: str$19(r.page),
1147
+ totalDays: num$3(r.totalDays),
1148
+ totalImpressions: num$3(r.totalImpressions),
1149
+ changeDate: str$19(r.changeDate),
1150
+ llr: num$3(r.llr),
1151
+ leftMean: num$3(r.leftMean),
1152
+ rightMean: num$3(r.rightMean),
1153
+ delta,
1154
+ leftStddev: num$3(r.leftStddev),
1155
+ rightStddev: num$3(r.rightStddev),
1156
+ direction: improved ? "improved" : "worsened",
1157
+ series: parseJsonList$14(r.seriesJson).map((s) => ({
1158
+ date: str$19(s.date),
1159
+ value: num$3(s.value)
1160
+ }))
1161
+ };
1162
+ });
1163
+ return {
1164
+ results,
1165
+ meta: {
1166
+ total: results.length,
1167
+ metric,
1168
+ threshold,
1169
+ improved: results.filter((r) => r.direction === "improved").length,
1170
+ worsened: results.filter((r) => r.direction === "worsened").length
1171
+ }
1172
+ };
1173
+ }
1174
+ });
1175
+ const INTENT_PREFIXES_REGEX = "^(how to|what is|what are|why is|why do|where to|when to|best|top|vs|versus|compare|review|buy|cheap|free|near me)(\\s|$)";
1176
+ const INTENT_PREFIXES = [
1177
+ "how to",
1178
+ "what is",
1179
+ "what are",
1180
+ "why is",
1181
+ "why do",
1182
+ "where to",
1183
+ "when to",
1184
+ "best",
1185
+ "top",
1186
+ "vs",
1187
+ "versus",
1188
+ "compare",
1189
+ "review",
1190
+ "buy",
1191
+ "cheap",
1192
+ "free",
1193
+ "near me"
1194
+ ];
1195
+ const WHITESPACE_RE = /\s+/;
1196
+ function str$18(v) {
1197
+ return v == null ? "" : String(v);
1198
+ }
1199
+ function parseJsonList$13(v) {
1200
+ if (Array.isArray(v)) return v;
1201
+ if (typeof v === "string" && v.length > 0) {
1202
+ const parsed = JSON.parse(v);
1203
+ return Array.isArray(parsed) ? parsed : [];
1204
+ }
1205
+ return [];
1206
+ }
1207
+ function extractIntentPrefix(keyword) {
1208
+ const lower = keyword.toLowerCase();
1209
+ for (const prefix of INTENT_PREFIXES) if (lower.startsWith(`${prefix} `) || lower.startsWith(prefix)) return prefix;
1210
+ return null;
1211
+ }
1212
+ function extractWordPrefix(keyword, wordCount = 2) {
1213
+ const words = keyword.toLowerCase().split(WHITESPACE_RE).filter(Boolean);
1214
+ if (words.length < wordCount + 1) return null;
1215
+ return words.slice(0, wordCount).join(" ");
1216
+ }
1217
+ function analyzeClustering(keywords, options = {}) {
1218
+ const { minClusterSize = 2, minImpressions = 10, clusterBy = "both" } = options;
1219
+ const filtered = keywords.filter((k) => num$4(k.impressions) >= minImpressions);
1220
+ const clusterMap = /* @__PURE__ */ new Map();
1221
+ const clusteredKeywords = /* @__PURE__ */ new Set();
1222
+ if (clusterBy === "intent" || clusterBy === "both") for (const kw of filtered) {
1223
+ const intent = extractIntentPrefix(kw.query);
1224
+ if (intent) {
1225
+ const existing = clusterMap.get(intent);
1226
+ if (existing) existing.keywords.push(kw);
1227
+ else clusterMap.set(intent, {
1228
+ type: "intent",
1229
+ keywords: [kw]
1230
+ });
1231
+ clusteredKeywords.add(kw.query);
1232
+ }
1233
+ }
1234
+ if (clusterBy === "prefix" || clusterBy === "both") {
1235
+ const unclustered = filtered.filter((kw) => !clusteredKeywords.has(kw.query));
1236
+ const prefixMap = /* @__PURE__ */ new Map();
1237
+ for (const kw of unclustered) {
1238
+ const prefix = extractWordPrefix(kw.query);
1239
+ if (prefix) {
1240
+ const existing = prefixMap.get(prefix);
1241
+ if (existing) existing.push(kw);
1242
+ else prefixMap.set(prefix, [kw]);
1243
+ }
1244
+ }
1245
+ for (const [prefix, kws] of prefixMap) if (kws.length >= minClusterSize) {
1246
+ clusterMap.set(prefix, {
1247
+ type: "prefix",
1248
+ keywords: kws
1249
+ });
1250
+ kws.forEach((kw) => clusteredKeywords.add(kw.query));
1251
+ }
1252
+ }
1253
+ const clusters = [];
1254
+ for (const [name, data] of clusterMap) {
1255
+ if (data.keywords.length < minClusterSize) continue;
1256
+ const totalClicks = data.keywords.reduce((sum, k) => sum + num$4(k.clicks), 0);
1257
+ const totalImpressions = data.keywords.reduce((sum, k) => sum + num$4(k.impressions), 0);
1258
+ const avgPosition = data.keywords.reduce((sum, k) => sum + num$4(k.position), 0) / data.keywords.length;
1259
+ clusters.push({
1260
+ clusterName: name,
1261
+ clusterType: data.type,
1262
+ keywords: data.keywords,
1263
+ totalClicks,
1264
+ totalImpressions,
1265
+ avgPosition,
1266
+ keywordCount: data.keywords.length
1267
+ });
1268
+ }
1269
+ clusters.sort((a, b) => b.totalClicks - a.totalClicks);
1270
+ return {
1271
+ clusters,
1272
+ unclustered: filtered.filter((kw) => !clusteredKeywords.has(kw.query))
1273
+ };
1274
+ }
1275
+ const clusteringAnalyzer = defineAnalyzer({
1276
+ id: "clustering",
1277
+ buildSql(params) {
1278
+ const { startDate, endDate } = periodOf(params);
1279
+ const minImpressions = params.minImpressions ?? 10;
1280
+ const minClusterSize = params.minClusterSize ?? 2;
1281
+ const clusterBy = params.clusterBy ?? "both";
1282
+ const doIntent = clusterBy === "intent" || clusterBy === "both";
1283
+ const doPrefix = clusterBy === "prefix" || clusterBy === "both";
1284
+ const intentExpr = doIntent ? `NULLIF(regexp_extract(LOWER(query), '${INTENT_PREFIXES_REGEX}', 1), '')` : `CAST(NULL AS VARCHAR)`;
1285
+ const prefixExpr = doPrefix ? `CASE WHEN len(regexp_split_to_array(LOWER(query), '\\s+')) >= 3
1286
+ THEN array_to_string(list_slice(regexp_split_to_array(LOWER(query), '\\s+'), 1, 2), ' ')
1287
+ ELSE CAST(NULL AS VARCHAR) END` : `CAST(NULL AS VARCHAR)`;
1288
+ return {
1289
+ sql: `
1290
+ WITH agg AS (
1291
+ SELECT
1292
+ query,
1293
+ ${METRIC_EXPR.clicks} AS clicks,
1294
+ ${METRIC_EXPR.impressions} AS impressions,
1295
+ ${METRIC_EXPR.ctr} AS ctr,
1296
+ ${METRIC_EXPR.position} AS position
1297
+ FROM read_parquet({{FILES}}, union_by_name = true)
1298
+ WHERE date >= ? AND date <= ?
1299
+ GROUP BY query
1300
+ HAVING SUM(impressions) >= ?
1301
+ ),
1302
+ classified AS (
1303
+ SELECT
1304
+ query, clicks, impressions, ctr, position,
1305
+ ${intentExpr} AS intent_prefix,
1306
+ ${prefixExpr} AS word_prefix
1307
+ FROM agg
1308
+ ),
1309
+ keyed AS (
1310
+ SELECT
1311
+ query, clicks, impressions, ctr, position,
1312
+ COALESCE(intent_prefix, word_prefix) AS cluster_name,
1313
+ CASE WHEN intent_prefix IS NOT NULL THEN 'intent' ELSE 'prefix' END AS cluster_type
1314
+ FROM classified
1315
+ WHERE COALESCE(intent_prefix, word_prefix) IS NOT NULL
1316
+ )
1317
+ SELECT
1318
+ cluster_name AS clusterName,
1319
+ any_value(cluster_type) AS clusterType,
1320
+ CAST(COUNT(*) AS DOUBLE) AS keywordCount,
1321
+ ${METRIC_EXPR.clicks} AS totalClicks,
1322
+ ${METRIC_EXPR.impressions} AS totalImpressions,
1323
+ AVG(position) AS avgPosition,
1324
+ to_json(list({ 'query': query, 'clicks': clicks, 'impressions': impressions, 'ctr': ctr, 'position': position })) AS keywords
1325
+ FROM keyed
1326
+ GROUP BY cluster_name
1327
+ HAVING COUNT(*) >= ?
1328
+ ORDER BY totalClicks DESC
1329
+ `,
1330
+ params: [
1331
+ startDate,
1332
+ endDate,
1333
+ minImpressions,
1334
+ minClusterSize
1335
+ ],
1336
+ current: {
1337
+ table: "keywords",
1338
+ partitions: enumeratePartitions(startDate, endDate)
1339
+ }
1340
+ };
1341
+ },
1342
+ reduceSql(rows) {
1343
+ const clusters = (Array.isArray(rows) ? rows : []).map((r) => ({
1344
+ clusterName: str$18(r.clusterName),
1345
+ clusterType: str$18(r.clusterType),
1346
+ keywordCount: num$4(r.keywordCount),
1347
+ totalClicks: num$4(r.totalClicks),
1348
+ totalImpressions: num$4(r.totalImpressions),
1349
+ avgPosition: num$4(r.avgPosition),
1350
+ keywords: parseJsonList$13(r.keywords).map((k) => ({
1351
+ query: str$18(k.query),
1352
+ clicks: num$4(k.clicks),
1353
+ impressions: num$4(k.impressions),
1354
+ ctr: num$4(k.ctr),
1355
+ position: num$4(k.position)
1356
+ }))
1357
+ }));
1358
+ return {
1359
+ results: clusters,
1360
+ meta: {
1361
+ total: clusters.length,
1362
+ totalClusters: clusters.length
1363
+ }
1364
+ };
1365
+ },
1366
+ buildRows(params) {
1367
+ return { keywords: keywordsQueryState(periodOf(params), params.limit) };
1368
+ },
1369
+ reduceRows(rows, params) {
1370
+ const result = analyzeClustering(Array.isArray(rows) ? rows : [], {
1371
+ clusterBy: params.clusterBy,
1372
+ minClusterSize: params.minClusterSize,
1373
+ minImpressions: params.minImpressions
1374
+ });
1375
+ return {
1376
+ results: result.clusters,
1377
+ meta: { totalClusters: result.clusters.length }
1378
+ };
1379
+ }
1380
+ });
1381
+ function str$17(v) {
1382
+ return v == null ? "" : String(v);
1383
+ }
1384
+ function parseJsonList$12(v) {
1385
+ if (Array.isArray(v)) return v;
1386
+ if (typeof v === "string" && v.length > 0) {
1387
+ const parsed = JSON.parse(v);
1388
+ return Array.isArray(parsed) ? parsed : [];
1389
+ }
1390
+ return [];
1391
+ }
1392
+ function calculateGini(values) {
1393
+ if (values.length === 0) return 0;
1394
+ const sorted = [...values].sort((a, b) => a - b);
1395
+ const n = sorted.length;
1396
+ const sum = sorted.reduce((a, b) => a + b, 0);
1397
+ if (sum === 0) return 0;
1398
+ let weightedSum = 0;
1399
+ for (let i = 0; i < n; i++) weightedSum += (2 * (i + 1) - n - 1) * sorted[i];
1400
+ return weightedSum / (n * sum);
1401
+ }
1402
+ function calculateHHI(shares) {
1403
+ return shares.reduce((sum, share) => sum + (share * 100) ** 2, 0);
1404
+ }
1405
+ function analyzeConcentration(items, options = {}) {
1406
+ const { topN = 10 } = options;
1407
+ if (items.length === 0) return {
1408
+ giniCoefficient: 0,
1409
+ hhi: 0,
1410
+ topNConcentration: 0,
1411
+ topNItems: [],
1412
+ totalItems: 0,
1413
+ totalClicks: 0,
1414
+ riskLevel: "low"
1415
+ };
1416
+ const sorted = [...items].sort((a, b) => b.clicks - a.clicks);
1417
+ const totalClicks = sorted.reduce((sum, item) => sum + item.clicks, 0);
1418
+ const clickValues = sorted.map((i) => i.clicks);
1419
+ const shares = totalClicks > 0 ? sorted.map((i) => i.clicks / totalClicks) : [];
1420
+ const giniCoefficient = calculateGini(clickValues);
1421
+ const hhi = calculateHHI(shares);
1422
+ const topNItems = sorted.slice(0, topN).map((item) => ({
1423
+ key: item.key,
1424
+ clicks: item.clicks,
1425
+ share: totalClicks > 0 ? item.clicks / totalClicks : 0
1426
+ }));
1427
+ const topNClicks = topNItems.reduce((sum, item) => sum + item.clicks, 0);
1428
+ const topNConcentration = totalClicks > 0 ? topNClicks / totalClicks : 0;
1429
+ let riskLevel = "low";
1430
+ if (hhi > 2500) riskLevel = "high";
1431
+ else if (hhi > 1500) riskLevel = "medium";
1432
+ return {
1433
+ giniCoefficient,
1434
+ hhi,
1435
+ topNConcentration,
1436
+ topNItems,
1437
+ totalItems: items.length,
1438
+ totalClicks,
1439
+ riskLevel
1440
+ };
1441
+ }
1442
+ function analyzePageConcentration(pages, options) {
1443
+ return analyzeConcentration(pages.map((p) => ({
1444
+ key: p.page,
1445
+ clicks: num$4(p.clicks)
1446
+ })), options);
1447
+ }
1448
+ function analyzeKeywordConcentration(keywords, options) {
1449
+ return analyzeConcentration(keywords.map((k) => ({
1450
+ key: k.query,
1451
+ clicks: num$4(k.clicks)
1452
+ })), options);
1453
+ }
1454
+ const concentrationAnalyzer = defineAnalyzer({
1455
+ id: "concentration",
1456
+ buildSql(params) {
1457
+ const { startDate, endDate } = periodOf(params);
1458
+ const dim = params.dimension || "pages";
1459
+ const topN = params.topN ?? 10;
1460
+ const table = dim === "keywords" ? "keywords" : "pages";
1461
+ const keyCol = dim === "keywords" ? "query" : "url";
1462
+ return {
1463
+ sql: `
1464
+ WITH items AS (
1465
+ SELECT
1466
+ ${keyCol} AS key,
1467
+ ${METRIC_EXPR.clicks} AS clicks
1468
+ FROM read_parquet({{FILES}}, union_by_name = true)
1469
+ WHERE date >= ? AND date <= ?
1470
+ GROUP BY ${keyCol}
1471
+ HAVING SUM(clicks) > 0
1472
+ ),
1473
+ totals AS (
1474
+ SELECT SUM(clicks) AS total_clicks, COUNT(*) AS total_items FROM items
1475
+ ),
1476
+ ranked AS (
1477
+ SELECT
1478
+ i.key, i.clicks,
1479
+ i.clicks / NULLIF(t.total_clicks, 0) AS share,
1480
+ ROW_NUMBER() OVER (ORDER BY i.clicks DESC, i.key ASC) AS rnk_desc,
1481
+ ROW_NUMBER() OVER (ORDER BY i.clicks ASC, i.key ASC) AS rnk_asc,
1482
+ t.total_clicks AS tclicks,
1483
+ t.total_items AS titems
1484
+ FROM items i, totals t
1485
+ ),
1486
+ gini_num AS (
1487
+ SELECT SUM((2.0 * rnk_asc - titems - 1) * clicks) AS weighted_sum FROM ranked
1488
+ ),
1489
+ hhi_calc AS (
1490
+ SELECT SUM(POWER(share * 100, 2)) AS hhi FROM ranked
1491
+ ),
1492
+ top_list AS (
1493
+ SELECT
1494
+ list({ 'key': key, 'clicks': clicks, 'share': share } ORDER BY clicks DESC, key ASC) AS items,
1495
+ SUM(clicks) AS top_clicks
1496
+ FROM ranked WHERE rnk_desc <= ?
1497
+ )
1498
+ SELECT
1499
+ COALESCE(
1500
+ (SELECT weighted_sum FROM gini_num)
1501
+ / NULLIF((SELECT total_items FROM totals) * (SELECT total_clicks FROM totals), 0),
1502
+ 0.0
1503
+ ) AS giniCoefficient,
1504
+ COALESCE((SELECT hhi FROM hhi_calc), 0.0) AS hhi,
1505
+ COALESCE(
1506
+ CAST((SELECT top_clicks FROM top_list) AS DOUBLE)
1507
+ / NULLIF((SELECT total_clicks FROM totals), 0),
1508
+ 0.0
1509
+ ) AS topNConcentration,
1510
+ COALESCE((SELECT to_json(items) FROM top_list), '[]') AS topNItems,
1511
+ COALESCE((SELECT total_items FROM totals), 0) AS totalItems,
1512
+ COALESCE((SELECT total_clicks FROM totals), 0.0) AS totalClicks,
1513
+ CASE
1514
+ WHEN COALESCE((SELECT hhi FROM hhi_calc), 0.0) > 2500 THEN 'high'
1515
+ WHEN COALESCE((SELECT hhi FROM hhi_calc), 0.0) > 1500 THEN 'medium'
1516
+ ELSE 'low'
1517
+ END AS riskLevel
1518
+ `,
1519
+ params: [
1520
+ startDate,
1521
+ endDate,
1522
+ topN
1523
+ ],
1524
+ current: {
1525
+ table,
1526
+ partitions: enumeratePartitions(startDate, endDate)
1527
+ }
1528
+ };
1529
+ },
1530
+ reduceSql(rows, params) {
1531
+ const r = (Array.isArray(rows) ? rows : [])[0] ?? {};
1532
+ const topRaw = parseJsonList$12(r.topNItems);
1533
+ return {
1534
+ results: [{
1535
+ giniCoefficient: num$4(r.giniCoefficient),
1536
+ hhi: num$4(r.hhi),
1537
+ topNConcentration: num$4(r.topNConcentration),
1538
+ topNItems: topRaw.map((t) => ({
1539
+ key: str$17(t.key),
1540
+ clicks: num$4(t.clicks),
1541
+ share: num$4(t.share)
1542
+ })),
1543
+ totalItems: num$4(r.totalItems),
1544
+ totalClicks: num$4(r.totalClicks),
1545
+ riskLevel: str$17(r.riskLevel)
1546
+ }],
1547
+ meta: {
1548
+ total: 1,
1549
+ dimension: params.dimension || "pages"
1550
+ }
1551
+ };
1552
+ },
1553
+ buildRows(params) {
1554
+ const dim = params.dimension || "pages";
1555
+ const period = periodOf(params);
1556
+ const out = {};
1557
+ if (dim === "pages") out.pages = pagesQueryState(period, params.limit);
1558
+ else out.keywords = keywordsQueryState(period, params.limit);
1559
+ return out;
1560
+ },
1561
+ reduceRows(rows, params) {
1562
+ const dim = params.dimension || "pages";
1563
+ const arr = Array.isArray(rows) ? rows : rows[dim] ?? [];
1564
+ return {
1565
+ results: [dim === "pages" ? analyzePageConcentration(arr, { topN: params.topN }) : analyzeKeywordConcentration(arr, { topN: params.topN })],
1566
+ meta: { dimension: dim }
1567
+ };
1568
+ }
1569
+ });
1570
+ function num$2(v) {
1571
+ if (typeof v === "number") return v;
1572
+ if (typeof v === "bigint") return Number(v);
1573
+ if (v == null) return 0;
1574
+ const n = Number(v);
1575
+ return Number.isFinite(n) ? n : 0;
1576
+ }
1577
+ function str$16(v) {
1578
+ return v == null ? "" : String(v);
1579
+ }
1580
+ const contentVelocityAnalyzer = defineAnalyzer({
1581
+ id: "content-velocity",
1582
+ buildSql(params) {
1583
+ const days = Math.min(Math.max(Number(params.days ?? 90), 7), 365);
1584
+ const { endDate } = periodOf(params);
1585
+ const start = new Date(endDate);
1586
+ start.setUTCDate(start.getUTCDate() - days);
1587
+ const startDate = toIsoDate(start);
1588
+ return {
1589
+ sql: `
1590
+ WITH src AS (
1591
+ SELECT query, date
1592
+ FROM read_parquet({{FILES}}, union_by_name = true)
1593
+ WHERE date >= ? AND date <= ? AND impressions > 0
1594
+ ),
1595
+ first_seen AS (
1596
+ SELECT query, MIN(date) AS first_date FROM src GROUP BY query
1597
+ ),
1598
+ per_week AS (
1599
+ SELECT
1600
+ strftime(date, '%G-W%V') AS week,
1601
+ MIN(date) AS week_start,
1602
+ CAST(COUNT(DISTINCT query) AS DOUBLE) AS totalKeywords
1603
+ FROM src
1604
+ GROUP BY week
1605
+ ),
1606
+ new_per_week AS (
1607
+ SELECT
1608
+ strftime(first_date, '%G-W%V') AS week,
1609
+ CAST(COUNT(*) AS DOUBLE) AS newKeywords
1610
+ FROM first_seen
1611
+ GROUP BY week
1612
+ )
1613
+ SELECT
1614
+ pw.week AS week,
1615
+ COALESCE(npw.newKeywords, 0) AS newKeywords,
1616
+ pw.totalKeywords AS totalKeywords
1617
+ FROM per_week pw
1618
+ LEFT JOIN new_per_week npw ON pw.week = npw.week
1619
+ ORDER BY pw.week ASC
1620
+ `,
1621
+ params: [startDate, endDate],
1622
+ current: {
1623
+ table: "keywords",
1624
+ partitions: enumeratePartitions(startDate, endDate)
1625
+ }
1626
+ };
1627
+ },
1628
+ reduceSql(rows, params) {
1629
+ const arr = Array.isArray(rows) ? rows : [];
1630
+ const days = Math.min(Math.max(Number(params.days ?? 90), 7), 365);
1631
+ const { endDate } = periodOf(params);
1632
+ const startDateD = new Date(endDate);
1633
+ startDateD.setUTCDate(startDateD.getUTCDate() - days);
1634
+ const startDate = toIsoDate(startDateD);
1635
+ const weekly = arr.map((r) => ({
1636
+ week: str$16(r.week),
1637
+ newKeywords: num$2(r.newKeywords),
1638
+ totalKeywords: num$2(r.totalKeywords)
1639
+ }));
1640
+ const total = weekly.reduce((s, w) => s + w.newKeywords, 0);
1641
+ const avg = weekly.length > 0 ? total / weekly.length : 0;
1642
+ const mid = Math.floor(weekly.length / 2);
1643
+ const firstAvg = mid > 0 ? weekly.slice(0, mid).reduce((s, w) => s + w.newKeywords, 0) / mid : 0;
1644
+ const diff = (weekly.length - mid > 0 ? weekly.slice(mid).reduce((s, w) => s + w.newKeywords, 0) / (weekly.length - mid) : 0) - firstAvg;
1645
+ const threshold = Math.max(1, avg * .15);
1646
+ return {
1647
+ results: weekly,
1648
+ meta: {
1649
+ summary: {
1650
+ totalNewKeywords: total,
1651
+ avgPerWeek: avg,
1652
+ trend: diff > threshold ? "accelerating" : diff < -threshold ? "decelerating" : "stable"
1653
+ },
1654
+ days,
1655
+ startDate,
1656
+ endDate
1657
+ }
1658
+ };
1659
+ }
1660
+ });
1661
+ function num$1(v) {
1662
+ if (typeof v === "number") return v;
1663
+ if (typeof v === "bigint") return Number(v);
1664
+ if (v == null) return 0;
1665
+ const n = Number(v);
1666
+ return Number.isFinite(n) ? n : 0;
1667
+ }
1668
+ function str$15(v) {
1669
+ return v == null ? "" : String(v);
1670
+ }
1671
+ function bool$2(v) {
1672
+ return v === true || v === 1 || v === "true";
1673
+ }
1674
+ function parseJsonList$11(v) {
1675
+ if (Array.isArray(v)) return v;
1676
+ if (typeof v === "string" && v.length > 0) {
1677
+ const parsed = JSON.parse(v);
1678
+ return Array.isArray(parsed) ? parsed : [];
1679
+ }
1680
+ return [];
1681
+ }
1682
+ const ctrAnomalyAnalyzer = defineAnalyzer({
1683
+ id: "ctr-anomaly",
1684
+ buildSql(params) {
1685
+ const endDate = params.endDate ?? defaultEndDate();
1686
+ const startDate = params.startDate ?? daysAgo(93);
1687
+ const minDailyImpressions = params.minImpressions ?? 5;
1688
+ const minRollingN = 14;
1689
+ const zThreshold = params.threshold ?? 2;
1690
+ const maxPositionDelta = 1.5;
1691
+ const minBreachDays = 2;
1692
+ const limit = params.limit ?? 200;
1693
+ return {
1694
+ sql: `
1695
+ WITH daily AS (
1696
+ SELECT
1697
+ query,
1698
+ url AS page,
1699
+ date,
1700
+ ${METRIC_EXPR.clicks} AS day_clicks,
1701
+ ${METRIC_EXPR.impressions} AS day_impressions,
1702
+ ${METRIC_EXPR.ctr} AS day_ctr,
1703
+ ${METRIC_EXPR.position} AS day_position
1704
+ FROM read_parquet({{FILES}}, union_by_name = true)
1705
+ WHERE date >= ? AND date <= ?
1706
+ AND query IS NOT NULL AND query <> ''
1707
+ AND url IS NOT NULL AND url <> ''
1708
+ GROUP BY query, url, date
1709
+ HAVING SUM(impressions) >= ?
1710
+ ),
1711
+ rolled AS (
1712
+ SELECT *,
1713
+ AVG(day_ctr) OVER w AS rolling_ctr,
1714
+ STDDEV_POP(day_ctr) OVER w AS rolling_stddev,
1715
+ AVG(day_position) OVER w AS rolling_position,
1716
+ COUNT(*) OVER w AS rolling_n
1717
+ FROM daily
1718
+ WINDOW w AS (
1719
+ PARTITION BY query, page
1720
+ ORDER BY date
1721
+ ROWS BETWEEN 28 PRECEDING AND 1 PRECEDING
1722
+ )
1723
+ ),
1724
+ flagged AS (
1725
+ SELECT *,
1726
+ CASE
1727
+ WHEN rolling_n >= ${Number(minRollingN)} AND rolling_stddev > 0
1728
+ THEN (day_ctr - rolling_ctr) / rolling_stddev
1729
+ ELSE 0.0
1730
+ END AS z_score,
1731
+ CASE
1732
+ WHEN rolling_position IS NULL THEN 0.0
1733
+ ELSE ABS(day_position - rolling_position)
1734
+ END AS position_delta
1735
+ FROM rolled
1736
+ ),
1737
+ breaches AS (
1738
+ SELECT *,
1739
+ CASE
1740
+ WHEN ABS(z_score) >= ${zThreshold}
1741
+ AND position_delta <= ${maxPositionDelta}
1742
+ AND rolling_n >= ${Number(minRollingN)}
1743
+ THEN true ELSE false
1744
+ END AS is_breach
1745
+ FROM flagged
1746
+ ),
1747
+ per_entity AS (
1748
+ SELECT
1749
+ query, page,
1750
+ COUNT(*) FILTER (WHERE is_breach AND z_score < 0) AS breach_days_down,
1751
+ COUNT(*) FILTER (WHERE is_breach AND z_score > 0) AS breach_days_up,
1752
+ SUM(CASE
1753
+ WHEN is_breach AND z_score < 0
1754
+ THEN (rolling_ctr - day_ctr) * day_impressions
1755
+ ELSE 0.0
1756
+ END) AS clicks_lost,
1757
+ SUM(CASE
1758
+ WHEN is_breach AND z_score < 0
1759
+ THEN ABS(z_score) * day_impressions
1760
+ ELSE 0.0
1761
+ END) AS severity_raw,
1762
+ MAX(CASE WHEN is_breach THEN ABS(z_score) ELSE 0.0 END) AS max_z,
1763
+ AVG(rolling_ctr) FILTER (WHERE rolling_n >= ${Number(minRollingN)}) AS baseline_ctr,
1764
+ AVG(rolling_position) FILTER (WHERE rolling_n >= ${Number(minRollingN)}) AS baseline_position,
1765
+ SUM(day_impressions) AS total_impressions,
1766
+ SUM(day_clicks) AS total_clicks
1767
+ FROM breaches
1768
+ GROUP BY query, page
1769
+ HAVING COUNT(*) FILTER (WHERE is_breach AND z_score < 0) >= ${Number(minBreachDays)}
1770
+ ),
1771
+ series AS (
1772
+ SELECT query, page,
1773
+ to_json(list({
1774
+ 'date': strftime(date, '%Y-%m-%d'),
1775
+ 'ctr': day_ctr,
1776
+ 'position': day_position,
1777
+ 'impressions': day_impressions,
1778
+ 'rollingCtr': rolling_ctr,
1779
+ 'rollingStddev': rolling_stddev,
1780
+ 'z': z_score,
1781
+ 'breach': is_breach AND z_score < 0
1782
+ } ORDER BY date)) AS seriesJson
1783
+ FROM breaches
1784
+ GROUP BY query, page
1785
+ )
1786
+ SELECT
1787
+ e.query AS keyword,
1788
+ e.page,
1789
+ CAST(e.breach_days_down AS DOUBLE) AS breachDaysDown,
1790
+ CAST(e.breach_days_up AS DOUBLE) AS breachDaysUp,
1791
+ CAST(ROUND(e.clicks_lost) AS DOUBLE) AS clicksLost,
1792
+ e.severity_raw AS severityRaw,
1793
+ e.max_z AS maxZ,
1794
+ e.baseline_ctr AS baselineCtr,
1795
+ e.baseline_position AS baselinePosition,
1796
+ e.total_impressions AS totalImpressions,
1797
+ e.total_clicks AS totalClicks,
1798
+ s.seriesJson
1799
+ FROM per_entity e
1800
+ LEFT JOIN series s USING (query, page)
1801
+ ORDER BY clicksLost DESC
1802
+ LIMIT ${Number(limit)}
1803
+ `,
1804
+ params: [
1805
+ startDate,
1806
+ endDate,
1807
+ minDailyImpressions
1808
+ ],
1809
+ current: {
1810
+ table: "page_keywords",
1811
+ partitions: enumeratePartitions(startDate, endDate)
1812
+ }
1813
+ };
1814
+ },
1815
+ reduceSql(rows, params) {
1816
+ const arr = Array.isArray(rows) ? rows : [];
1817
+ const minRollingN = 14;
1818
+ const zThreshold = params.threshold ?? 2;
1819
+ const anomalies = arr.map((r) => ({
1820
+ keyword: str$15(r.keyword),
1821
+ page: str$15(r.page),
1822
+ breachDaysDown: num$1(r.breachDaysDown),
1823
+ breachDaysUp: num$1(r.breachDaysUp),
1824
+ clicksLost: num$1(r.clicksLost),
1825
+ severity: num$1(r.severityRaw),
1826
+ maxZ: num$1(r.maxZ),
1827
+ baselineCtr: num$1(r.baselineCtr),
1828
+ baselinePosition: num$1(r.baselinePosition),
1829
+ totalImpressions: num$1(r.totalImpressions),
1830
+ totalClicks: num$1(r.totalClicks),
1831
+ series: parseJsonList$11(r.seriesJson).map((s) => ({
1832
+ date: str$15(s.date),
1833
+ ctr: num$1(s.ctr),
1834
+ position: num$1(s.position),
1835
+ impressions: num$1(s.impressions),
1836
+ rollingCtr: s.rollingCtr == null ? null : num$1(s.rollingCtr),
1837
+ rollingStddev: s.rollingStddev == null ? null : num$1(s.rollingStddev),
1838
+ z: num$1(s.z),
1839
+ breach: bool$2(s.breach)
1840
+ }))
1841
+ }));
1842
+ const totalClicksLost = anomalies.reduce((s, a) => s + a.clicksLost, 0);
1843
+ const totalBreachDays = anomalies.reduce((s, a) => s + a.breachDaysDown, 0);
1844
+ return {
1845
+ results: anomalies,
1846
+ meta: {
1847
+ total: anomalies.length,
1848
+ totalClicksLost,
1849
+ totalBreachDays,
1850
+ zThreshold,
1851
+ minRollingN
1852
+ }
1853
+ };
1854
+ }
1855
+ });
1856
+ function num(v) {
1857
+ if (typeof v === "number") return v;
1858
+ if (typeof v === "bigint") return Number(v);
1859
+ if (v == null) return 0;
1860
+ const n = Number(v);
1861
+ return Number.isFinite(n) ? n : 0;
1862
+ }
1863
+ function str$14(v) {
1864
+ return v == null ? "" : String(v);
1865
+ }
1866
+ function parseJsonList$10(v) {
1867
+ if (Array.isArray(v)) return v;
1868
+ if (typeof v === "string" && v.length > 0) {
1869
+ const parsed = JSON.parse(v);
1870
+ return Array.isArray(parsed) ? parsed : [];
1871
+ }
1872
+ return [];
1873
+ }
1874
+ const ctrCurveAnalyzer = defineAnalyzer({
1875
+ id: "ctr-curve",
1876
+ buildSql(params) {
1877
+ const { startDate, endDate } = periodOf(params);
1878
+ return {
1879
+ sql: `
1880
+ WITH src AS (
1881
+ SELECT
1882
+ query,
1883
+ clicks,
1884
+ impressions,
1885
+ sum_position,
1886
+ (sum_position / NULLIF(impressions, 0) + 1) AS avg_pos
1887
+ FROM read_parquet({{FILES}}, union_by_name = true)
1888
+ WHERE date >= ? AND date <= ? AND impressions > 0
1889
+ ),
1890
+ curve AS (
1891
+ SELECT
1892
+ CASE
1893
+ WHEN avg_pos <= 1.5 THEN '1'
1894
+ WHEN avg_pos <= 2.5 THEN '2'
1895
+ WHEN avg_pos <= 3.5 THEN '3'
1896
+ WHEN avg_pos <= 5.5 THEN '4-5'
1897
+ WHEN avg_pos <= 10.5 THEN '6-10'
1898
+ WHEN avg_pos <= 20.5 THEN '11-20'
1899
+ ELSE '20+'
1900
+ END AS bucket,
1901
+ AVG(CAST(clicks AS DOUBLE) / NULLIF(impressions, 0)) AS avgCtr,
1902
+ AVG(avg_pos) AS medianPosition,
1903
+ CAST(COUNT(DISTINCT query) AS DOUBLE) AS keywordCount,
1904
+ ${METRIC_EXPR.clicks} AS totalClicks,
1905
+ ${METRIC_EXPR.impressions} AS totalImpressions
1906
+ FROM src
1907
+ GROUP BY bucket
1908
+ ),
1909
+ ks AS (
1910
+ SELECT
1911
+ query,
1912
+ ${METRIC_EXPR.clicks} AS clicks,
1913
+ ${METRIC_EXPR.impressions} AS impressions,
1914
+ ${METRIC_EXPR.ctr} AS ctr,
1915
+ ${METRIC_EXPR.position} AS position,
1916
+ CASE
1917
+ WHEN ${METRIC_EXPR.position} <= 3.5 THEN 'top3'
1918
+ WHEN ${METRIC_EXPR.position} <= 10.5 THEN 'page1'
1919
+ WHEN ${METRIC_EXPR.position} <= 20.5 THEN 'page2'
1920
+ ELSE 'deep'
1921
+ END AS band
1922
+ FROM src
1923
+ GROUP BY query
1924
+ HAVING SUM(impressions) >= 20
1925
+ ),
1926
+ band_avg AS (
1927
+ SELECT band, AVG(ctr) AS band_avg_ctr FROM ks GROUP BY band
1928
+ ),
1929
+ outliers AS (
1930
+ SELECT
1931
+ ks.query, ks.clicks, ks.impressions, ks.ctr, ks.position,
1932
+ ba.band_avg_ctr AS expectedCtr,
1933
+ ks.ctr - ba.band_avg_ctr AS ctrDiff
1934
+ FROM ks JOIN band_avg ba ON ks.band = ba.band
1935
+ ORDER BY ABS(ks.ctr - ba.band_avg_ctr) DESC
1936
+ LIMIT 50
1937
+ )
1938
+ SELECT
1939
+ (SELECT to_json(list({
1940
+ 'bucket': bucket,
1941
+ 'avgCtr': avgCtr,
1942
+ 'medianPosition': medianPosition,
1943
+ 'keywordCount': keywordCount,
1944
+ 'totalClicks': totalClicks,
1945
+ 'totalImpressions': totalImpressions
1946
+ })) FROM curve) AS curve_json,
1947
+ (SELECT to_json(list({
1948
+ 'query': query,
1949
+ 'clicks': clicks,
1950
+ 'impressions': impressions,
1951
+ 'ctr': ctr,
1952
+ 'position': position,
1953
+ 'expectedCtr': expectedCtr,
1954
+ 'ctrDiff': ctrDiff
1955
+ })) FROM outliers) AS outliers_json
1956
+ `,
1957
+ params: [startDate, endDate],
1958
+ current: {
1959
+ table: "keywords",
1960
+ partitions: enumeratePartitions(startDate, endDate)
1961
+ }
1962
+ };
1963
+ },
1964
+ reduceSql(rows, params) {
1965
+ const arr = Array.isArray(rows) ? rows : [];
1966
+ const { startDate, endDate } = periodOf(params);
1967
+ const row = arr[0] ?? {};
1968
+ const curve = parseJsonList$10(row.curve_json).map((r) => ({
1969
+ bucket: str$14(r.bucket),
1970
+ avgCtr: num(r.avgCtr),
1971
+ medianPosition: num(r.medianPosition),
1972
+ keywordCount: num(r.keywordCount),
1973
+ totalClicks: num(r.totalClicks),
1974
+ totalImpressions: num(r.totalImpressions)
1975
+ }));
1976
+ const outliers = parseJsonList$10(row.outliers_json).map((r) => ({
1977
+ query: str$14(r.query),
1978
+ clicks: num(r.clicks),
1979
+ impressions: num(r.impressions),
1980
+ ctr: num(r.ctr),
1981
+ position: num(r.position),
1982
+ expectedCtr: num(r.expectedCtr),
1983
+ ctrDiff: num(r.ctrDiff)
1984
+ }));
1985
+ return {
1986
+ results: curve,
1987
+ meta: {
1988
+ overperforming: outliers.filter((o) => o.ctrDiff > 0).slice(0, 25),
1989
+ underperforming: outliers.filter((o) => o.ctrDiff < 0).slice(0, 25),
1990
+ startDate,
1991
+ endDate
1992
+ }
1993
+ };
1994
+ }
1995
+ });
1996
+ function str$13(v) {
1997
+ return v == null ? "" : String(v);
1998
+ }
1999
+ function parseJsonList$9(v) {
2000
+ if (Array.isArray(v)) return v;
2001
+ if (typeof v === "string" && v.length > 0) {
2002
+ const parsed = JSON.parse(v);
2003
+ return Array.isArray(parsed) ? parsed : [];
2004
+ }
2005
+ return [];
2006
+ }
2007
+ const darkTrafficAnalyzer = defineAnalyzer({
2008
+ id: "dark-traffic",
2009
+ buildSql(params) {
2010
+ const { startDate, endDate } = periodOf(params);
2011
+ return {
2012
+ sql: `
2013
+ WITH page_totals AS (
2014
+ SELECT SUM(clicks) AS total_clicks, SUM(impressions) AS total_impressions
2015
+ FROM read_parquet({{FILES}}, union_by_name = true)
2016
+ WHERE date >= ? AND date <= ?
2017
+ ),
2018
+ kw_totals AS (
2019
+ SELECT SUM(clicks) AS total_clicks, SUM(impressions) AS total_impressions
2020
+ FROM read_parquet({{FILES_KEYWORDS}}, union_by_name = true)
2021
+ WHERE date >= ? AND date <= ?
2022
+ ),
2023
+ per_page AS (
2024
+ SELECT url, SUM(clicks) AS page_clicks
2025
+ FROM read_parquet({{FILES}}, union_by_name = true)
2026
+ WHERE date >= ? AND date <= ?
2027
+ GROUP BY url
2028
+ HAVING SUM(clicks) > 0
2029
+ ),
2030
+ per_page_kw AS (
2031
+ SELECT url, SUM(clicks) AS attributed_clicks, COUNT(DISTINCT query) AS kw_count
2032
+ FROM read_parquet({{FILES_PAGE_KEYWORDS}}, union_by_name = true)
2033
+ WHERE date >= ? AND date <= ?
2034
+ GROUP BY url
2035
+ ),
2036
+ page_rows AS (
2037
+ SELECT
2038
+ p.url AS url,
2039
+ CAST(p.page_clicks AS DOUBLE) AS totalClicks,
2040
+ CAST(COALESCE(k.attributed_clicks, 0) AS DOUBLE) AS attributedClicks,
2041
+ CAST(p.page_clicks - COALESCE(k.attributed_clicks, 0) AS DOUBLE) AS darkClicks,
2042
+ CAST(p.page_clicks - COALESCE(k.attributed_clicks, 0) AS DOUBLE)
2043
+ / NULLIF(p.page_clicks, 0) AS darkPercent,
2044
+ CAST(COALESCE(k.kw_count, 0) AS DOUBLE) AS keywordCount
2045
+ FROM per_page p
2046
+ LEFT JOIN per_page_kw k ON p.url = k.url
2047
+ WHERE p.page_clicks - COALESCE(k.attributed_clicks, 0) > 0
2048
+ ORDER BY darkClicks DESC
2049
+ LIMIT 50
2050
+ )
2051
+ SELECT
2052
+ (SELECT to_json({
2053
+ 'totalClicks': CAST(total_clicks AS DOUBLE),
2054
+ 'totalImpressions': CAST(total_impressions AS DOUBLE)
2055
+ }) FROM page_totals) AS page_totals_json,
2056
+ (SELECT to_json({
2057
+ 'attributedClicks': CAST(total_clicks AS DOUBLE),
2058
+ 'attributedImpressions': CAST(total_impressions AS DOUBLE)
2059
+ }) FROM kw_totals) AS kw_totals_json,
2060
+ (SELECT to_json(list({
2061
+ 'url': url,
2062
+ 'totalClicks': totalClicks,
2063
+ 'attributedClicks': attributedClicks,
2064
+ 'darkClicks': darkClicks,
2065
+ 'darkPercent': darkPercent,
2066
+ 'keywordCount': keywordCount
2067
+ })) FROM page_rows) AS pages_json
2068
+ `,
2069
+ params: [
2070
+ startDate,
2071
+ endDate,
2072
+ startDate,
2073
+ endDate,
2074
+ startDate,
2075
+ endDate,
2076
+ startDate,
2077
+ endDate
2078
+ ],
2079
+ current: {
2080
+ table: "pages",
2081
+ partitions: enumeratePartitions(startDate, endDate)
2082
+ },
2083
+ extraFiles: {
2084
+ KEYWORDS: {
2085
+ table: "keywords",
2086
+ partitions: enumeratePartitions(startDate, endDate)
2087
+ },
2088
+ PAGE_KEYWORDS: {
2089
+ table: "page_keywords",
2090
+ partitions: enumeratePartitions(startDate, endDate)
2091
+ }
2092
+ }
2093
+ };
2094
+ },
2095
+ reduceSql(rows, params) {
2096
+ const arr = Array.isArray(rows) ? rows : [];
2097
+ const { startDate, endDate } = periodOf(params);
2098
+ const row = arr[0] ?? {};
2099
+ const pageTotals = typeof row.page_totals_json === "string" ? JSON.parse(row.page_totals_json) : row.page_totals_json ?? {};
2100
+ const kwTotals = typeof row.kw_totals_json === "string" ? JSON.parse(row.kw_totals_json) : row.kw_totals_json ?? {};
2101
+ const totalClicks = num$4(pageTotals.totalClicks);
2102
+ const totalImpressions = num$4(pageTotals.totalImpressions);
2103
+ const attributedClicks = num$4(kwTotals.attributedClicks);
2104
+ const attributedImpressions = num$4(kwTotals.attributedImpressions);
2105
+ const darkClicks = Math.max(0, totalClicks - attributedClicks);
2106
+ const darkPercent = totalClicks > 0 ? darkClicks / totalClicks : 0;
2107
+ return {
2108
+ results: parseJsonList$9(row.pages_json).map((r) => ({
2109
+ url: str$13(r.url),
2110
+ totalClicks: num$4(r.totalClicks),
2111
+ attributedClicks: num$4(r.attributedClicks),
2112
+ darkClicks: num$4(r.darkClicks),
2113
+ darkPercent: num$4(r.darkPercent),
2114
+ keywordCount: num$4(r.keywordCount)
2115
+ })),
2116
+ meta: {
2117
+ summary: {
2118
+ totalClicks,
2119
+ attributedClicks,
2120
+ darkClicks,
2121
+ darkPercent,
2122
+ totalImpressions,
2123
+ attributedImpressions
2124
+ },
2125
+ startDate,
2126
+ endDate
2127
+ }
2128
+ };
2129
+ }
2130
+ });
2131
+ function requireBuilderState(input, tool) {
2132
+ if (!input || typeof input !== "object" || !("dimensions" in input) || !Array.isArray(input.dimensions)) throw new Error(`${tool}: params.q is required (BuilderState)`);
2133
+ return input;
2134
+ }
2135
+ function optionalBuilderState(input, tool, key) {
2136
+ if (input == null) return null;
2137
+ if (typeof input !== "object" || !("dimensions" in input) || !Array.isArray(input.dimensions)) throw new Error(`${tool}: params.${key} must be a BuilderState`);
2138
+ return input;
2139
+ }
2140
+ const NUMERIC_METRIC_COLS = [
2141
+ "clicks",
2142
+ "impressions",
2143
+ "ctr",
2144
+ "position",
2145
+ "prevClicks",
2146
+ "prevImpressions",
2147
+ "prevCtr",
2148
+ "prevPosition",
2149
+ "variantCount",
2150
+ "totalCount"
2151
+ ];
2152
+ function coerceNumericCols(row) {
2153
+ const out = { ...row };
2154
+ for (const col of NUMERIC_METRIC_COLS) if (col in out && out[col] != null) out[col] = Number(out[col]);
2155
+ return out;
2156
+ }
2157
+ function shapeDataQuery(rows, extras, opts) {
2158
+ let totalCount;
2159
+ let cleaned;
2160
+ if (opts.hasPrev) {
2161
+ cleaned = rows.map(coerceNumericCols);
2162
+ totalCount = Number((extras?.count?.[0])?.total ?? cleaned.length);
2163
+ } else {
2164
+ const first = rows[0];
2165
+ totalCount = Number(first?.totalCount ?? 0);
2166
+ cleaned = rows.map((raw) => {
2167
+ const { totalCount: _tc, totalClicks: _tclk, totalImpressions: _timp, totalCtr: _tctr, totalPosition: _tpos, sum_position: _sp, ...rest } = raw;
2168
+ return coerceNumericCols(rest);
2169
+ });
2170
+ }
2171
+ const totalsRow = extras?.totals?.[0] ?? {};
2172
+ const totals = {
2173
+ clicks: Number(totalsRow.clicks ?? 0),
2174
+ impressions: Number(totalsRow.impressions ?? 0),
2175
+ ctr: Number(totalsRow.ctr ?? 0),
2176
+ position: Number(totalsRow.position ?? 0)
2177
+ };
2178
+ const extrasResults = [];
2179
+ if (extras?.canonicalExtras) extrasResults.push({
2180
+ key: "canonicalExtras",
2181
+ results: extras.canonicalExtras
2182
+ });
2183
+ return {
2184
+ results: mergeExtras(cleaned, extrasResults),
2185
+ meta: {
2186
+ totalCount,
2187
+ totals
2188
+ }
2189
+ };
2190
+ }
2191
+ function buildDataQueryPlan(params, options) {
2192
+ const state = requireBuilderState(params.q, "data-query");
2193
+ if (state.dimensions.includes("date")) throw new Error("data-query: date dimension not supported; use data-detail");
2194
+ const prev = optionalBuilderState(params.qc, "data-query", "qc");
2195
+ const totals = buildTotalsSql(state, options);
2196
+ const extras = buildExtrasQueries(state, options);
2197
+ const extraQueries = [{
2198
+ name: "totals",
2199
+ sql: totals.sql,
2200
+ params: totals.params
2201
+ }, ...extras.map((extra) => ({
2202
+ name: extra.key,
2203
+ sql: extra.sql,
2204
+ params: extra.params
2205
+ }))];
2206
+ const tableKey = options.adapter.inferTable(state.dimensions);
2207
+ if (prev) {
2208
+ const comparison = resolveComparisonSQL(state, prev, options, params.comparisonFilter);
2209
+ extraQueries.push({
2210
+ name: "count",
2211
+ sql: comparison.countSql,
2212
+ params: comparison.countParams
2213
+ });
2214
+ return {
2215
+ tableKey,
2216
+ sql: comparison.sql,
2217
+ params: comparison.params,
2218
+ extraQueries,
2219
+ shape: (rows, _params, resolvedExtras) => shapeDataQuery(rows, resolvedExtras, { hasPrev: true })
2220
+ };
2221
+ }
2222
+ const optimized = resolveToSQLOptimized(state, options);
2223
+ return {
2224
+ tableKey,
2225
+ sql: optimized.sql,
2226
+ params: optimized.params,
2227
+ extraQueries,
2228
+ shape: (rows, _params, resolvedExtras) => shapeDataQuery(rows, resolvedExtras, { hasPrev: false })
2229
+ };
2230
+ }
2231
+ function buildDataDetailPlan(params, options) {
2232
+ const state = requireBuilderState(params.q, "data-detail");
2233
+ if (!state.dimensions.includes("date")) throw new Error("data-detail: `date` dimension is required");
2234
+ const main = resolveToSQL(state, options);
2235
+ const totals = buildTotalsSql(state, options);
2236
+ const prev = optionalBuilderState(params.qc, "data-detail", "qc");
2237
+ const extraQueries = [{
2238
+ name: "totals",
2239
+ sql: totals.sql,
2240
+ params: totals.params
2241
+ }];
2242
+ if (prev) {
2243
+ const previousTotals = buildTotalsSql(prev, options);
2244
+ extraQueries.push({
2245
+ name: "prevTotals",
2246
+ sql: previousTotals.sql,
2247
+ params: previousTotals.params
2248
+ });
2249
+ }
2250
+ const tableKey = options.adapter.inferTable(state.dimensions);
2251
+ const { startDate: rangeStart, endDate: rangeEnd } = extractDateRange(state.filter);
2252
+ return {
2253
+ tableKey,
2254
+ sql: main.sql,
2255
+ params: main.params,
2256
+ extraQueries,
2257
+ shape: (rows, _params, extras) => {
2258
+ const coerced = rows.map(coerceNumericCols);
2259
+ const daily = rangeStart && rangeEnd ? padTimeseries(coerced, {
2260
+ startDate: rangeStart,
2261
+ endDate: rangeEnd
2262
+ }) : coerced;
2263
+ const totalsRow = extras?.totals?.[0] ?? {};
2264
+ const meta = { totals: {
2265
+ clicks: Number(totalsRow.clicks ?? 0),
2266
+ impressions: Number(totalsRow.impressions ?? 0),
2267
+ ctr: Number(totalsRow.ctr ?? 0),
2268
+ position: Number(totalsRow.position ?? 0)
2269
+ } };
2270
+ if (extras?.prevTotals) {
2271
+ const previousTotalsRow = extras.prevTotals[0] ?? {};
2272
+ meta.previousTotals = {
2273
+ clicks: Number(previousTotalsRow.clicks ?? 0),
2274
+ impressions: Number(previousTotalsRow.impressions ?? 0),
2275
+ ctr: Number(previousTotalsRow.ctr ?? 0),
2276
+ position: Number(previousTotalsRow.position ?? 0)
2277
+ };
2278
+ }
2279
+ return {
2280
+ results: daily,
2281
+ meta
2282
+ };
2283
+ }
2284
+ };
2285
+ }
2286
+ const dataDetailAnalyzer = defineAnalyzer({
2287
+ id: "data-detail",
2288
+ buildSql(params) {
2289
+ const plan = buildDataDetailPlan(params, { adapter: pgResolverAdapter });
2290
+ return {
2291
+ sql: plan.sql,
2292
+ params: plan.params,
2293
+ current: {
2294
+ table: plan.tableKey,
2295
+ partitions: []
2296
+ },
2297
+ requiresAttachedTables: true,
2298
+ extraQueries: plan.extraQueries
2299
+ };
2300
+ },
2301
+ reduceSql(rows, params, ctx) {
2302
+ const arr = Array.isArray(rows) ? rows : [];
2303
+ const { results, meta } = buildDataDetailPlan(params, { adapter: pgResolverAdapter }).shape(arr, params, ctx.extras);
2304
+ return {
2305
+ results,
2306
+ meta
2307
+ };
2308
+ }
2309
+ });
2310
+ const dataQueryAnalyzer = defineAnalyzer({
2311
+ id: "data-query",
2312
+ buildSql(params) {
2313
+ const plan = buildDataQueryPlan(params, { adapter: pgResolverAdapter });
2314
+ return {
2315
+ sql: plan.sql,
2316
+ params: plan.params,
2317
+ current: {
2318
+ table: plan.tableKey,
2319
+ partitions: []
2320
+ },
2321
+ requiresAttachedTables: true,
2322
+ extraQueries: plan.extraQueries
2323
+ };
2324
+ },
2325
+ reduceSql(rows, params, ctx) {
2326
+ const arr = Array.isArray(rows) ? rows : [];
2327
+ const { results, meta } = buildDataQueryPlan(params, { adapter: pgResolverAdapter }).shape(arr, params, ctx.extras);
2328
+ return {
2329
+ results,
2330
+ meta
2331
+ };
2332
+ }
2333
+ });
2334
+ const sortResults$1 = createMetricSorter("lostClicks", {
2335
+ lostClicks: "desc",
2336
+ declinePercent: "desc",
2337
+ currentClicks: "asc"
2338
+ });
2339
+ function str$12(v) {
2340
+ return v == null ? "" : String(v);
2341
+ }
2342
+ function parseJsonList$8(v) {
2343
+ if (Array.isArray(v)) return v;
2344
+ if (typeof v === "string" && v.length > 0) {
2345
+ const parsed = JSON.parse(v);
2346
+ return Array.isArray(parsed) ? parsed : [];
2347
+ }
2348
+ return [];
2349
+ }
2350
+ function analyzeDecay(input, options = {}) {
2351
+ const { minPreviousClicks = 50, threshold = .2, sortBy = "lostClicks" } = options;
2352
+ const currentMap = buildPeriodMap(input.current, (r) => r.page, (r) => ({
2353
+ clicks: num$4(r.clicks),
2354
+ position: num$4(r.position)
2355
+ }));
2356
+ const previousMap = buildPeriodMap(input.previous, (r) => r.page, (r) => ({
2357
+ clicks: num$4(r.clicks),
2358
+ position: num$4(r.position)
2359
+ }), (r) => num$4(r.clicks) >= minPreviousClicks);
2360
+ const results = [];
2361
+ for (const [page, prev] of previousMap) {
2362
+ const curr = currentMap.get(page) || {
2363
+ clicks: 0,
2364
+ position: 0
2365
+ };
2366
+ const lostClicks = prev.clicks - curr.clicks;
2367
+ const declinePercent = prev.clicks > 0 ? lostClicks / prev.clicks : 0;
2368
+ if (declinePercent >= threshold && lostClicks > 0) results.push({
2369
+ page,
2370
+ currentClicks: curr.clicks,
2371
+ previousClicks: prev.clicks,
2372
+ lostClicks,
2373
+ declinePercent,
2374
+ currentPosition: curr.position,
2375
+ previousPosition: prev.position,
2376
+ positionDrop: curr.position - prev.position
2377
+ });
2378
+ }
2379
+ return sortResults$1(results, sortBy);
2380
+ }
2381
+ const decayAnalyzer = defineAnalyzer({
2382
+ id: "decay",
2383
+ buildSql(params) {
2384
+ const { current: cur, previous: prev } = comparisonOf(params);
2385
+ const minPreviousClicks = params.minPreviousClicks ?? 50;
2386
+ const threshold = params.threshold ?? .2;
2387
+ const limit = params.limit ?? 2e3;
2388
+ return {
2389
+ sql: `
2390
+ WITH cur AS (
2391
+ SELECT
2392
+ url,
2393
+ ${METRIC_EXPR.clicks} AS clicks,
2394
+ ${METRIC_EXPR.position} AS position
2395
+ FROM read_parquet({{FILES}}, union_by_name = true)
2396
+ WHERE date >= ? AND date <= ?
2397
+ GROUP BY url
2398
+ ),
2399
+ prev AS (
2400
+ SELECT
2401
+ url,
2402
+ ${METRIC_EXPR.clicks} AS clicks,
2403
+ ${METRIC_EXPR.position} AS position
2404
+ FROM read_parquet({{FILES_PREV}}, union_by_name = true)
2405
+ WHERE date >= ? AND date <= ?
2406
+ GROUP BY url
2407
+ HAVING SUM(clicks) >= ?
2408
+ ),
2409
+ weekly AS (
2410
+ SELECT url, date_trunc('week', CAST(date AS DATE)) AS week,
2411
+ ${METRIC_EXPR.clicks} AS clicks,
2412
+ ${METRIC_EXPR.impressions} AS impressions
2413
+ FROM (
2414
+ SELECT url, date, clicks, impressions
2415
+ FROM read_parquet({{FILES}}, union_by_name = true)
2416
+ WHERE date >= ? AND date <= ?
2417
+ UNION ALL
2418
+ SELECT url, date, clicks, impressions
2419
+ FROM read_parquet({{FILES_PREV}}, union_by_name = true)
2420
+ WHERE date >= ? AND date <= ?
2421
+ )
2422
+ GROUP BY url, week
2423
+ ),
2424
+ series_by_url AS (
2425
+ SELECT url, to_json(list({
2426
+ 'week': strftime(week, '%Y-%m-%d'),
2427
+ 'clicks': clicks,
2428
+ 'impressions': impressions
2429
+ } ORDER BY week)) AS seriesJson
2430
+ FROM weekly GROUP BY url
2431
+ ),
2432
+ joined AS (
2433
+ SELECT
2434
+ p.url AS page,
2435
+ COALESCE(c.clicks, 0.0) AS currentClicks,
2436
+ p.clicks AS previousClicks,
2437
+ (p.clicks - COALESCE(c.clicks, 0.0)) AS lostClicks,
2438
+ (p.clicks - COALESCE(c.clicks, 0.0)) / NULLIF(p.clicks, 0) AS declinePercent,
2439
+ COALESCE(c.position, 0.0) AS currentPosition,
2440
+ p.position AS previousPosition,
2441
+ (COALESCE(c.position, 0.0) - p.position) AS positionDrop,
2442
+ s.seriesJson
2443
+ FROM prev p
2444
+ LEFT JOIN cur c ON p.url = c.url
2445
+ LEFT JOIN series_by_url s ON p.url = s.url
2446
+ )
2447
+ SELECT *
2448
+ FROM joined
2449
+ WHERE declinePercent >= ? AND lostClicks > 0
2450
+ ORDER BY lostClicks DESC
2451
+ LIMIT ${Number(limit)}
2452
+ `,
2453
+ params: [
2454
+ cur.startDate,
2455
+ cur.endDate,
2456
+ prev.startDate,
2457
+ prev.endDate,
2458
+ minPreviousClicks,
2459
+ cur.startDate,
2460
+ cur.endDate,
2461
+ prev.startDate,
2462
+ prev.endDate,
2463
+ threshold
2464
+ ],
2465
+ current: {
2466
+ table: "pages",
2467
+ partitions: enumeratePartitions(cur.startDate, cur.endDate)
2468
+ },
2469
+ previous: {
2470
+ table: "pages",
2471
+ partitions: enumeratePartitions(prev.startDate, prev.endDate)
2472
+ }
2473
+ };
2474
+ },
2475
+ reduceSql(rows) {
2476
+ const arr = Array.isArray(rows) ? rows : [];
2477
+ return {
2478
+ results: arr.map((r) => ({
2479
+ page: str$12(r.page),
2480
+ currentClicks: num$4(r.currentClicks),
2481
+ previousClicks: num$4(r.previousClicks),
2482
+ lostClicks: num$4(r.lostClicks),
2483
+ declinePercent: num$4(r.declinePercent),
2484
+ currentPosition: num$4(r.currentPosition),
2485
+ previousPosition: num$4(r.previousPosition),
2486
+ positionDrop: num$4(r.positionDrop),
2487
+ series: parseJsonList$8(r.seriesJson).map((s) => ({
2488
+ week: str$12(s.week),
2489
+ clicks: num$4(s.clicks),
2490
+ impressions: num$4(s.impressions)
2491
+ }))
2492
+ })),
2493
+ meta: { total: arr.length }
2494
+ };
2495
+ },
2496
+ buildRows(params) {
2497
+ const { current, previous } = comparisonOf(params);
2498
+ return {
2499
+ current: pagesQueryState(current, params.limit),
2500
+ previous: pagesQueryState(previous, params.limit)
2501
+ };
2502
+ },
2503
+ reduceRows(rows, params) {
2504
+ const map = rows && !Array.isArray(rows) ? rows : {
2505
+ current: [],
2506
+ previous: []
2507
+ };
2508
+ const results = analyzeDecay({
2509
+ current: map.current ?? [],
2510
+ previous: map.previous ?? []
2511
+ }, {
2512
+ minPreviousClicks: params.minPreviousClicks,
2513
+ threshold: params.threshold
2514
+ });
2515
+ return {
2516
+ results,
2517
+ meta: { total: results.length }
2518
+ };
2519
+ }
2520
+ });
2521
+ function str$11(v) {
2522
+ return v == null ? "" : String(v);
2523
+ }
2524
+ const deviceGapAnalyzer = defineAnalyzer({
2525
+ id: "device-gap",
2526
+ buildSql(params) {
2527
+ const { startDate, endDate } = periodOf(params);
2528
+ return {
2529
+ sql: `
2530
+ SELECT
2531
+ date,
2532
+ device,
2533
+ ${METRIC_EXPR.clicks} AS clicks,
2534
+ ${METRIC_EXPR.impressions} AS impressions,
2535
+ ${METRIC_EXPR.ctr} AS ctr,
2536
+ ${METRIC_EXPR.position} AS position
2537
+ FROM read_parquet({{FILES}}, union_by_name = true)
2538
+ WHERE date >= ? AND date <= ?
2539
+ GROUP BY date, device
2540
+ ORDER BY date ASC
2541
+ `,
2542
+ params: [startDate, endDate],
2543
+ current: {
2544
+ table: "devices",
2545
+ partitions: enumeratePartitions(startDate, endDate)
2546
+ }
2547
+ };
2548
+ },
2549
+ reduceSql(rows, params) {
2550
+ const arr = Array.isArray(rows) ? rows : [];
2551
+ const { startDate, endDate } = periodOf(params);
2552
+ const typed = arr.map((r) => ({
2553
+ date: str$11(r.date),
2554
+ device: str$11(r.device).toUpperCase(),
2555
+ clicks: num$4(r.clicks),
2556
+ impressions: num$4(r.impressions),
2557
+ ctr: num$4(r.ctr),
2558
+ position: num$4(r.position)
2559
+ }));
2560
+ const byDate = /* @__PURE__ */ new Map();
2561
+ for (const r of typed) {
2562
+ const entry = byDate.get(r.date) ?? {};
2563
+ const metrics = {
2564
+ clicks: r.clicks,
2565
+ impressions: r.impressions,
2566
+ ctr: r.ctr,
2567
+ position: r.position
2568
+ };
2569
+ if (r.device === "DESKTOP") entry.desktop = metrics;
2570
+ else if (r.device === "MOBILE") entry.mobile = metrics;
2571
+ byDate.set(r.date, entry);
2572
+ }
2573
+ const zero = {
2574
+ clicks: 0,
2575
+ impressions: 0,
2576
+ ctr: 0,
2577
+ position: 0
2578
+ };
2579
+ const daily = [...byDate.entries()].sort(([a], [b]) => a.localeCompare(b)).map(([date, sides]) => {
2580
+ const d = sides.desktop ?? zero;
2581
+ const m = sides.mobile ?? zero;
2582
+ return {
2583
+ date,
2584
+ desktop: d,
2585
+ mobile: m,
2586
+ gaps: {
2587
+ ctrGap: d.ctr - m.ctr,
2588
+ positionGap: m.position - d.position
2589
+ }
2590
+ };
2591
+ });
2592
+ const weekly = (start, end) => {
2593
+ const slice = daily.slice(start, end);
2594
+ if (slice.length === 0) return {
2595
+ ctr: 0,
2596
+ pos: 0
2597
+ };
2598
+ const sum = slice.reduce((acc, d) => ({
2599
+ ctr: acc.ctr + d.gaps.ctrGap,
2600
+ pos: acc.pos + d.gaps.positionGap
2601
+ }), {
2602
+ ctr: 0,
2603
+ pos: 0
2604
+ });
2605
+ return {
2606
+ ctr: sum.ctr / slice.length,
2607
+ pos: sum.pos / slice.length
2608
+ };
2609
+ };
2610
+ const first = weekly(0, 7);
2611
+ const last = weekly(Math.max(0, daily.length - 7), daily.length);
2612
+ const classify = (firstVal, lastVal) => {
2613
+ const diff = Math.abs(lastVal) - Math.abs(firstVal);
2614
+ if (Math.abs(diff) < .005) return "stable";
2615
+ return diff < 0 ? "improving" : "worsening";
2616
+ };
2617
+ return {
2618
+ results: daily,
2619
+ meta: {
2620
+ summary: {
2621
+ avgCtrGap: daily.reduce((s, d) => s + d.gaps.ctrGap, 0) / Math.max(1, daily.length),
2622
+ avgPositionGap: daily.reduce((s, d) => s + d.gaps.positionGap, 0) / Math.max(1, daily.length),
2623
+ ctrGapTrend: classify(first.ctr, last.ctr),
2624
+ positionGapTrend: classify(first.pos, last.pos)
2625
+ },
2626
+ startDate,
2627
+ endDate
2628
+ }
2629
+ };
2630
+ }
2631
+ });
2632
+ function str$10(v) {
2633
+ return v == null ? "" : String(v);
2634
+ }
2635
+ function parseJsonList$7(v) {
2636
+ if (Array.isArray(v)) return v;
2637
+ if (typeof v === "string" && v.length > 0) {
2638
+ const parsed = JSON.parse(v);
2639
+ return Array.isArray(parsed) ? parsed : [];
2640
+ }
2641
+ return [];
2642
+ }
2643
+ const INTENT_ATLAS_STOP_WORDS = [
2644
+ "the",
2645
+ "a",
2646
+ "an",
2647
+ "is",
2648
+ "are",
2649
+ "was",
2650
+ "were",
2651
+ "be",
2652
+ "been",
2653
+ "of",
2654
+ "to",
2655
+ "in",
2656
+ "for",
2657
+ "on",
2658
+ "and",
2659
+ "or",
2660
+ "with",
2661
+ "at",
2662
+ "by",
2663
+ "from",
2664
+ "into",
2665
+ "about",
2666
+ "as",
2667
+ "so",
2668
+ "than",
2669
+ "then",
2670
+ "that",
2671
+ "this",
2672
+ "my",
2673
+ "your",
2674
+ "our",
2675
+ "their",
2676
+ "his",
2677
+ "her",
2678
+ "its",
2679
+ "me",
2680
+ "you",
2681
+ "what",
2682
+ "how",
2683
+ "why",
2684
+ "when",
2685
+ "where",
2686
+ "who",
2687
+ "which",
2688
+ "do",
2689
+ "does"
2690
+ ];
2691
+ const intentAtlasAnalyzer = defineAnalyzer({
2692
+ id: "intent-atlas",
2693
+ buildSql(params) {
2694
+ const endDate = params.endDate ?? defaultEndDate();
2695
+ const startDate = params.startDate ?? daysAgo(90);
2696
+ const minQueryImpressions = params.minImpressions ?? 20;
2697
+ const minClusterSize = params.minClusterSize ?? 3;
2698
+ const minTokenImpressions = 50;
2699
+ const limit = params.limit ?? 200;
2700
+ const stopList = INTENT_ATLAS_STOP_WORDS.map((w) => `'${w}'`).join(", ");
2701
+ return {
2702
+ sql: `
2703
+ WITH queries AS (
2704
+ SELECT
2705
+ query,
2706
+ ${METRIC_EXPR.impressions} AS impressions,
2707
+ ${METRIC_EXPR.clicks} AS clicks,
2708
+ ${METRIC_EXPR.position} AS position
2709
+ FROM read_parquet({{FILES}}, union_by_name = true)
2710
+ WHERE date >= ? AND date <= ?
2711
+ AND query IS NOT NULL AND query <> ''
2712
+ GROUP BY query
2713
+ HAVING SUM(impressions) >= ?
2714
+ ),
2715
+ tokens AS (
2716
+ SELECT q.query, q.impressions, q.clicks, q.position,
2717
+ LOWER(t.token) AS token
2718
+ FROM queries q,
2719
+ unnest(regexp_split_to_array(LOWER(q.query), '\\s+')) AS t(token)
2720
+ WHERE LENGTH(t.token) >= 3
2721
+ AND LOWER(t.token) NOT IN (${stopList})
2722
+ ),
2723
+ token_weights AS (
2724
+ SELECT token,
2725
+ SUM(impressions) AS token_impressions,
2726
+ COUNT(DISTINCT query) AS query_count
2727
+ FROM tokens
2728
+ GROUP BY token
2729
+ HAVING SUM(impressions) >= ${Number(minTokenImpressions)}
2730
+ ),
2731
+ ranked_tokens AS (
2732
+ SELECT t.query, t.token, tw.token_impressions,
2733
+ ROW_NUMBER() OVER (
2734
+ PARTITION BY t.query
2735
+ ORDER BY tw.token_impressions DESC, t.token ASC
2736
+ ) AS rnk
2737
+ FROM tokens t
2738
+ JOIN token_weights tw USING (token)
2739
+ ),
2740
+ cluster_keys AS (
2741
+ SELECT query,
2742
+ array_to_string(list(token ORDER BY token), ' + ') AS cluster_key
2743
+ FROM ranked_tokens
2744
+ WHERE rnk <= 2
2745
+ GROUP BY query
2746
+ HAVING COUNT(*) >= 2
2747
+ ),
2748
+ clustered AS (
2749
+ SELECT q.query, q.impressions, q.clicks, q.position, ck.cluster_key
2750
+ FROM queries q
2751
+ JOIN cluster_keys ck USING (query)
2752
+ )
2753
+ SELECT
2754
+ cluster_key AS clusterKey,
2755
+ COUNT(*) AS keywordCount,
2756
+ SUM(impressions) AS totalImpressions,
2757
+ SUM(clicks) AS totalClicks,
2758
+ SUM(clicks) / NULLIF(SUM(impressions), 0) AS ctr,
2759
+ AVG(position) AS avgPosition,
2760
+ to_json(list({
2761
+ 'query': query,
2762
+ 'impressions': impressions,
2763
+ 'clicks': clicks,
2764
+ 'position': position
2765
+ } ORDER BY impressions DESC)) AS keywords
2766
+ FROM clustered
2767
+ GROUP BY cluster_key
2768
+ HAVING COUNT(*) >= ${Number(minClusterSize)}
2769
+ ORDER BY totalImpressions DESC
2770
+ LIMIT ${Number(limit)}
2771
+ `,
2772
+ params: [
2773
+ startDate,
2774
+ endDate,
2775
+ minQueryImpressions
2776
+ ],
2777
+ current: {
2778
+ table: "keywords",
2779
+ partitions: enumeratePartitions(startDate, endDate)
2780
+ }
2781
+ };
2782
+ },
2783
+ reduceSql(rows) {
2784
+ const clusters = (Array.isArray(rows) ? rows : []).map((r) => ({
2785
+ clusterKey: str$10(r.clusterKey),
2786
+ keywordCount: num$4(r.keywordCount),
2787
+ totalImpressions: num$4(r.totalImpressions),
2788
+ totalClicks: num$4(r.totalClicks),
2789
+ ctr: num$4(r.ctr),
2790
+ avgPosition: num$4(r.avgPosition),
2791
+ keywords: parseJsonList$7(r.keywords).slice(0, 25).map((k) => ({
2792
+ query: str$10(k.query),
2793
+ impressions: num$4(k.impressions),
2794
+ clicks: num$4(k.clicks),
2795
+ position: num$4(k.position)
2796
+ }))
2797
+ }));
2798
+ const totalImpressions = clusters.reduce((s, c) => s + c.totalImpressions, 0);
2799
+ const totalKeywords = clusters.reduce((s, c) => s + c.keywordCount, 0);
2800
+ return {
2801
+ results: clusters,
2802
+ meta: {
2803
+ total: clusters.length,
2804
+ totalImpressions,
2805
+ totalKeywords
2806
+ }
2807
+ };
2808
+ }
2809
+ });
2810
+ function str$9(v) {
2811
+ return v == null ? "" : String(v);
2812
+ }
2813
+ function parseJsonList$6(v) {
2814
+ if (Array.isArray(v)) return v;
2815
+ if (typeof v === "string" && v.length > 0) {
2816
+ const parsed = JSON.parse(v);
2817
+ return Array.isArray(parsed) ? parsed : [];
2818
+ }
2819
+ return [];
2820
+ }
2821
+ const keywordBreadthAnalyzer = defineAnalyzer({
2822
+ id: "keyword-breadth",
2823
+ buildSql(params) {
2824
+ const { startDate, endDate } = periodOf(params);
2825
+ return {
2826
+ sql: `
2827
+ WITH per_page AS (
2828
+ SELECT
2829
+ url,
2830
+ CAST(COUNT(DISTINCT query) AS DOUBLE) AS keywordCount,
2831
+ ${METRIC_EXPR.clicks} AS clicks,
2832
+ ${METRIC_EXPR.impressions} AS impressions
2833
+ FROM read_parquet({{FILES}}, union_by_name = true)
2834
+ WHERE date >= ? AND date <= ? AND impressions > 0
2835
+ GROUP BY url
2836
+ ),
2837
+ bucketed AS (
2838
+ SELECT
2839
+ CASE
2840
+ WHEN keywordCount = 1 THEN '1'
2841
+ WHEN keywordCount BETWEEN 2 AND 5 THEN '2-5'
2842
+ WHEN keywordCount BETWEEN 6 AND 15 THEN '6-15'
2843
+ WHEN keywordCount BETWEEN 16 AND 50 THEN '16-50'
2844
+ ELSE '50+'
2845
+ END AS bucket,
2846
+ MIN(keywordCount) AS sort_key,
2847
+ CAST(COUNT(*) AS DOUBLE) AS pageCount
2848
+ FROM per_page
2849
+ GROUP BY bucket
2850
+ ),
2851
+ fragile AS (
2852
+ SELECT url, keywordCount, clicks, impressions
2853
+ FROM per_page
2854
+ WHERE keywordCount <= 2 AND clicks >= 5
2855
+ ORDER BY clicks DESC
2856
+ LIMIT 20
2857
+ ),
2858
+ authority AS (
2859
+ SELECT url, keywordCount, clicks, impressions
2860
+ FROM per_page
2861
+ WHERE keywordCount >= 20
2862
+ ORDER BY keywordCount DESC
2863
+ LIMIT 20
2864
+ ),
2865
+ stats AS (
2866
+ SELECT
2867
+ CAST(COUNT(*) AS DOUBLE) AS totalPages,
2868
+ CAST(AVG(keywordCount) AS DOUBLE) AS avgKeywordsPerPage,
2869
+ CAST(SUM(CASE WHEN keywordCount <= 2 THEN 1 ELSE 0 END) AS DOUBLE) AS fragileCount,
2870
+ CAST(SUM(CASE WHEN keywordCount >= 20 THEN 1 ELSE 0 END) AS DOUBLE) AS authorityCount
2871
+ FROM per_page
2872
+ )
2873
+ SELECT
2874
+ (SELECT to_json(list({ 'bucket': bucket, 'pageCount': pageCount, 'sortKey': sort_key })
2875
+ ORDER BY sort_key ASC) FROM bucketed) AS distribution_json,
2876
+ (SELECT to_json(list({ 'url': url, 'keywordCount': keywordCount, 'clicks': clicks, 'impressions': impressions })) FROM fragile) AS fragile_json,
2877
+ (SELECT to_json(list({ 'url': url, 'keywordCount': keywordCount, 'clicks': clicks, 'impressions': impressions })) FROM authority) AS authority_json,
2878
+ (SELECT to_json({
2879
+ 'totalPages': totalPages,
2880
+ 'avgKeywordsPerPage': avgKeywordsPerPage,
2881
+ 'fragileCount': fragileCount,
2882
+ 'authorityCount': authorityCount
2883
+ }) FROM stats) AS stats_json
2884
+ `,
2885
+ params: [startDate, endDate],
2886
+ current: {
2887
+ table: "page_keywords",
2888
+ partitions: enumeratePartitions(startDate, endDate)
2889
+ }
2890
+ };
2891
+ },
2892
+ reduceSql(rows, params) {
2893
+ const arr = Array.isArray(rows) ? rows : [];
2894
+ const { startDate, endDate } = periodOf(params);
2895
+ const row = arr[0] ?? {};
2896
+ const distribution = parseJsonList$6(row.distribution_json).sort((a, b) => num$4(a.sortKey) - num$4(b.sortKey)).map((r) => ({
2897
+ bucket: str$9(r.bucket),
2898
+ pageCount: num$4(r.pageCount)
2899
+ }));
2900
+ const fragile = parseJsonList$6(row.fragile_json).map((r) => ({
2901
+ url: str$9(r.url),
2902
+ keywordCount: num$4(r.keywordCount),
2903
+ clicks: num$4(r.clicks),
2904
+ impressions: num$4(r.impressions)
2905
+ }));
2906
+ const authority = parseJsonList$6(row.authority_json).map((r) => ({
2907
+ url: str$9(r.url),
2908
+ keywordCount: num$4(r.keywordCount),
2909
+ clicks: num$4(r.clicks),
2910
+ impressions: num$4(r.impressions)
2911
+ }));
2912
+ const stats = typeof row.stats_json === "string" ? JSON.parse(row.stats_json) : row.stats_json ?? {};
2913
+ return {
2914
+ results: distribution,
2915
+ meta: {
2916
+ fragilePages: fragile,
2917
+ authorityPages: authority,
2918
+ summary: {
2919
+ totalPages: num$4(stats.totalPages),
2920
+ avgKeywordsPerPage: num$4(stats.avgKeywordsPerPage),
2921
+ fragileCount: num$4(stats.fragileCount),
2922
+ authorityCount: num$4(stats.authorityCount)
2923
+ },
2924
+ startDate,
2925
+ endDate
2926
+ }
2927
+ };
2928
+ }
2929
+ });
2930
+ function str$8(v) {
2931
+ return v == null ? "" : String(v);
2932
+ }
2933
+ function parseJsonList$5(v) {
2934
+ if (Array.isArray(v)) return v;
2935
+ if (typeof v === "string" && v.length > 0) {
2936
+ const parsed = JSON.parse(v);
2937
+ return Array.isArray(parsed) ? parsed : [];
2938
+ }
2939
+ return [];
2940
+ }
2941
+ function downsampleLogRank(points) {
2942
+ const all = points.map((p) => ({
2943
+ rank: num$4(p.rank),
2944
+ impressions: num$4(p.impressions),
2945
+ clicks: num$4(p.clicks),
2946
+ query: str$8(p.query)
2947
+ }));
2948
+ if (all.length <= 80) return all;
2949
+ const top = all.slice(0, 10);
2950
+ const rest = all.slice(10);
2951
+ const stepped = [];
2952
+ let nextThreshold = 1.15;
2953
+ for (const p of rest) if (p.rank >= nextThreshold) {
2954
+ stepped.push(p);
2955
+ nextThreshold *= 1.15;
2956
+ }
2957
+ return [...top, ...stepped];
2958
+ }
2959
+ const longTailAnalyzer = defineAnalyzer({
2960
+ id: "long-tail",
2961
+ buildSql(params) {
2962
+ const { startDate, endDate } = periodOf(params);
2963
+ const minQueries = 10;
2964
+ const minQueryImpressions = params.minImpressions ?? 5;
2965
+ const limit = params.limit ?? 100;
2966
+ return {
2967
+ sql: `
2968
+ WITH page_queries AS (
2969
+ SELECT
2970
+ url AS page,
2971
+ query,
2972
+ ${METRIC_EXPR.impressions} AS impressions,
2973
+ ${METRIC_EXPR.clicks} AS clicks
2974
+ FROM read_parquet({{FILES}}, union_by_name = true)
2975
+ WHERE date >= ? AND date <= ?
2976
+ AND query IS NOT NULL AND query <> ''
2977
+ AND url IS NOT NULL AND url <> ''
2978
+ GROUP BY url, query
2979
+ HAVING SUM(impressions) >= ?
2980
+ ),
2981
+ ranked AS (
2982
+ SELECT
2983
+ page, query, impressions, clicks,
2984
+ ROW_NUMBER() OVER (PARTITION BY page ORDER BY impressions DESC, query ASC) AS rnk
2985
+ FROM page_queries
2986
+ ),
2987
+ log_space AS (
2988
+ SELECT *,
2989
+ LN(rnk) AS log_rank,
2990
+ LN(impressions) AS log_impr
2991
+ FROM ranked
2992
+ ),
2993
+ fit AS (
2994
+ SELECT
2995
+ page,
2996
+ COUNT(*) AS query_count,
2997
+ SUM(impressions) AS total_impressions,
2998
+ SUM(clicks) AS total_clicks,
2999
+ REGR_SLOPE(log_impr, log_rank) AS slope,
3000
+ REGR_INTERCEPT(log_impr, log_rank) AS intercept,
3001
+ REGR_R2(log_impr, log_rank) AS r2,
3002
+ MAX(impressions) AS head_impressions,
3003
+ MAX(CASE WHEN rnk = 1 THEN impressions END) / NULLIF(SUM(impressions), 0) AS head_share
3004
+ FROM log_space
3005
+ GROUP BY page
3006
+ HAVING COUNT(*) >= ${Number(minQueries)}
3007
+ ),
3008
+ scatter AS (
3009
+ SELECT
3010
+ l.page,
3011
+ to_json(list({
3012
+ 'rank': l.rnk,
3013
+ 'impressions': l.impressions,
3014
+ 'clicks': l.clicks,
3015
+ 'query': l.query
3016
+ } ORDER BY l.rnk)) AS pointsJson
3017
+ FROM log_space l
3018
+ JOIN fit f USING (page)
3019
+ GROUP BY l.page
3020
+ )
3021
+ SELECT
3022
+ f.page,
3023
+ f.query_count AS queryCount,
3024
+ f.total_impressions AS totalImpressions,
3025
+ f.total_clicks AS totalClicks,
3026
+ f.slope AS slope,
3027
+ f.intercept AS intercept,
3028
+ f.r2 AS r2,
3029
+ f.head_impressions AS headImpressions,
3030
+ f.head_share AS headShare,
3031
+ s.pointsJson AS pointsJson,
3032
+ CASE
3033
+ WHEN f.slope > -0.6 THEN 'flat-tail'
3034
+ WHEN f.slope > -1.2 THEN 'balanced'
3035
+ ELSE 'head-heavy'
3036
+ END AS fingerprint
3037
+ FROM fit f
3038
+ LEFT JOIN scatter s USING (page)
3039
+ ORDER BY f.total_impressions DESC
3040
+ LIMIT ${Number(limit)}
3041
+ `,
3042
+ params: [
3043
+ startDate,
3044
+ endDate,
3045
+ minQueryImpressions
3046
+ ],
3047
+ current: {
3048
+ table: "page_keywords",
3049
+ partitions: enumeratePartitions(startDate, endDate)
3050
+ }
3051
+ };
3052
+ },
3053
+ reduceSql(rows) {
3054
+ const results = (Array.isArray(rows) ? rows : []).map((r) => ({
3055
+ page: str$8(r.page),
3056
+ queryCount: num$4(r.queryCount),
3057
+ totalImpressions: num$4(r.totalImpressions),
3058
+ totalClicks: num$4(r.totalClicks),
3059
+ slope: num$4(r.slope),
3060
+ intercept: num$4(r.intercept),
3061
+ r2: num$4(r.r2),
3062
+ headImpressions: num$4(r.headImpressions),
3063
+ headShare: num$4(r.headShare),
3064
+ fingerprint: str$8(r.fingerprint),
3065
+ points: downsampleLogRank(parseJsonList$5(r.pointsJson))
3066
+ }));
3067
+ const counts = {
3068
+ "flat-tail": 0,
3069
+ "balanced": 0,
3070
+ "head-heavy": 0
3071
+ };
3072
+ for (const r of results) counts[r.fingerprint]++;
3073
+ return {
3074
+ results,
3075
+ meta: {
3076
+ total: results.length,
3077
+ fingerprints: counts,
3078
+ avgSlope: results.length > 0 ? results.reduce((s, r) => s + r.slope, 0) / results.length : 0
3079
+ }
3080
+ };
3081
+ }
3082
+ });
3083
+ function percentDifference(current, previous) {
3084
+ if (previous === 0) return current > 0 ? 100 : 0;
3085
+ return (current - previous) / previous * 100;
3086
+ }
3087
+ function str$7(v) {
3088
+ return v == null ? "" : String(v);
3089
+ }
3090
+ function parseJsonList$4(v) {
3091
+ if (Array.isArray(v)) return v;
3092
+ if (typeof v === "string" && v.length > 0) {
3093
+ const parsed = JSON.parse(v);
3094
+ return Array.isArray(parsed) ? parsed : [];
3095
+ }
3096
+ return [];
3097
+ }
3098
+ function analyzeMovers(input, options = {}) {
3099
+ const { changeThreshold = .2, minImpressions = 50, sortBy = "clicksChange" } = options;
3100
+ const normFactor = input.normalizationFactor ?? 1;
3101
+ const baselineMap = buildPeriodMap(input.previous, (r) => r.query, (r) => ({
3102
+ clicks: num$4(r.clicks) / normFactor,
3103
+ impressions: num$4(r.impressions) / normFactor,
3104
+ position: num$4(r.position),
3105
+ page: r.page ?? null
3106
+ }));
3107
+ const pageMap = /* @__PURE__ */ new Map();
3108
+ for (const row of input.current) if (!pageMap.has(row.query) && row.page) pageMap.set(row.query, row.page);
3109
+ for (const row of input.previous) if (!pageMap.has(row.query) && row.page) pageMap.set(row.query, row.page);
3110
+ const rising = [];
3111
+ const declining = [];
3112
+ const stable = [];
3113
+ for (const row of input.current) {
3114
+ const impressions = num$4(row.impressions);
3115
+ const clicks = num$4(row.clicks);
3116
+ const position = num$4(row.position);
3117
+ if (impressions < minImpressions) continue;
3118
+ const baseline = baselineMap.get(row.query) || {
3119
+ clicks: 0,
3120
+ impressions: 0,
3121
+ position: 0,
3122
+ page: null
3123
+ };
3124
+ const clicksChangePercent = percentDifference(clicks, baseline.clicks);
3125
+ const impressionsChangePercent = percentDifference(impressions, baseline.impressions);
3126
+ const data = {
3127
+ keyword: row.query,
3128
+ page: pageMap.get(row.query) ?? null,
3129
+ recentClicks: clicks,
3130
+ recentImpressions: impressions,
3131
+ recentPosition: position,
3132
+ baselineClicks: Math.round(baseline.clicks),
3133
+ baselineImpressions: Math.round(baseline.impressions),
3134
+ baselinePosition: baseline.position,
3135
+ clicksChange: clicks - Math.round(baseline.clicks),
3136
+ clicksChangePercent,
3137
+ impressionsChangePercent,
3138
+ positionChange: position - baseline.position
3139
+ };
3140
+ const absChange = Math.abs(clicksChangePercent / 100);
3141
+ if (clicksChangePercent > 0 && absChange >= changeThreshold) rising.push(data);
3142
+ else if (clicksChangePercent < 0 && absChange >= changeThreshold) declining.push(data);
3143
+ else stable.push(data);
3144
+ }
3145
+ const sortFn = (a, b) => {
3146
+ switch (sortBy) {
3147
+ case "clicks": return b.recentClicks - a.recentClicks;
3148
+ case "impressions": return b.recentImpressions - a.recentImpressions;
3149
+ case "clicksChange": return Math.abs(b.clicksChangePercent) - Math.abs(a.clicksChangePercent);
3150
+ case "impressionsChange": return Math.abs(b.impressionsChangePercent) - Math.abs(a.impressionsChangePercent);
3151
+ case "positionChange": return Math.abs(b.positionChange) - Math.abs(a.positionChange);
3152
+ default: return Math.abs(b.clicksChangePercent) - Math.abs(a.clicksChangePercent);
3153
+ }
3154
+ };
3155
+ rising.sort(sortFn);
3156
+ declining.sort(sortFn);
3157
+ stable.sort((a, b) => b.recentClicks - a.recentClicks);
3158
+ return {
3159
+ rising,
3160
+ declining,
3161
+ stable
3162
+ };
3163
+ }
3164
+ const moversAnalyzer = defineAnalyzer({
3165
+ id: "movers",
3166
+ buildSql(params) {
3167
+ const { current: cur, previous: prev } = comparisonOf(params);
3168
+ const minImpressions = params.minImpressions ?? 50;
3169
+ const changeThreshold = params.changeThreshold ?? .2;
3170
+ const limit = params.limit ?? 2e3;
3171
+ return {
3172
+ sql: `
3173
+ WITH cur AS (
3174
+ SELECT
3175
+ query, url,
3176
+ ${METRIC_EXPR.clicks} AS clicks,
3177
+ ${METRIC_EXPR.impressions} AS impressions,
3178
+ ${METRIC_EXPR.position} AS position
3179
+ FROM read_parquet({{FILES}}, union_by_name = true)
3180
+ WHERE date >= ? AND date <= ?
3181
+ GROUP BY query, url
3182
+ ),
3183
+ prev AS (
3184
+ SELECT
3185
+ query, url,
3186
+ ${METRIC_EXPR.clicks} AS clicks,
3187
+ ${METRIC_EXPR.impressions} AS impressions,
3188
+ ${METRIC_EXPR.position} AS position
3189
+ FROM read_parquet({{FILES_PREV}}, union_by_name = true)
3190
+ WHERE date >= ? AND date <= ?
3191
+ GROUP BY query, url
3192
+ ),
3193
+ weekly AS (
3194
+ SELECT query, url, date_trunc('week', CAST(date AS DATE)) AS week,
3195
+ ${METRIC_EXPR.clicks} AS clicks,
3196
+ ${METRIC_EXPR.impressions} AS impressions
3197
+ FROM (
3198
+ SELECT query, url, date, clicks, impressions
3199
+ FROM read_parquet({{FILES}}, union_by_name = true)
3200
+ WHERE date >= ? AND date <= ?
3201
+ UNION ALL
3202
+ SELECT query, url, date, clicks, impressions
3203
+ FROM read_parquet({{FILES_PREV}}, union_by_name = true)
3204
+ WHERE date >= ? AND date <= ?
3205
+ )
3206
+ GROUP BY query, url, week
3207
+ ),
3208
+ series_by_entity AS (
3209
+ SELECT query, url, to_json(list({
3210
+ 'week': strftime(week, '%Y-%m-%d'),
3211
+ 'clicks': clicks,
3212
+ 'impressions': impressions
3213
+ } ORDER BY week)) AS seriesJson
3214
+ FROM weekly GROUP BY query, url
3215
+ ),
3216
+ joined AS (
3217
+ SELECT
3218
+ c.query AS keyword,
3219
+ c.url AS page,
3220
+ c.clicks AS recentClicks,
3221
+ c.impressions AS recentImpressions,
3222
+ c.position AS recentPosition,
3223
+ COALESCE(p.clicks, 0.0) AS baselineClicks,
3224
+ COALESCE(p.impressions, 0.0) AS baselineImpressions,
3225
+ COALESCE(p.position, 0.0) AS baselinePosition,
3226
+ (c.clicks - COALESCE(p.clicks, 0.0)) AS clicksChange,
3227
+ CASE
3228
+ WHEN COALESCE(p.clicks, 0.0) = 0 THEN CASE WHEN c.clicks > 0 THEN 100.0 ELSE 0.0 END
3229
+ ELSE (c.clicks - p.clicks) * 100.0 / p.clicks
3230
+ END AS clicksChangePercent,
3231
+ CASE
3232
+ WHEN COALESCE(p.impressions, 0.0) = 0 THEN CASE WHEN c.impressions > 0 THEN 100.0 ELSE 0.0 END
3233
+ ELSE (c.impressions - p.impressions) * 100.0 / p.impressions
3234
+ END AS impressionsChangePercent,
3235
+ (c.position - COALESCE(p.position, 0.0)) AS positionChange,
3236
+ s.seriesJson
3237
+ FROM cur c
3238
+ LEFT JOIN prev p ON c.query = p.query AND c.url = p.url
3239
+ LEFT JOIN series_by_entity s ON c.query = s.query AND c.url = s.url
3240
+ WHERE c.impressions >= ?
3241
+ )
3242
+ SELECT *,
3243
+ CASE
3244
+ WHEN clicksChangePercent > 0 AND ABS(clicksChangePercent) / 100.0 >= ? THEN 'rising'
3245
+ WHEN clicksChangePercent < 0 AND ABS(clicksChangePercent) / 100.0 >= ? THEN 'declining'
3246
+ ELSE 'stable'
3247
+ END AS direction
3248
+ FROM joined
3249
+ ORDER BY ABS(clicksChangePercent) DESC
3250
+ LIMIT ${Number(limit)}
3251
+ `,
3252
+ params: [
3253
+ cur.startDate,
3254
+ cur.endDate,
3255
+ prev.startDate,
3256
+ prev.endDate,
3257
+ cur.startDate,
3258
+ cur.endDate,
3259
+ prev.startDate,
3260
+ prev.endDate,
3261
+ minImpressions,
3262
+ changeThreshold,
3263
+ changeThreshold
3264
+ ],
3265
+ current: {
3266
+ table: "page_keywords",
3267
+ partitions: enumeratePartitions(cur.startDate, cur.endDate)
3268
+ },
3269
+ previous: {
3270
+ table: "page_keywords",
3271
+ partitions: enumeratePartitions(prev.startDate, prev.endDate)
3272
+ }
3273
+ };
3274
+ },
3275
+ reduceSql(rows) {
3276
+ const normalized = (Array.isArray(rows) ? rows : []).map((r) => ({
3277
+ keyword: str$7(r.keyword),
3278
+ page: r.page == null ? null : str$7(r.page),
3279
+ recentClicks: num$4(r.recentClicks),
3280
+ recentImpressions: num$4(r.recentImpressions),
3281
+ recentPosition: num$4(r.recentPosition),
3282
+ baselineClicks: Math.round(num$4(r.baselineClicks)),
3283
+ baselineImpressions: Math.round(num$4(r.baselineImpressions)),
3284
+ baselinePosition: num$4(r.baselinePosition),
3285
+ clicksChange: num$4(r.clicksChange),
3286
+ clicksChangePercent: num$4(r.clicksChangePercent),
3287
+ impressionsChangePercent: num$4(r.impressionsChangePercent),
3288
+ positionChange: num$4(r.positionChange),
3289
+ direction: str$7(r.direction),
3290
+ series: parseJsonList$4(r.seriesJson).map((s) => ({
3291
+ week: str$7(s.week),
3292
+ clicks: num$4(s.clicks),
3293
+ impressions: num$4(s.impressions)
3294
+ }))
3295
+ }));
3296
+ const rising = normalized.filter((r) => r.direction === "rising");
3297
+ const declining = normalized.filter((r) => r.direction === "declining");
3298
+ const stable = normalized.filter((r) => r.direction === "stable");
3299
+ const combined = [...rising, ...declining];
3300
+ return {
3301
+ results: combined,
3302
+ meta: {
3303
+ total: combined.length,
3304
+ rising: rising.length,
3305
+ declining: declining.length,
3306
+ stable: stable.length
3307
+ }
3308
+ };
3309
+ },
3310
+ buildRows(params) {
3311
+ const { current, previous } = comparisonOf(params);
3312
+ return {
3313
+ current: keywordsQueryState(current, params.limit),
3314
+ previous: keywordsQueryState(previous, params.limit)
3315
+ };
3316
+ },
3317
+ reduceRows(rows, params) {
3318
+ const map = rows && !Array.isArray(rows) ? rows : {
3319
+ current: [],
3320
+ previous: []
3321
+ };
3322
+ const result = analyzeMovers({
3323
+ current: map.current ?? [],
3324
+ previous: map.previous ?? []
3325
+ }, {
3326
+ changeThreshold: params.changeThreshold,
3327
+ minImpressions: params.minImpressions
3328
+ });
3329
+ return {
3330
+ results: [...result.rising.map((r) => ({
3331
+ ...r,
3332
+ direction: "rising"
3333
+ })), ...result.declining.map((r) => ({
3334
+ ...r,
3335
+ direction: "declining"
3336
+ }))],
3337
+ meta: {
3338
+ rising: result.rising.length,
3339
+ declining: result.declining.length
3340
+ }
3341
+ };
3342
+ }
3343
+ });
3344
+ const DEFAULT_LIMIT = 1e3;
3345
+ const MAX_LIMIT = 5e4;
3346
+ function clampLimit(limit, fallback = DEFAULT_LIMIT) {
3347
+ const n = Number(limit ?? fallback);
3348
+ if (!Number.isFinite(n) || n <= 0) return fallback;
3349
+ return Math.min(n, MAX_LIMIT);
3350
+ }
3351
+ function clampOffset(offset) {
3352
+ const n = Number(offset ?? 0);
3353
+ if (!Number.isFinite(n) || n < 0) return 0;
3354
+ return Math.floor(n);
3355
+ }
3356
+ function paginateClause(input) {
3357
+ const l = clampLimit(input.limit);
3358
+ const o = clampOffset(input.offset);
3359
+ return o > 0 ? `LIMIT ${l} OFFSET ${o}` : `LIMIT ${l}`;
3360
+ }
3361
+ function paginateInMemory(rows, input) {
3362
+ const l = clampLimit(input.limit, rows.length);
3363
+ const o = clampOffset(input.offset);
3364
+ return rows.slice(o, o + l);
3365
+ }
3366
+ function resolveSort(input, allowed, defaults) {
3367
+ return {
3368
+ sortBy: input.sortBy && allowed.includes(input.sortBy) ? input.sortBy : defaults.sortBy,
3369
+ sortDir: input.sortDir === "asc" || input.sortDir === "desc" ? input.sortDir : defaults.sortDir
3370
+ };
3371
+ }
3372
+ const EXPECTED_CTR_BY_POSITION = {
3373
+ 1: .3,
3374
+ 2: .15,
3375
+ 3: .1,
3376
+ 4: .07,
3377
+ 5: .05,
3378
+ 6: .04,
3379
+ 7: .03,
3380
+ 8: .025,
3381
+ 9: .02,
3382
+ 10: .015
3383
+ };
3384
+ function getExpectedCtr(position) {
3385
+ return EXPECTED_CTR_BY_POSITION[Math.round(Math.max(1, Math.min(position, 10)))] || .01;
3386
+ }
3387
+ function calculatePositionScore(position) {
3388
+ if (position <= 3) return .2;
3389
+ if (position > 50) return .1;
3390
+ const distance = Math.abs(position - 11);
3391
+ return Math.max(0, 1 - distance / 15);
3392
+ }
3393
+ function calculateImpressionScore(impressions) {
3394
+ if (impressions <= 0) return 0;
3395
+ return Math.min(Math.log10(impressions) / 5, 1);
3396
+ }
3397
+ function calculateCtrGapScore(actualCtr, position) {
3398
+ const expectedCtr = getExpectedCtr(position);
3399
+ if (actualCtr >= expectedCtr) return 0;
3400
+ const gap = expectedCtr - actualCtr;
3401
+ return Math.min(gap / expectedCtr, 1);
3402
+ }
3403
+ const sortResults = createMetricSorter("opportunityScore", {
3404
+ opportunityScore: "desc",
3405
+ potentialClicks: "desc",
3406
+ impressions: "desc",
3407
+ position: "asc"
3408
+ });
3409
+ const opportunityAnalyzer = defineAnalyzer({
3410
+ id: "opportunity",
3411
+ buildSql(params) {
3412
+ const { startDate, endDate } = periodOf(params);
3413
+ const minImpressions = params.minImpressions ?? 100;
3414
+ const w1 = 1;
3415
+ const w2 = 1;
3416
+ const w3 = 1;
3417
+ const totalW = w1 + w2 + w3;
3418
+ const limit = params.limit ?? 1e3;
3419
+ return {
3420
+ sql: `
3421
+ WITH agg AS (
3422
+ SELECT
3423
+ query AS keyword,
3424
+ url AS page,
3425
+ ${METRIC_EXPR.clicks} AS clicks,
3426
+ ${METRIC_EXPR.impressions} AS impressions,
3427
+ ${METRIC_EXPR.ctr} AS ctr,
3428
+ ${METRIC_EXPR.position} AS position
3429
+ FROM read_parquet({{FILES}}, union_by_name = true)
3430
+ WHERE date >= ? AND date <= ?
3431
+ GROUP BY query, url
3432
+ HAVING SUM(impressions) >= ?
3433
+ ),
3434
+ scored AS (
3435
+ SELECT
3436
+ keyword, page, clicks, impressions, ctr, position,
3437
+ CASE
3438
+ WHEN position <= 3 THEN 0.2
3439
+ WHEN position > 50 THEN 0.1
3440
+ ELSE GREATEST(0.0, 1.0 - ABS(position - 11.0) / 15.0)
3441
+ END AS positionScore,
3442
+ CASE WHEN impressions <= 0 THEN 0.0 ELSE LEAST(LOG10(impressions) / 5.0, 1.0) END AS impressionScore,
3443
+ CASE CAST(ROUND(GREATEST(LEAST(position, 10.0), 1.0)) AS INTEGER)
3444
+ WHEN 1 THEN 0.30
3445
+ WHEN 2 THEN 0.15
3446
+ WHEN 3 THEN 0.10
3447
+ WHEN 4 THEN 0.07
3448
+ WHEN 5 THEN 0.05
3449
+ WHEN 6 THEN 0.04
3450
+ WHEN 7 THEN 0.03
3451
+ WHEN 8 THEN 0.025
3452
+ WHEN 9 THEN 0.02
3453
+ WHEN 10 THEN 0.015
3454
+ ELSE 0.01
3455
+ END AS expectedCtr
3456
+ FROM agg
3457
+ ),
3458
+ gapped AS (
3459
+ SELECT
3460
+ *,
3461
+ CASE WHEN ctr >= expectedCtr THEN 0.0 ELSE LEAST((expectedCtr - ctr) / expectedCtr, 1.0) END AS ctrGapScore
3462
+ FROM scored
3463
+ )
3464
+ SELECT
3465
+ keyword, page, clicks, impressions, ctr, position,
3466
+ CAST(ROUND(POWER(
3467
+ POWER(positionScore, ${w1}) * POWER(impressionScore, ${w2}) * POWER(ctrGapScore, ${w3}),
3468
+ 1.0 / ${totalW}
3469
+ ) * 100) AS DOUBLE) AS opportunityScore,
3470
+ CAST(ROUND(impressions * (
3471
+ CASE CAST(ROUND(GREATEST(LEAST(position, 3.0), 1.0)) AS INTEGER)
3472
+ WHEN 1 THEN 0.30
3473
+ WHEN 2 THEN 0.15
3474
+ WHEN 3 THEN 0.10
3475
+ ELSE 0.10
3476
+ END
3477
+ )) AS DOUBLE) AS potentialClicks,
3478
+ positionScore, impressionScore, ctrGapScore
3479
+ FROM gapped
3480
+ ORDER BY opportunityScore DESC
3481
+ ${paginateClause({
3482
+ limit,
3483
+ offset: params.offset
3484
+ })}
3485
+ `,
3486
+ params: [
3487
+ startDate,
3488
+ endDate,
3489
+ minImpressions
3490
+ ],
3491
+ current: {
3492
+ table: "page_keywords",
3493
+ partitions: enumeratePartitions(startDate, endDate)
3494
+ }
3495
+ };
3496
+ },
3497
+ reduceSql(rows) {
3498
+ const arr = Array.isArray(rows) ? rows : [];
3499
+ return {
3500
+ results: arr.map((r) => ({
3501
+ keyword: r.keyword == null ? "" : String(r.keyword),
3502
+ page: r.page == null ? null : String(r.page),
3503
+ clicks: num$4(r.clicks),
3504
+ impressions: num$4(r.impressions),
3505
+ ctr: num$4(r.ctr),
3506
+ position: num$4(r.position),
3507
+ opportunityScore: num$4(r.opportunityScore),
3508
+ potentialClicks: num$4(r.potentialClicks),
3509
+ factors: {
3510
+ positionScore: num$4(r.positionScore),
3511
+ impressionScore: num$4(r.impressionScore),
3512
+ ctrGapScore: num$4(r.ctrGapScore)
3513
+ }
3514
+ })),
3515
+ meta: { total: arr.length }
3516
+ };
3517
+ },
3518
+ buildRows(params) {
3519
+ return { keywords: keywordsQueryState(periodOf(params), params.limit) };
3520
+ },
3521
+ reduceRows(rows, params) {
3522
+ const keywords = (Array.isArray(rows) ? rows : []) ?? [];
3523
+ const minImpressions = params.minImpressions ?? 100;
3524
+ const positionWeight = 1;
3525
+ const impressionsWeight = 1;
3526
+ const ctrGapWeight = 1;
3527
+ const sortBy = "opportunityScore";
3528
+ const results = [];
3529
+ for (const row of keywords) {
3530
+ const impressions = num$4(row.impressions);
3531
+ const position = num$4(row.position);
3532
+ const ctr = num$4(row.ctr);
3533
+ const clicks = num$4(row.clicks);
3534
+ if (impressions < minImpressions) continue;
3535
+ const positionScore = calculatePositionScore(position);
3536
+ const impressionScore = calculateImpressionScore(impressions);
3537
+ const ctrGapScore = calculateCtrGapScore(ctr, position);
3538
+ const geometricMean = (positionScore ** positionWeight * impressionScore ** impressionsWeight * ctrGapScore ** ctrGapWeight) ** (1 / (positionWeight + impressionsWeight + ctrGapWeight));
3539
+ const opportunityScore = Math.round(geometricMean * 100);
3540
+ const targetCtr = getExpectedCtr(Math.min(3, position));
3541
+ const potentialClicks = Math.round(impressions * targetCtr);
3542
+ results.push({
3543
+ keyword: row.query,
3544
+ page: row.page ?? null,
3545
+ clicks,
3546
+ impressions,
3547
+ ctr,
3548
+ position,
3549
+ opportunityScore,
3550
+ potentialClicks,
3551
+ factors: {
3552
+ positionScore,
3553
+ impressionScore,
3554
+ ctrGapScore
3555
+ }
3556
+ });
3557
+ }
3558
+ const sorted = sortResults(results, sortBy);
3559
+ const paged = paginateInMemory(sorted, {
3560
+ limit: params.limit,
3561
+ offset: params.offset
3562
+ });
3563
+ return {
3564
+ results: paged,
3565
+ meta: {
3566
+ total: sorted.length,
3567
+ returned: paged.length
3568
+ }
3569
+ };
3570
+ }
3571
+ });
3572
+ function str$6(v) {
3573
+ return v == null ? "" : String(v);
3574
+ }
3575
+ const positionDistributionAnalyzer = defineAnalyzer({
3576
+ id: "position-distribution",
3577
+ buildSql(params) {
3578
+ const { startDate, endDate } = periodOf(params);
3579
+ return {
3580
+ sql: `
3581
+ WITH pos AS (
3582
+ SELECT
3583
+ date,
3584
+ (sum_position / NULLIF(impressions, 0) + 1) AS avg_pos
3585
+ FROM read_parquet({{FILES}}, union_by_name = true)
3586
+ WHERE date >= ? AND date <= ? AND impressions > 0
3587
+ )
3588
+ SELECT
3589
+ date,
3590
+ CAST(SUM(CASE WHEN avg_pos <= 3 THEN 1 ELSE 0 END) AS DOUBLE) AS pos_1_3,
3591
+ CAST(SUM(CASE WHEN avg_pos > 3 AND avg_pos <= 10 THEN 1 ELSE 0 END) AS DOUBLE) AS pos_4_10,
3592
+ CAST(SUM(CASE WHEN avg_pos > 10 AND avg_pos <= 20 THEN 1 ELSE 0 END) AS DOUBLE) AS pos_11_20,
3593
+ CAST(SUM(CASE WHEN avg_pos > 20 THEN 1 ELSE 0 END) AS DOUBLE) AS pos_20_plus,
3594
+ CAST(COUNT(*) AS DOUBLE) AS total
3595
+ FROM pos
3596
+ GROUP BY date
3597
+ ORDER BY date ASC
3598
+ `,
3599
+ params: [startDate, endDate],
3600
+ current: {
3601
+ table: "keywords",
3602
+ partitions: enumeratePartitions(startDate, endDate)
3603
+ }
3604
+ };
3605
+ },
3606
+ reduceSql(rows, params) {
3607
+ const arr = Array.isArray(rows) ? rows : [];
3608
+ const { startDate, endDate } = periodOf(params);
3609
+ return {
3610
+ results: arr.map((r) => ({
3611
+ date: str$6(r.date),
3612
+ pos_1_3: num$4(r.pos_1_3),
3613
+ pos_4_10: num$4(r.pos_4_10),
3614
+ pos_11_20: num$4(r.pos_11_20),
3615
+ pos_20_plus: num$4(r.pos_20_plus),
3616
+ total: num$4(r.total)
3617
+ })),
3618
+ meta: {
3619
+ total: arr.length,
3620
+ startDate,
3621
+ endDate
3622
+ }
3623
+ };
3624
+ }
3625
+ });
3626
+ function str$5(v) {
3627
+ return v == null ? "" : String(v);
3628
+ }
3629
+ const positionVolatilityAnalyzer = defineAnalyzer({
3630
+ id: "position-volatility",
3631
+ buildSql(params) {
3632
+ const { startDate, endDate } = periodOf(params);
3633
+ const topN = params.topN ?? 30;
3634
+ const minDayImpressions = params.minImpressions ?? 10;
3635
+ const minDays = params.minWeeksWithData ?? 7;
3636
+ return {
3637
+ sql: `
3638
+ WITH query_day AS (
3639
+ SELECT
3640
+ url AS page,
3641
+ query,
3642
+ date,
3643
+ ${METRIC_EXPR.impressions} AS q_impressions,
3644
+ ${METRIC_EXPR.position} AS q_position
3645
+ FROM read_parquet({{FILES}}, union_by_name = true)
3646
+ WHERE date >= ? AND date <= ?
3647
+ AND query IS NOT NULL AND query <> ''
3648
+ AND url IS NOT NULL AND url <> ''
3649
+ GROUP BY url, query, date
3650
+ HAVING SUM(impressions) >= 1
3651
+ ),
3652
+ daily AS (
3653
+ SELECT
3654
+ page, date,
3655
+ COUNT(*) AS query_count,
3656
+ SUM(q_impressions) AS day_impressions,
3657
+ SUM(q_position * q_impressions) / NULLIF(SUM(q_impressions), 0) AS avg_position,
3658
+ COALESCE(STDDEV_POP(q_position), 0.0) AS pos_stddev,
3659
+ MIN(q_position) AS best_position,
3660
+ MAX(q_position) AS worst_position
3661
+ FROM query_day
3662
+ GROUP BY page, date
3663
+ HAVING SUM(q_impressions) >= ?
3664
+ ),
3665
+ with_shift AS (
3666
+ SELECT *,
3667
+ LAG(avg_position) OVER (PARTITION BY page ORDER BY date) AS prev_position,
3668
+ COALESCE(
3669
+ ABS(avg_position - LAG(avg_position) OVER (PARTITION BY page ORDER BY date)),
3670
+ 0.0
3671
+ ) AS dod_shift
3672
+ FROM daily
3673
+ ),
3674
+ scored AS (
3675
+ SELECT *,
3676
+ pos_stddev + dod_shift AS volatility
3677
+ FROM with_shift
3678
+ ),
3679
+ top_pages AS (
3680
+ SELECT page,
3681
+ SUM(day_impressions) AS total_impressions,
3682
+ AVG(volatility) AS avg_volatility,
3683
+ MAX(volatility) AS peak_volatility,
3684
+ COUNT(*) AS days_with_data
3685
+ FROM scored
3686
+ GROUP BY page
3687
+ HAVING COUNT(*) >= ?
3688
+ ORDER BY avg_volatility DESC
3689
+ LIMIT ${Number(topN)}
3690
+ )
3691
+ SELECT
3692
+ s.page,
3693
+ strftime(s.date, '%Y-%m-%d') AS date,
3694
+ s.query_count AS queryCount,
3695
+ s.day_impressions AS dayImpressions,
3696
+ s.avg_position AS avgPosition,
3697
+ s.pos_stddev AS posStddev,
3698
+ s.best_position AS bestPosition,
3699
+ s.worst_position AS worstPosition,
3700
+ s.dod_shift AS dodShift,
3701
+ s.volatility AS volatility,
3702
+ t.avg_volatility AS pageAvgVolatility,
3703
+ t.peak_volatility AS pagePeakVolatility,
3704
+ t.total_impressions AS pageTotalImpressions
3705
+ FROM scored s
3706
+ JOIN top_pages t USING (page)
3707
+ ORDER BY t.avg_volatility DESC, s.date ASC
3708
+ `,
3709
+ params: [
3710
+ startDate,
3711
+ endDate,
3712
+ minDayImpressions,
3713
+ minDays
3714
+ ],
3715
+ current: {
3716
+ table: "page_keywords",
3717
+ partitions: enumeratePartitions(startDate, endDate)
3718
+ }
3719
+ };
3720
+ },
3721
+ reduceSql(rows) {
3722
+ const arr = Array.isArray(rows) ? rows : [];
3723
+ const byPage = /* @__PURE__ */ new Map();
3724
+ const allDates = /* @__PURE__ */ new Set();
3725
+ for (const r of arr) {
3726
+ const page = str$5(r.page);
3727
+ const date = str$5(r.date);
3728
+ allDates.add(date);
3729
+ const entry = byPage.get(page) ?? {
3730
+ page,
3731
+ avgVolatility: num$4(r.pageAvgVolatility),
3732
+ peakVolatility: num$4(r.pagePeakVolatility),
3733
+ totalImpressions: num$4(r.pageTotalImpressions),
3734
+ days: []
3735
+ };
3736
+ entry.days.push({
3737
+ date,
3738
+ queryCount: num$4(r.queryCount),
3739
+ dayImpressions: num$4(r.dayImpressions),
3740
+ avgPosition: num$4(r.avgPosition),
3741
+ posStddev: num$4(r.posStddev),
3742
+ bestPosition: num$4(r.bestPosition),
3743
+ worstPosition: num$4(r.worstPosition),
3744
+ dodShift: num$4(r.dodShift),
3745
+ volatility: num$4(r.volatility)
3746
+ });
3747
+ byPage.set(page, entry);
3748
+ }
3749
+ const pages = [...byPage.values()].sort((a, b) => b.avgVolatility - a.avgVolatility);
3750
+ const dates = [...allDates].sort();
3751
+ const maxVolatility = pages.reduce((m, p) => Math.max(m, p.peakVolatility), 0);
3752
+ return {
3753
+ results: pages,
3754
+ meta: {
3755
+ total: pages.length,
3756
+ dates,
3757
+ maxVolatility
3758
+ }
3759
+ };
3760
+ }
3761
+ });
3762
+ function str$4(v) {
3763
+ return v == null ? "" : String(v);
3764
+ }
3765
+ function parseJsonList$3(v) {
3766
+ if (Array.isArray(v)) return v;
3767
+ if (typeof v === "string" && v.length > 0) {
3768
+ const parsed = JSON.parse(v);
3769
+ return Array.isArray(parsed) ? parsed : [];
3770
+ }
3771
+ return [];
3772
+ }
3773
+ const queryMigrationAnalyzer = defineAnalyzer({
3774
+ id: "query-migration",
3775
+ buildSql(params) {
3776
+ const cur = periodOf(params);
3777
+ let prevStart = params.prevStartDate;
3778
+ let prevEnd = params.prevEndDate;
3779
+ if (prevStart == null || prevEnd == null) {
3780
+ const curStartMs = new Date(cur.startDate).getTime();
3781
+ const span = new Date(cur.endDate).getTime() - curStartMs;
3782
+ prevEnd = toIsoDate(new Date(curStartMs - MS_PER_DAY));
3783
+ prevStart = toIsoDate(new Date(curStartMs - MS_PER_DAY - span));
3784
+ }
3785
+ const minImpressions = params.minImpressions ?? 20;
3786
+ const limit = params.limit ?? 200;
3787
+ const maxLevenshtein = 2;
3788
+ return {
3789
+ sql: `
3790
+ WITH cur AS (
3791
+ SELECT query, url AS page,
3792
+ ${METRIC_EXPR.impressions} AS impressions,
3793
+ ${METRIC_EXPR.clicks} AS clicks,
3794
+ ${METRIC_EXPR.position} AS position
3795
+ FROM read_parquet({{FILES}}, union_by_name = true)
3796
+ WHERE date >= ? AND date <= ?
3797
+ AND query IS NOT NULL AND query <> ''
3798
+ AND url IS NOT NULL AND url <> ''
3799
+ GROUP BY query, url
3800
+ HAVING SUM(impressions) >= ?
3801
+ ),
3802
+ prev AS (
3803
+ SELECT query, url AS page,
3804
+ ${METRIC_EXPR.impressions} AS impressions,
3805
+ ${METRIC_EXPR.clicks} AS clicks,
3806
+ ${METRIC_EXPR.position} AS position
3807
+ FROM read_parquet({{FILES_PREV}}, union_by_name = true)
3808
+ WHERE date >= ? AND date <= ?
3809
+ AND query IS NOT NULL AND query <> ''
3810
+ AND url IS NOT NULL AND url <> ''
3811
+ GROUP BY query, url
3812
+ HAVING SUM(impressions) >= ?
3813
+ ),
3814
+ lost AS (
3815
+ SELECT p.page AS source_page, p.query AS source_query, p.impressions AS source_impressions
3816
+ FROM prev p
3817
+ LEFT JOIN cur c ON p.page = c.page AND p.query = c.query
3818
+ WHERE c.query IS NULL
3819
+ ),
3820
+ gained AS (
3821
+ SELECT c.page AS target_page, c.query AS target_query, c.impressions AS target_impressions
3822
+ FROM cur c
3823
+ LEFT JOIN prev p ON p.page = c.page AND p.query = c.query
3824
+ WHERE p.query IS NULL
3825
+ ),
3826
+ matched AS (
3827
+ SELECT
3828
+ l.source_page, l.source_query, l.source_impressions,
3829
+ g.target_page, g.target_query, g.target_impressions,
3830
+ CASE
3831
+ WHEN l.source_query = g.target_query THEN 'exact'
3832
+ ELSE 'fuzzy'
3833
+ END AS match_type,
3834
+ LEAST(l.source_impressions, g.target_impressions) AS absorbed_impressions
3835
+ FROM lost l
3836
+ JOIN gained g
3837
+ ON l.source_page <> g.target_page
3838
+ AND ABS(LENGTH(l.source_query) - LENGTH(g.target_query)) <= ${maxLevenshtein}
3839
+ AND (
3840
+ l.source_query = g.target_query
3841
+ OR levenshtein(l.source_query, g.target_query) <= ${maxLevenshtein}
3842
+ )
3843
+ ),
3844
+ edges AS (
3845
+ SELECT
3846
+ source_page, target_page,
3847
+ SUM(absorbed_impressions) AS weight,
3848
+ COUNT(*) AS query_count,
3849
+ SUM(CASE WHEN match_type = 'exact' THEN 1 ELSE 0 END) AS exact_count,
3850
+ to_json(list({
3851
+ 'sourceQuery': source_query,
3852
+ 'targetQuery': target_query,
3853
+ 'absorbed': absorbed_impressions,
3854
+ 'matchType': match_type
3855
+ } ORDER BY absorbed_impressions DESC)) AS examplesJson
3856
+ FROM matched
3857
+ GROUP BY source_page, target_page
3858
+ )
3859
+ SELECT *
3860
+ FROM edges
3861
+ ORDER BY weight DESC
3862
+ LIMIT ${Number(limit)}
3863
+ `,
3864
+ params: [
3865
+ cur.startDate,
3866
+ cur.endDate,
3867
+ minImpressions,
3868
+ prevStart,
3869
+ prevEnd,
3870
+ minImpressions
3871
+ ],
3872
+ current: {
3873
+ table: "page_keywords",
3874
+ partitions: enumeratePartitions(cur.startDate, cur.endDate)
3875
+ },
3876
+ previous: {
3877
+ table: "page_keywords",
3878
+ partitions: enumeratePartitions(prevStart, prevEnd)
3879
+ }
3880
+ };
3881
+ },
3882
+ reduceSql(rows, params) {
3883
+ const arr = Array.isArray(rows) ? rows : [];
3884
+ const cur = periodOf(params);
3885
+ let prevStart = params.prevStartDate;
3886
+ let prevEnd = params.prevEndDate;
3887
+ if (prevStart == null || prevEnd == null) {
3888
+ const curStartMs = new Date(cur.startDate).getTime();
3889
+ const span = new Date(cur.endDate).getTime() - curStartMs;
3890
+ prevEnd = toIsoDate(new Date(curStartMs - MS_PER_DAY));
3891
+ prevStart = toIsoDate(new Date(curStartMs - MS_PER_DAY - span));
3892
+ }
3893
+ const edges = arr.map((r) => ({
3894
+ sourcePage: str$4(r.source_page),
3895
+ targetPage: str$4(r.target_page),
3896
+ weight: num$4(r.weight),
3897
+ queryCount: num$4(r.query_count),
3898
+ exactCount: num$4(r.exact_count),
3899
+ fuzzyCount: num$4(r.query_count) - num$4(r.exact_count),
3900
+ examples: parseJsonList$3(r.examplesJson).slice(0, 8).map((e) => ({
3901
+ sourceQuery: str$4(e.sourceQuery),
3902
+ targetQuery: str$4(e.targetQuery),
3903
+ absorbed: num$4(e.absorbed),
3904
+ matchType: str$4(e.matchType)
3905
+ }))
3906
+ }));
3907
+ const nodeAgg = /* @__PURE__ */ new Map();
3908
+ for (const e of edges) {
3909
+ const src = nodeAgg.get(e.sourcePage) ?? {
3910
+ url: e.sourcePage,
3911
+ outgoing: 0,
3912
+ incoming: 0
3913
+ };
3914
+ src.outgoing += e.weight;
3915
+ nodeAgg.set(e.sourcePage, src);
3916
+ const tgt = nodeAgg.get(e.targetPage) ?? {
3917
+ url: e.targetPage,
3918
+ outgoing: 0,
3919
+ incoming: 0
3920
+ };
3921
+ tgt.incoming += e.weight;
3922
+ nodeAgg.set(e.targetPage, tgt);
3923
+ }
3924
+ const nodes = [...nodeAgg.values()];
3925
+ const totalAbsorbed = edges.reduce((s, e) => s + e.weight, 0);
3926
+ return {
3927
+ results: edges,
3928
+ meta: {
3929
+ total: edges.length,
3930
+ totalAbsorbed,
3931
+ period: {
3932
+ current: cur,
3933
+ previous: {
3934
+ startDate: prevStart,
3935
+ endDate: prevEnd
3936
+ }
3937
+ },
3938
+ nodes
3939
+ }
3940
+ };
3941
+ }
3942
+ });
3943
+ function str$3(v) {
3944
+ return v == null ? "" : String(v);
3945
+ }
3946
+ function bool$1(v) {
3947
+ return v === true || v === 1 || v === "true";
3948
+ }
3949
+ function calculateCV(values) {
3950
+ if (values.length === 0) return 0;
3951
+ const mean = values.reduce((a, b) => a + b, 0) / values.length;
3952
+ if (mean === 0) return 0;
3953
+ const variance = values.reduce((sum, v) => sum + (v - mean) ** 2, 0) / values.length;
3954
+ return Math.min(Math.sqrt(variance) / mean, 1);
3955
+ }
3956
+ function analyzeSeasonality(dates, options = {}) {
3957
+ const { metric = "clicks" } = options;
3958
+ if (dates.length === 0) return {
3959
+ hasSeasonality: false,
3960
+ strength: 0,
3961
+ peakMonths: [],
3962
+ troughMonths: [],
3963
+ monthlyBreakdown: [],
3964
+ insufficientData: true
3965
+ };
3966
+ const monthlyMap = /* @__PURE__ */ new Map();
3967
+ for (const row of dates) {
3968
+ const month = row.date.substring(0, 7);
3969
+ const value = metric === "clicks" ? row.clicks : row.impressions;
3970
+ monthlyMap.set(month, (monthlyMap.get(month) || 0) + value);
3971
+ }
3972
+ const months = Array.from(monthlyMap.keys()).sort();
3973
+ const values = months.map((m) => monthlyMap.get(m) || 0);
3974
+ const insufficientData = months.length < 12;
3975
+ const totalValue = values.reduce((a, b) => a + b, 0);
3976
+ const avgValue = values.length > 0 ? totalValue / values.length : 0;
3977
+ const monthlyBreakdown = months.map((month, i) => {
3978
+ const value = values[i] ?? 0;
3979
+ const vsAverage = avgValue > 0 ? value / avgValue : 0;
3980
+ return {
3981
+ month,
3982
+ value,
3983
+ vsAverage,
3984
+ isPeak: vsAverage > 1.5,
3985
+ isTrough: vsAverage < .5
3986
+ };
3987
+ });
3988
+ const peakMonths = [...new Set(monthlyBreakdown.filter((m) => m.isPeak).map((m) => m.month.substring(5, 7)))];
3989
+ const troughMonths = [...new Set(monthlyBreakdown.filter((m) => m.isTrough).map((m) => m.month.substring(5, 7)))];
3990
+ const strength = calculateCV(values);
3991
+ return {
3992
+ hasSeasonality: peakMonths.length > 0 || troughMonths.length > 0 || strength > .3,
3993
+ strength,
3994
+ peakMonths,
3995
+ troughMonths,
3996
+ monthlyBreakdown,
3997
+ insufficientData
3998
+ };
3999
+ }
4000
+ const seasonalityAnalyzer = defineAnalyzer({
4001
+ id: "seasonality",
4002
+ buildSql(params) {
4003
+ const { startDate, endDate } = periodOf(params);
4004
+ return {
4005
+ sql: `
4006
+ WITH monthly AS (
4007
+ SELECT
4008
+ strftime(date, '%Y-%m') AS month,
4009
+ CAST(SUM(${params.metric === "impressions" ? "impressions" : "clicks"}) AS DOUBLE) AS value
4010
+ FROM read_parquet({{FILES}}, union_by_name = true)
4011
+ WHERE date >= ? AND date <= ?
4012
+ GROUP BY month
4013
+ ),
4014
+ stats AS (
4015
+ SELECT
4016
+ AVG(value) AS avg_val,
4017
+ COALESCE(STDDEV_POP(value), 0.0) AS std_val,
4018
+ CAST(COUNT(*) AS DOUBLE) AS month_count
4019
+ FROM monthly
4020
+ )
4021
+ SELECT
4022
+ m.month AS month,
4023
+ m.value AS value,
4024
+ CASE WHEN s.avg_val > 0 THEN m.value / s.avg_val ELSE 0.0 END AS vsAverage,
4025
+ (s.avg_val > 0 AND m.value / s.avg_val > 1.5) AS isPeak,
4026
+ (s.avg_val > 0 AND m.value / s.avg_val < 0.5) AS isTrough,
4027
+ CASE WHEN s.avg_val > 0 THEN LEAST(s.std_val / s.avg_val, 1.0) ELSE 0.0 END AS strength,
4028
+ s.month_count AS monthCount
4029
+ FROM monthly m, stats s
4030
+ ORDER BY m.month
4031
+ `,
4032
+ params: [startDate, endDate],
4033
+ current: {
4034
+ table: "pages",
4035
+ partitions: enumeratePartitions(startDate, endDate)
4036
+ }
4037
+ };
4038
+ },
4039
+ reduceSql(rows) {
4040
+ const arr = Array.isArray(rows) ? rows : [];
4041
+ const breakdown = arr.map((r) => ({
4042
+ month: str$3(r.month),
4043
+ value: num$4(r.value),
4044
+ vsAverage: num$4(r.vsAverage),
4045
+ isPeak: bool$1(r.isPeak),
4046
+ isTrough: bool$1(r.isTrough)
4047
+ }));
4048
+ const first = arr[0];
4049
+ const strength = first ? num$4(first.strength) : 0;
4050
+ const monthCount = first ? num$4(first.monthCount) : 0;
4051
+ const peakMonths = [...new Set(breakdown.filter((m) => m.isPeak).map((m) => m.month.substring(5, 7)))];
4052
+ const troughMonths = [...new Set(breakdown.filter((m) => m.isTrough).map((m) => m.month.substring(5, 7)))];
4053
+ const hasSeasonality = peakMonths.length > 0 || troughMonths.length > 0 || strength > .3;
4054
+ const insufficientData = monthCount < 12;
4055
+ return {
4056
+ results: breakdown,
4057
+ meta: {
4058
+ total: breakdown.length,
4059
+ hasSeasonality,
4060
+ strength,
4061
+ peakMonths,
4062
+ troughMonths,
4063
+ insufficientData
4064
+ }
4065
+ };
4066
+ },
4067
+ buildRows(params) {
4068
+ return { dates: datesQueryState(periodOf(params), params.limit) };
4069
+ },
4070
+ reduceRows(rows, params) {
4071
+ const result = analyzeSeasonality(Array.isArray(rows) ? rows : [], { metric: params.metric });
4072
+ return {
4073
+ results: result.monthlyBreakdown,
4074
+ meta: { strength: result.strength }
4075
+ };
4076
+ }
4077
+ });
4078
+ function str$2(v) {
4079
+ return v == null ? "" : String(v);
4080
+ }
4081
+ function bool(v) {
4082
+ return v === true || v === 1 || v === "true";
4083
+ }
4084
+ function parseJsonList$2(v) {
4085
+ if (Array.isArray(v)) return v;
4086
+ if (typeof v === "string" && v.length > 0) {
4087
+ const parsed = JSON.parse(v);
4088
+ return Array.isArray(parsed) ? parsed : [];
4089
+ }
4090
+ return [];
4091
+ }
4092
+ const stlDecomposeAnalyzer = defineAnalyzer({
4093
+ id: "stl-decompose",
4094
+ buildSql(params) {
4095
+ const endDate = params.endDate ?? defaultEndDate();
4096
+ const startDate = params.startDate ?? daysAgo(93);
4097
+ const minImpressions = params.minImpressions ?? 100;
4098
+ const minDays = 21;
4099
+ const metric = params.metric === "clicks" ? "clicks" : "impressions";
4100
+ const limit = params.limit ?? 100;
4101
+ return {
4102
+ sql: `
4103
+ WITH daily AS (
4104
+ SELECT
4105
+ query,
4106
+ url AS page,
4107
+ date,
4108
+ ${METRIC_EXPR.clicks} AS clicks,
4109
+ ${METRIC_EXPR.impressions} AS impressions,
4110
+ CAST(SUM(${metric}) AS DOUBLE) AS observed
4111
+ FROM read_parquet({{FILES}}, union_by_name = true)
4112
+ WHERE date >= ? AND date <= ?
4113
+ AND query IS NOT NULL AND query <> ''
4114
+ AND url IS NOT NULL AND url <> ''
4115
+ GROUP BY query, url, date
4116
+ ),
4117
+ entity_stats AS (
4118
+ SELECT query, page,
4119
+ COUNT(*) AS days,
4120
+ SUM(impressions) AS total_impressions
4121
+ FROM daily
4122
+ GROUP BY query, page
4123
+ HAVING COUNT(*) >= ${Number(minDays)}
4124
+ AND SUM(impressions) >= ?
4125
+ ),
4126
+ filtered AS (
4127
+ SELECT d.*
4128
+ FROM daily d
4129
+ JOIN entity_stats e USING (query, page)
4130
+ ),
4131
+ trended AS (
4132
+ SELECT *,
4133
+ CASE
4134
+ WHEN COUNT(*) OVER w = 7
4135
+ THEN AVG(observed) OVER w
4136
+ ELSE NULL
4137
+ END AS trend
4138
+ FROM filtered
4139
+ WINDOW w AS (
4140
+ PARTITION BY query, page
4141
+ ORDER BY date
4142
+ ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING
4143
+ )
4144
+ ),
4145
+ detrended AS (
4146
+ SELECT *,
4147
+ observed - trend AS detrended,
4148
+ dayofweek(date) AS dow
4149
+ FROM trended
4150
+ ),
4151
+ seasonal_raw AS (
4152
+ SELECT *,
4153
+ AVG(detrended) OVER (PARTITION BY query, page, dow) AS seasonal_dow
4154
+ FROM detrended
4155
+ ),
4156
+ seasonal_centered AS (
4157
+ SELECT *,
4158
+ seasonal_dow - AVG(seasonal_dow) OVER (PARTITION BY query, page) AS seasonal
4159
+ FROM seasonal_raw
4160
+ ),
4161
+ residualed AS (
4162
+ SELECT *,
4163
+ CASE
4164
+ WHEN trend IS NULL OR seasonal IS NULL THEN NULL
4165
+ ELSE observed - trend - seasonal
4166
+ END AS residual
4167
+ FROM seasonal_centered
4168
+ ),
4169
+ scored AS (
4170
+ SELECT *,
4171
+ STDDEV_POP(residual) OVER (PARTITION BY query, page) AS resid_std,
4172
+ CASE
4173
+ WHEN residual IS NOT NULL
4174
+ AND STDDEV_POP(residual) OVER (PARTITION BY query, page) > 0
4175
+ AND ABS(residual) > 2.0 * STDDEV_POP(residual) OVER (PARTITION BY query, page)
4176
+ THEN true ELSE false
4177
+ END AS anomaly
4178
+ FROM residualed
4179
+ ),
4180
+ per_entity AS (
4181
+ SELECT query, page,
4182
+ COUNT(*) AS days,
4183
+ SUM(impressions) AS total_impressions,
4184
+ VAR_POP(detrended) AS var_detrended,
4185
+ VAR_POP(seasonal) AS var_seasonal,
4186
+ VAR_POP(residual) AS var_residual,
4187
+ COUNT(*) FILTER (WHERE anomaly) AS residual_anomalies,
4188
+ REGR_SLOPE(observed, epoch(date) / 86400.0) AS trend_slope
4189
+ FROM scored
4190
+ GROUP BY query, page
4191
+ ),
4192
+ series AS (
4193
+ SELECT query, page,
4194
+ to_json(list({
4195
+ 'date': strftime(date, '%Y-%m-%d'),
4196
+ 'observed': observed,
4197
+ 'trend': trend,
4198
+ 'seasonal': seasonal,
4199
+ 'residual': residual,
4200
+ 'anomaly': anomaly
4201
+ } ORDER BY date)) AS seriesJson
4202
+ FROM scored
4203
+ GROUP BY query, page
4204
+ )
4205
+ SELECT
4206
+ e.query AS keyword,
4207
+ e.page,
4208
+ CAST(e.total_impressions AS DOUBLE) AS totalImpressions,
4209
+ CAST(e.days AS DOUBLE) AS days,
4210
+ CASE
4211
+ WHEN e.var_detrended IS NULL OR e.var_detrended = 0 THEN 0.0
4212
+ ELSE LEAST(e.var_seasonal / NULLIF(e.var_detrended, 0), 1.0)
4213
+ END AS seasonalStrength,
4214
+ CASE
4215
+ WHEN e.var_detrended IS NULL OR e.var_detrended = 0 THEN 0.0
4216
+ ELSE GREATEST(0.0, 1.0 - e.var_residual / NULLIF(e.var_detrended, 0))
4217
+ END AS trendStrength,
4218
+ CAST(e.residual_anomalies AS DOUBLE) AS residualAnomalies,
4219
+ COALESCE(e.trend_slope, 0.0) AS trendSlope,
4220
+ s.seriesJson
4221
+ FROM per_entity e
4222
+ LEFT JOIN series s USING (query, page)
4223
+ ORDER BY seasonalStrength DESC, ABS(COALESCE(e.trend_slope, 0.0)) DESC
4224
+ LIMIT ${Number(limit)}
4225
+ `,
4226
+ params: [
4227
+ startDate,
4228
+ endDate,
4229
+ minImpressions
4230
+ ],
4231
+ current: {
4232
+ table: "page_keywords",
4233
+ partitions: enumeratePartitions(startDate, endDate)
4234
+ }
4235
+ };
4236
+ },
4237
+ reduceSql(rows, params) {
4238
+ const arr = Array.isArray(rows) ? rows : [];
4239
+ const metric = params.metric === "clicks" ? "clicks" : "impressions";
4240
+ const results = arr.map((r) => ({
4241
+ keyword: str$2(r.keyword),
4242
+ page: str$2(r.page),
4243
+ totalImpressions: num$4(r.totalImpressions),
4244
+ days: num$4(r.days),
4245
+ seasonalStrength: num$4(r.seasonalStrength),
4246
+ trendStrength: num$4(r.trendStrength),
4247
+ residualAnomalies: num$4(r.residualAnomalies),
4248
+ trendSlope: num$4(r.trendSlope),
4249
+ series: parseJsonList$2(r.seriesJson).map((s) => ({
4250
+ date: str$2(s.date),
4251
+ observed: num$4(s.observed),
4252
+ trend: s.trend == null ? null : num$4(s.trend),
4253
+ seasonal: s.seasonal == null ? null : num$4(s.seasonal),
4254
+ residual: s.residual == null ? null : num$4(s.residual),
4255
+ anomaly: bool(s.anomaly)
4256
+ }))
4257
+ }));
4258
+ return {
4259
+ results,
4260
+ meta: {
4261
+ total: results.length,
4262
+ metric,
4263
+ avgSeasonalStrength: results.length > 0 ? results.reduce((a, r) => a + r.seasonalStrength, 0) / results.length : 0
4264
+ }
4265
+ };
4266
+ }
4267
+ });
4268
+ const DEFAULT_ROW_LIMIT$1 = 25e3;
4269
+ const strikingDistanceAnalyzer = defineAnalyzer({
4270
+ id: "striking-distance",
4271
+ reduce(rows, params) {
4272
+ const arr = Array.isArray(rows) ? rows : [];
4273
+ const minPosition = params.minPosition ?? 4;
4274
+ const maxPosition = params.maxPosition ?? 20;
4275
+ const minImpressions = params.minImpressions ?? 100;
4276
+ const maxCtr = params.maxCtr ?? .05;
4277
+ const limit = params.limit ?? 1e3;
4278
+ const results = [];
4279
+ for (const row of arr) {
4280
+ const position = num$4(row.position);
4281
+ const impressions = num$4(row.impressions);
4282
+ const ctr = num$4(row.ctr);
4283
+ const clicks = num$4(row.clicks);
4284
+ if (position < minPosition || position > maxPosition) continue;
4285
+ if (impressions < minImpressions) continue;
4286
+ if (ctr > maxCtr) continue;
4287
+ results.push({
4288
+ keyword: String(row.query ?? ""),
4289
+ page: row.page == null ? null : String(row.page),
4290
+ clicks,
4291
+ impressions,
4292
+ ctr,
4293
+ position,
4294
+ potentialClicks: Math.round(impressions * .15)
4295
+ });
4296
+ }
4297
+ results.sort((a, b) => b.potentialClicks - a.potentialClicks);
4298
+ const paged = paginateInMemory(results, {
4299
+ limit,
4300
+ offset: params.offset
4301
+ });
4302
+ return {
4303
+ results: paged,
4304
+ meta: {
4305
+ total: results.length,
4306
+ returned: paged.length
4307
+ }
4308
+ };
4309
+ },
4310
+ buildSql(params) {
4311
+ const { startDate, endDate } = periodOf(params);
4312
+ return {
4313
+ sql: `
4314
+ SELECT
4315
+ query,
4316
+ url AS page,
4317
+ CAST(SUM(clicks) AS DOUBLE) AS clicks,
4318
+ CAST(SUM(impressions) AS DOUBLE) AS impressions,
4319
+ CAST(SUM(clicks) AS DOUBLE) / NULLIF(SUM(impressions), 0) AS ctr,
4320
+ SUM(sum_position) / NULLIF(SUM(impressions), 0) + 1 AS position
4321
+ FROM read_parquet({{FILES}}, union_by_name = true)
4322
+ WHERE date >= ? AND date <= ?
4323
+ GROUP BY query, url
4324
+ `,
4325
+ params: [startDate, endDate],
4326
+ current: {
4327
+ table: "page_keywords",
4328
+ partitions: enumeratePartitions(startDate, endDate)
4329
+ }
4330
+ };
4331
+ },
4332
+ buildRows(params) {
4333
+ return { keywords: keywordsQueryState(periodOf(params), params.limit ?? DEFAULT_ROW_LIMIT$1) };
4334
+ }
4335
+ });
4336
+ function str$1(v) {
4337
+ return v == null ? "" : String(v);
4338
+ }
4339
+ function parseJsonList$1(v) {
4340
+ if (Array.isArray(v)) return v;
4341
+ if (typeof v === "string" && v.length > 0) {
4342
+ const parsed = JSON.parse(v);
4343
+ return Array.isArray(parsed) ? parsed : [];
4344
+ }
4345
+ return [];
4346
+ }
4347
+ const survivalAnalyzer = defineAnalyzer({
4348
+ id: "survival",
4349
+ buildSql(params) {
4350
+ const endDate = params.endDate ?? defaultEndDate();
4351
+ const startDate = params.startDate ?? daysAgo(183);
4352
+ const minImpressions = params.minImpressions ?? 5;
4353
+ return {
4354
+ sql: `
4355
+ WITH daily AS (
4356
+ SELECT
4357
+ query,
4358
+ url,
4359
+ date,
4360
+ ${METRIC_EXPR.clicks} AS day_clicks,
4361
+ ${METRIC_EXPR.impressions} AS day_impressions,
4362
+ ${METRIC_EXPR.position} AS day_position
4363
+ FROM read_parquet({{FILES}}, union_by_name = true)
4364
+ WHERE date >= ? AND date <= ?
4365
+ AND query IS NOT NULL AND query <> ''
4366
+ AND url IS NOT NULL AND url <> ''
4367
+ GROUP BY query, url, date
4368
+ HAVING SUM(impressions) >= ?
4369
+ ),
4370
+ classified AS (
4371
+ SELECT *,
4372
+ (day_position <= 10) AS in_top10
4373
+ FROM daily
4374
+ ),
4375
+ transitions AS (
4376
+ SELECT *,
4377
+ CASE
4378
+ WHEN in_top10 AND (LAG(in_top10) OVER w IS NULL OR NOT LAG(in_top10) OVER w)
4379
+ THEN 1 ELSE 0
4380
+ END AS is_entry
4381
+ FROM classified
4382
+ WINDOW w AS (PARTITION BY query, url ORDER BY date)
4383
+ ),
4384
+ run_ids AS (
4385
+ SELECT *,
4386
+ SUM(is_entry) OVER (PARTITION BY query, url ORDER BY date) AS run_id
4387
+ FROM transitions
4388
+ WHERE in_top10
4389
+ ),
4390
+ window_bounds AS (
4391
+ SELECT MIN(date) AS window_start, MAX(date) AS window_end FROM daily
4392
+ ),
4393
+ episodes_raw AS (
4394
+ SELECT
4395
+ query, url, run_id,
4396
+ MIN(date) AS entry_date,
4397
+ MAX(date) AS exit_date,
4398
+ DATEDIFF('day', MIN(date), MAX(date)) + 1 AS tenure
4399
+ FROM run_ids
4400
+ GROUP BY query, url, run_id
4401
+ ),
4402
+ episodes AS (
4403
+ SELECT
4404
+ e.query, e.url, e.run_id, e.entry_date, e.exit_date, e.tenure,
4405
+ (e.exit_date >= wb.window_end - INTERVAL 2 DAY) AS censored,
4406
+ CASE
4407
+ WHEN regexp_extract(e.url, '^(?:https?://[^/]+)?(/[^/?#]*)', 1) = '/' OR e.url = '/'
4408
+ THEN 'home'
4409
+ WHEN regexp_extract(e.url, '^(?:https?://[^/]+)?/([^/?#]+)', 1) = ''
4410
+ THEN 'home'
4411
+ ELSE regexp_extract(e.url, '^(?:https?://[^/]+)?/([^/?#]+)', 1)
4412
+ END AS cohort
4413
+ FROM episodes_raw e
4414
+ CROSS JOIN window_bounds wb
4415
+ ),
4416
+ episodes_all AS (
4417
+ SELECT query, url, tenure, censored, cohort FROM episodes
4418
+ UNION ALL
4419
+ SELECT query, url, tenure, censored, '__all__' AS cohort FROM episodes
4420
+ ),
4421
+ cohort_totals AS (
4422
+ SELECT cohort, COUNT(*) AS n_total
4423
+ FROM episodes_all
4424
+ GROUP BY cohort
4425
+ ),
4426
+ events AS (
4427
+ SELECT
4428
+ cohort,
4429
+ tenure,
4430
+ COUNT(*) FILTER (WHERE NOT censored) AS d_t,
4431
+ COUNT(*) AS n_ending_at_t
4432
+ FROM episodes_all
4433
+ GROUP BY cohort, tenure
4434
+ ),
4435
+ km AS (
4436
+ SELECT
4437
+ e.cohort,
4438
+ e.tenure,
4439
+ e.d_t,
4440
+ e.n_ending_at_t,
4441
+ SUM(e.n_ending_at_t) OVER (PARTITION BY e.cohort ORDER BY e.tenure DESC
4442
+ ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS at_risk
4443
+ FROM events e
4444
+ ),
4445
+ km_surv AS (
4446
+ SELECT
4447
+ cohort, tenure, d_t, at_risk,
4448
+ EXP(SUM(LN(GREATEST(1.0 - CAST(d_t AS DOUBLE) / NULLIF(at_risk, 0), 1e-9)))
4449
+ OVER (PARTITION BY cohort ORDER BY tenure
4450
+ ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)) AS survival
4451
+ FROM km
4452
+ ),
4453
+ curve_agg AS (
4454
+ SELECT
4455
+ cohort,
4456
+ to_json(list({
4457
+ 'tenure': tenure,
4458
+ 'survival': survival,
4459
+ 'atRisk': at_risk,
4460
+ 'events': d_t
4461
+ } ORDER BY tenure)) AS curveJson
4462
+ FROM km_surv
4463
+ GROUP BY cohort
4464
+ ),
4465
+ cohort_stats AS (
4466
+ SELECT
4467
+ ea.cohort,
4468
+ COUNT(*) AS episode_count,
4469
+ AVG(CASE WHEN ea.censored THEN 1.0 ELSE 0.0 END) AS censoring_rate
4470
+ FROM episodes_all ea
4471
+ GROUP BY ea.cohort
4472
+ )
4473
+ SELECT
4474
+ cs.cohort,
4475
+ cs.episode_count AS episodeCount,
4476
+ cs.censoring_rate AS censoringRate,
4477
+ ca.curveJson
4478
+ FROM cohort_stats cs
4479
+ LEFT JOIN curve_agg ca USING (cohort)
4480
+ ORDER BY cs.cohort
4481
+ `,
4482
+ params: [
4483
+ startDate,
4484
+ endDate,
4485
+ minImpressions
4486
+ ],
4487
+ current: {
4488
+ table: "page_keywords",
4489
+ partitions: enumeratePartitions(startDate, endDate)
4490
+ }
4491
+ };
4492
+ },
4493
+ reduceSql(rows, params) {
4494
+ const arr = Array.isArray(rows) ? rows : [];
4495
+ const endDate = params.endDate ?? defaultEndDate();
4496
+ const startDate = params.startDate ?? daysAgo(183);
4497
+ const windowDays = Math.round((new Date(endDate).getTime() - new Date(startDate).getTime()) / MS_PER_DAY) + 1;
4498
+ const results = arr.map((r) => {
4499
+ const curve = parseJsonList$1(r.curveJson).map((p) => ({
4500
+ tenure: num$4(p.tenure),
4501
+ survival: num$4(p.survival),
4502
+ atRisk: num$4(p.atRisk),
4503
+ events: num$4(p.events)
4504
+ }));
4505
+ let medianTenure = 0;
4506
+ for (let i = 0; i < curve.length; i++) {
4507
+ const cur = curve[i];
4508
+ if (cur.survival <= .5) {
4509
+ if (i === 0) medianTenure = cur.tenure;
4510
+ else {
4511
+ const prev = curve[i - 1];
4512
+ const span = prev.survival - cur.survival;
4513
+ const frac = span > 0 ? (prev.survival - .5) / span : 0;
4514
+ medianTenure = prev.tenure + frac * (cur.tenure - prev.tenure);
4515
+ }
4516
+ break;
4517
+ }
4518
+ }
4519
+ const last = curve[curve.length - 1];
4520
+ if (medianTenure === 0 && last && last.survival > .5) medianTenure = last.tenure;
4521
+ return {
4522
+ cohort: str$1(r.cohort),
4523
+ episodeCount: num$4(r.episodeCount),
4524
+ censoringRate: num$4(r.censoringRate),
4525
+ medianTenure,
4526
+ curve
4527
+ };
4528
+ });
4529
+ return {
4530
+ results,
4531
+ meta: {
4532
+ totalEpisodes: results.find((r) => r.cohort === "__all__")?.episodeCount ?? 0,
4533
+ cohortCount: results.filter((r) => r.cohort !== "__all__").length,
4534
+ windowDays
4535
+ }
4536
+ };
4537
+ }
4538
+ });
4539
+ function str(v) {
4540
+ return v == null ? "" : String(v);
4541
+ }
4542
+ function parseJsonList(v) {
4543
+ if (Array.isArray(v)) return v;
4544
+ if (typeof v === "string" && v.length > 0) {
4545
+ const parsed = JSON.parse(v);
4546
+ return Array.isArray(parsed) ? parsed : [];
4547
+ }
4548
+ return [];
4549
+ }
4550
+ const trendsAnalyzer = defineAnalyzer({
4551
+ id: "trends",
4552
+ buildSql(params) {
4553
+ const weeks = params.weeks ?? 28;
4554
+ const endDate = params.endDate || defaultEndDate();
4555
+ const startDate = params.startDate || toIsoDate(/* @__PURE__ */ new Date(Date.parse(endDate) - (weeks * 7 - 1) * MS_PER_DAY));
4556
+ const minImpressions = params.minImpressions ?? 100;
4557
+ const minWeeksWithData = params.minWeeksWithData ?? Math.max(2, Math.floor(weeks / 4));
4558
+ const limit = params.limit ?? 500;
4559
+ const dim = params.dimension === "keywords" ? "keywords" : "pages";
4560
+ const table = dim === "keywords" ? "keywords" : "pages";
4561
+ return {
4562
+ sql: `
4563
+ WITH bucketed AS (
4564
+ SELECT
4565
+ ${dim === "keywords" ? "query" : "url"} AS entity,
4566
+ date_trunc('week', CAST(date AS DATE)) AS week,
4567
+ ${METRIC_EXPR.clicks} AS clicks,
4568
+ ${METRIC_EXPR.impressions} AS impressions,
4569
+ SUM(sum_position) AS sum_position_sum
4570
+ FROM read_parquet({{FILES}}, union_by_name = true)
4571
+ WHERE date >= ? AND date <= ?
4572
+ GROUP BY entity, week
4573
+ ),
4574
+ with_meta AS (
4575
+ SELECT
4576
+ entity, week, clicks, impressions, sum_position_sum,
4577
+ ROW_NUMBER() OVER (PARTITION BY entity ORDER BY week) - 1 AS week_idx,
4578
+ COUNT(*) OVER (PARTITION BY entity) AS n_weeks,
4579
+ (ROW_NUMBER() OVER (PARTITION BY entity ORDER BY week) - 1)
4580
+ < (COUNT(*) OVER (PARTITION BY entity) / 2) AS is_first_half
4581
+ FROM bucketed
4582
+ ),
4583
+ agg AS (
4584
+ SELECT
4585
+ entity,
4586
+ SUM(clicks) AS totalClicks,
4587
+ SUM(impressions) AS totalImpressions,
4588
+ any_value(n_weeks) AS weeksWithData,
4589
+ COALESCE(regr_slope(clicks, CAST(week_idx AS DOUBLE)), 0.0) AS slope,
4590
+ SUM(CASE WHEN is_first_half THEN clicks ELSE 0 END) AS firstHalfClicks,
4591
+ SUM(CASE WHEN NOT is_first_half THEN clicks ELSE 0 END) AS secondHalfClicks,
4592
+ SUM(sum_position_sum) / NULLIF(SUM(impressions), 0) + 1 AS avgPosition,
4593
+ to_json(list({
4594
+ 'week': strftime(week, '%Y-%m-%d'),
4595
+ 'clicks': clicks,
4596
+ 'impressions': impressions
4597
+ } ORDER BY week)) AS seriesJson
4598
+ FROM with_meta
4599
+ GROUP BY entity
4600
+ HAVING SUM(impressions) >= ? AND any_value(n_weeks) >= ?
4601
+ ),
4602
+ classified AS (
4603
+ SELECT
4604
+ *,
4605
+ CASE
4606
+ WHEN firstHalfClicks = 0 AND secondHalfClicks > 0 THEN 10.0
4607
+ WHEN firstHalfClicks = 0 THEN 1.0
4608
+ ELSE secondHalfClicks / firstHalfClicks
4609
+ END AS growthRatio
4610
+ FROM agg
4611
+ )
4612
+ SELECT
4613
+ entity,
4614
+ totalClicks,
4615
+ totalImpressions,
4616
+ weeksWithData,
4617
+ slope,
4618
+ growthRatio,
4619
+ avgPosition,
4620
+ CASE
4621
+ WHEN growthRatio >= 1.5 AND slope > 0 THEN 'accelerating'
4622
+ WHEN growthRatio >= 1.1 AND slope >= 0 THEN 'growing'
4623
+ WHEN growthRatio < 0.5 THEN 'cratering'
4624
+ WHEN growthRatio < 0.9 AND slope < 0 THEN 'declining'
4625
+ ELSE 'steady'
4626
+ END AS trend,
4627
+ seriesJson
4628
+ FROM classified
4629
+ ORDER BY
4630
+ CASE
4631
+ WHEN growthRatio >= 1.5 AND slope > 0 THEN 0
4632
+ WHEN growthRatio < 0.5 THEN 1
4633
+ WHEN growthRatio >= 1.1 AND slope >= 0 THEN 2
4634
+ WHEN growthRatio < 0.9 AND slope < 0 THEN 3
4635
+ ELSE 4
4636
+ END,
4637
+ ABS(growthRatio - 1) DESC,
4638
+ totalClicks DESC
4639
+ LIMIT ${Number(limit)}
4640
+ `,
4641
+ params: [
4642
+ startDate,
4643
+ endDate,
4644
+ minImpressions,
4645
+ minWeeksWithData
4646
+ ],
4647
+ current: {
4648
+ table,
4649
+ partitions: enumeratePartitions(startDate, endDate)
4650
+ }
4651
+ };
4652
+ },
4653
+ reduceSql(rows, params) {
4654
+ const arr = Array.isArray(rows) ? rows : [];
4655
+ const weeks = params.weeks ?? 28;
4656
+ const endDate = params.endDate || defaultEndDate();
4657
+ const startDate = params.startDate || toIsoDate(/* @__PURE__ */ new Date(Date.parse(endDate) - (weeks * 7 - 1) * MS_PER_DAY));
4658
+ const dim = params.dimension === "keywords" ? "keywords" : "pages";
4659
+ const results = arr.map((r) => {
4660
+ const series = parseJsonList(r.seriesJson).map((s) => ({
4661
+ week: str(s.week),
4662
+ clicks: num$4(s.clicks),
4663
+ impressions: num$4(s.impressions)
4664
+ }));
4665
+ return {
4666
+ [dim === "keywords" ? "query" : "page"]: str(r.entity),
4667
+ totalClicks: num$4(r.totalClicks),
4668
+ totalImpressions: num$4(r.totalImpressions),
4669
+ weeksWithData: num$4(r.weeksWithData),
4670
+ slope: num$4(r.slope),
4671
+ growthRatio: num$4(r.growthRatio),
4672
+ avgPosition: num$4(r.avgPosition),
4673
+ trend: str(r.trend),
4674
+ series
4675
+ };
4676
+ });
4677
+ const counts = {
4678
+ accelerating: 0,
4679
+ growing: 0,
4680
+ steady: 0,
4681
+ declining: 0,
4682
+ cratering: 0
4683
+ };
4684
+ for (const r of results) counts[r.trend] = (counts[r.trend] ?? 0) + 1;
4685
+ return {
4686
+ results,
4687
+ meta: {
4688
+ total: results.length,
4689
+ dimension: dim,
4690
+ weeks: Number(weeks),
4691
+ startDate,
4692
+ endDate,
4693
+ counts
4694
+ }
4695
+ };
4696
+ }
4697
+ });
4698
+ const DEFAULT_ROW_LIMIT = 25e3;
4699
+ const sortRowResults = createSorter((item) => item.impressions, "impressions");
4700
+ const zeroClickAnalyzer = defineAnalyzer({
4701
+ id: "zero-click",
4702
+ buildSql(params) {
4703
+ const { startDate, endDate } = periodOf(params);
4704
+ const minImpressions = params.minImpressions ?? 1e3;
4705
+ const maxCtr = params.maxCtr ?? .03;
4706
+ const maxPosition = params.maxPosition ?? 10;
4707
+ const limit = params.limit ?? 1e3;
4708
+ return {
4709
+ sql: `
4710
+ WITH agg AS (
4711
+ SELECT
4712
+ query,
4713
+ url AS page,
4714
+ ${METRIC_EXPR.clicks} AS clicks,
4715
+ ${METRIC_EXPR.impressions} AS impressions,
4716
+ ${METRIC_EXPR.ctr} AS ctr,
4717
+ ${METRIC_EXPR.position} AS position
4718
+ FROM read_parquet({{FILES}}, union_by_name = true)
4719
+ WHERE date >= ? AND date <= ?
4720
+ GROUP BY query, url
4721
+ HAVING SUM(impressions) >= ?
4722
+ )
4723
+ SELECT
4724
+ query, page, clicks, impressions, ctr, position,
4725
+ CAST(GREATEST(0, ROUND(impressions * (
4726
+ CASE
4727
+ WHEN position <= 1 THEN 0.30
4728
+ WHEN position <= 3 THEN 0.15
4729
+ WHEN position <= 5 THEN 0.08
4730
+ ELSE 0.04
4731
+ END
4732
+ )) - clicks) AS DOUBLE) AS missedClicks
4733
+ FROM agg
4734
+ WHERE position <= ? AND ctr < ?
4735
+ ORDER BY impressions DESC
4736
+ ${paginateClause({
4737
+ limit,
4738
+ offset: params.offset
4739
+ })}
4740
+ `,
4741
+ params: [
4742
+ startDate,
4743
+ endDate,
4744
+ minImpressions,
4745
+ maxPosition,
4746
+ maxCtr
4747
+ ],
4748
+ current: {
4749
+ table: "page_keywords",
4750
+ partitions: enumeratePartitions(startDate, endDate)
4751
+ }
4752
+ };
4753
+ },
4754
+ reduceSql(rows, params) {
4755
+ const arr = Array.isArray(rows) ? rows : [];
4756
+ const minImpressions = params.minImpressions ?? 1e3;
4757
+ const maxCtr = params.maxCtr ?? .03;
4758
+ const maxPosition = params.maxPosition ?? 10;
4759
+ return {
4760
+ results: arr.map((r) => ({
4761
+ query: r.query == null ? "" : String(r.query),
4762
+ page: r.page == null ? "" : String(r.page),
4763
+ clicks: num$4(r.clicks),
4764
+ impressions: num$4(r.impressions),
4765
+ ctr: num$4(r.ctr),
4766
+ position: num$4(r.position),
4767
+ missedClicks: num$4(r.missedClicks)
4768
+ })),
4769
+ meta: {
4770
+ total: arr.length,
4771
+ minImpressions,
4772
+ maxCtr,
4773
+ maxPosition
4774
+ }
4775
+ };
4776
+ },
4777
+ buildRows(params) {
4778
+ const period = periodOf(params);
4779
+ const limit = params.limit ?? DEFAULT_ROW_LIMIT;
4780
+ return { rows: gsc.select(query, page).where(between(date, period.startDate, period.endDate)).limit(limit).getState() };
4781
+ },
4782
+ reduceRows(rows, params) {
4783
+ const arr = Array.isArray(rows) ? rows : [];
4784
+ const minImpressions = params.minImpressions ?? 1e3;
4785
+ const maxCtr = params.maxCtr ?? .03;
4786
+ const maxPosition = params.maxPosition ?? 10;
4787
+ const queryMap = /* @__PURE__ */ new Map();
4788
+ for (const row of arr) {
4789
+ if (row.impressions < minImpressions) continue;
4790
+ if (row.position > maxPosition) continue;
4791
+ if (row.ctr > maxCtr) continue;
4792
+ const existing = queryMap.get(row.query);
4793
+ if (!existing || row.position < existing.position) queryMap.set(row.query, {
4794
+ query: row.query,
4795
+ page: row.page,
4796
+ clicks: row.clicks,
4797
+ impressions: row.impressions,
4798
+ ctr: row.ctr,
4799
+ position: row.position
4800
+ });
4801
+ }
4802
+ const results = sortRowResults(Array.from(queryMap.values()), "impressions", "desc");
4803
+ const paged = paginateInMemory(results, {
4804
+ limit: params.limit,
4805
+ offset: params.offset
4806
+ });
4807
+ return {
4808
+ results: paged,
4809
+ meta: {
4810
+ total: results.length,
4811
+ returned: paged.length
4812
+ }
4813
+ };
4814
+ }
4815
+ });
4816
+ var AnalyzerCapabilityError = class extends Error {
4817
+ constructor(tool, missing) {
4818
+ super(`analyzer "${tool}" requires capabilities [${missing.join(", ")}] not provided by source`);
4819
+ this.tool = tool;
4820
+ this.missing = missing;
4821
+ this.name = "AnalyzerCapabilityError";
4822
+ }
4823
+ };
4824
+ function sourceCapabilities(source) {
4825
+ const caps = /* @__PURE__ */ new Set();
4826
+ if (source.executeSql) caps.add("executeSql");
4827
+ if (source.capabilities.fileSets) caps.add("partitionedParquet");
4828
+ if (source.capabilities.regex) caps.add("regex");
4829
+ if (source.capabilities.windowTotals) caps.add("windowTotals");
4830
+ if (source.capabilities.comparisonJoin) caps.add("comparisonJoin");
4831
+ if (source.capabilities.attachedTables) caps.add("attachedTables");
4832
+ return caps;
4833
+ }
4834
+ function assertSatisfies(analyzer, caps) {
4835
+ const missing = analyzer.requires.filter((c) => !caps.has(c));
4836
+ if (missing.length > 0) throw new AnalyzerCapabilityError(analyzer.id, missing);
4837
+ }
4838
+ async function runAnalyzerFromSource(source, params, registry) {
4839
+ const caps = sourceCapabilities(source);
4840
+ const analyzer = registry.resolveAnalyzer(params.type, caps.has("executeSql") || caps.has("attachedTables"));
4841
+ if (!analyzer) throw new AnalyzerCapabilityError(params.type, ["executeSql"]);
4842
+ assertSatisfies(analyzer, caps);
4843
+ const plan = analyzer.build(params);
4844
+ if (plan.kind === "rows") return runRowsPlanAgainstSource(source, analyzer, plan, params);
4845
+ return runSqlPlanAgainstSource(source, analyzer, plan, params);
4846
+ }
4847
+ async function runRowsPlanAgainstSource(source, analyzer, plan, params) {
4848
+ const entries = Object.entries(plan.queries);
4849
+ const resolved = await Promise.all(entries.map(async ([k, q]) => [k, await source.queryRows(q.state)]));
4850
+ const rowMap = Object.fromEntries(resolved);
4851
+ const { results, meta } = analyzer.reduce(rowMap, { params });
4852
+ return {
4853
+ results,
4854
+ meta: {
4855
+ tool: params.type,
4856
+ ...meta
4857
+ }
4858
+ };
4859
+ }
4860
+ function fileSetsFor(plan) {
4861
+ const fileSets = { FILES: plan.current };
4862
+ if (plan.previous) fileSets.FILES_PREV = plan.previous;
4863
+ if (plan.extraFiles) for (const [key, fs] of Object.entries(plan.extraFiles)) fileSets[`FILES_${key}`] = fs;
4864
+ return fileSets;
4865
+ }
4866
+ async function runSqlPlanAgainstSource(source, analyzer, plan, params) {
4867
+ if (!source.executeSql) throw new AnalyzerCapabilityError(analyzer.id, ["executeSql"]);
4868
+ if (plan.requiresAttachedTables && !source.capabilities.attachedTables) throw new AnalyzerCapabilityError(analyzer.id, ["attachedTables"]);
4869
+ const fileSets = source.capabilities.fileSets ? fileSetsFor(plan) : void 0;
4870
+ const rows = await source.executeSql(plan.sql, plan.params, fileSets ? { fileSets } : void 0);
4871
+ const extras = {};
4872
+ if (plan.extraQueries) for (const q of plan.extraQueries) {
4873
+ const extraRows = await source.executeSql(q.sql, q.params, fileSets ? { fileSets } : void 0);
4874
+ extras[q.name] = extraRows;
4875
+ }
4876
+ const { results, meta } = analyzer.reduce(rows, {
4877
+ params,
4878
+ extras
4879
+ });
4880
+ const sourceMeta = source.capabilities.localSource ? { source: "local" } : {};
4881
+ return {
4882
+ results,
4883
+ meta: {
4884
+ tool: params.type,
4885
+ ...sourceMeta,
4886
+ ...meta
4887
+ }
4888
+ };
4889
+ }
4890
+ function createAnalyzerRegistry(init = {}) {
4891
+ const byId = /* @__PURE__ */ new Map();
4892
+ for (const a of init.rows ?? []) {
4893
+ const entry = byId.get(a.id) ?? {};
4894
+ entry.rows = a;
4895
+ byId.set(a.id, entry);
4896
+ }
4897
+ for (const a of init.sql ?? []) {
4898
+ const entry = byId.get(a.id) ?? {};
4899
+ entry.sql = a;
4900
+ byId.set(a.id, entry);
4901
+ }
4902
+ const listAnalyzerIds = () => [...byId.keys()].sort();
4903
+ const getAnalyzerVariants = (id) => byId.get(id);
4904
+ const resolveAnalyzer = (id, sourceSupportsSql) => {
4905
+ const variants = byId.get(id);
4906
+ if (!variants) return void 0;
4907
+ if (sourceSupportsSql) return variants.sql ?? variants.rows;
4908
+ return variants.rows;
4909
+ };
4910
+ const listAnalyzersFor = (sourceSupportsSql) => {
4911
+ const out = [];
4912
+ for (const id of listAnalyzerIds()) {
4913
+ const a = resolveAnalyzer(id, sourceSupportsSql);
4914
+ if (a) out.push(a);
4915
+ }
4916
+ return out;
4917
+ };
4918
+ const listAnalyzerIdsFor = (source) => {
4919
+ const sourceSupportsSql = typeof source.executeSql === "function";
4920
+ const out = [];
4921
+ for (const id of listAnalyzerIds()) if (resolveAnalyzer(id, sourceSupportsSql)) out.push(id);
4922
+ return out;
4923
+ };
4924
+ return {
4925
+ listAnalyzerIds,
4926
+ getAnalyzerVariants,
4927
+ resolveAnalyzer,
4928
+ listAnalyzersFor,
4929
+ listAnalyzerIdsFor
4930
+ };
4931
+ }
4932
+ const ROW_ANALYZERS = [
4933
+ strikingDistanceAnalyzer.rows,
4934
+ opportunityAnalyzer.rows,
4935
+ brandAnalyzer.rows,
4936
+ concentrationAnalyzer.rows,
4937
+ clusteringAnalyzer.rows,
4938
+ seasonalityAnalyzer.rows,
4939
+ moversAnalyzer.rows,
4940
+ decayAnalyzer.rows,
4941
+ cannibalizationAnalyzer.rows,
4942
+ zeroClickAnalyzer.rows
4943
+ ];
4944
+ export { AnalyzerCapabilityError, ROW_ANALYZERS, bayesianCtrAnalyzer, bipartitePagerankAnalyzer, brandAnalyzer, cannibalizationAnalyzer, changePointAnalyzer, clampLimit, clampOffset, clusteringAnalyzer, concentrationAnalyzer, contentVelocityAnalyzer, createAnalyzerRegistry, ctrAnomalyAnalyzer, ctrCurveAnalyzer, darkTrafficAnalyzer, dataDetailAnalyzer, dataQueryAnalyzer, datesQueryState, decayAnalyzer, defineAnalyzer, deviceGapAnalyzer, intentAtlasAnalyzer, keywordBreadthAnalyzer, keywordsQueryState, longTailAnalyzer, moversAnalyzer, opportunityAnalyzer, pagesQueryState, paginateClause, paginateInMemory, positionDistributionAnalyzer, positionVolatilityAnalyzer, queryMigrationAnalyzer, resolveSort, runAnalyzerFromSource, seasonalityAnalyzer, stlDecomposeAnalyzer, strikingDistanceAnalyzer, survivalAnalyzer, trendsAnalyzer, zeroClickAnalyzer };