@gscdump/analysis 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1957 @@
1
+ import { SQL_ANALYZERS } from "@gscdump/engine-duckdb-node";
2
+ import { enumeratePartitions } from "@gscdump/engine/planner";
3
+ import { METRIC_EXPR } from "@gscdump/engine/sql-fragments";
4
+ import { between, date, gsc, page, query } from "gscdump/query";
5
+ import { daysAgo } from "gscdump";
6
+ function createAnalyzerRegistry(init = {}) {
7
+ const byId = /* @__PURE__ */ new Map();
8
+ for (const a of init.rows ?? []) {
9
+ const entry = byId.get(a.id) ?? {};
10
+ entry.rows = a;
11
+ byId.set(a.id, entry);
12
+ }
13
+ for (const a of init.sql ?? []) {
14
+ const entry = byId.get(a.id) ?? {};
15
+ entry.sql = a;
16
+ byId.set(a.id, entry);
17
+ }
18
+ const listAnalyzerIds = () => [...byId.keys()].sort();
19
+ const getAnalyzerVariants = (id) => byId.get(id);
20
+ const resolveAnalyzer = (id, sourceSupportsSql) => {
21
+ const variants = byId.get(id);
22
+ if (!variants) return void 0;
23
+ if (sourceSupportsSql) return variants.sql ?? variants.rows;
24
+ return variants.rows;
25
+ };
26
+ const listAnalyzersFor = (sourceSupportsSql) => {
27
+ const out = [];
28
+ for (const id of listAnalyzerIds()) {
29
+ const a = resolveAnalyzer(id, sourceSupportsSql);
30
+ if (a) out.push(a);
31
+ }
32
+ return out;
33
+ };
34
+ const listAnalyzerIdsFor = (source) => {
35
+ const sourceSupportsSql = typeof source.executeSql === "function";
36
+ const out = [];
37
+ for (const id of listAnalyzerIds()) if (resolveAnalyzer(id, sourceSupportsSql)) out.push(id);
38
+ return out;
39
+ };
40
+ return {
41
+ listAnalyzerIds,
42
+ getAnalyzerVariants,
43
+ resolveAnalyzer,
44
+ listAnalyzersFor,
45
+ listAnalyzerIdsFor
46
+ };
47
+ }
48
+ const DEFAULT_LIMIT$1 = 25e3;
49
+ function keywordsQueryState(period, limit = DEFAULT_LIMIT$1) {
50
+ return gsc.select(query, page).where(between(date, period.startDate, period.endDate)).limit(limit).getState();
51
+ }
52
+ function pagesQueryState(period, limit = DEFAULT_LIMIT$1) {
53
+ return gsc.select(page).where(between(date, period.startDate, period.endDate)).limit(limit).getState();
54
+ }
55
+ function datesQueryState(period, limit = DEFAULT_LIMIT$1) {
56
+ return gsc.select(date).where(between(date, period.startDate, period.endDate)).limit(limit).getState();
57
+ }
58
+ const DEFAULT_SQL_REQUIRES = ["executeSql", "partitionedParquet"];
59
+ function defineAnalyzer(opts) {
60
+ const { id, reduce, reduceSql, reduceRows, buildSql, buildRows, sqlRequires = DEFAULT_SQL_REQUIRES, rowsRequires = [] } = opts;
61
+ const sqlReducer = reduceSql ?? reduce;
62
+ const rowsReducer = reduceRows ?? reduce;
63
+ if (buildSql && !sqlReducer) throw new Error(`defineAnalyzer(${id}): buildSql requires reduce or reduceSql`);
64
+ if (buildRows && !rowsReducer) throw new Error(`defineAnalyzer(${id}): buildRows requires reduce or reduceRows`);
65
+ const wrap = (fn) => (rows, params, ctx) => {
66
+ return fn(Array.isArray(rows) ? rows : pickSingle(rows) ?? rows, params, ctx);
67
+ };
68
+ return {
69
+ id,
70
+ sql: buildSql && sqlReducer ? {
71
+ id,
72
+ requires: sqlRequires,
73
+ build(params) {
74
+ const spec = buildSql(params);
75
+ return {
76
+ kind: "sql",
77
+ sql: spec.sql,
78
+ params: spec.params,
79
+ current: spec.current,
80
+ previous: spec.previous,
81
+ extraFiles: spec.extraFiles,
82
+ extraQueries: spec.extraQueries,
83
+ requiresAttachedTables: spec.requiresAttachedTables
84
+ };
85
+ },
86
+ reduce(rows, ctx) {
87
+ const { results, meta } = wrap(sqlReducer)(rows, ctx.params, { extras: ctx.extras });
88
+ return {
89
+ results,
90
+ meta
91
+ };
92
+ }
93
+ } : void 0,
94
+ rows: buildRows && rowsReducer ? {
95
+ id,
96
+ requires: rowsRequires,
97
+ build(params) {
98
+ const queries = buildRows(params);
99
+ return {
100
+ kind: "rows",
101
+ queries: Object.fromEntries(Object.entries(queries).map(([k, state]) => [k, { state }]))
102
+ };
103
+ },
104
+ reduce(rows, ctx) {
105
+ const { results, meta } = wrap(rowsReducer)(rows, ctx.params, {});
106
+ return {
107
+ results,
108
+ meta
109
+ };
110
+ }
111
+ } : void 0
112
+ };
113
+ }
114
+ function pickSingle(rows) {
115
+ const keys = Object.keys(rows);
116
+ return keys.length === 1 ? rows[keys[0]] : void 0;
117
+ }
118
+ function defaultEndDate() {
119
+ return daysAgo(3);
120
+ }
121
+ function defaultStartDate() {
122
+ return daysAgo(31);
123
+ }
124
+ function periodOf(params) {
125
+ return {
126
+ startDate: params.startDate || defaultStartDate(),
127
+ endDate: params.endDate || defaultEndDate()
128
+ };
129
+ }
130
+ function comparisonOf(params) {
131
+ if (!params.prevStartDate || !params.prevEndDate) throw new Error(`${params.type} analysis requires prevStartDate and prevEndDate`);
132
+ return {
133
+ current: periodOf(params),
134
+ previous: {
135
+ startDate: params.prevStartDate,
136
+ endDate: params.prevEndDate
137
+ }
138
+ };
139
+ }
140
+ function num(v) {
141
+ if (typeof v === "number") return v;
142
+ if (typeof v === "bigint") return Number(v);
143
+ if (v == null) return 0;
144
+ return Number(v);
145
+ }
146
+ function buildPeriodMap(rows, key, value, filter) {
147
+ const out = /* @__PURE__ */ new Map();
148
+ for (const row of rows) {
149
+ if (filter && !filter(row)) continue;
150
+ out.set(key(row), value(row));
151
+ }
152
+ return out;
153
+ }
154
+ function createSorter(getValue, defaultMetric, defaultOrder = "desc") {
155
+ return (items, sortBy = defaultMetric, sortOrder = defaultOrder) => {
156
+ const mult = sortOrder === "desc" ? -1 : 1;
157
+ return [...items].sort((a, b) => (getValue(a, sortBy) - getValue(b, sortBy)) * mult);
158
+ };
159
+ }
160
+ function createMetricSorter(defaultMetric, orderByMetric) {
161
+ return (items, sortBy = defaultMetric) => {
162
+ const mult = orderByMetric[sortBy] === "desc" ? -1 : 1;
163
+ return [...items].sort((a, b) => (a[sortBy] - b[sortBy]) * mult);
164
+ };
165
+ }
166
+ function escapeRegexAlt(s) {
167
+ return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
168
+ }
169
+ function str$6(v) {
170
+ return v == null ? "" : String(v);
171
+ }
172
+ function analyzeBrandSegmentation(keywords, options) {
173
+ const { brandTerms, minImpressions = 10 } = options;
174
+ const lowerBrandTerms = brandTerms.map((t) => t.toLowerCase());
175
+ const brand = [];
176
+ const nonBrand = [];
177
+ for (const row of keywords) {
178
+ if (num(row.impressions) < minImpressions) continue;
179
+ if (lowerBrandTerms.some((term) => row.query.toLowerCase().includes(term))) brand.push(row);
180
+ else nonBrand.push(row);
181
+ }
182
+ const brandClicks = brand.reduce((sum, k) => sum + num(k.clicks), 0);
183
+ const nonBrandClicks = nonBrand.reduce((sum, k) => sum + num(k.clicks), 0);
184
+ const totalClicks = brandClicks + nonBrandClicks;
185
+ return {
186
+ brand,
187
+ nonBrand,
188
+ summary: {
189
+ brandClicks,
190
+ nonBrandClicks,
191
+ brandShare: totalClicks > 0 ? brandClicks / totalClicks : 0,
192
+ brandImpressions: brand.reduce((sum, k) => sum + num(k.impressions), 0),
193
+ nonBrandImpressions: nonBrand.reduce((sum, k) => sum + num(k.impressions), 0)
194
+ }
195
+ };
196
+ }
197
+ const brandAnalyzer = defineAnalyzer({
198
+ id: "brand",
199
+ buildSql(params) {
200
+ if (!params.brandTerms?.length) throw new Error("Brand analysis requires brandTerms");
201
+ const { startDate, endDate } = periodOf(params);
202
+ const minImpressions = params.minImpressions ?? 10;
203
+ const limit = params.limit ?? 1e4;
204
+ const regex = `(${params.brandTerms.map((t) => escapeRegexAlt(t.toLowerCase())).join("|")})`;
205
+ return {
206
+ sql: `
207
+ WITH agg AS (
208
+ SELECT
209
+ query,
210
+ url AS page,
211
+ ${METRIC_EXPR.clicks} AS clicks,
212
+ ${METRIC_EXPR.impressions} AS impressions,
213
+ ${METRIC_EXPR.ctr} AS ctr,
214
+ ${METRIC_EXPR.position} AS position
215
+ FROM read_parquet({{FILES}}, union_by_name = true)
216
+ WHERE date >= ? AND date <= ?
217
+ GROUP BY query, url
218
+ HAVING SUM(impressions) >= ?
219
+ )
220
+ SELECT
221
+ query, page, clicks, impressions, ctr, position,
222
+ CASE WHEN regexp_matches(LOWER(query), ?) THEN 'brand' ELSE 'non-brand' END AS segment
223
+ FROM agg
224
+ ORDER BY clicks DESC
225
+ LIMIT ${Number(limit)}
226
+ `,
227
+ params: [
228
+ startDate,
229
+ endDate,
230
+ minImpressions,
231
+ regex
232
+ ],
233
+ current: {
234
+ table: "page_keywords",
235
+ partitions: enumeratePartitions(startDate, endDate)
236
+ }
237
+ };
238
+ },
239
+ reduceSql(rows) {
240
+ const normalized = (Array.isArray(rows) ? rows : []).map((r) => ({
241
+ query: str$6(r.query),
242
+ page: r.page == null ? void 0 : str$6(r.page),
243
+ clicks: num(r.clicks),
244
+ impressions: num(r.impressions),
245
+ ctr: num(r.ctr),
246
+ position: num(r.position),
247
+ segment: str$6(r.segment)
248
+ }));
249
+ let brandClicks = 0;
250
+ let nonBrandClicks = 0;
251
+ let brandImpressions = 0;
252
+ let nonBrandImpressions = 0;
253
+ for (const r of normalized) if (r.segment === "brand") {
254
+ brandClicks += r.clicks;
255
+ brandImpressions += r.impressions;
256
+ } else {
257
+ nonBrandClicks += r.clicks;
258
+ nonBrandImpressions += r.impressions;
259
+ }
260
+ const totalClicks = brandClicks + nonBrandClicks;
261
+ return {
262
+ results: normalized,
263
+ meta: {
264
+ total: normalized.length,
265
+ summary: {
266
+ brandClicks,
267
+ nonBrandClicks,
268
+ brandShare: totalClicks > 0 ? brandClicks / totalClicks : 0,
269
+ brandImpressions,
270
+ nonBrandImpressions
271
+ }
272
+ }
273
+ };
274
+ },
275
+ buildRows(params) {
276
+ return { keywords: keywordsQueryState(periodOf(params), params.limit) };
277
+ },
278
+ reduceRows(rows, params) {
279
+ if (!params.brandTerms?.length) throw new Error("Brand analysis requires brandTerms");
280
+ const result = analyzeBrandSegmentation(Array.isArray(rows) ? rows : [], {
281
+ brandTerms: params.brandTerms,
282
+ minImpressions: params.minImpressions
283
+ });
284
+ return {
285
+ results: [...result.brand.map((r) => ({
286
+ ...r,
287
+ segment: "brand"
288
+ })), ...result.nonBrand.map((r) => ({
289
+ ...r,
290
+ segment: "non-brand"
291
+ }))],
292
+ meta: { summary: result.summary }
293
+ };
294
+ }
295
+ });
296
+ const sortRowResults$1 = createSorter((item, metric) => {
297
+ switch (metric) {
298
+ case "clicks": return item.totalClicks;
299
+ case "impressions": return item.totalImpressions;
300
+ case "positionSpread": return item.positionSpread;
301
+ case "pageCount": return item.pages.length;
302
+ }
303
+ }, "clicks");
304
+ function str$5(v) {
305
+ return v == null ? "" : String(v);
306
+ }
307
+ function parseJsonList$4(v) {
308
+ if (Array.isArray(v)) return v;
309
+ if (typeof v === "string" && v.length > 0) {
310
+ const parsed = JSON.parse(v);
311
+ return Array.isArray(parsed) ? parsed : [];
312
+ }
313
+ return [];
314
+ }
315
+ function analyzeCannibalization(rows, options = {}) {
316
+ const { minImpressions = 10, maxPositionSpread = 10, minPages = 2, sortBy = "clicks", sortOrder = "desc" } = options;
317
+ const queryMap = /* @__PURE__ */ new Map();
318
+ for (const row of rows) {
319
+ if (row.impressions < minImpressions) continue;
320
+ const pages = queryMap.get(row.query) || [];
321
+ pages.push({
322
+ page: row.page,
323
+ clicks: row.clicks,
324
+ impressions: row.impressions,
325
+ ctr: row.ctr,
326
+ position: row.position
327
+ });
328
+ queryMap.set(row.query, pages);
329
+ }
330
+ const results = [];
331
+ for (const [query, pages] of queryMap) {
332
+ if (pages.length < minPages) continue;
333
+ pages.sort((a, b) => b.clicks - a.clicks);
334
+ const positions = pages.map((p) => p.position);
335
+ const positionSpread = Math.max(...positions) - Math.min(...positions);
336
+ if (positionSpread > maxPositionSpread) continue;
337
+ results.push({
338
+ query,
339
+ pages,
340
+ totalClicks: pages.reduce((sum, p) => sum + p.clicks, 0),
341
+ totalImpressions: pages.reduce((sum, p) => sum + p.impressions, 0),
342
+ positionSpread
343
+ });
344
+ }
345
+ return sortRowResults$1(results, sortBy, sortOrder);
346
+ }
347
+ const cannibalizationAnalyzer = defineAnalyzer({
348
+ id: "cannibalization",
349
+ buildSql(params) {
350
+ const { startDate, endDate } = periodOf(params);
351
+ const minImpressions = params.minImpressions ?? 50;
352
+ const minCompetitors = 2;
353
+ const minQueryImpressions = (params.minImpressions ?? 50) * 2;
354
+ const limit = params.limit ?? 200;
355
+ return {
356
+ sql: `
357
+ WITH agg AS (
358
+ SELECT
359
+ query,
360
+ url,
361
+ ${METRIC_EXPR.clicks} AS clicks,
362
+ ${METRIC_EXPR.impressions} AS impressions,
363
+ ${METRIC_EXPR.ctr} AS ctr,
364
+ ${METRIC_EXPR.position} AS position
365
+ FROM read_parquet({{FILES}}, union_by_name = true)
366
+ WHERE date >= ? AND date <= ?
367
+ AND query IS NOT NULL AND query <> ''
368
+ AND url IS NOT NULL AND url <> ''
369
+ GROUP BY query, url
370
+ HAVING SUM(impressions) >= ?
371
+ ),
372
+ query_totals AS (
373
+ SELECT
374
+ query,
375
+ SUM(impressions) AS total_impressions,
376
+ SUM(clicks) AS total_clicks,
377
+ COUNT(*) AS competitor_count
378
+ FROM agg
379
+ GROUP BY query
380
+ HAVING COUNT(*) >= ? AND SUM(impressions) >= ?
381
+ ),
382
+ ranked AS (
383
+ SELECT
384
+ a.query,
385
+ a.url,
386
+ a.clicks,
387
+ a.impressions,
388
+ a.ctr,
389
+ a.position,
390
+ a.impressions / NULLIF(t.total_impressions, 0) AS share,
391
+ ROW_NUMBER() OVER (
392
+ PARTITION BY a.query
393
+ ORDER BY a.impressions DESC, a.clicks DESC, a.url ASC
394
+ ) AS rnk
395
+ FROM agg a
396
+ JOIN query_totals t USING (query)
397
+ ),
398
+ leader AS (
399
+ SELECT query, url AS leader_url, ctr AS leader_ctr, position AS leader_position
400
+ FROM ranked WHERE rnk = 1
401
+ ),
402
+ events AS (
403
+ SELECT
404
+ r.query,
405
+ any_value(l.leader_url) AS leader_url,
406
+ any_value(l.leader_ctr) AS leader_ctr,
407
+ any_value(l.leader_position) AS leader_position,
408
+ SUM(POWER(r.share * 100.0, 2)) AS hhi,
409
+ SUM(CASE
410
+ WHEN r.rnk > 1 AND l.leader_ctr > r.ctr
411
+ THEN (l.leader_ctr - r.ctr) * r.impressions
412
+ ELSE 0.0
413
+ END) AS stolen_clicks,
414
+ to_json(list({
415
+ 'url': r.url,
416
+ 'clicks': r.clicks,
417
+ 'impressions': r.impressions,
418
+ 'ctr': r.ctr,
419
+ 'position': r.position,
420
+ 'share': r.share,
421
+ 'rank': r.rnk
422
+ } ORDER BY r.rnk)) AS competitors
423
+ FROM ranked r
424
+ JOIN leader l USING (query)
425
+ GROUP BY r.query
426
+ )
427
+ SELECT
428
+ e.query AS keyword,
429
+ t.total_impressions AS totalImpressions,
430
+ t.total_clicks AS totalClicks,
431
+ t.competitor_count AS competitorCount,
432
+ e.leader_url AS leaderUrl,
433
+ e.leader_ctr AS leaderCtr,
434
+ e.leader_position AS leaderPosition,
435
+ e.hhi AS hhi,
436
+ GREATEST(0.0, 1.0 - e.hhi / 10000.0) AS fragmentation,
437
+ e.stolen_clicks AS stolenClicks,
438
+ e.competitors AS competitors,
439
+ CAST(ROUND(LEAST(100.0,
440
+ 100.0 * POWER(
441
+ GREATEST(1.0 - e.hhi / 10000.0, 0.0)
442
+ * LEAST(e.stolen_clicks / GREATEST(t.total_clicks + e.stolen_clicks, 1.0), 1.0)
443
+ * LEAST(LOG10(GREATEST(t.total_impressions, 10.0)) / 5.0, 1.0),
444
+ 1.0 / 3.0
445
+ )
446
+ )) AS DOUBLE) AS severity
447
+ FROM events e
448
+ JOIN query_totals t USING (query)
449
+ ORDER BY severity DESC, stolenClicks DESC
450
+ LIMIT ${Number(limit)}
451
+ `,
452
+ params: [
453
+ startDate,
454
+ endDate,
455
+ minImpressions,
456
+ minCompetitors,
457
+ minQueryImpressions
458
+ ],
459
+ current: {
460
+ table: "page_keywords",
461
+ partitions: enumeratePartitions(startDate, endDate)
462
+ }
463
+ };
464
+ },
465
+ reduceSql(rows) {
466
+ const events = (Array.isArray(rows) ? rows : []).map((r) => ({
467
+ keyword: str$5(r.keyword),
468
+ totalImpressions: num(r.totalImpressions),
469
+ totalClicks: num(r.totalClicks),
470
+ competitorCount: num(r.competitorCount),
471
+ leaderUrl: str$5(r.leaderUrl),
472
+ leaderCtr: num(r.leaderCtr),
473
+ leaderPosition: num(r.leaderPosition),
474
+ hhi: num(r.hhi),
475
+ fragmentation: num(r.fragmentation),
476
+ stolenClicks: num(r.stolenClicks),
477
+ severity: num(r.severity),
478
+ competitors: parseJsonList$4(r.competitors).map((c) => ({
479
+ url: str$5(c.url),
480
+ clicks: num(c.clicks),
481
+ impressions: num(c.impressions),
482
+ ctr: num(c.ctr),
483
+ position: num(c.position),
484
+ share: num(c.share),
485
+ rank: num(c.rank)
486
+ }))
487
+ }));
488
+ const nodeAgg = /* @__PURE__ */ new Map();
489
+ const edgeAgg = /* @__PURE__ */ new Map();
490
+ for (const ev of events) {
491
+ for (const c of ev.competitors) {
492
+ const n = nodeAgg.get(c.url) ?? {
493
+ impressions: 0,
494
+ clicks: 0,
495
+ queries: /* @__PURE__ */ new Set()
496
+ };
497
+ n.impressions += c.impressions;
498
+ n.clicks += c.clicks;
499
+ n.queries.add(ev.keyword);
500
+ nodeAgg.set(c.url, n);
501
+ }
502
+ for (let i = 0; i < ev.competitors.length; i++) for (let j = i + 1; j < ev.competitors.length; j++) {
503
+ const a = ev.competitors[i];
504
+ const b = ev.competitors[j];
505
+ const [src, tgt] = a.url < b.url ? [a.url, b.url] : [b.url, a.url];
506
+ const key = `${src}${tgt}`;
507
+ const weight = Math.min(a.impressions, b.impressions);
508
+ const edge = edgeAgg.get(key) ?? {
509
+ source: src,
510
+ target: tgt,
511
+ weight: 0,
512
+ queries: 0
513
+ };
514
+ edge.weight += weight;
515
+ edge.queries += 1;
516
+ edgeAgg.set(key, edge);
517
+ }
518
+ }
519
+ const nodes = [...nodeAgg.entries()].map(([url, n]) => ({
520
+ url,
521
+ impressions: n.impressions,
522
+ clicks: n.clicks,
523
+ queryCount: n.queries.size
524
+ }));
525
+ const edges = [...edgeAgg.values()];
526
+ const avgFragmentation = events.length > 0 ? events.reduce((s, e) => s + e.fragmentation, 0) / events.length : 0;
527
+ const totalStolenClicks = events.reduce((s, e) => s + e.stolenClicks, 0);
528
+ return {
529
+ results: events,
530
+ meta: {
531
+ total: events.length,
532
+ totalStolenClicks,
533
+ avgFragmentation,
534
+ graph: {
535
+ nodes,
536
+ edges
537
+ }
538
+ }
539
+ };
540
+ },
541
+ buildRows(params) {
542
+ return { rows: keywordsQueryState(periodOf(params), params.limit) };
543
+ },
544
+ reduceRows(rows, params) {
545
+ const results = analyzeCannibalization(Array.isArray(rows) ? rows : [], {
546
+ minImpressions: params.minImpressions,
547
+ maxPositionSpread: params.maxPositionSpread,
548
+ minPages: params.minPages
549
+ });
550
+ return {
551
+ results,
552
+ meta: { total: results.length }
553
+ };
554
+ }
555
+ });
556
+ const INTENT_PREFIXES_REGEX = "^(how to|what is|what are|why is|why do|where to|when to|best|top|vs|versus|compare|review|buy|cheap|free|near me)(\\s|$)";
557
+ const INTENT_PREFIXES = [
558
+ "how to",
559
+ "what is",
560
+ "what are",
561
+ "why is",
562
+ "why do",
563
+ "where to",
564
+ "when to",
565
+ "best",
566
+ "top",
567
+ "vs",
568
+ "versus",
569
+ "compare",
570
+ "review",
571
+ "buy",
572
+ "cheap",
573
+ "free",
574
+ "near me"
575
+ ];
576
+ const WHITESPACE_RE = /\s+/;
577
+ function str$4(v) {
578
+ return v == null ? "" : String(v);
579
+ }
580
+ function parseJsonList$3(v) {
581
+ if (Array.isArray(v)) return v;
582
+ if (typeof v === "string" && v.length > 0) {
583
+ const parsed = JSON.parse(v);
584
+ return Array.isArray(parsed) ? parsed : [];
585
+ }
586
+ return [];
587
+ }
588
+ function extractIntentPrefix(keyword) {
589
+ const lower = keyword.toLowerCase();
590
+ for (const prefix of INTENT_PREFIXES) if (lower.startsWith(`${prefix} `) || lower.startsWith(prefix)) return prefix;
591
+ return null;
592
+ }
593
+ function extractWordPrefix(keyword, wordCount = 2) {
594
+ const words = keyword.toLowerCase().split(WHITESPACE_RE).filter(Boolean);
595
+ if (words.length < wordCount + 1) return null;
596
+ return words.slice(0, wordCount).join(" ");
597
+ }
598
+ function analyzeClustering(keywords, options = {}) {
599
+ const { minClusterSize = 2, minImpressions = 10, clusterBy = "both" } = options;
600
+ const filtered = keywords.filter((k) => num(k.impressions) >= minImpressions);
601
+ const clusterMap = /* @__PURE__ */ new Map();
602
+ const clusteredKeywords = /* @__PURE__ */ new Set();
603
+ if (clusterBy === "intent" || clusterBy === "both") for (const kw of filtered) {
604
+ const intent = extractIntentPrefix(kw.query);
605
+ if (intent) {
606
+ const existing = clusterMap.get(intent);
607
+ if (existing) existing.keywords.push(kw);
608
+ else clusterMap.set(intent, {
609
+ type: "intent",
610
+ keywords: [kw]
611
+ });
612
+ clusteredKeywords.add(kw.query);
613
+ }
614
+ }
615
+ if (clusterBy === "prefix" || clusterBy === "both") {
616
+ const unclustered = filtered.filter((kw) => !clusteredKeywords.has(kw.query));
617
+ const prefixMap = /* @__PURE__ */ new Map();
618
+ for (const kw of unclustered) {
619
+ const prefix = extractWordPrefix(kw.query);
620
+ if (prefix) {
621
+ const existing = prefixMap.get(prefix);
622
+ if (existing) existing.push(kw);
623
+ else prefixMap.set(prefix, [kw]);
624
+ }
625
+ }
626
+ for (const [prefix, kws] of prefixMap) if (kws.length >= minClusterSize) {
627
+ clusterMap.set(prefix, {
628
+ type: "prefix",
629
+ keywords: kws
630
+ });
631
+ kws.forEach((kw) => clusteredKeywords.add(kw.query));
632
+ }
633
+ }
634
+ const clusters = [];
635
+ for (const [name, data] of clusterMap) {
636
+ if (data.keywords.length < minClusterSize) continue;
637
+ const totalClicks = data.keywords.reduce((sum, k) => sum + num(k.clicks), 0);
638
+ const totalImpressions = data.keywords.reduce((sum, k) => sum + num(k.impressions), 0);
639
+ const avgPosition = data.keywords.reduce((sum, k) => sum + num(k.position), 0) / data.keywords.length;
640
+ clusters.push({
641
+ clusterName: name,
642
+ clusterType: data.type,
643
+ keywords: data.keywords,
644
+ totalClicks,
645
+ totalImpressions,
646
+ avgPosition,
647
+ keywordCount: data.keywords.length
648
+ });
649
+ }
650
+ clusters.sort((a, b) => b.totalClicks - a.totalClicks);
651
+ return {
652
+ clusters,
653
+ unclustered: filtered.filter((kw) => !clusteredKeywords.has(kw.query))
654
+ };
655
+ }
656
+ const clusteringAnalyzer = defineAnalyzer({
657
+ id: "clustering",
658
+ buildSql(params) {
659
+ const { startDate, endDate } = periodOf(params);
660
+ const minImpressions = params.minImpressions ?? 10;
661
+ const minClusterSize = params.minClusterSize ?? 2;
662
+ const clusterBy = params.clusterBy ?? "both";
663
+ const doIntent = clusterBy === "intent" || clusterBy === "both";
664
+ const doPrefix = clusterBy === "prefix" || clusterBy === "both";
665
+ const intentExpr = doIntent ? `NULLIF(regexp_extract(LOWER(query), '${INTENT_PREFIXES_REGEX}', 1), '')` : `CAST(NULL AS VARCHAR)`;
666
+ const prefixExpr = doPrefix ? `CASE WHEN len(regexp_split_to_array(LOWER(query), '\\s+')) >= 3
667
+ THEN array_to_string(list_slice(regexp_split_to_array(LOWER(query), '\\s+'), 1, 2), ' ')
668
+ ELSE CAST(NULL AS VARCHAR) END` : `CAST(NULL AS VARCHAR)`;
669
+ return {
670
+ sql: `
671
+ WITH agg AS (
672
+ SELECT
673
+ query,
674
+ ${METRIC_EXPR.clicks} AS clicks,
675
+ ${METRIC_EXPR.impressions} AS impressions,
676
+ ${METRIC_EXPR.ctr} AS ctr,
677
+ ${METRIC_EXPR.position} AS position
678
+ FROM read_parquet({{FILES}}, union_by_name = true)
679
+ WHERE date >= ? AND date <= ?
680
+ GROUP BY query
681
+ HAVING SUM(impressions) >= ?
682
+ ),
683
+ classified AS (
684
+ SELECT
685
+ query, clicks, impressions, ctr, position,
686
+ ${intentExpr} AS intent_prefix,
687
+ ${prefixExpr} AS word_prefix
688
+ FROM agg
689
+ ),
690
+ keyed AS (
691
+ SELECT
692
+ query, clicks, impressions, ctr, position,
693
+ COALESCE(intent_prefix, word_prefix) AS cluster_name,
694
+ CASE WHEN intent_prefix IS NOT NULL THEN 'intent' ELSE 'prefix' END AS cluster_type
695
+ FROM classified
696
+ WHERE COALESCE(intent_prefix, word_prefix) IS NOT NULL
697
+ )
698
+ SELECT
699
+ cluster_name AS clusterName,
700
+ any_value(cluster_type) AS clusterType,
701
+ CAST(COUNT(*) AS DOUBLE) AS keywordCount,
702
+ ${METRIC_EXPR.clicks} AS totalClicks,
703
+ ${METRIC_EXPR.impressions} AS totalImpressions,
704
+ AVG(position) AS avgPosition,
705
+ to_json(list({ 'query': query, 'clicks': clicks, 'impressions': impressions, 'ctr': ctr, 'position': position })) AS keywords
706
+ FROM keyed
707
+ GROUP BY cluster_name
708
+ HAVING COUNT(*) >= ?
709
+ ORDER BY totalClicks DESC
710
+ `,
711
+ params: [
712
+ startDate,
713
+ endDate,
714
+ minImpressions,
715
+ minClusterSize
716
+ ],
717
+ current: {
718
+ table: "keywords",
719
+ partitions: enumeratePartitions(startDate, endDate)
720
+ }
721
+ };
722
+ },
723
+ reduceSql(rows) {
724
+ const clusters = (Array.isArray(rows) ? rows : []).map((r) => ({
725
+ clusterName: str$4(r.clusterName),
726
+ clusterType: str$4(r.clusterType),
727
+ keywordCount: num(r.keywordCount),
728
+ totalClicks: num(r.totalClicks),
729
+ totalImpressions: num(r.totalImpressions),
730
+ avgPosition: num(r.avgPosition),
731
+ keywords: parseJsonList$3(r.keywords).map((k) => ({
732
+ query: str$4(k.query),
733
+ clicks: num(k.clicks),
734
+ impressions: num(k.impressions),
735
+ ctr: num(k.ctr),
736
+ position: num(k.position)
737
+ }))
738
+ }));
739
+ return {
740
+ results: clusters,
741
+ meta: {
742
+ total: clusters.length,
743
+ totalClusters: clusters.length
744
+ }
745
+ };
746
+ },
747
+ buildRows(params) {
748
+ return { keywords: keywordsQueryState(periodOf(params), params.limit) };
749
+ },
750
+ reduceRows(rows, params) {
751
+ const result = analyzeClustering(Array.isArray(rows) ? rows : [], {
752
+ clusterBy: params.clusterBy,
753
+ minClusterSize: params.minClusterSize,
754
+ minImpressions: params.minImpressions
755
+ });
756
+ return {
757
+ results: result.clusters,
758
+ meta: { totalClusters: result.clusters.length }
759
+ };
760
+ }
761
+ });
762
+ function str$3(v) {
763
+ return v == null ? "" : String(v);
764
+ }
765
+ function parseJsonList$2(v) {
766
+ if (Array.isArray(v)) return v;
767
+ if (typeof v === "string" && v.length > 0) {
768
+ const parsed = JSON.parse(v);
769
+ return Array.isArray(parsed) ? parsed : [];
770
+ }
771
+ return [];
772
+ }
773
+ function calculateGini(values) {
774
+ if (values.length === 0) return 0;
775
+ const sorted = [...values].sort((a, b) => a - b);
776
+ const n = sorted.length;
777
+ const sum = sorted.reduce((a, b) => a + b, 0);
778
+ if (sum === 0) return 0;
779
+ let weightedSum = 0;
780
+ for (let i = 0; i < n; i++) weightedSum += (2 * (i + 1) - n - 1) * sorted[i];
781
+ return weightedSum / (n * sum);
782
+ }
783
+ function calculateHHI(shares) {
784
+ return shares.reduce((sum, share) => sum + (share * 100) ** 2, 0);
785
+ }
786
+ function analyzeConcentration(items, options = {}) {
787
+ const { topN = 10 } = options;
788
+ if (items.length === 0) return {
789
+ giniCoefficient: 0,
790
+ hhi: 0,
791
+ topNConcentration: 0,
792
+ topNItems: [],
793
+ totalItems: 0,
794
+ totalClicks: 0,
795
+ riskLevel: "low"
796
+ };
797
+ const sorted = [...items].sort((a, b) => b.clicks - a.clicks);
798
+ const totalClicks = sorted.reduce((sum, item) => sum + item.clicks, 0);
799
+ const clickValues = sorted.map((i) => i.clicks);
800
+ const shares = totalClicks > 0 ? sorted.map((i) => i.clicks / totalClicks) : [];
801
+ const giniCoefficient = calculateGini(clickValues);
802
+ const hhi = calculateHHI(shares);
803
+ const topNItems = sorted.slice(0, topN).map((item) => ({
804
+ key: item.key,
805
+ clicks: item.clicks,
806
+ share: totalClicks > 0 ? item.clicks / totalClicks : 0
807
+ }));
808
+ const topNClicks = topNItems.reduce((sum, item) => sum + item.clicks, 0);
809
+ const topNConcentration = totalClicks > 0 ? topNClicks / totalClicks : 0;
810
+ let riskLevel = "low";
811
+ if (hhi > 2500) riskLevel = "high";
812
+ else if (hhi > 1500) riskLevel = "medium";
813
+ return {
814
+ giniCoefficient,
815
+ hhi,
816
+ topNConcentration,
817
+ topNItems,
818
+ totalItems: items.length,
819
+ totalClicks,
820
+ riskLevel
821
+ };
822
+ }
823
+ function analyzePageConcentration(pages, options) {
824
+ return analyzeConcentration(pages.map((p) => ({
825
+ key: p.page,
826
+ clicks: num(p.clicks)
827
+ })), options);
828
+ }
829
+ function analyzeKeywordConcentration(keywords, options) {
830
+ return analyzeConcentration(keywords.map((k) => ({
831
+ key: k.query,
832
+ clicks: num(k.clicks)
833
+ })), options);
834
+ }
835
+ const concentrationAnalyzer = defineAnalyzer({
836
+ id: "concentration",
837
+ buildSql(params) {
838
+ const { startDate, endDate } = periodOf(params);
839
+ const dim = params.dimension || "pages";
840
+ const topN = params.topN ?? 10;
841
+ const table = dim === "keywords" ? "keywords" : "pages";
842
+ const keyCol = dim === "keywords" ? "query" : "url";
843
+ return {
844
+ sql: `
845
+ WITH items AS (
846
+ SELECT
847
+ ${keyCol} AS key,
848
+ ${METRIC_EXPR.clicks} AS clicks
849
+ FROM read_parquet({{FILES}}, union_by_name = true)
850
+ WHERE date >= ? AND date <= ?
851
+ GROUP BY ${keyCol}
852
+ HAVING SUM(clicks) > 0
853
+ ),
854
+ totals AS (
855
+ SELECT SUM(clicks) AS total_clicks, COUNT(*) AS total_items FROM items
856
+ ),
857
+ ranked AS (
858
+ SELECT
859
+ i.key, i.clicks,
860
+ i.clicks / NULLIF(t.total_clicks, 0) AS share,
861
+ ROW_NUMBER() OVER (ORDER BY i.clicks DESC, i.key ASC) AS rnk_desc,
862
+ ROW_NUMBER() OVER (ORDER BY i.clicks ASC, i.key ASC) AS rnk_asc,
863
+ t.total_clicks AS tclicks,
864
+ t.total_items AS titems
865
+ FROM items i, totals t
866
+ ),
867
+ gini_num AS (
868
+ SELECT SUM((2.0 * rnk_asc - titems - 1) * clicks) AS weighted_sum FROM ranked
869
+ ),
870
+ hhi_calc AS (
871
+ SELECT SUM(POWER(share * 100, 2)) AS hhi FROM ranked
872
+ ),
873
+ top_list AS (
874
+ SELECT
875
+ list({ 'key': key, 'clicks': clicks, 'share': share } ORDER BY clicks DESC, key ASC) AS items,
876
+ SUM(clicks) AS top_clicks
877
+ FROM ranked WHERE rnk_desc <= ?
878
+ )
879
+ SELECT
880
+ COALESCE(
881
+ (SELECT weighted_sum FROM gini_num)
882
+ / NULLIF((SELECT total_items FROM totals) * (SELECT total_clicks FROM totals), 0),
883
+ 0.0
884
+ ) AS giniCoefficient,
885
+ COALESCE((SELECT hhi FROM hhi_calc), 0.0) AS hhi,
886
+ COALESCE(
887
+ CAST((SELECT top_clicks FROM top_list) AS DOUBLE)
888
+ / NULLIF((SELECT total_clicks FROM totals), 0),
889
+ 0.0
890
+ ) AS topNConcentration,
891
+ COALESCE((SELECT to_json(items) FROM top_list), '[]') AS topNItems,
892
+ COALESCE((SELECT total_items FROM totals), 0) AS totalItems,
893
+ COALESCE((SELECT total_clicks FROM totals), 0.0) AS totalClicks,
894
+ CASE
895
+ WHEN COALESCE((SELECT hhi FROM hhi_calc), 0.0) > 2500 THEN 'high'
896
+ WHEN COALESCE((SELECT hhi FROM hhi_calc), 0.0) > 1500 THEN 'medium'
897
+ ELSE 'low'
898
+ END AS riskLevel
899
+ `,
900
+ params: [
901
+ startDate,
902
+ endDate,
903
+ topN
904
+ ],
905
+ current: {
906
+ table,
907
+ partitions: enumeratePartitions(startDate, endDate)
908
+ }
909
+ };
910
+ },
911
+ reduceSql(rows, params) {
912
+ const r = (Array.isArray(rows) ? rows : [])[0] ?? {};
913
+ const topRaw = parseJsonList$2(r.topNItems);
914
+ return {
915
+ results: [{
916
+ giniCoefficient: num(r.giniCoefficient),
917
+ hhi: num(r.hhi),
918
+ topNConcentration: num(r.topNConcentration),
919
+ topNItems: topRaw.map((t) => ({
920
+ key: str$3(t.key),
921
+ clicks: num(t.clicks),
922
+ share: num(t.share)
923
+ })),
924
+ totalItems: num(r.totalItems),
925
+ totalClicks: num(r.totalClicks),
926
+ riskLevel: str$3(r.riskLevel)
927
+ }],
928
+ meta: {
929
+ total: 1,
930
+ dimension: params.dimension || "pages"
931
+ }
932
+ };
933
+ },
934
+ buildRows(params) {
935
+ const dim = params.dimension || "pages";
936
+ const period = periodOf(params);
937
+ const out = {};
938
+ if (dim === "pages") out.pages = pagesQueryState(period, params.limit);
939
+ else out.keywords = keywordsQueryState(period, params.limit);
940
+ return out;
941
+ },
942
+ reduceRows(rows, params) {
943
+ const dim = params.dimension || "pages";
944
+ const arr = Array.isArray(rows) ? rows : rows[dim] ?? [];
945
+ return {
946
+ results: [dim === "pages" ? analyzePageConcentration(arr, { topN: params.topN }) : analyzeKeywordConcentration(arr, { topN: params.topN })],
947
+ meta: { dimension: dim }
948
+ };
949
+ }
950
+ });
951
+ const sortResults$1 = createMetricSorter("lostClicks", {
952
+ lostClicks: "desc",
953
+ declinePercent: "desc",
954
+ currentClicks: "asc"
955
+ });
956
+ function str$2(v) {
957
+ return v == null ? "" : String(v);
958
+ }
959
+ function parseJsonList$1(v) {
960
+ if (Array.isArray(v)) return v;
961
+ if (typeof v === "string" && v.length > 0) {
962
+ const parsed = JSON.parse(v);
963
+ return Array.isArray(parsed) ? parsed : [];
964
+ }
965
+ return [];
966
+ }
967
+ function analyzeDecay(input, options = {}) {
968
+ const { minPreviousClicks = 50, threshold = .2, sortBy = "lostClicks" } = options;
969
+ const currentMap = buildPeriodMap(input.current, (r) => r.page, (r) => ({
970
+ clicks: num(r.clicks),
971
+ position: num(r.position)
972
+ }));
973
+ const previousMap = buildPeriodMap(input.previous, (r) => r.page, (r) => ({
974
+ clicks: num(r.clicks),
975
+ position: num(r.position)
976
+ }), (r) => num(r.clicks) >= minPreviousClicks);
977
+ const results = [];
978
+ for (const [page, prev] of previousMap) {
979
+ const curr = currentMap.get(page) || {
980
+ clicks: 0,
981
+ position: 0
982
+ };
983
+ const lostClicks = prev.clicks - curr.clicks;
984
+ const declinePercent = prev.clicks > 0 ? lostClicks / prev.clicks : 0;
985
+ if (declinePercent >= threshold && lostClicks > 0) results.push({
986
+ page,
987
+ currentClicks: curr.clicks,
988
+ previousClicks: prev.clicks,
989
+ lostClicks,
990
+ declinePercent,
991
+ currentPosition: curr.position,
992
+ previousPosition: prev.position,
993
+ positionDrop: curr.position - prev.position
994
+ });
995
+ }
996
+ return sortResults$1(results, sortBy);
997
+ }
998
+ const decayAnalyzer = defineAnalyzer({
999
+ id: "decay",
1000
+ buildSql(params) {
1001
+ const { current: cur, previous: prev } = comparisonOf(params);
1002
+ const minPreviousClicks = params.minPreviousClicks ?? 50;
1003
+ const threshold = params.threshold ?? .2;
1004
+ const limit = params.limit ?? 2e3;
1005
+ return {
1006
+ sql: `
1007
+ WITH cur AS (
1008
+ SELECT
1009
+ url,
1010
+ ${METRIC_EXPR.clicks} AS clicks,
1011
+ ${METRIC_EXPR.position} AS position
1012
+ FROM read_parquet({{FILES}}, union_by_name = true)
1013
+ WHERE date >= ? AND date <= ?
1014
+ GROUP BY url
1015
+ ),
1016
+ prev AS (
1017
+ SELECT
1018
+ url,
1019
+ ${METRIC_EXPR.clicks} AS clicks,
1020
+ ${METRIC_EXPR.position} AS position
1021
+ FROM read_parquet({{FILES_PREV}}, union_by_name = true)
1022
+ WHERE date >= ? AND date <= ?
1023
+ GROUP BY url
1024
+ HAVING SUM(clicks) >= ?
1025
+ ),
1026
+ weekly AS (
1027
+ SELECT url, date_trunc('week', CAST(date AS DATE)) AS week,
1028
+ ${METRIC_EXPR.clicks} AS clicks,
1029
+ ${METRIC_EXPR.impressions} AS impressions
1030
+ FROM (
1031
+ SELECT url, date, clicks, impressions
1032
+ FROM read_parquet({{FILES}}, union_by_name = true)
1033
+ WHERE date >= ? AND date <= ?
1034
+ UNION ALL
1035
+ SELECT url, date, clicks, impressions
1036
+ FROM read_parquet({{FILES_PREV}}, union_by_name = true)
1037
+ WHERE date >= ? AND date <= ?
1038
+ )
1039
+ GROUP BY url, week
1040
+ ),
1041
+ series_by_url AS (
1042
+ SELECT url, to_json(list({
1043
+ 'week': strftime(week, '%Y-%m-%d'),
1044
+ 'clicks': clicks,
1045
+ 'impressions': impressions
1046
+ } ORDER BY week)) AS seriesJson
1047
+ FROM weekly GROUP BY url
1048
+ ),
1049
+ joined AS (
1050
+ SELECT
1051
+ p.url AS page,
1052
+ COALESCE(c.clicks, 0.0) AS currentClicks,
1053
+ p.clicks AS previousClicks,
1054
+ (p.clicks - COALESCE(c.clicks, 0.0)) AS lostClicks,
1055
+ (p.clicks - COALESCE(c.clicks, 0.0)) / NULLIF(p.clicks, 0) AS declinePercent,
1056
+ COALESCE(c.position, 0.0) AS currentPosition,
1057
+ p.position AS previousPosition,
1058
+ (COALESCE(c.position, 0.0) - p.position) AS positionDrop,
1059
+ s.seriesJson
1060
+ FROM prev p
1061
+ LEFT JOIN cur c ON p.url = c.url
1062
+ LEFT JOIN series_by_url s ON p.url = s.url
1063
+ )
1064
+ SELECT *
1065
+ FROM joined
1066
+ WHERE declinePercent >= ? AND lostClicks > 0
1067
+ ORDER BY lostClicks DESC
1068
+ LIMIT ${Number(limit)}
1069
+ `,
1070
+ params: [
1071
+ cur.startDate,
1072
+ cur.endDate,
1073
+ prev.startDate,
1074
+ prev.endDate,
1075
+ minPreviousClicks,
1076
+ cur.startDate,
1077
+ cur.endDate,
1078
+ prev.startDate,
1079
+ prev.endDate,
1080
+ threshold
1081
+ ],
1082
+ current: {
1083
+ table: "pages",
1084
+ partitions: enumeratePartitions(cur.startDate, cur.endDate)
1085
+ },
1086
+ previous: {
1087
+ table: "pages",
1088
+ partitions: enumeratePartitions(prev.startDate, prev.endDate)
1089
+ }
1090
+ };
1091
+ },
1092
+ reduceSql(rows) {
1093
+ const arr = Array.isArray(rows) ? rows : [];
1094
+ return {
1095
+ results: arr.map((r) => ({
1096
+ page: str$2(r.page),
1097
+ currentClicks: num(r.currentClicks),
1098
+ previousClicks: num(r.previousClicks),
1099
+ lostClicks: num(r.lostClicks),
1100
+ declinePercent: num(r.declinePercent),
1101
+ currentPosition: num(r.currentPosition),
1102
+ previousPosition: num(r.previousPosition),
1103
+ positionDrop: num(r.positionDrop),
1104
+ series: parseJsonList$1(r.seriesJson).map((s) => ({
1105
+ week: str$2(s.week),
1106
+ clicks: num(s.clicks),
1107
+ impressions: num(s.impressions)
1108
+ }))
1109
+ })),
1110
+ meta: { total: arr.length }
1111
+ };
1112
+ },
1113
+ buildRows(params) {
1114
+ const { current, previous } = comparisonOf(params);
1115
+ return {
1116
+ current: pagesQueryState(current, params.limit),
1117
+ previous: pagesQueryState(previous, params.limit)
1118
+ };
1119
+ },
1120
+ reduceRows(rows, params) {
1121
+ const map = rows && !Array.isArray(rows) ? rows : {
1122
+ current: [],
1123
+ previous: []
1124
+ };
1125
+ const results = analyzeDecay({
1126
+ current: map.current ?? [],
1127
+ previous: map.previous ?? []
1128
+ }, {
1129
+ minPreviousClicks: params.minPreviousClicks,
1130
+ threshold: params.threshold
1131
+ });
1132
+ return {
1133
+ results,
1134
+ meta: { total: results.length }
1135
+ };
1136
+ }
1137
+ });
1138
+ function percentDifference(current, previous) {
1139
+ if (previous === 0) return current > 0 ? 100 : 0;
1140
+ return (current - previous) / previous * 100;
1141
+ }
1142
+ function str$1(v) {
1143
+ return v == null ? "" : String(v);
1144
+ }
1145
+ function parseJsonList(v) {
1146
+ if (Array.isArray(v)) return v;
1147
+ if (typeof v === "string" && v.length > 0) {
1148
+ const parsed = JSON.parse(v);
1149
+ return Array.isArray(parsed) ? parsed : [];
1150
+ }
1151
+ return [];
1152
+ }
1153
+ function analyzeMovers(input, options = {}) {
1154
+ const { changeThreshold = .2, minImpressions = 50, sortBy = "clicksChange" } = options;
1155
+ const normFactor = input.normalizationFactor ?? 1;
1156
+ const baselineMap = buildPeriodMap(input.previous, (r) => r.query, (r) => ({
1157
+ clicks: num(r.clicks) / normFactor,
1158
+ impressions: num(r.impressions) / normFactor,
1159
+ position: num(r.position),
1160
+ page: r.page ?? null
1161
+ }));
1162
+ const pageMap = /* @__PURE__ */ new Map();
1163
+ for (const row of input.current) if (!pageMap.has(row.query) && row.page) pageMap.set(row.query, row.page);
1164
+ for (const row of input.previous) if (!pageMap.has(row.query) && row.page) pageMap.set(row.query, row.page);
1165
+ const rising = [];
1166
+ const declining = [];
1167
+ const stable = [];
1168
+ for (const row of input.current) {
1169
+ const impressions = num(row.impressions);
1170
+ const clicks = num(row.clicks);
1171
+ const position = num(row.position);
1172
+ if (impressions < minImpressions) continue;
1173
+ const baseline = baselineMap.get(row.query) || {
1174
+ clicks: 0,
1175
+ impressions: 0,
1176
+ position: 0,
1177
+ page: null
1178
+ };
1179
+ const clicksChangePercent = percentDifference(clicks, baseline.clicks);
1180
+ const impressionsChangePercent = percentDifference(impressions, baseline.impressions);
1181
+ const data = {
1182
+ keyword: row.query,
1183
+ page: pageMap.get(row.query) ?? null,
1184
+ recentClicks: clicks,
1185
+ recentImpressions: impressions,
1186
+ recentPosition: position,
1187
+ baselineClicks: Math.round(baseline.clicks),
1188
+ baselineImpressions: Math.round(baseline.impressions),
1189
+ baselinePosition: baseline.position,
1190
+ clicksChange: clicks - Math.round(baseline.clicks),
1191
+ clicksChangePercent,
1192
+ impressionsChangePercent,
1193
+ positionChange: position - baseline.position
1194
+ };
1195
+ const absChange = Math.abs(clicksChangePercent / 100);
1196
+ if (clicksChangePercent > 0 && absChange >= changeThreshold) rising.push(data);
1197
+ else if (clicksChangePercent < 0 && absChange >= changeThreshold) declining.push(data);
1198
+ else stable.push(data);
1199
+ }
1200
+ const sortFn = (a, b) => {
1201
+ switch (sortBy) {
1202
+ case "clicks": return b.recentClicks - a.recentClicks;
1203
+ case "impressions": return b.recentImpressions - a.recentImpressions;
1204
+ case "clicksChange": return Math.abs(b.clicksChangePercent) - Math.abs(a.clicksChangePercent);
1205
+ case "impressionsChange": return Math.abs(b.impressionsChangePercent) - Math.abs(a.impressionsChangePercent);
1206
+ case "positionChange": return Math.abs(b.positionChange) - Math.abs(a.positionChange);
1207
+ default: return Math.abs(b.clicksChangePercent) - Math.abs(a.clicksChangePercent);
1208
+ }
1209
+ };
1210
+ rising.sort(sortFn);
1211
+ declining.sort(sortFn);
1212
+ stable.sort((a, b) => b.recentClicks - a.recentClicks);
1213
+ return {
1214
+ rising,
1215
+ declining,
1216
+ stable
1217
+ };
1218
+ }
1219
+ const moversAnalyzer = defineAnalyzer({
1220
+ id: "movers",
1221
+ buildSql(params) {
1222
+ const { current: cur, previous: prev } = comparisonOf(params);
1223
+ const minImpressions = params.minImpressions ?? 50;
1224
+ const changeThreshold = params.changeThreshold ?? .2;
1225
+ const limit = params.limit ?? 2e3;
1226
+ return {
1227
+ sql: `
1228
+ WITH cur AS (
1229
+ SELECT
1230
+ query, url,
1231
+ ${METRIC_EXPR.clicks} AS clicks,
1232
+ ${METRIC_EXPR.impressions} AS impressions,
1233
+ ${METRIC_EXPR.position} AS position
1234
+ FROM read_parquet({{FILES}}, union_by_name = true)
1235
+ WHERE date >= ? AND date <= ?
1236
+ GROUP BY query, url
1237
+ ),
1238
+ prev AS (
1239
+ SELECT
1240
+ query, url,
1241
+ ${METRIC_EXPR.clicks} AS clicks,
1242
+ ${METRIC_EXPR.impressions} AS impressions,
1243
+ ${METRIC_EXPR.position} AS position
1244
+ FROM read_parquet({{FILES_PREV}}, union_by_name = true)
1245
+ WHERE date >= ? AND date <= ?
1246
+ GROUP BY query, url
1247
+ ),
1248
+ weekly AS (
1249
+ SELECT query, url, date_trunc('week', CAST(date AS DATE)) AS week,
1250
+ ${METRIC_EXPR.clicks} AS clicks,
1251
+ ${METRIC_EXPR.impressions} AS impressions
1252
+ FROM (
1253
+ SELECT query, url, date, clicks, impressions
1254
+ FROM read_parquet({{FILES}}, union_by_name = true)
1255
+ WHERE date >= ? AND date <= ?
1256
+ UNION ALL
1257
+ SELECT query, url, date, clicks, impressions
1258
+ FROM read_parquet({{FILES_PREV}}, union_by_name = true)
1259
+ WHERE date >= ? AND date <= ?
1260
+ )
1261
+ GROUP BY query, url, week
1262
+ ),
1263
+ series_by_entity AS (
1264
+ SELECT query, url, to_json(list({
1265
+ 'week': strftime(week, '%Y-%m-%d'),
1266
+ 'clicks': clicks,
1267
+ 'impressions': impressions
1268
+ } ORDER BY week)) AS seriesJson
1269
+ FROM weekly GROUP BY query, url
1270
+ ),
1271
+ joined AS (
1272
+ SELECT
1273
+ c.query AS keyword,
1274
+ c.url AS page,
1275
+ c.clicks AS recentClicks,
1276
+ c.impressions AS recentImpressions,
1277
+ c.position AS recentPosition,
1278
+ COALESCE(p.clicks, 0.0) AS baselineClicks,
1279
+ COALESCE(p.impressions, 0.0) AS baselineImpressions,
1280
+ COALESCE(p.position, 0.0) AS baselinePosition,
1281
+ (c.clicks - COALESCE(p.clicks, 0.0)) AS clicksChange,
1282
+ CASE
1283
+ WHEN COALESCE(p.clicks, 0.0) = 0 THEN CASE WHEN c.clicks > 0 THEN 100.0 ELSE 0.0 END
1284
+ ELSE (c.clicks - p.clicks) * 100.0 / p.clicks
1285
+ END AS clicksChangePercent,
1286
+ CASE
1287
+ WHEN COALESCE(p.impressions, 0.0) = 0 THEN CASE WHEN c.impressions > 0 THEN 100.0 ELSE 0.0 END
1288
+ ELSE (c.impressions - p.impressions) * 100.0 / p.impressions
1289
+ END AS impressionsChangePercent,
1290
+ (c.position - COALESCE(p.position, 0.0)) AS positionChange,
1291
+ s.seriesJson
1292
+ FROM cur c
1293
+ LEFT JOIN prev p ON c.query = p.query AND c.url = p.url
1294
+ LEFT JOIN series_by_entity s ON c.query = s.query AND c.url = s.url
1295
+ WHERE c.impressions >= ?
1296
+ )
1297
+ SELECT *,
1298
+ CASE
1299
+ WHEN clicksChangePercent > 0 AND ABS(clicksChangePercent) / 100.0 >= ? THEN 'rising'
1300
+ WHEN clicksChangePercent < 0 AND ABS(clicksChangePercent) / 100.0 >= ? THEN 'declining'
1301
+ ELSE 'stable'
1302
+ END AS direction
1303
+ FROM joined
1304
+ ORDER BY ABS(clicksChangePercent) DESC
1305
+ LIMIT ${Number(limit)}
1306
+ `,
1307
+ params: [
1308
+ cur.startDate,
1309
+ cur.endDate,
1310
+ prev.startDate,
1311
+ prev.endDate,
1312
+ cur.startDate,
1313
+ cur.endDate,
1314
+ prev.startDate,
1315
+ prev.endDate,
1316
+ minImpressions,
1317
+ changeThreshold,
1318
+ changeThreshold
1319
+ ],
1320
+ current: {
1321
+ table: "page_keywords",
1322
+ partitions: enumeratePartitions(cur.startDate, cur.endDate)
1323
+ },
1324
+ previous: {
1325
+ table: "page_keywords",
1326
+ partitions: enumeratePartitions(prev.startDate, prev.endDate)
1327
+ }
1328
+ };
1329
+ },
1330
+ reduceSql(rows) {
1331
+ const normalized = (Array.isArray(rows) ? rows : []).map((r) => ({
1332
+ keyword: str$1(r.keyword),
1333
+ page: r.page == null ? null : str$1(r.page),
1334
+ recentClicks: num(r.recentClicks),
1335
+ recentImpressions: num(r.recentImpressions),
1336
+ recentPosition: num(r.recentPosition),
1337
+ baselineClicks: Math.round(num(r.baselineClicks)),
1338
+ baselineImpressions: Math.round(num(r.baselineImpressions)),
1339
+ baselinePosition: num(r.baselinePosition),
1340
+ clicksChange: num(r.clicksChange),
1341
+ clicksChangePercent: num(r.clicksChangePercent),
1342
+ impressionsChangePercent: num(r.impressionsChangePercent),
1343
+ positionChange: num(r.positionChange),
1344
+ direction: str$1(r.direction),
1345
+ series: parseJsonList(r.seriesJson).map((s) => ({
1346
+ week: str$1(s.week),
1347
+ clicks: num(s.clicks),
1348
+ impressions: num(s.impressions)
1349
+ }))
1350
+ }));
1351
+ const rising = normalized.filter((r) => r.direction === "rising");
1352
+ const declining = normalized.filter((r) => r.direction === "declining");
1353
+ const stable = normalized.filter((r) => r.direction === "stable");
1354
+ const combined = [...rising, ...declining];
1355
+ return {
1356
+ results: combined,
1357
+ meta: {
1358
+ total: combined.length,
1359
+ rising: rising.length,
1360
+ declining: declining.length,
1361
+ stable: stable.length
1362
+ }
1363
+ };
1364
+ },
1365
+ buildRows(params) {
1366
+ const { current, previous } = comparisonOf(params);
1367
+ return {
1368
+ current: keywordsQueryState(current, params.limit),
1369
+ previous: keywordsQueryState(previous, params.limit)
1370
+ };
1371
+ },
1372
+ reduceRows(rows, params) {
1373
+ const map = rows && !Array.isArray(rows) ? rows : {
1374
+ current: [],
1375
+ previous: []
1376
+ };
1377
+ const result = analyzeMovers({
1378
+ current: map.current ?? [],
1379
+ previous: map.previous ?? []
1380
+ }, {
1381
+ changeThreshold: params.changeThreshold,
1382
+ minImpressions: params.minImpressions
1383
+ });
1384
+ return {
1385
+ results: [...result.rising.map((r) => ({
1386
+ ...r,
1387
+ direction: "rising"
1388
+ })), ...result.declining.map((r) => ({
1389
+ ...r,
1390
+ direction: "declining"
1391
+ }))],
1392
+ meta: {
1393
+ rising: result.rising.length,
1394
+ declining: result.declining.length
1395
+ }
1396
+ };
1397
+ }
1398
+ });
1399
+ const DEFAULT_LIMIT = 1e3;
1400
+ const MAX_LIMIT = 5e4;
1401
+ function clampLimit(limit, fallback = DEFAULT_LIMIT) {
1402
+ const n = Number(limit ?? fallback);
1403
+ if (!Number.isFinite(n) || n <= 0) return fallback;
1404
+ return Math.min(n, MAX_LIMIT);
1405
+ }
1406
+ function clampOffset(offset) {
1407
+ const n = Number(offset ?? 0);
1408
+ if (!Number.isFinite(n) || n < 0) return 0;
1409
+ return Math.floor(n);
1410
+ }
1411
+ function paginateClause(input) {
1412
+ const l = clampLimit(input.limit);
1413
+ const o = clampOffset(input.offset);
1414
+ return o > 0 ? `LIMIT ${l} OFFSET ${o}` : `LIMIT ${l}`;
1415
+ }
1416
+ function paginateInMemory(rows, input) {
1417
+ const l = clampLimit(input.limit, rows.length);
1418
+ const o = clampOffset(input.offset);
1419
+ return rows.slice(o, o + l);
1420
+ }
1421
+ const EXPECTED_CTR_BY_POSITION = {
1422
+ 1: .3,
1423
+ 2: .15,
1424
+ 3: .1,
1425
+ 4: .07,
1426
+ 5: .05,
1427
+ 6: .04,
1428
+ 7: .03,
1429
+ 8: .025,
1430
+ 9: .02,
1431
+ 10: .015
1432
+ };
1433
+ function getExpectedCtr(position) {
1434
+ return EXPECTED_CTR_BY_POSITION[Math.round(Math.max(1, Math.min(position, 10)))] || .01;
1435
+ }
1436
+ function calculatePositionScore(position) {
1437
+ if (position <= 3) return .2;
1438
+ if (position > 50) return .1;
1439
+ const distance = Math.abs(position - 11);
1440
+ return Math.max(0, 1 - distance / 15);
1441
+ }
1442
+ function calculateImpressionScore(impressions) {
1443
+ if (impressions <= 0) return 0;
1444
+ return Math.min(Math.log10(impressions) / 5, 1);
1445
+ }
1446
+ function calculateCtrGapScore(actualCtr, position) {
1447
+ const expectedCtr = getExpectedCtr(position);
1448
+ if (actualCtr >= expectedCtr) return 0;
1449
+ const gap = expectedCtr - actualCtr;
1450
+ return Math.min(gap / expectedCtr, 1);
1451
+ }
1452
+ const sortResults = createMetricSorter("opportunityScore", {
1453
+ opportunityScore: "desc",
1454
+ potentialClicks: "desc",
1455
+ impressions: "desc",
1456
+ position: "asc"
1457
+ });
1458
+ const opportunityAnalyzer = defineAnalyzer({
1459
+ id: "opportunity",
1460
+ buildSql(params) {
1461
+ const { startDate, endDate } = periodOf(params);
1462
+ const minImpressions = params.minImpressions ?? 100;
1463
+ const w1 = 1;
1464
+ const w2 = 1;
1465
+ const w3 = 1;
1466
+ const totalW = w1 + w2 + w3;
1467
+ const limit = params.limit ?? 1e3;
1468
+ return {
1469
+ sql: `
1470
+ WITH agg AS (
1471
+ SELECT
1472
+ query AS keyword,
1473
+ url AS page,
1474
+ ${METRIC_EXPR.clicks} AS clicks,
1475
+ ${METRIC_EXPR.impressions} AS impressions,
1476
+ ${METRIC_EXPR.ctr} AS ctr,
1477
+ ${METRIC_EXPR.position} AS position
1478
+ FROM read_parquet({{FILES}}, union_by_name = true)
1479
+ WHERE date >= ? AND date <= ?
1480
+ GROUP BY query, url
1481
+ HAVING SUM(impressions) >= ?
1482
+ ),
1483
+ scored AS (
1484
+ SELECT
1485
+ keyword, page, clicks, impressions, ctr, position,
1486
+ CASE
1487
+ WHEN position <= 3 THEN 0.2
1488
+ WHEN position > 50 THEN 0.1
1489
+ ELSE GREATEST(0.0, 1.0 - ABS(position - 11.0) / 15.0)
1490
+ END AS positionScore,
1491
+ CASE WHEN impressions <= 0 THEN 0.0 ELSE LEAST(LOG10(impressions) / 5.0, 1.0) END AS impressionScore,
1492
+ CASE CAST(ROUND(GREATEST(LEAST(position, 10.0), 1.0)) AS INTEGER)
1493
+ WHEN 1 THEN 0.30
1494
+ WHEN 2 THEN 0.15
1495
+ WHEN 3 THEN 0.10
1496
+ WHEN 4 THEN 0.07
1497
+ WHEN 5 THEN 0.05
1498
+ WHEN 6 THEN 0.04
1499
+ WHEN 7 THEN 0.03
1500
+ WHEN 8 THEN 0.025
1501
+ WHEN 9 THEN 0.02
1502
+ WHEN 10 THEN 0.015
1503
+ ELSE 0.01
1504
+ END AS expectedCtr
1505
+ FROM agg
1506
+ ),
1507
+ gapped AS (
1508
+ SELECT
1509
+ *,
1510
+ CASE WHEN ctr >= expectedCtr THEN 0.0 ELSE LEAST((expectedCtr - ctr) / expectedCtr, 1.0) END AS ctrGapScore
1511
+ FROM scored
1512
+ )
1513
+ SELECT
1514
+ keyword, page, clicks, impressions, ctr, position,
1515
+ CAST(ROUND(POWER(
1516
+ POWER(positionScore, ${w1}) * POWER(impressionScore, ${w2}) * POWER(ctrGapScore, ${w3}),
1517
+ 1.0 / ${totalW}
1518
+ ) * 100) AS DOUBLE) AS opportunityScore,
1519
+ CAST(ROUND(impressions * (
1520
+ CASE CAST(ROUND(GREATEST(LEAST(position, 3.0), 1.0)) AS INTEGER)
1521
+ WHEN 1 THEN 0.30
1522
+ WHEN 2 THEN 0.15
1523
+ WHEN 3 THEN 0.10
1524
+ ELSE 0.10
1525
+ END
1526
+ )) AS DOUBLE) AS potentialClicks,
1527
+ positionScore, impressionScore, ctrGapScore
1528
+ FROM gapped
1529
+ ORDER BY opportunityScore DESC
1530
+ ${paginateClause({
1531
+ limit,
1532
+ offset: params.offset
1533
+ })}
1534
+ `,
1535
+ params: [
1536
+ startDate,
1537
+ endDate,
1538
+ minImpressions
1539
+ ],
1540
+ current: {
1541
+ table: "page_keywords",
1542
+ partitions: enumeratePartitions(startDate, endDate)
1543
+ }
1544
+ };
1545
+ },
1546
+ reduceSql(rows) {
1547
+ const arr = Array.isArray(rows) ? rows : [];
1548
+ return {
1549
+ results: arr.map((r) => ({
1550
+ keyword: r.keyword == null ? "" : String(r.keyword),
1551
+ page: r.page == null ? null : String(r.page),
1552
+ clicks: num(r.clicks),
1553
+ impressions: num(r.impressions),
1554
+ ctr: num(r.ctr),
1555
+ position: num(r.position),
1556
+ opportunityScore: num(r.opportunityScore),
1557
+ potentialClicks: num(r.potentialClicks),
1558
+ factors: {
1559
+ positionScore: num(r.positionScore),
1560
+ impressionScore: num(r.impressionScore),
1561
+ ctrGapScore: num(r.ctrGapScore)
1562
+ }
1563
+ })),
1564
+ meta: { total: arr.length }
1565
+ };
1566
+ },
1567
+ buildRows(params) {
1568
+ return { keywords: keywordsQueryState(periodOf(params), params.limit) };
1569
+ },
1570
+ reduceRows(rows, params) {
1571
+ const keywords = (Array.isArray(rows) ? rows : []) ?? [];
1572
+ const minImpressions = params.minImpressions ?? 100;
1573
+ const positionWeight = 1;
1574
+ const impressionsWeight = 1;
1575
+ const ctrGapWeight = 1;
1576
+ const sortBy = "opportunityScore";
1577
+ const results = [];
1578
+ for (const row of keywords) {
1579
+ const impressions = num(row.impressions);
1580
+ const position = num(row.position);
1581
+ const ctr = num(row.ctr);
1582
+ const clicks = num(row.clicks);
1583
+ if (impressions < minImpressions) continue;
1584
+ const positionScore = calculatePositionScore(position);
1585
+ const impressionScore = calculateImpressionScore(impressions);
1586
+ const ctrGapScore = calculateCtrGapScore(ctr, position);
1587
+ const geometricMean = (positionScore ** positionWeight * impressionScore ** impressionsWeight * ctrGapScore ** ctrGapWeight) ** (1 / (positionWeight + impressionsWeight + ctrGapWeight));
1588
+ const opportunityScore = Math.round(geometricMean * 100);
1589
+ const targetCtr = getExpectedCtr(Math.min(3, position));
1590
+ const potentialClicks = Math.round(impressions * targetCtr);
1591
+ results.push({
1592
+ keyword: row.query,
1593
+ page: row.page ?? null,
1594
+ clicks,
1595
+ impressions,
1596
+ ctr,
1597
+ position,
1598
+ opportunityScore,
1599
+ potentialClicks,
1600
+ factors: {
1601
+ positionScore,
1602
+ impressionScore,
1603
+ ctrGapScore
1604
+ }
1605
+ });
1606
+ }
1607
+ const sorted = sortResults(results, sortBy);
1608
+ const paged = paginateInMemory(sorted, {
1609
+ limit: params.limit,
1610
+ offset: params.offset
1611
+ });
1612
+ return {
1613
+ results: paged,
1614
+ meta: {
1615
+ total: sorted.length,
1616
+ returned: paged.length
1617
+ }
1618
+ };
1619
+ }
1620
+ });
1621
+ function str(v) {
1622
+ return v == null ? "" : String(v);
1623
+ }
1624
+ function bool(v) {
1625
+ return v === true || v === 1 || v === "true";
1626
+ }
1627
+ function calculateCV(values) {
1628
+ if (values.length === 0) return 0;
1629
+ const mean = values.reduce((a, b) => a + b, 0) / values.length;
1630
+ if (mean === 0) return 0;
1631
+ const variance = values.reduce((sum, v) => sum + (v - mean) ** 2, 0) / values.length;
1632
+ return Math.min(Math.sqrt(variance) / mean, 1);
1633
+ }
1634
+ function analyzeSeasonality(dates, options = {}) {
1635
+ const { metric = "clicks" } = options;
1636
+ if (dates.length === 0) return {
1637
+ hasSeasonality: false,
1638
+ strength: 0,
1639
+ peakMonths: [],
1640
+ troughMonths: [],
1641
+ monthlyBreakdown: [],
1642
+ insufficientData: true
1643
+ };
1644
+ const monthlyMap = /* @__PURE__ */ new Map();
1645
+ for (const row of dates) {
1646
+ const month = row.date.substring(0, 7);
1647
+ const value = metric === "clicks" ? row.clicks : row.impressions;
1648
+ monthlyMap.set(month, (monthlyMap.get(month) || 0) + value);
1649
+ }
1650
+ const months = Array.from(monthlyMap.keys()).sort();
1651
+ const values = months.map((m) => monthlyMap.get(m) || 0);
1652
+ const insufficientData = months.length < 12;
1653
+ const totalValue = values.reduce((a, b) => a + b, 0);
1654
+ const avgValue = values.length > 0 ? totalValue / values.length : 0;
1655
+ const monthlyBreakdown = months.map((month, i) => {
1656
+ const value = values[i] ?? 0;
1657
+ const vsAverage = avgValue > 0 ? value / avgValue : 0;
1658
+ return {
1659
+ month,
1660
+ value,
1661
+ vsAverage,
1662
+ isPeak: vsAverage > 1.5,
1663
+ isTrough: vsAverage < .5
1664
+ };
1665
+ });
1666
+ const peakMonths = [...new Set(monthlyBreakdown.filter((m) => m.isPeak).map((m) => m.month.substring(5, 7)))];
1667
+ const troughMonths = [...new Set(monthlyBreakdown.filter((m) => m.isTrough).map((m) => m.month.substring(5, 7)))];
1668
+ const strength = calculateCV(values);
1669
+ return {
1670
+ hasSeasonality: peakMonths.length > 0 || troughMonths.length > 0 || strength > .3,
1671
+ strength,
1672
+ peakMonths,
1673
+ troughMonths,
1674
+ monthlyBreakdown,
1675
+ insufficientData
1676
+ };
1677
+ }
1678
+ const seasonalityAnalyzer = defineAnalyzer({
1679
+ id: "seasonality",
1680
+ buildSql(params) {
1681
+ const { startDate, endDate } = periodOf(params);
1682
+ return {
1683
+ sql: `
1684
+ WITH monthly AS (
1685
+ SELECT
1686
+ strftime(date, '%Y-%m') AS month,
1687
+ CAST(SUM(${params.metric === "impressions" ? "impressions" : "clicks"}) AS DOUBLE) AS value
1688
+ FROM read_parquet({{FILES}}, union_by_name = true)
1689
+ WHERE date >= ? AND date <= ?
1690
+ GROUP BY month
1691
+ ),
1692
+ stats AS (
1693
+ SELECT
1694
+ AVG(value) AS avg_val,
1695
+ COALESCE(STDDEV_POP(value), 0.0) AS std_val,
1696
+ CAST(COUNT(*) AS DOUBLE) AS month_count
1697
+ FROM monthly
1698
+ )
1699
+ SELECT
1700
+ m.month AS month,
1701
+ m.value AS value,
1702
+ CASE WHEN s.avg_val > 0 THEN m.value / s.avg_val ELSE 0.0 END AS vsAverage,
1703
+ (s.avg_val > 0 AND m.value / s.avg_val > 1.5) AS isPeak,
1704
+ (s.avg_val > 0 AND m.value / s.avg_val < 0.5) AS isTrough,
1705
+ CASE WHEN s.avg_val > 0 THEN LEAST(s.std_val / s.avg_val, 1.0) ELSE 0.0 END AS strength,
1706
+ s.month_count AS monthCount
1707
+ FROM monthly m, stats s
1708
+ ORDER BY m.month
1709
+ `,
1710
+ params: [startDate, endDate],
1711
+ current: {
1712
+ table: "pages",
1713
+ partitions: enumeratePartitions(startDate, endDate)
1714
+ }
1715
+ };
1716
+ },
1717
+ reduceSql(rows) {
1718
+ const arr = Array.isArray(rows) ? rows : [];
1719
+ const breakdown = arr.map((r) => ({
1720
+ month: str(r.month),
1721
+ value: num(r.value),
1722
+ vsAverage: num(r.vsAverage),
1723
+ isPeak: bool(r.isPeak),
1724
+ isTrough: bool(r.isTrough)
1725
+ }));
1726
+ const first = arr[0];
1727
+ const strength = first ? num(first.strength) : 0;
1728
+ const monthCount = first ? num(first.monthCount) : 0;
1729
+ const peakMonths = [...new Set(breakdown.filter((m) => m.isPeak).map((m) => m.month.substring(5, 7)))];
1730
+ const troughMonths = [...new Set(breakdown.filter((m) => m.isTrough).map((m) => m.month.substring(5, 7)))];
1731
+ const hasSeasonality = peakMonths.length > 0 || troughMonths.length > 0 || strength > .3;
1732
+ const insufficientData = monthCount < 12;
1733
+ return {
1734
+ results: breakdown,
1735
+ meta: {
1736
+ total: breakdown.length,
1737
+ hasSeasonality,
1738
+ strength,
1739
+ peakMonths,
1740
+ troughMonths,
1741
+ insufficientData
1742
+ }
1743
+ };
1744
+ },
1745
+ buildRows(params) {
1746
+ return { dates: datesQueryState(periodOf(params), params.limit) };
1747
+ },
1748
+ reduceRows(rows, params) {
1749
+ const result = analyzeSeasonality(Array.isArray(rows) ? rows : [], { metric: params.metric });
1750
+ return {
1751
+ results: result.monthlyBreakdown,
1752
+ meta: { strength: result.strength }
1753
+ };
1754
+ }
1755
+ });
1756
+ const DEFAULT_ROW_LIMIT$1 = 25e3;
1757
+ const strikingDistanceAnalyzer = defineAnalyzer({
1758
+ id: "striking-distance",
1759
+ reduce(rows, params) {
1760
+ const arr = Array.isArray(rows) ? rows : [];
1761
+ const minPosition = params.minPosition ?? 4;
1762
+ const maxPosition = params.maxPosition ?? 20;
1763
+ const minImpressions = params.minImpressions ?? 100;
1764
+ const maxCtr = params.maxCtr ?? .05;
1765
+ const limit = params.limit ?? 1e3;
1766
+ const results = [];
1767
+ for (const row of arr) {
1768
+ const position = num(row.position);
1769
+ const impressions = num(row.impressions);
1770
+ const ctr = num(row.ctr);
1771
+ const clicks = num(row.clicks);
1772
+ if (position < minPosition || position > maxPosition) continue;
1773
+ if (impressions < minImpressions) continue;
1774
+ if (ctr > maxCtr) continue;
1775
+ results.push({
1776
+ keyword: String(row.query ?? ""),
1777
+ page: row.page == null ? null : String(row.page),
1778
+ clicks,
1779
+ impressions,
1780
+ ctr,
1781
+ position,
1782
+ potentialClicks: Math.round(impressions * .15)
1783
+ });
1784
+ }
1785
+ results.sort((a, b) => b.potentialClicks - a.potentialClicks);
1786
+ const paged = paginateInMemory(results, {
1787
+ limit,
1788
+ offset: params.offset
1789
+ });
1790
+ return {
1791
+ results: paged,
1792
+ meta: {
1793
+ total: results.length,
1794
+ returned: paged.length
1795
+ }
1796
+ };
1797
+ },
1798
+ buildSql(params) {
1799
+ const { startDate, endDate } = periodOf(params);
1800
+ return {
1801
+ sql: `
1802
+ SELECT
1803
+ query,
1804
+ url AS page,
1805
+ CAST(SUM(clicks) AS DOUBLE) AS clicks,
1806
+ CAST(SUM(impressions) AS DOUBLE) AS impressions,
1807
+ CAST(SUM(clicks) AS DOUBLE) / NULLIF(SUM(impressions), 0) AS ctr,
1808
+ SUM(sum_position) / NULLIF(SUM(impressions), 0) + 1 AS position
1809
+ FROM read_parquet({{FILES}}, union_by_name = true)
1810
+ WHERE date >= ? AND date <= ?
1811
+ GROUP BY query, url
1812
+ `,
1813
+ params: [startDate, endDate],
1814
+ current: {
1815
+ table: "page_keywords",
1816
+ partitions: enumeratePartitions(startDate, endDate)
1817
+ }
1818
+ };
1819
+ },
1820
+ buildRows(params) {
1821
+ return { keywords: keywordsQueryState(periodOf(params), params.limit ?? DEFAULT_ROW_LIMIT$1) };
1822
+ }
1823
+ });
1824
+ const DEFAULT_ROW_LIMIT = 25e3;
1825
+ const sortRowResults = createSorter((item) => item.impressions, "impressions");
1826
+ const zeroClickAnalyzer = defineAnalyzer({
1827
+ id: "zero-click",
1828
+ buildSql(params) {
1829
+ const { startDate, endDate } = periodOf(params);
1830
+ const minImpressions = params.minImpressions ?? 1e3;
1831
+ const maxCtr = params.maxCtr ?? .03;
1832
+ const maxPosition = params.maxPosition ?? 10;
1833
+ const limit = params.limit ?? 1e3;
1834
+ return {
1835
+ sql: `
1836
+ WITH agg AS (
1837
+ SELECT
1838
+ query,
1839
+ url AS page,
1840
+ ${METRIC_EXPR.clicks} AS clicks,
1841
+ ${METRIC_EXPR.impressions} AS impressions,
1842
+ ${METRIC_EXPR.ctr} AS ctr,
1843
+ ${METRIC_EXPR.position} AS position
1844
+ FROM read_parquet({{FILES}}, union_by_name = true)
1845
+ WHERE date >= ? AND date <= ?
1846
+ GROUP BY query, url
1847
+ HAVING SUM(impressions) >= ?
1848
+ )
1849
+ SELECT
1850
+ query, page, clicks, impressions, ctr, position,
1851
+ CAST(GREATEST(0, ROUND(impressions * (
1852
+ CASE
1853
+ WHEN position <= 1 THEN 0.30
1854
+ WHEN position <= 3 THEN 0.15
1855
+ WHEN position <= 5 THEN 0.08
1856
+ ELSE 0.04
1857
+ END
1858
+ )) - clicks) AS DOUBLE) AS missedClicks
1859
+ FROM agg
1860
+ WHERE position <= ? AND ctr < ?
1861
+ ORDER BY impressions DESC
1862
+ ${paginateClause({
1863
+ limit,
1864
+ offset: params.offset
1865
+ })}
1866
+ `,
1867
+ params: [
1868
+ startDate,
1869
+ endDate,
1870
+ minImpressions,
1871
+ maxPosition,
1872
+ maxCtr
1873
+ ],
1874
+ current: {
1875
+ table: "page_keywords",
1876
+ partitions: enumeratePartitions(startDate, endDate)
1877
+ }
1878
+ };
1879
+ },
1880
+ reduceSql(rows, params) {
1881
+ const arr = Array.isArray(rows) ? rows : [];
1882
+ const minImpressions = params.minImpressions ?? 1e3;
1883
+ const maxCtr = params.maxCtr ?? .03;
1884
+ const maxPosition = params.maxPosition ?? 10;
1885
+ return {
1886
+ results: arr.map((r) => ({
1887
+ query: r.query == null ? "" : String(r.query),
1888
+ page: r.page == null ? "" : String(r.page),
1889
+ clicks: num(r.clicks),
1890
+ impressions: num(r.impressions),
1891
+ ctr: num(r.ctr),
1892
+ position: num(r.position),
1893
+ missedClicks: num(r.missedClicks)
1894
+ })),
1895
+ meta: {
1896
+ total: arr.length,
1897
+ minImpressions,
1898
+ maxCtr,
1899
+ maxPosition
1900
+ }
1901
+ };
1902
+ },
1903
+ buildRows(params) {
1904
+ const period = periodOf(params);
1905
+ const limit = params.limit ?? DEFAULT_ROW_LIMIT;
1906
+ return { rows: gsc.select(query, page).where(between(date, period.startDate, period.endDate)).limit(limit).getState() };
1907
+ },
1908
+ reduceRows(rows, params) {
1909
+ const arr = Array.isArray(rows) ? rows : [];
1910
+ const minImpressions = params.minImpressions ?? 1e3;
1911
+ const maxCtr = params.maxCtr ?? .03;
1912
+ const maxPosition = params.maxPosition ?? 10;
1913
+ const queryMap = /* @__PURE__ */ new Map();
1914
+ for (const row of arr) {
1915
+ if (row.impressions < minImpressions) continue;
1916
+ if (row.position > maxPosition) continue;
1917
+ if (row.ctr > maxCtr) continue;
1918
+ const existing = queryMap.get(row.query);
1919
+ if (!existing || row.position < existing.position) queryMap.set(row.query, {
1920
+ query: row.query,
1921
+ page: row.page,
1922
+ clicks: row.clicks,
1923
+ impressions: row.impressions,
1924
+ ctr: row.ctr,
1925
+ position: row.position
1926
+ });
1927
+ }
1928
+ const results = sortRowResults(Array.from(queryMap.values()), "impressions", "desc");
1929
+ const paged = paginateInMemory(results, {
1930
+ limit: params.limit,
1931
+ offset: params.offset
1932
+ });
1933
+ return {
1934
+ results: paged,
1935
+ meta: {
1936
+ total: results.length,
1937
+ returned: paged.length
1938
+ }
1939
+ };
1940
+ }
1941
+ });
1942
+ const defaultAnalyzerRegistry = createAnalyzerRegistry({
1943
+ rows: [
1944
+ strikingDistanceAnalyzer.rows,
1945
+ opportunityAnalyzer.rows,
1946
+ brandAnalyzer.rows,
1947
+ concentrationAnalyzer.rows,
1948
+ clusteringAnalyzer.rows,
1949
+ seasonalityAnalyzer.rows,
1950
+ moversAnalyzer.rows,
1951
+ decayAnalyzer.rows,
1952
+ cannibalizationAnalyzer.rows,
1953
+ zeroClickAnalyzer.rows
1954
+ ],
1955
+ sql: SQL_ANALYZERS
1956
+ });
1957
+ export { defaultAnalyzerRegistry };