@gscdump/analysis 0.7.1 → 0.7.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +43 -108
- package/dist/analyzer/index.d.mts +38 -179
- package/dist/analyzer/index.mjs +269 -502
- package/dist/default-registry.d.mts +2 -86
- package/dist/default-registry.mjs +2986 -204
- package/dist/index.d.mts +75 -234
- package/dist/index.mjs +3275 -800
- package/dist/query/index.d.mts +1 -1
- package/dist/query/index.mjs +1 -33
- package/dist/rollups.d.mts +163 -0
- package/dist/rollups.mjs +346 -0
- package/dist/source/index.d.mts +31 -219
- package/dist/source/index.mjs +27 -388
- package/package.json +23 -17
- package/dist/period/index.d.mts +0 -57
- package/dist/period/index.mjs +0 -150
package/dist/index.mjs
CHANGED
|
@@ -1,11 +1,13 @@
|
|
|
1
|
+
import { AnalyzerCapabilityError, createAnalyzerRegistry, createAnalyzerRegistry as createAnalyzerRegistry$1, defineAnalyzer, defineAnalyzer as defineAnalyzer$1, runAnalyzerFromSource, runAnalyzerFromSource as runAnalyzerFromSource$1 } from "@gscdump/engine/analyzer";
|
|
2
|
+
import { num, num as num$1 } from "@gscdump/engine/analysis-types";
|
|
3
|
+
import { comparisonOf, comparisonOf as comparisonOf$1, defaultEndDate, padTimeseries, padTimeseries as padTimeseries$1, periodOf, periodOf as periodOf$1, resolveWindow, windowToComparisonPeriod, windowToPeriod } from "@gscdump/engine/period";
|
|
1
4
|
import { enumeratePartitions } from "@gscdump/engine/planner";
|
|
2
5
|
import { METRIC_EXPR } from "@gscdump/engine/sql-fragments";
|
|
3
|
-
import { between, date,
|
|
6
|
+
import { between, date, extractDateRange, gsc, page, query } from "gscdump/query";
|
|
7
|
+
import { ENGINE_QUERY_CAPABILITIES, createAttachedTableSource, createEngineQuerySource, queryComparisonRows, queryRows, queryRows as queryRows$1, rewriteForTableSource, runAnalyzerWithEngine, typedQuery, typedQuery as typedQuery$1 } from "@gscdump/engine/source";
|
|
4
8
|
import { MS_PER_DAY, daysAgo, toIsoDate } from "gscdump";
|
|
5
|
-
import {
|
|
6
|
-
import {
|
|
7
|
-
import { buildLogicalPlan } from "gscdump/query/plan";
|
|
8
|
-
import { createEngine as createSqliteQuerySource } from "@gscdump/engine-sqlite";
|
|
9
|
+
import { buildExtrasQueries, buildTotalsSql, isSqlQuerySource, mergeExtras, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized } from "@gscdump/engine/resolver";
|
|
10
|
+
import { canProxyToGsc } from "@gscdump/engine-gsc-api";
|
|
9
11
|
function clamp01(value) {
|
|
10
12
|
if (value < 0) return 0;
|
|
11
13
|
if (value > 1) return 1;
|
|
@@ -20,82 +22,8 @@ function percentDifference(current, previous) {
|
|
|
20
22
|
if (previous === 0) return current > 0 ? 100 : 0;
|
|
21
23
|
return (current - previous) / previous * 100;
|
|
22
24
|
}
|
|
23
|
-
var AnalyzerCapabilityError = class extends Error {
|
|
24
|
-
constructor(tool, missing) {
|
|
25
|
-
super(`analyzer "${tool}" requires capabilities [${missing.join(", ")}] not provided by source`);
|
|
26
|
-
this.tool = tool;
|
|
27
|
-
this.missing = missing;
|
|
28
|
-
this.name = "AnalyzerCapabilityError";
|
|
29
|
-
}
|
|
30
|
-
};
|
|
31
|
-
function sourceCapabilities(source) {
|
|
32
|
-
const caps = /* @__PURE__ */ new Set();
|
|
33
|
-
if (source.executeSql) caps.add("executeSql");
|
|
34
|
-
if (source.capabilities.fileSets) caps.add("partitionedParquet");
|
|
35
|
-
if (source.capabilities.regex) caps.add("regex");
|
|
36
|
-
if (source.capabilities.windowTotals) caps.add("windowTotals");
|
|
37
|
-
if (source.capabilities.comparisonJoin) caps.add("comparisonJoin");
|
|
38
|
-
if (source.capabilities.attachedTables) caps.add("attachedTables");
|
|
39
|
-
return caps;
|
|
40
|
-
}
|
|
41
|
-
function assertSatisfies(analyzer, caps) {
|
|
42
|
-
const missing = analyzer.requires.filter((c) => !caps.has(c));
|
|
43
|
-
if (missing.length > 0) throw new AnalyzerCapabilityError(analyzer.id, missing);
|
|
44
|
-
}
|
|
45
|
-
async function runAnalyzerFromSource(source, params, registry) {
|
|
46
|
-
const caps = sourceCapabilities(source);
|
|
47
|
-
const analyzer = registry.resolveAnalyzer(params.type, caps.has("executeSql") || caps.has("attachedTables"));
|
|
48
|
-
if (!analyzer) throw new AnalyzerCapabilityError(params.type, ["executeSql"]);
|
|
49
|
-
assertSatisfies(analyzer, caps);
|
|
50
|
-
const plan = analyzer.build(params);
|
|
51
|
-
if (plan.kind === "rows") return runRowsPlanAgainstSource(source, analyzer, plan, params);
|
|
52
|
-
return runSqlPlanAgainstSource(source, analyzer, plan, params);
|
|
53
|
-
}
|
|
54
|
-
async function runRowsPlanAgainstSource(source, analyzer, plan, params) {
|
|
55
|
-
const entries = Object.entries(plan.queries);
|
|
56
|
-
const resolved = await Promise.all(entries.map(async ([k, q]) => [k, await source.queryRows(q.state)]));
|
|
57
|
-
const rowMap = Object.fromEntries(resolved);
|
|
58
|
-
const { results, meta } = analyzer.reduce(rowMap, { params });
|
|
59
|
-
return {
|
|
60
|
-
results,
|
|
61
|
-
meta: {
|
|
62
|
-
tool: params.type,
|
|
63
|
-
...meta
|
|
64
|
-
}
|
|
65
|
-
};
|
|
66
|
-
}
|
|
67
|
-
function fileSetsFor(plan) {
|
|
68
|
-
const fileSets = { FILES: plan.current };
|
|
69
|
-
if (plan.previous) fileSets.FILES_PREV = plan.previous;
|
|
70
|
-
if (plan.extraFiles) for (const [key, fs] of Object.entries(plan.extraFiles)) fileSets[`FILES_${key}`] = fs;
|
|
71
|
-
return fileSets;
|
|
72
|
-
}
|
|
73
|
-
async function runSqlPlanAgainstSource(source, analyzer, plan, params) {
|
|
74
|
-
if (!source.executeSql) throw new AnalyzerCapabilityError(analyzer.id, ["executeSql"]);
|
|
75
|
-
if (plan.requiresAttachedTables && !source.capabilities.attachedTables) throw new AnalyzerCapabilityError(analyzer.id, ["attachedTables"]);
|
|
76
|
-
const fileSets = source.capabilities.fileSets ? fileSetsFor(plan) : void 0;
|
|
77
|
-
const rows = await source.executeSql(plan.sql, plan.params, fileSets ? { fileSets } : void 0);
|
|
78
|
-
const extras = {};
|
|
79
|
-
if (plan.extraQueries) for (const q of plan.extraQueries) {
|
|
80
|
-
const extraRows = await source.executeSql(q.sql, q.params, fileSets ? { fileSets } : void 0);
|
|
81
|
-
extras[q.name] = extraRows;
|
|
82
|
-
}
|
|
83
|
-
const { results, meta } = analyzer.reduce(rows, {
|
|
84
|
-
params,
|
|
85
|
-
extras
|
|
86
|
-
});
|
|
87
|
-
const sourceMeta = source.capabilities.localSource ? { source: "local" } : {};
|
|
88
|
-
return {
|
|
89
|
-
results,
|
|
90
|
-
meta: {
|
|
91
|
-
tool: params.type,
|
|
92
|
-
...sourceMeta,
|
|
93
|
-
...meta
|
|
94
|
-
}
|
|
95
|
-
};
|
|
96
|
-
}
|
|
97
25
|
async function analyzeFromSource(source, params, registry) {
|
|
98
|
-
return runAnalyzerFromSource(source, params, registry);
|
|
26
|
+
return runAnalyzerFromSource$1(source, params, registry);
|
|
99
27
|
}
|
|
100
28
|
const DEFAULT_SOURCES = [
|
|
101
29
|
"striking-distance",
|
|
@@ -360,48 +288,6 @@ async function analyzeActionPriority(analyzer, options = {}) {
|
|
|
360
288
|
async function analyzeActionPriorityFromSource(source, registry, options = {}) {
|
|
361
289
|
return analyzeActionPriority({ analyze: (params) => analyzeFromSource(source, params, registry) }, options);
|
|
362
290
|
}
|
|
363
|
-
function createAnalyzerRegistry(init = {}) {
|
|
364
|
-
const byId = /* @__PURE__ */ new Map();
|
|
365
|
-
for (const a of init.rows ?? []) {
|
|
366
|
-
const entry = byId.get(a.id) ?? {};
|
|
367
|
-
entry.rows = a;
|
|
368
|
-
byId.set(a.id, entry);
|
|
369
|
-
}
|
|
370
|
-
for (const a of init.sql ?? []) {
|
|
371
|
-
const entry = byId.get(a.id) ?? {};
|
|
372
|
-
entry.sql = a;
|
|
373
|
-
byId.set(a.id, entry);
|
|
374
|
-
}
|
|
375
|
-
const listAnalyzerIds = () => [...byId.keys()].sort();
|
|
376
|
-
const getAnalyzerVariants = (id) => byId.get(id);
|
|
377
|
-
const resolveAnalyzer = (id, sourceSupportsSql) => {
|
|
378
|
-
const variants = byId.get(id);
|
|
379
|
-
if (!variants) return void 0;
|
|
380
|
-
if (sourceSupportsSql) return variants.sql ?? variants.rows;
|
|
381
|
-
return variants.rows;
|
|
382
|
-
};
|
|
383
|
-
const listAnalyzersFor = (sourceSupportsSql) => {
|
|
384
|
-
const out = [];
|
|
385
|
-
for (const id of listAnalyzerIds()) {
|
|
386
|
-
const a = resolveAnalyzer(id, sourceSupportsSql);
|
|
387
|
-
if (a) out.push(a);
|
|
388
|
-
}
|
|
389
|
-
return out;
|
|
390
|
-
};
|
|
391
|
-
const listAnalyzerIdsFor = (source) => {
|
|
392
|
-
const sourceSupportsSql = typeof source.executeSql === "function";
|
|
393
|
-
const out = [];
|
|
394
|
-
for (const id of listAnalyzerIds()) if (resolveAnalyzer(id, sourceSupportsSql)) out.push(id);
|
|
395
|
-
return out;
|
|
396
|
-
};
|
|
397
|
-
return {
|
|
398
|
-
listAnalyzerIds,
|
|
399
|
-
getAnalyzerVariants,
|
|
400
|
-
resolveAnalyzer,
|
|
401
|
-
listAnalyzersFor,
|
|
402
|
-
listAnalyzerIdsFor
|
|
403
|
-
};
|
|
404
|
-
}
|
|
405
291
|
const DEFAULT_LIMIT$1 = 25e3;
|
|
406
292
|
function keywordsQueryState(period, limit = DEFAULT_LIMIT$1) {
|
|
407
293
|
return gsc.select(query, page).where(between(date, period.startDate, period.endDate)).limit(limit).getState();
|
|
@@ -412,244 +298,10 @@ function pagesQueryState(period, limit = DEFAULT_LIMIT$1) {
|
|
|
412
298
|
function datesQueryState(period, limit = DEFAULT_LIMIT$1) {
|
|
413
299
|
return gsc.select(date).where(between(date, period.startDate, period.endDate)).limit(limit).getState();
|
|
414
300
|
}
|
|
415
|
-
const DEFAULT_SQL_REQUIRES = ["executeSql", "partitionedParquet"];
|
|
416
|
-
function defineAnalyzer(opts) {
|
|
417
|
-
const { id, reduce, reduceSql, reduceRows, buildSql, buildRows, sqlRequires = DEFAULT_SQL_REQUIRES, rowsRequires = [] } = opts;
|
|
418
|
-
const sqlReducer = reduceSql ?? reduce;
|
|
419
|
-
const rowsReducer = reduceRows ?? reduce;
|
|
420
|
-
if (buildSql && !sqlReducer) throw new Error(`defineAnalyzer(${id}): buildSql requires reduce or reduceSql`);
|
|
421
|
-
if (buildRows && !rowsReducer) throw new Error(`defineAnalyzer(${id}): buildRows requires reduce or reduceRows`);
|
|
422
|
-
const wrap = (fn) => (rows, params, ctx) => {
|
|
423
|
-
return fn(Array.isArray(rows) ? rows : pickSingle(rows) ?? rows, params, ctx);
|
|
424
|
-
};
|
|
425
|
-
return {
|
|
426
|
-
id,
|
|
427
|
-
sql: buildSql && sqlReducer ? {
|
|
428
|
-
id,
|
|
429
|
-
requires: sqlRequires,
|
|
430
|
-
build(params) {
|
|
431
|
-
const spec = buildSql(params);
|
|
432
|
-
return {
|
|
433
|
-
kind: "sql",
|
|
434
|
-
sql: spec.sql,
|
|
435
|
-
params: spec.params,
|
|
436
|
-
current: spec.current,
|
|
437
|
-
previous: spec.previous,
|
|
438
|
-
extraFiles: spec.extraFiles,
|
|
439
|
-
extraQueries: spec.extraQueries,
|
|
440
|
-
requiresAttachedTables: spec.requiresAttachedTables
|
|
441
|
-
};
|
|
442
|
-
},
|
|
443
|
-
reduce(rows, ctx) {
|
|
444
|
-
const { results, meta } = wrap(sqlReducer)(rows, ctx.params, { extras: ctx.extras });
|
|
445
|
-
return {
|
|
446
|
-
results,
|
|
447
|
-
meta
|
|
448
|
-
};
|
|
449
|
-
}
|
|
450
|
-
} : void 0,
|
|
451
|
-
rows: buildRows && rowsReducer ? {
|
|
452
|
-
id,
|
|
453
|
-
requires: rowsRequires,
|
|
454
|
-
build(params) {
|
|
455
|
-
const queries = buildRows(params);
|
|
456
|
-
return {
|
|
457
|
-
kind: "rows",
|
|
458
|
-
queries: Object.fromEntries(Object.entries(queries).map(([k, state]) => [k, { state }]))
|
|
459
|
-
};
|
|
460
|
-
},
|
|
461
|
-
reduce(rows, ctx) {
|
|
462
|
-
const { results, meta } = wrap(rowsReducer)(rows, ctx.params, {});
|
|
463
|
-
return {
|
|
464
|
-
results,
|
|
465
|
-
meta
|
|
466
|
-
};
|
|
467
|
-
}
|
|
468
|
-
} : void 0
|
|
469
|
-
};
|
|
470
|
-
}
|
|
471
|
-
function pickSingle(rows) {
|
|
472
|
-
const keys = Object.keys(rows);
|
|
473
|
-
return keys.length === 1 ? rows[keys[0]] : void 0;
|
|
474
|
-
}
|
|
475
|
-
function defaultEndDate() {
|
|
476
|
-
return daysAgo(3);
|
|
477
|
-
}
|
|
478
|
-
function defaultStartDate() {
|
|
479
|
-
return daysAgo(31);
|
|
480
|
-
}
|
|
481
|
-
function periodOf(params) {
|
|
482
|
-
return {
|
|
483
|
-
startDate: params.startDate || defaultStartDate(),
|
|
484
|
-
endDate: params.endDate || defaultEndDate()
|
|
485
|
-
};
|
|
486
|
-
}
|
|
487
|
-
function comparisonOf(params) {
|
|
488
|
-
if (!params.prevStartDate || !params.prevEndDate) throw new Error(`${params.type} analysis requires prevStartDate and prevEndDate`);
|
|
489
|
-
return {
|
|
490
|
-
current: periodOf(params),
|
|
491
|
-
previous: {
|
|
492
|
-
startDate: params.prevStartDate,
|
|
493
|
-
endDate: params.prevEndDate
|
|
494
|
-
}
|
|
495
|
-
};
|
|
496
|
-
}
|
|
497
|
-
function parseIso(s) {
|
|
498
|
-
return /* @__PURE__ */ new Date(`${s}T00:00:00Z`);
|
|
499
|
-
}
|
|
500
|
-
function addDays(d, n) {
|
|
501
|
-
return new Date(d.getTime() + n * MS_PER_DAY);
|
|
502
|
-
}
|
|
503
|
-
function daysBetween(start, end) {
|
|
504
|
-
return Math.round((parseIso(end).getTime() - parseIso(start).getTime()) / MS_PER_DAY) + 1;
|
|
505
|
-
}
|
|
506
|
-
function resolveWindow(opts) {
|
|
507
|
-
const anchor = opts.anchor ? parseIso(opts.anchor) : /* @__PURE__ */ new Date();
|
|
508
|
-
const anchorIso = toIsoDate(anchor);
|
|
509
|
-
let start;
|
|
510
|
-
let end;
|
|
511
|
-
switch (opts.preset) {
|
|
512
|
-
case "last-7d":
|
|
513
|
-
end = anchorIso;
|
|
514
|
-
start = toIsoDate(addDays(anchor, -6));
|
|
515
|
-
break;
|
|
516
|
-
case "last-28d":
|
|
517
|
-
end = anchorIso;
|
|
518
|
-
start = toIsoDate(addDays(anchor, -27));
|
|
519
|
-
break;
|
|
520
|
-
case "last-30d":
|
|
521
|
-
end = anchorIso;
|
|
522
|
-
start = toIsoDate(addDays(anchor, -29));
|
|
523
|
-
break;
|
|
524
|
-
case "last-90d":
|
|
525
|
-
end = anchorIso;
|
|
526
|
-
start = toIsoDate(addDays(anchor, -89));
|
|
527
|
-
break;
|
|
528
|
-
case "last-180d":
|
|
529
|
-
end = anchorIso;
|
|
530
|
-
start = toIsoDate(addDays(anchor, -179));
|
|
531
|
-
break;
|
|
532
|
-
case "last-365d":
|
|
533
|
-
end = anchorIso;
|
|
534
|
-
start = toIsoDate(addDays(anchor, -364));
|
|
535
|
-
break;
|
|
536
|
-
case "mtd":
|
|
537
|
-
end = anchorIso;
|
|
538
|
-
start = toIsoDate(new Date(Date.UTC(anchor.getUTCFullYear(), anchor.getUTCMonth(), 1)));
|
|
539
|
-
break;
|
|
540
|
-
case "ytd":
|
|
541
|
-
end = anchorIso;
|
|
542
|
-
start = toIsoDate(new Date(Date.UTC(anchor.getUTCFullYear(), 0, 1)));
|
|
543
|
-
break;
|
|
544
|
-
case "custom":
|
|
545
|
-
if (!opts.start || !opts.end) throw new Error("resolveWindow: preset=custom requires start and end");
|
|
546
|
-
start = opts.start;
|
|
547
|
-
end = opts.end;
|
|
548
|
-
break;
|
|
549
|
-
}
|
|
550
|
-
const days = daysBetween(start, end);
|
|
551
|
-
const result = {
|
|
552
|
-
start,
|
|
553
|
-
end,
|
|
554
|
-
days
|
|
555
|
-
};
|
|
556
|
-
const mode = opts.comparison ?? "none";
|
|
557
|
-
if (mode === "prev-period") {
|
|
558
|
-
const prevEnd = toIsoDate(addDays(parseIso(start), -1));
|
|
559
|
-
result.comparison = {
|
|
560
|
-
start: toIsoDate(addDays(parseIso(prevEnd), -(days - 1))),
|
|
561
|
-
end: prevEnd
|
|
562
|
-
};
|
|
563
|
-
} else if (mode === "yoy") {
|
|
564
|
-
const prevEnd = toIsoDate(addDays(parseIso(end), -365));
|
|
565
|
-
result.comparison = {
|
|
566
|
-
start: toIsoDate(addDays(parseIso(start), -365)),
|
|
567
|
-
end: prevEnd
|
|
568
|
-
};
|
|
569
|
-
}
|
|
570
|
-
return result;
|
|
571
|
-
}
|
|
572
|
-
function windowToPeriod(w) {
|
|
573
|
-
return {
|
|
574
|
-
startDate: w.start,
|
|
575
|
-
endDate: w.end
|
|
576
|
-
};
|
|
577
|
-
}
|
|
578
|
-
function windowToComparisonPeriod(w) {
|
|
579
|
-
if (!w.comparison) return void 0;
|
|
580
|
-
return {
|
|
581
|
-
current: {
|
|
582
|
-
startDate: w.start,
|
|
583
|
-
endDate: w.end
|
|
584
|
-
},
|
|
585
|
-
previous: {
|
|
586
|
-
startDate: w.comparison.start,
|
|
587
|
-
endDate: w.comparison.end
|
|
588
|
-
}
|
|
589
|
-
};
|
|
590
|
-
}
|
|
591
|
-
const DEFAULT_FILL = {
|
|
592
|
-
clicks: 0,
|
|
593
|
-
impressions: 0,
|
|
594
|
-
ctr: 0,
|
|
595
|
-
position: 0
|
|
596
|
-
};
|
|
597
|
-
function padTimeseries(rows, options) {
|
|
598
|
-
const { startDate, endDate } = options;
|
|
599
|
-
const dateKey = options.dateKey ?? "date";
|
|
600
|
-
const fill = options.fill ?? DEFAULT_FILL;
|
|
601
|
-
const byDate = /* @__PURE__ */ new Map();
|
|
602
|
-
for (const row of rows) {
|
|
603
|
-
const d = String(row[dateKey]);
|
|
604
|
-
const bucket = byDate.get(d);
|
|
605
|
-
if (bucket) bucket.push(row);
|
|
606
|
-
else byDate.set(d, [row]);
|
|
607
|
-
}
|
|
608
|
-
const result = [];
|
|
609
|
-
const start = /* @__PURE__ */ new Date(`${startDate}T00:00:00Z`);
|
|
610
|
-
const end = /* @__PURE__ */ new Date(`${endDate}T00:00:00Z`);
|
|
611
|
-
if (Number.isNaN(start.getTime()) || Number.isNaN(end.getTime())) throw new Error(`padTimeseries: invalid date range ${startDate}..${endDate}`);
|
|
612
|
-
for (let cursorMs = start.getTime(), endMs = end.getTime(); cursorMs <= endMs; cursorMs += MS_PER_DAY) {
|
|
613
|
-
const dateStr = toIsoDate(new Date(cursorMs));
|
|
614
|
-
const existing = byDate.get(dateStr);
|
|
615
|
-
if (existing) result.push(...existing);
|
|
616
|
-
else result.push({
|
|
617
|
-
...fill,
|
|
618
|
-
[dateKey]: dateStr
|
|
619
|
-
});
|
|
620
|
-
}
|
|
621
|
-
return result;
|
|
622
|
-
}
|
|
623
|
-
function num(v) {
|
|
624
|
-
if (typeof v === "number") return v;
|
|
625
|
-
if (typeof v === "bigint") return Number(v);
|
|
626
|
-
if (v == null) return 0;
|
|
627
|
-
return Number(v);
|
|
628
|
-
}
|
|
629
|
-
function buildPeriodMap(rows, key, value, filter) {
|
|
630
|
-
const out = /* @__PURE__ */ new Map();
|
|
631
|
-
for (const row of rows) {
|
|
632
|
-
if (filter && !filter(row)) continue;
|
|
633
|
-
out.set(key(row), value(row));
|
|
634
|
-
}
|
|
635
|
-
return out;
|
|
636
|
-
}
|
|
637
|
-
function createSorter(getValue, defaultMetric, defaultOrder = "desc") {
|
|
638
|
-
return (items, sortBy = defaultMetric, sortOrder = defaultOrder) => {
|
|
639
|
-
const mult = sortOrder === "desc" ? -1 : 1;
|
|
640
|
-
return [...items].sort((a, b) => (getValue(a, sortBy) - getValue(b, sortBy)) * mult);
|
|
641
|
-
};
|
|
642
|
-
}
|
|
643
|
-
function createMetricSorter(defaultMetric, orderByMetric) {
|
|
644
|
-
return (items, sortBy = defaultMetric) => {
|
|
645
|
-
const mult = orderByMetric[sortBy] === "desc" ? -1 : 1;
|
|
646
|
-
return [...items].sort((a, b) => (a[sortBy] - b[sortBy]) * mult);
|
|
647
|
-
};
|
|
648
|
-
}
|
|
649
301
|
function escapeRegexAlt(s) {
|
|
650
302
|
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
651
303
|
}
|
|
652
|
-
function str$
|
|
304
|
+
function str$23(v) {
|
|
653
305
|
return v == null ? "" : String(v);
|
|
654
306
|
}
|
|
655
307
|
function analyzeBrandSegmentation(keywords, options) {
|
|
@@ -658,12 +310,12 @@ function analyzeBrandSegmentation(keywords, options) {
|
|
|
658
310
|
const brand = [];
|
|
659
311
|
const nonBrand = [];
|
|
660
312
|
for (const row of keywords) {
|
|
661
|
-
if (num(row.impressions) < minImpressions) continue;
|
|
313
|
+
if (num$1(row.impressions) < minImpressions) continue;
|
|
662
314
|
if (lowerBrandTerms.some((term) => row.query.toLowerCase().includes(term))) brand.push(row);
|
|
663
315
|
else nonBrand.push(row);
|
|
664
316
|
}
|
|
665
|
-
const brandClicks = brand.reduce((sum, k) => sum + num(k.clicks), 0);
|
|
666
|
-
const nonBrandClicks = nonBrand.reduce((sum, k) => sum + num(k.clicks), 0);
|
|
317
|
+
const brandClicks = brand.reduce((sum, k) => sum + num$1(k.clicks), 0);
|
|
318
|
+
const nonBrandClicks = nonBrand.reduce((sum, k) => sum + num$1(k.clicks), 0);
|
|
667
319
|
const totalClicks = brandClicks + nonBrandClicks;
|
|
668
320
|
return {
|
|
669
321
|
brand,
|
|
@@ -672,16 +324,16 @@ function analyzeBrandSegmentation(keywords, options) {
|
|
|
672
324
|
brandClicks,
|
|
673
325
|
nonBrandClicks,
|
|
674
326
|
brandShare: totalClicks > 0 ? brandClicks / totalClicks : 0,
|
|
675
|
-
brandImpressions: brand.reduce((sum, k) => sum + num(k.impressions), 0),
|
|
676
|
-
nonBrandImpressions: nonBrand.reduce((sum, k) => sum + num(k.impressions), 0)
|
|
327
|
+
brandImpressions: brand.reduce((sum, k) => sum + num$1(k.impressions), 0),
|
|
328
|
+
nonBrandImpressions: nonBrand.reduce((sum, k) => sum + num$1(k.impressions), 0)
|
|
677
329
|
}
|
|
678
330
|
};
|
|
679
331
|
}
|
|
680
|
-
const brandAnalyzer = defineAnalyzer({
|
|
332
|
+
const brandAnalyzer = defineAnalyzer$1({
|
|
681
333
|
id: "brand",
|
|
682
334
|
buildSql(params) {
|
|
683
335
|
if (!params.brandTerms?.length) throw new Error("Brand analysis requires brandTerms");
|
|
684
|
-
const { startDate, endDate } = periodOf(params);
|
|
336
|
+
const { startDate, endDate } = periodOf$1(params);
|
|
685
337
|
const minImpressions = params.minImpressions ?? 10;
|
|
686
338
|
const limit = params.limit ?? 1e4;
|
|
687
339
|
const regex = `(${params.brandTerms.map((t) => escapeRegexAlt(t.toLowerCase())).join("|")})`;
|
|
@@ -721,13 +373,13 @@ const brandAnalyzer = defineAnalyzer({
|
|
|
721
373
|
},
|
|
722
374
|
reduceSql(rows) {
|
|
723
375
|
const normalized = (Array.isArray(rows) ? rows : []).map((r) => ({
|
|
724
|
-
query: str$
|
|
725
|
-
page: r.page == null ? void 0 : str$
|
|
726
|
-
clicks: num(r.clicks),
|
|
727
|
-
impressions: num(r.impressions),
|
|
728
|
-
ctr: num(r.ctr),
|
|
729
|
-
position: num(r.position),
|
|
730
|
-
segment: str$
|
|
376
|
+
query: str$23(r.query),
|
|
377
|
+
page: r.page == null ? void 0 : str$23(r.page),
|
|
378
|
+
clicks: num$1(r.clicks),
|
|
379
|
+
impressions: num$1(r.impressions),
|
|
380
|
+
ctr: num$1(r.ctr),
|
|
381
|
+
position: num$1(r.position),
|
|
382
|
+
segment: str$23(r.segment)
|
|
731
383
|
}));
|
|
732
384
|
let brandClicks = 0;
|
|
733
385
|
let nonBrandClicks = 0;
|
|
@@ -756,7 +408,7 @@ const brandAnalyzer = defineAnalyzer({
|
|
|
756
408
|
};
|
|
757
409
|
},
|
|
758
410
|
buildRows(params) {
|
|
759
|
-
return { keywords: keywordsQueryState(periodOf(params), params.limit) };
|
|
411
|
+
return { keywords: keywordsQueryState(periodOf$1(params), params.limit) };
|
|
760
412
|
},
|
|
761
413
|
reduceRows(rows, params) {
|
|
762
414
|
if (!params.brandTerms?.length) throw new Error("Brand analysis requires brandTerms");
|
|
@@ -776,6 +428,26 @@ const brandAnalyzer = defineAnalyzer({
|
|
|
776
428
|
};
|
|
777
429
|
}
|
|
778
430
|
});
|
|
431
|
+
function buildPeriodMap(rows, key, value, filter) {
|
|
432
|
+
const out = /* @__PURE__ */ new Map();
|
|
433
|
+
for (const row of rows) {
|
|
434
|
+
if (filter && !filter(row)) continue;
|
|
435
|
+
out.set(key(row), value(row));
|
|
436
|
+
}
|
|
437
|
+
return out;
|
|
438
|
+
}
|
|
439
|
+
function createSorter(getValue, defaultMetric, defaultOrder = "desc") {
|
|
440
|
+
return (items, sortBy = defaultMetric, sortOrder = defaultOrder) => {
|
|
441
|
+
const mult = sortOrder === "desc" ? -1 : 1;
|
|
442
|
+
return [...items].sort((a, b) => (getValue(a, sortBy) - getValue(b, sortBy)) * mult);
|
|
443
|
+
};
|
|
444
|
+
}
|
|
445
|
+
function createMetricSorter(defaultMetric, orderByMetric) {
|
|
446
|
+
return (items, sortBy = defaultMetric) => {
|
|
447
|
+
const mult = orderByMetric[sortBy] === "desc" ? -1 : 1;
|
|
448
|
+
return [...items].sort((a, b) => (a[sortBy] - b[sortBy]) * mult);
|
|
449
|
+
};
|
|
450
|
+
}
|
|
779
451
|
const sortRowResults$1 = createSorter((item, metric) => {
|
|
780
452
|
switch (metric) {
|
|
781
453
|
case "clicks": return item.totalClicks;
|
|
@@ -784,10 +456,10 @@ const sortRowResults$1 = createSorter((item, metric) => {
|
|
|
784
456
|
case "pageCount": return item.pages.length;
|
|
785
457
|
}
|
|
786
458
|
}, "clicks");
|
|
787
|
-
function str$
|
|
459
|
+
function str$22(v) {
|
|
788
460
|
return v == null ? "" : String(v);
|
|
789
461
|
}
|
|
790
|
-
function parseJsonList$
|
|
462
|
+
function parseJsonList$16(v) {
|
|
791
463
|
if (Array.isArray(v)) return v;
|
|
792
464
|
if (typeof v === "string" && v.length > 0) {
|
|
793
465
|
const parsed = JSON.parse(v);
|
|
@@ -827,10 +499,10 @@ function analyzeCannibalization(rows, options = {}) {
|
|
|
827
499
|
}
|
|
828
500
|
return sortRowResults$1(results, sortBy, sortOrder);
|
|
829
501
|
}
|
|
830
|
-
const cannibalizationAnalyzer = defineAnalyzer({
|
|
502
|
+
const cannibalizationAnalyzer = defineAnalyzer$1({
|
|
831
503
|
id: "cannibalization",
|
|
832
504
|
buildSql(params) {
|
|
833
|
-
const { startDate, endDate } = periodOf(params);
|
|
505
|
+
const { startDate, endDate } = periodOf$1(params);
|
|
834
506
|
const minImpressions = params.minImpressions ?? 50;
|
|
835
507
|
const minCompetitors = 2;
|
|
836
508
|
const minQueryImpressions = (params.minImpressions ?? 50) * 2;
|
|
@@ -947,25 +619,25 @@ const cannibalizationAnalyzer = defineAnalyzer({
|
|
|
947
619
|
},
|
|
948
620
|
reduceSql(rows) {
|
|
949
621
|
const events = (Array.isArray(rows) ? rows : []).map((r) => ({
|
|
950
|
-
keyword: str$
|
|
951
|
-
totalImpressions: num(r.totalImpressions),
|
|
952
|
-
totalClicks: num(r.totalClicks),
|
|
953
|
-
competitorCount: num(r.competitorCount),
|
|
954
|
-
leaderUrl: str$
|
|
955
|
-
leaderCtr: num(r.leaderCtr),
|
|
956
|
-
leaderPosition: num(r.leaderPosition),
|
|
957
|
-
hhi: num(r.hhi),
|
|
958
|
-
fragmentation: num(r.fragmentation),
|
|
959
|
-
stolenClicks: num(r.stolenClicks),
|
|
960
|
-
severity: num(r.severity),
|
|
961
|
-
competitors: parseJsonList$
|
|
962
|
-
url: str$
|
|
963
|
-
clicks: num(c.clicks),
|
|
964
|
-
impressions: num(c.impressions),
|
|
965
|
-
ctr: num(c.ctr),
|
|
966
|
-
position: num(c.position),
|
|
967
|
-
share: num(c.share),
|
|
968
|
-
rank: num(c.rank)
|
|
622
|
+
keyword: str$22(r.keyword),
|
|
623
|
+
totalImpressions: num$1(r.totalImpressions),
|
|
624
|
+
totalClicks: num$1(r.totalClicks),
|
|
625
|
+
competitorCount: num$1(r.competitorCount),
|
|
626
|
+
leaderUrl: str$22(r.leaderUrl),
|
|
627
|
+
leaderCtr: num$1(r.leaderCtr),
|
|
628
|
+
leaderPosition: num$1(r.leaderPosition),
|
|
629
|
+
hhi: num$1(r.hhi),
|
|
630
|
+
fragmentation: num$1(r.fragmentation),
|
|
631
|
+
stolenClicks: num$1(r.stolenClicks),
|
|
632
|
+
severity: num$1(r.severity),
|
|
633
|
+
competitors: parseJsonList$16(r.competitors).map((c) => ({
|
|
634
|
+
url: str$22(c.url),
|
|
635
|
+
clicks: num$1(c.clicks),
|
|
636
|
+
impressions: num$1(c.impressions),
|
|
637
|
+
ctr: num$1(c.ctr),
|
|
638
|
+
position: num$1(c.position),
|
|
639
|
+
share: num$1(c.share),
|
|
640
|
+
rank: num$1(c.rank)
|
|
969
641
|
}))
|
|
970
642
|
}));
|
|
971
643
|
const nodeAgg = /* @__PURE__ */ new Map();
|
|
@@ -1022,7 +694,7 @@ const cannibalizationAnalyzer = defineAnalyzer({
|
|
|
1022
694
|
};
|
|
1023
695
|
},
|
|
1024
696
|
buildRows(params) {
|
|
1025
|
-
return { rows: keywordsQueryState(periodOf(params), params.limit) };
|
|
697
|
+
return { rows: keywordsQueryState(periodOf$1(params), params.limit) };
|
|
1026
698
|
},
|
|
1027
699
|
reduceRows(rows, params) {
|
|
1028
700
|
const results = analyzeCannibalization(Array.isArray(rows) ? rows : [], {
|
|
@@ -1057,10 +729,10 @@ const INTENT_PREFIXES = [
|
|
|
1057
729
|
"near me"
|
|
1058
730
|
];
|
|
1059
731
|
const WHITESPACE_RE$1 = /\s+/;
|
|
1060
|
-
function str$
|
|
732
|
+
function str$21(v) {
|
|
1061
733
|
return v == null ? "" : String(v);
|
|
1062
734
|
}
|
|
1063
|
-
function parseJsonList$
|
|
735
|
+
function parseJsonList$15(v) {
|
|
1064
736
|
if (Array.isArray(v)) return v;
|
|
1065
737
|
if (typeof v === "string" && v.length > 0) {
|
|
1066
738
|
const parsed = JSON.parse(v);
|
|
@@ -1080,7 +752,7 @@ function extractWordPrefix(keyword, wordCount = 2) {
|
|
|
1080
752
|
}
|
|
1081
753
|
function analyzeClustering(keywords, options = {}) {
|
|
1082
754
|
const { minClusterSize = 2, minImpressions = 10, clusterBy = "both" } = options;
|
|
1083
|
-
const filtered = keywords.filter((k) => num(k.impressions) >= minImpressions);
|
|
755
|
+
const filtered = keywords.filter((k) => num$1(k.impressions) >= minImpressions);
|
|
1084
756
|
const clusterMap = /* @__PURE__ */ new Map();
|
|
1085
757
|
const clusteredKeywords = /* @__PURE__ */ new Set();
|
|
1086
758
|
if (clusterBy === "intent" || clusterBy === "both") for (const kw of filtered) {
|
|
@@ -1117,9 +789,9 @@ function analyzeClustering(keywords, options = {}) {
|
|
|
1117
789
|
const clusters = [];
|
|
1118
790
|
for (const [name, data] of clusterMap) {
|
|
1119
791
|
if (data.keywords.length < minClusterSize) continue;
|
|
1120
|
-
const totalClicks = data.keywords.reduce((sum, k) => sum + num(k.clicks), 0);
|
|
1121
|
-
const totalImpressions = data.keywords.reduce((sum, k) => sum + num(k.impressions), 0);
|
|
1122
|
-
const avgPosition = data.keywords.reduce((sum, k) => sum + num(k.position), 0) / data.keywords.length;
|
|
792
|
+
const totalClicks = data.keywords.reduce((sum, k) => sum + num$1(k.clicks), 0);
|
|
793
|
+
const totalImpressions = data.keywords.reduce((sum, k) => sum + num$1(k.impressions), 0);
|
|
794
|
+
const avgPosition = data.keywords.reduce((sum, k) => sum + num$1(k.position), 0) / data.keywords.length;
|
|
1123
795
|
clusters.push({
|
|
1124
796
|
clusterName: name,
|
|
1125
797
|
clusterType: data.type,
|
|
@@ -1136,10 +808,10 @@ function analyzeClustering(keywords, options = {}) {
|
|
|
1136
808
|
unclustered: filtered.filter((kw) => !clusteredKeywords.has(kw.query))
|
|
1137
809
|
};
|
|
1138
810
|
}
|
|
1139
|
-
const clusteringAnalyzer = defineAnalyzer({
|
|
811
|
+
const clusteringAnalyzer = defineAnalyzer$1({
|
|
1140
812
|
id: "clustering",
|
|
1141
813
|
buildSql(params) {
|
|
1142
|
-
const { startDate, endDate } = periodOf(params);
|
|
814
|
+
const { startDate, endDate } = periodOf$1(params);
|
|
1143
815
|
const minImpressions = params.minImpressions ?? 10;
|
|
1144
816
|
const minClusterSize = params.minClusterSize ?? 2;
|
|
1145
817
|
const clusterBy = params.clusterBy ?? "both";
|
|
@@ -1205,18 +877,18 @@ const clusteringAnalyzer = defineAnalyzer({
|
|
|
1205
877
|
},
|
|
1206
878
|
reduceSql(rows) {
|
|
1207
879
|
const clusters = (Array.isArray(rows) ? rows : []).map((r) => ({
|
|
1208
|
-
clusterName: str$
|
|
1209
|
-
clusterType: str$
|
|
1210
|
-
keywordCount: num(r.keywordCount),
|
|
1211
|
-
totalClicks: num(r.totalClicks),
|
|
1212
|
-
totalImpressions: num(r.totalImpressions),
|
|
1213
|
-
avgPosition: num(r.avgPosition),
|
|
1214
|
-
keywords: parseJsonList$
|
|
1215
|
-
query: str$
|
|
1216
|
-
clicks: num(k.clicks),
|
|
1217
|
-
impressions: num(k.impressions),
|
|
1218
|
-
ctr: num(k.ctr),
|
|
1219
|
-
position: num(k.position)
|
|
880
|
+
clusterName: str$21(r.clusterName),
|
|
881
|
+
clusterType: str$21(r.clusterType),
|
|
882
|
+
keywordCount: num$1(r.keywordCount),
|
|
883
|
+
totalClicks: num$1(r.totalClicks),
|
|
884
|
+
totalImpressions: num$1(r.totalImpressions),
|
|
885
|
+
avgPosition: num$1(r.avgPosition),
|
|
886
|
+
keywords: parseJsonList$15(r.keywords).map((k) => ({
|
|
887
|
+
query: str$21(k.query),
|
|
888
|
+
clicks: num$1(k.clicks),
|
|
889
|
+
impressions: num$1(k.impressions),
|
|
890
|
+
ctr: num$1(k.ctr),
|
|
891
|
+
position: num$1(k.position)
|
|
1220
892
|
}))
|
|
1221
893
|
}));
|
|
1222
894
|
return {
|
|
@@ -1228,7 +900,7 @@ const clusteringAnalyzer = defineAnalyzer({
|
|
|
1228
900
|
};
|
|
1229
901
|
},
|
|
1230
902
|
buildRows(params) {
|
|
1231
|
-
return { keywords: keywordsQueryState(periodOf(params), params.limit) };
|
|
903
|
+
return { keywords: keywordsQueryState(periodOf$1(params), params.limit) };
|
|
1232
904
|
},
|
|
1233
905
|
reduceRows(rows, params) {
|
|
1234
906
|
const result = analyzeClustering(Array.isArray(rows) ? rows : [], {
|
|
@@ -1242,10 +914,10 @@ const clusteringAnalyzer = defineAnalyzer({
|
|
|
1242
914
|
};
|
|
1243
915
|
}
|
|
1244
916
|
});
|
|
1245
|
-
function str$
|
|
917
|
+
function str$20(v) {
|
|
1246
918
|
return v == null ? "" : String(v);
|
|
1247
919
|
}
|
|
1248
|
-
function parseJsonList$
|
|
920
|
+
function parseJsonList$14(v) {
|
|
1249
921
|
if (Array.isArray(v)) return v;
|
|
1250
922
|
if (typeof v === "string" && v.length > 0) {
|
|
1251
923
|
const parsed = JSON.parse(v);
|
|
@@ -1306,19 +978,19 @@ function analyzeConcentration(items, options = {}) {
|
|
|
1306
978
|
function analyzePageConcentration(pages, options) {
|
|
1307
979
|
return analyzeConcentration(pages.map((p) => ({
|
|
1308
980
|
key: p.page,
|
|
1309
|
-
clicks: num(p.clicks)
|
|
981
|
+
clicks: num$1(p.clicks)
|
|
1310
982
|
})), options);
|
|
1311
983
|
}
|
|
1312
984
|
function analyzeKeywordConcentration(keywords, options) {
|
|
1313
985
|
return analyzeConcentration(keywords.map((k) => ({
|
|
1314
986
|
key: k.query,
|
|
1315
|
-
clicks: num(k.clicks)
|
|
987
|
+
clicks: num$1(k.clicks)
|
|
1316
988
|
})), options);
|
|
1317
989
|
}
|
|
1318
|
-
const concentrationAnalyzer = defineAnalyzer({
|
|
990
|
+
const concentrationAnalyzer = defineAnalyzer$1({
|
|
1319
991
|
id: "concentration",
|
|
1320
992
|
buildSql(params) {
|
|
1321
|
-
const { startDate, endDate } = periodOf(params);
|
|
993
|
+
const { startDate, endDate } = periodOf$1(params);
|
|
1322
994
|
const dim = params.dimension || "pages";
|
|
1323
995
|
const topN = params.topN ?? 10;
|
|
1324
996
|
const table = dim === "keywords" ? "keywords" : "pages";
|
|
@@ -1393,20 +1065,20 @@ const concentrationAnalyzer = defineAnalyzer({
|
|
|
1393
1065
|
},
|
|
1394
1066
|
reduceSql(rows, params) {
|
|
1395
1067
|
const r = (Array.isArray(rows) ? rows : [])[0] ?? {};
|
|
1396
|
-
const topRaw = parseJsonList$
|
|
1068
|
+
const topRaw = parseJsonList$14(r.topNItems);
|
|
1397
1069
|
return {
|
|
1398
1070
|
results: [{
|
|
1399
|
-
giniCoefficient: num(r.giniCoefficient),
|
|
1400
|
-
hhi: num(r.hhi),
|
|
1401
|
-
topNConcentration: num(r.topNConcentration),
|
|
1071
|
+
giniCoefficient: num$1(r.giniCoefficient),
|
|
1072
|
+
hhi: num$1(r.hhi),
|
|
1073
|
+
topNConcentration: num$1(r.topNConcentration),
|
|
1402
1074
|
topNItems: topRaw.map((t) => ({
|
|
1403
|
-
key: str$
|
|
1404
|
-
clicks: num(t.clicks),
|
|
1405
|
-
share: num(t.share)
|
|
1075
|
+
key: str$20(t.key),
|
|
1076
|
+
clicks: num$1(t.clicks),
|
|
1077
|
+
share: num$1(t.share)
|
|
1406
1078
|
})),
|
|
1407
|
-
totalItems: num(r.totalItems),
|
|
1408
|
-
totalClicks: num(r.totalClicks),
|
|
1409
|
-
riskLevel: str$
|
|
1079
|
+
totalItems: num$1(r.totalItems),
|
|
1080
|
+
totalClicks: num$1(r.totalClicks),
|
|
1081
|
+
riskLevel: str$20(r.riskLevel)
|
|
1410
1082
|
}],
|
|
1411
1083
|
meta: {
|
|
1412
1084
|
total: 1,
|
|
@@ -1416,7 +1088,7 @@ const concentrationAnalyzer = defineAnalyzer({
|
|
|
1416
1088
|
},
|
|
1417
1089
|
buildRows(params) {
|
|
1418
1090
|
const dim = params.dimension || "pages";
|
|
1419
|
-
const period = periodOf(params);
|
|
1091
|
+
const period = periodOf$1(params);
|
|
1420
1092
|
const out = {};
|
|
1421
1093
|
if (dim === "pages") out.pages = pagesQueryState(period, params.limit);
|
|
1422
1094
|
else out.keywords = keywordsQueryState(period, params.limit);
|
|
@@ -1436,10 +1108,10 @@ const sortResults$2 = createMetricSorter("lostClicks", {
|
|
|
1436
1108
|
declinePercent: "desc",
|
|
1437
1109
|
currentClicks: "asc"
|
|
1438
1110
|
});
|
|
1439
|
-
function str$
|
|
1111
|
+
function str$19(v) {
|
|
1440
1112
|
return v == null ? "" : String(v);
|
|
1441
1113
|
}
|
|
1442
|
-
function parseJsonList$
|
|
1114
|
+
function parseJsonList$13(v) {
|
|
1443
1115
|
if (Array.isArray(v)) return v;
|
|
1444
1116
|
if (typeof v === "string" && v.length > 0) {
|
|
1445
1117
|
const parsed = JSON.parse(v);
|
|
@@ -1450,13 +1122,13 @@ function parseJsonList$1(v) {
|
|
|
1450
1122
|
function analyzeDecay(input, options = {}) {
|
|
1451
1123
|
const { minPreviousClicks = 50, threshold = .2, sortBy = "lostClicks" } = options;
|
|
1452
1124
|
const currentMap = buildPeriodMap(input.current, (r) => r.page, (r) => ({
|
|
1453
|
-
clicks: num(r.clicks),
|
|
1454
|
-
position: num(r.position)
|
|
1125
|
+
clicks: num$1(r.clicks),
|
|
1126
|
+
position: num$1(r.position)
|
|
1455
1127
|
}));
|
|
1456
1128
|
const previousMap = buildPeriodMap(input.previous, (r) => r.page, (r) => ({
|
|
1457
|
-
clicks: num(r.clicks),
|
|
1458
|
-
position: num(r.position)
|
|
1459
|
-
}), (r) => num(r.clicks) >= minPreviousClicks);
|
|
1129
|
+
clicks: num$1(r.clicks),
|
|
1130
|
+
position: num$1(r.position)
|
|
1131
|
+
}), (r) => num$1(r.clicks) >= minPreviousClicks);
|
|
1460
1132
|
const results = [];
|
|
1461
1133
|
for (const [page, prev] of previousMap) {
|
|
1462
1134
|
const curr = currentMap.get(page) || {
|
|
@@ -1478,10 +1150,10 @@ function analyzeDecay(input, options = {}) {
|
|
|
1478
1150
|
}
|
|
1479
1151
|
return sortResults$2(results, sortBy);
|
|
1480
1152
|
}
|
|
1481
|
-
const decayAnalyzer = defineAnalyzer({
|
|
1153
|
+
const decayAnalyzer = defineAnalyzer$1({
|
|
1482
1154
|
id: "decay",
|
|
1483
1155
|
buildSql(params) {
|
|
1484
|
-
const { current: cur, previous: prev } = comparisonOf(params);
|
|
1156
|
+
const { current: cur, previous: prev } = comparisonOf$1(params);
|
|
1485
1157
|
const minPreviousClicks = params.minPreviousClicks ?? 50;
|
|
1486
1158
|
const threshold = params.threshold ?? .2;
|
|
1487
1159
|
const limit = params.limit ?? 2e3;
|
|
@@ -1576,25 +1248,25 @@ const decayAnalyzer = defineAnalyzer({
|
|
|
1576
1248
|
const arr = Array.isArray(rows) ? rows : [];
|
|
1577
1249
|
return {
|
|
1578
1250
|
results: arr.map((r) => ({
|
|
1579
|
-
page: str$
|
|
1580
|
-
currentClicks: num(r.currentClicks),
|
|
1581
|
-
previousClicks: num(r.previousClicks),
|
|
1582
|
-
lostClicks: num(r.lostClicks),
|
|
1583
|
-
declinePercent: num(r.declinePercent),
|
|
1584
|
-
currentPosition: num(r.currentPosition),
|
|
1585
|
-
previousPosition: num(r.previousPosition),
|
|
1586
|
-
positionDrop: num(r.positionDrop),
|
|
1587
|
-
series: parseJsonList$
|
|
1588
|
-
week: str$
|
|
1589
|
-
clicks: num(s.clicks),
|
|
1590
|
-
impressions: num(s.impressions)
|
|
1251
|
+
page: str$19(r.page),
|
|
1252
|
+
currentClicks: num$1(r.currentClicks),
|
|
1253
|
+
previousClicks: num$1(r.previousClicks),
|
|
1254
|
+
lostClicks: num$1(r.lostClicks),
|
|
1255
|
+
declinePercent: num$1(r.declinePercent),
|
|
1256
|
+
currentPosition: num$1(r.currentPosition),
|
|
1257
|
+
previousPosition: num$1(r.previousPosition),
|
|
1258
|
+
positionDrop: num$1(r.positionDrop),
|
|
1259
|
+
series: parseJsonList$13(r.seriesJson).map((s) => ({
|
|
1260
|
+
week: str$19(s.week),
|
|
1261
|
+
clicks: num$1(s.clicks),
|
|
1262
|
+
impressions: num$1(s.impressions)
|
|
1591
1263
|
}))
|
|
1592
1264
|
})),
|
|
1593
1265
|
meta: { total: arr.length }
|
|
1594
1266
|
};
|
|
1595
1267
|
},
|
|
1596
1268
|
buildRows(params) {
|
|
1597
|
-
const { current, previous } = comparisonOf(params);
|
|
1269
|
+
const { current, previous } = comparisonOf$1(params);
|
|
1598
1270
|
return {
|
|
1599
1271
|
current: pagesQueryState(current, params.limit),
|
|
1600
1272
|
previous: pagesQueryState(previous, params.limit)
|
|
@@ -1618,10 +1290,10 @@ const decayAnalyzer = defineAnalyzer({
|
|
|
1618
1290
|
};
|
|
1619
1291
|
}
|
|
1620
1292
|
});
|
|
1621
|
-
function str$
|
|
1293
|
+
function str$18(v) {
|
|
1622
1294
|
return v == null ? "" : String(v);
|
|
1623
1295
|
}
|
|
1624
|
-
function parseJsonList(v) {
|
|
1296
|
+
function parseJsonList$12(v) {
|
|
1625
1297
|
if (Array.isArray(v)) return v;
|
|
1626
1298
|
if (typeof v === "string" && v.length > 0) {
|
|
1627
1299
|
const parsed = JSON.parse(v);
|
|
@@ -1633,9 +1305,9 @@ function analyzeMovers(input, options = {}) {
|
|
|
1633
1305
|
const { changeThreshold = .2, minImpressions = 50, sortBy = "clicksChange" } = options;
|
|
1634
1306
|
const normFactor = input.normalizationFactor ?? 1;
|
|
1635
1307
|
const baselineMap = buildPeriodMap(input.previous, (r) => r.query, (r) => ({
|
|
1636
|
-
clicks: num(r.clicks) / normFactor,
|
|
1637
|
-
impressions: num(r.impressions) / normFactor,
|
|
1638
|
-
position: num(r.position),
|
|
1308
|
+
clicks: num$1(r.clicks) / normFactor,
|
|
1309
|
+
impressions: num$1(r.impressions) / normFactor,
|
|
1310
|
+
position: num$1(r.position),
|
|
1639
1311
|
page: r.page ?? null
|
|
1640
1312
|
}));
|
|
1641
1313
|
const pageMap = /* @__PURE__ */ new Map();
|
|
@@ -1645,9 +1317,9 @@ function analyzeMovers(input, options = {}) {
|
|
|
1645
1317
|
const declining = [];
|
|
1646
1318
|
const stable = [];
|
|
1647
1319
|
for (const row of input.current) {
|
|
1648
|
-
const impressions = num(row.impressions);
|
|
1649
|
-
const clicks = num(row.clicks);
|
|
1650
|
-
const position = num(row.position);
|
|
1320
|
+
const impressions = num$1(row.impressions);
|
|
1321
|
+
const clicks = num$1(row.clicks);
|
|
1322
|
+
const position = num$1(row.position);
|
|
1651
1323
|
if (impressions < minImpressions) continue;
|
|
1652
1324
|
const baseline = baselineMap.get(row.query) || {
|
|
1653
1325
|
clicks: 0,
|
|
@@ -1695,10 +1367,10 @@ function analyzeMovers(input, options = {}) {
|
|
|
1695
1367
|
stable
|
|
1696
1368
|
};
|
|
1697
1369
|
}
|
|
1698
|
-
const moversAnalyzer = defineAnalyzer({
|
|
1370
|
+
const moversAnalyzer = defineAnalyzer$1({
|
|
1699
1371
|
id: "movers",
|
|
1700
1372
|
buildSql(params) {
|
|
1701
|
-
const { current: cur, previous: prev } = comparisonOf(params);
|
|
1373
|
+
const { current: cur, previous: prev } = comparisonOf$1(params);
|
|
1702
1374
|
const minImpressions = params.minImpressions ?? 50;
|
|
1703
1375
|
const changeThreshold = params.changeThreshold ?? .2;
|
|
1704
1376
|
const limit = params.limit ?? 2e3;
|
|
@@ -1808,23 +1480,23 @@ const moversAnalyzer = defineAnalyzer({
|
|
|
1808
1480
|
},
|
|
1809
1481
|
reduceSql(rows) {
|
|
1810
1482
|
const normalized = (Array.isArray(rows) ? rows : []).map((r) => ({
|
|
1811
|
-
keyword: str$
|
|
1812
|
-
page: r.page == null ? null : str$
|
|
1813
|
-
recentClicks: num(r.recentClicks),
|
|
1814
|
-
recentImpressions: num(r.recentImpressions),
|
|
1815
|
-
recentPosition: num(r.recentPosition),
|
|
1816
|
-
baselineClicks: Math.round(num(r.baselineClicks)),
|
|
1817
|
-
baselineImpressions: Math.round(num(r.baselineImpressions)),
|
|
1818
|
-
baselinePosition: num(r.baselinePosition),
|
|
1819
|
-
clicksChange: num(r.clicksChange),
|
|
1820
|
-
clicksChangePercent: num(r.clicksChangePercent),
|
|
1821
|
-
impressionsChangePercent: num(r.impressionsChangePercent),
|
|
1822
|
-
positionChange: num(r.positionChange),
|
|
1823
|
-
direction: str$
|
|
1824
|
-
series: parseJsonList(r.seriesJson).map((s) => ({
|
|
1825
|
-
week: str$
|
|
1826
|
-
clicks: num(s.clicks),
|
|
1827
|
-
impressions: num(s.impressions)
|
|
1483
|
+
keyword: str$18(r.keyword),
|
|
1484
|
+
page: r.page == null ? null : str$18(r.page),
|
|
1485
|
+
recentClicks: num$1(r.recentClicks),
|
|
1486
|
+
recentImpressions: num$1(r.recentImpressions),
|
|
1487
|
+
recentPosition: num$1(r.recentPosition),
|
|
1488
|
+
baselineClicks: Math.round(num$1(r.baselineClicks)),
|
|
1489
|
+
baselineImpressions: Math.round(num$1(r.baselineImpressions)),
|
|
1490
|
+
baselinePosition: num$1(r.baselinePosition),
|
|
1491
|
+
clicksChange: num$1(r.clicksChange),
|
|
1492
|
+
clicksChangePercent: num$1(r.clicksChangePercent),
|
|
1493
|
+
impressionsChangePercent: num$1(r.impressionsChangePercent),
|
|
1494
|
+
positionChange: num$1(r.positionChange),
|
|
1495
|
+
direction: str$18(r.direction),
|
|
1496
|
+
series: parseJsonList$12(r.seriesJson).map((s) => ({
|
|
1497
|
+
week: str$18(s.week),
|
|
1498
|
+
clicks: num$1(s.clicks),
|
|
1499
|
+
impressions: num$1(s.impressions)
|
|
1828
1500
|
}))
|
|
1829
1501
|
}));
|
|
1830
1502
|
const rising = normalized.filter((r) => r.direction === "rising");
|
|
@@ -1842,7 +1514,7 @@ const moversAnalyzer = defineAnalyzer({
|
|
|
1842
1514
|
};
|
|
1843
1515
|
},
|
|
1844
1516
|
buildRows(params) {
|
|
1845
|
-
const { current, previous } = comparisonOf(params);
|
|
1517
|
+
const { current, previous } = comparisonOf$1(params);
|
|
1846
1518
|
return {
|
|
1847
1519
|
current: keywordsQueryState(current, params.limit),
|
|
1848
1520
|
previous: keywordsQueryState(previous, params.limit)
|
|
@@ -1934,10 +1606,10 @@ const sortResults$1 = createMetricSorter("opportunityScore", {
|
|
|
1934
1606
|
impressions: "desc",
|
|
1935
1607
|
position: "asc"
|
|
1936
1608
|
});
|
|
1937
|
-
const opportunityAnalyzer = defineAnalyzer({
|
|
1609
|
+
const opportunityAnalyzer = defineAnalyzer$1({
|
|
1938
1610
|
id: "opportunity",
|
|
1939
1611
|
buildSql(params) {
|
|
1940
|
-
const { startDate, endDate } = periodOf(params);
|
|
1612
|
+
const { startDate, endDate } = periodOf$1(params);
|
|
1941
1613
|
const minImpressions = params.minImpressions ?? 100;
|
|
1942
1614
|
const w1 = 1;
|
|
1943
1615
|
const w2 = 1;
|
|
@@ -2028,23 +1700,23 @@ const opportunityAnalyzer = defineAnalyzer({
|
|
|
2028
1700
|
results: arr.map((r) => ({
|
|
2029
1701
|
keyword: r.keyword == null ? "" : String(r.keyword),
|
|
2030
1702
|
page: r.page == null ? null : String(r.page),
|
|
2031
|
-
clicks: num(r.clicks),
|
|
2032
|
-
impressions: num(r.impressions),
|
|
2033
|
-
ctr: num(r.ctr),
|
|
2034
|
-
position: num(r.position),
|
|
2035
|
-
opportunityScore: num(r.opportunityScore),
|
|
2036
|
-
potentialClicks: num(r.potentialClicks),
|
|
1703
|
+
clicks: num$1(r.clicks),
|
|
1704
|
+
impressions: num$1(r.impressions),
|
|
1705
|
+
ctr: num$1(r.ctr),
|
|
1706
|
+
position: num$1(r.position),
|
|
1707
|
+
opportunityScore: num$1(r.opportunityScore),
|
|
1708
|
+
potentialClicks: num$1(r.potentialClicks),
|
|
2037
1709
|
factors: {
|
|
2038
|
-
positionScore: num(r.positionScore),
|
|
2039
|
-
impressionScore: num(r.impressionScore),
|
|
2040
|
-
ctrGapScore: num(r.ctrGapScore)
|
|
1710
|
+
positionScore: num$1(r.positionScore),
|
|
1711
|
+
impressionScore: num$1(r.impressionScore),
|
|
1712
|
+
ctrGapScore: num$1(r.ctrGapScore)
|
|
2041
1713
|
}
|
|
2042
1714
|
})),
|
|
2043
1715
|
meta: { total: arr.length }
|
|
2044
1716
|
};
|
|
2045
1717
|
},
|
|
2046
1718
|
buildRows(params) {
|
|
2047
|
-
return { keywords: keywordsQueryState(periodOf(params), params.limit) };
|
|
1719
|
+
return { keywords: keywordsQueryState(periodOf$1(params), params.limit) };
|
|
2048
1720
|
},
|
|
2049
1721
|
reduceRows(rows, params) {
|
|
2050
1722
|
const keywords = (Array.isArray(rows) ? rows : []) ?? [];
|
|
@@ -2055,10 +1727,10 @@ const opportunityAnalyzer = defineAnalyzer({
|
|
|
2055
1727
|
const sortBy = "opportunityScore";
|
|
2056
1728
|
const results = [];
|
|
2057
1729
|
for (const row of keywords) {
|
|
2058
|
-
const impressions = num(row.impressions);
|
|
2059
|
-
const position = num(row.position);
|
|
2060
|
-
const ctr = num(row.ctr);
|
|
2061
|
-
const clicks = num(row.clicks);
|
|
1730
|
+
const impressions = num$1(row.impressions);
|
|
1731
|
+
const position = num$1(row.position);
|
|
1732
|
+
const ctr = num$1(row.ctr);
|
|
1733
|
+
const clicks = num$1(row.clicks);
|
|
2062
1734
|
if (impressions < minImpressions) continue;
|
|
2063
1735
|
const positionScore = calculatePositionScore(position);
|
|
2064
1736
|
const impressionScore = calculateImpressionScore(impressions);
|
|
@@ -2097,10 +1769,10 @@ const opportunityAnalyzer = defineAnalyzer({
|
|
|
2097
1769
|
};
|
|
2098
1770
|
}
|
|
2099
1771
|
});
|
|
2100
|
-
function str(v) {
|
|
1772
|
+
function str$17(v) {
|
|
2101
1773
|
return v == null ? "" : String(v);
|
|
2102
1774
|
}
|
|
2103
|
-
function bool(v) {
|
|
1775
|
+
function bool$2(v) {
|
|
2104
1776
|
return v === true || v === 1 || v === "true";
|
|
2105
1777
|
}
|
|
2106
1778
|
function calculateCV(values) {
|
|
@@ -2154,10 +1826,10 @@ function analyzeSeasonality(dates, options = {}) {
|
|
|
2154
1826
|
insufficientData
|
|
2155
1827
|
};
|
|
2156
1828
|
}
|
|
2157
|
-
const seasonalityAnalyzer = defineAnalyzer({
|
|
1829
|
+
const seasonalityAnalyzer = defineAnalyzer$1({
|
|
2158
1830
|
id: "seasonality",
|
|
2159
1831
|
buildSql(params) {
|
|
2160
|
-
const { startDate, endDate } = periodOf(params);
|
|
1832
|
+
const { startDate, endDate } = periodOf$1(params);
|
|
2161
1833
|
return {
|
|
2162
1834
|
sql: `
|
|
2163
1835
|
WITH monthly AS (
|
|
@@ -2196,15 +1868,15 @@ const seasonalityAnalyzer = defineAnalyzer({
|
|
|
2196
1868
|
reduceSql(rows) {
|
|
2197
1869
|
const arr = Array.isArray(rows) ? rows : [];
|
|
2198
1870
|
const breakdown = arr.map((r) => ({
|
|
2199
|
-
month: str(r.month),
|
|
2200
|
-
value: num(r.value),
|
|
2201
|
-
vsAverage: num(r.vsAverage),
|
|
2202
|
-
isPeak: bool(r.isPeak),
|
|
2203
|
-
isTrough: bool(r.isTrough)
|
|
1871
|
+
month: str$17(r.month),
|
|
1872
|
+
value: num$1(r.value),
|
|
1873
|
+
vsAverage: num$1(r.vsAverage),
|
|
1874
|
+
isPeak: bool$2(r.isPeak),
|
|
1875
|
+
isTrough: bool$2(r.isTrough)
|
|
2204
1876
|
}));
|
|
2205
1877
|
const first = arr[0];
|
|
2206
|
-
const strength = first ? num(first.strength) : 0;
|
|
2207
|
-
const monthCount = first ? num(first.monthCount) : 0;
|
|
1878
|
+
const strength = first ? num$1(first.strength) : 0;
|
|
1879
|
+
const monthCount = first ? num$1(first.monthCount) : 0;
|
|
2208
1880
|
const peakMonths = [...new Set(breakdown.filter((m) => m.isPeak).map((m) => m.month.substring(5, 7)))];
|
|
2209
1881
|
const troughMonths = [...new Set(breakdown.filter((m) => m.isTrough).map((m) => m.month.substring(5, 7)))];
|
|
2210
1882
|
const hasSeasonality = peakMonths.length > 0 || troughMonths.length > 0 || strength > .3;
|
|
@@ -2222,7 +1894,7 @@ const seasonalityAnalyzer = defineAnalyzer({
|
|
|
2222
1894
|
};
|
|
2223
1895
|
},
|
|
2224
1896
|
buildRows(params) {
|
|
2225
|
-
return { dates: datesQueryState(periodOf(params), params.limit) };
|
|
1897
|
+
return { dates: datesQueryState(periodOf$1(params), params.limit) };
|
|
2226
1898
|
},
|
|
2227
1899
|
reduceRows(rows, params) {
|
|
2228
1900
|
const result = analyzeSeasonality(Array.isArray(rows) ? rows : [], { metric: params.metric });
|
|
@@ -2233,7 +1905,7 @@ const seasonalityAnalyzer = defineAnalyzer({
|
|
|
2233
1905
|
}
|
|
2234
1906
|
});
|
|
2235
1907
|
const DEFAULT_ROW_LIMIT$1 = 25e3;
|
|
2236
|
-
const strikingDistanceAnalyzer = defineAnalyzer({
|
|
1908
|
+
const strikingDistanceAnalyzer = defineAnalyzer$1({
|
|
2237
1909
|
id: "striking-distance",
|
|
2238
1910
|
reduce(rows, params) {
|
|
2239
1911
|
const arr = Array.isArray(rows) ? rows : [];
|
|
@@ -2244,10 +1916,10 @@ const strikingDistanceAnalyzer = defineAnalyzer({
|
|
|
2244
1916
|
const limit = params.limit ?? 1e3;
|
|
2245
1917
|
const results = [];
|
|
2246
1918
|
for (const row of arr) {
|
|
2247
|
-
const position = num(row.position);
|
|
2248
|
-
const impressions = num(row.impressions);
|
|
2249
|
-
const ctr = num(row.ctr);
|
|
2250
|
-
const clicks = num(row.clicks);
|
|
1919
|
+
const position = num$1(row.position);
|
|
1920
|
+
const impressions = num$1(row.impressions);
|
|
1921
|
+
const ctr = num$1(row.ctr);
|
|
1922
|
+
const clicks = num$1(row.clicks);
|
|
2251
1923
|
if (position < minPosition || position > maxPosition) continue;
|
|
2252
1924
|
if (impressions < minImpressions) continue;
|
|
2253
1925
|
if (ctr > maxCtr) continue;
|
|
@@ -2275,7 +1947,7 @@ const strikingDistanceAnalyzer = defineAnalyzer({
|
|
|
2275
1947
|
};
|
|
2276
1948
|
},
|
|
2277
1949
|
buildSql(params) {
|
|
2278
|
-
const { startDate, endDate } = periodOf(params);
|
|
1950
|
+
const { startDate, endDate } = periodOf$1(params);
|
|
2279
1951
|
return {
|
|
2280
1952
|
sql: `
|
|
2281
1953
|
SELECT
|
|
@@ -2297,15 +1969,15 @@ const strikingDistanceAnalyzer = defineAnalyzer({
|
|
|
2297
1969
|
};
|
|
2298
1970
|
},
|
|
2299
1971
|
buildRows(params) {
|
|
2300
|
-
return { keywords: keywordsQueryState(periodOf(params), params.limit ?? DEFAULT_ROW_LIMIT$1) };
|
|
1972
|
+
return { keywords: keywordsQueryState(periodOf$1(params), params.limit ?? DEFAULT_ROW_LIMIT$1) };
|
|
2301
1973
|
}
|
|
2302
1974
|
});
|
|
2303
1975
|
const DEFAULT_ROW_LIMIT = 25e3;
|
|
2304
1976
|
const sortRowResults = createSorter((item) => item.impressions, "impressions");
|
|
2305
|
-
const zeroClickAnalyzer = defineAnalyzer({
|
|
1977
|
+
const zeroClickAnalyzer = defineAnalyzer$1({
|
|
2306
1978
|
id: "zero-click",
|
|
2307
1979
|
buildSql(params) {
|
|
2308
|
-
const { startDate, endDate } = periodOf(params);
|
|
1980
|
+
const { startDate, endDate } = periodOf$1(params);
|
|
2309
1981
|
const minImpressions = params.minImpressions ?? 1e3;
|
|
2310
1982
|
const maxCtr = params.maxCtr ?? .03;
|
|
2311
1983
|
const maxPosition = params.maxPosition ?? 10;
|
|
@@ -2365,11 +2037,11 @@ const zeroClickAnalyzer = defineAnalyzer({
|
|
|
2365
2037
|
results: arr.map((r) => ({
|
|
2366
2038
|
query: r.query == null ? "" : String(r.query),
|
|
2367
2039
|
page: r.page == null ? "" : String(r.page),
|
|
2368
|
-
clicks: num(r.clicks),
|
|
2369
|
-
impressions: num(r.impressions),
|
|
2370
|
-
ctr: num(r.ctr),
|
|
2371
|
-
position: num(r.position),
|
|
2372
|
-
missedClicks: num(r.missedClicks)
|
|
2040
|
+
clicks: num$1(r.clicks),
|
|
2041
|
+
impressions: num$1(r.impressions),
|
|
2042
|
+
ctr: num$1(r.ctr),
|
|
2043
|
+
position: num$1(r.position),
|
|
2044
|
+
missedClicks: num$1(r.missedClicks)
|
|
2373
2045
|
})),
|
|
2374
2046
|
meta: {
|
|
2375
2047
|
total: arr.length,
|
|
@@ -2380,7 +2052,7 @@ const zeroClickAnalyzer = defineAnalyzer({
|
|
|
2380
2052
|
};
|
|
2381
2053
|
},
|
|
2382
2054
|
buildRows(params) {
|
|
2383
|
-
const period = periodOf(params);
|
|
2055
|
+
const period = periodOf$1(params);
|
|
2384
2056
|
const limit = params.limit ?? DEFAULT_ROW_LIMIT;
|
|
2385
2057
|
return { rows: gsc.select(query, page).where(between(date, period.startDate, period.endDate)).limit(limit).getState() };
|
|
2386
2058
|
},
|
|
@@ -2430,142 +2102,1487 @@ const ROW_ANALYZERS = [
|
|
|
2430
2102
|
cannibalizationAnalyzer.rows,
|
|
2431
2103
|
zeroClickAnalyzer.rows
|
|
2432
2104
|
];
|
|
2433
|
-
|
|
2434
|
-
|
|
2435
|
-
|
|
2436
|
-
|
|
2437
|
-
|
|
2438
|
-
|
|
2439
|
-
|
|
2440
|
-
|
|
2441
|
-
|
|
2442
|
-
|
|
2443
|
-
|
|
2444
|
-
|
|
2445
|
-
|
|
2446
|
-
|
|
2447
|
-
|
|
2448
|
-
|
|
2449
|
-
|
|
2450
|
-
|
|
2451
|
-
|
|
2452
|
-
|
|
2453
|
-
|
|
2454
|
-
|
|
2455
|
-
|
|
2456
|
-
|
|
2457
|
-
|
|
2458
|
-
|
|
2459
|
-
|
|
2460
|
-
|
|
2461
|
-
|
|
2462
|
-
|
|
2463
|
-
|
|
2464
|
-
|
|
2465
|
-
|
|
2466
|
-
|
|
2467
|
-
|
|
2468
|
-
|
|
2469
|
-
|
|
2470
|
-
|
|
2471
|
-
|
|
2472
|
-
|
|
2473
|
-
|
|
2474
|
-
|
|
2475
|
-
|
|
2476
|
-
|
|
2477
|
-
|
|
2478
|
-
|
|
2479
|
-
|
|
2480
|
-
|
|
2481
|
-
|
|
2482
|
-
|
|
2483
|
-
|
|
2484
|
-
|
|
2485
|
-
|
|
2486
|
-
|
|
2487
|
-
|
|
2488
|
-
|
|
2489
|
-
|
|
2490
|
-
|
|
2491
|
-
|
|
2492
|
-
|
|
2493
|
-
|
|
2494
|
-
|
|
2495
|
-
|
|
2496
|
-
|
|
2497
|
-
|
|
2498
|
-
|
|
2499
|
-
|
|
2500
|
-
|
|
2501
|
-
|
|
2502
|
-
|
|
2503
|
-
|
|
2504
|
-
|
|
2505
|
-
|
|
2506
|
-
|
|
2507
|
-
|
|
2508
|
-
|
|
2509
|
-
|
|
2510
|
-
|
|
2511
|
-
|
|
2512
|
-
|
|
2513
|
-
|
|
2514
|
-
|
|
2515
|
-
|
|
2516
|
-
|
|
2517
|
-
|
|
2518
|
-
|
|
2519
|
-
|
|
2520
|
-
|
|
2521
|
-
|
|
2522
|
-
|
|
2523
|
-
|
|
2524
|
-
|
|
2525
|
-
|
|
2526
|
-
|
|
2527
|
-
|
|
2528
|
-
|
|
2529
|
-
|
|
2530
|
-
|
|
2531
|
-
|
|
2532
|
-
|
|
2533
|
-
|
|
2534
|
-
|
|
2535
|
-
|
|
2536
|
-
|
|
2537
|
-
|
|
2538
|
-
|
|
2539
|
-
|
|
2540
|
-
|
|
2541
|
-
|
|
2542
|
-
|
|
2543
|
-
|
|
2544
|
-
|
|
2545
|
-
|
|
2546
|
-
|
|
2547
|
-
|
|
2548
|
-
|
|
2549
|
-
|
|
2550
|
-
|
|
2551
|
-
|
|
2552
|
-
|
|
2553
|
-
|
|
2554
|
-
|
|
2555
|
-
|
|
2556
|
-
|
|
2557
|
-
|
|
2558
|
-
|
|
2559
|
-
|
|
2560
|
-
|
|
2561
|
-
|
|
2562
|
-
|
|
2563
|
-
|
|
2564
|
-
|
|
2565
|
-
|
|
2566
|
-
|
|
2567
|
-
|
|
2568
|
-
|
|
2105
|
+
function num$6(v) {
|
|
2106
|
+
if (typeof v === "number") return v;
|
|
2107
|
+
if (typeof v === "bigint") return Number(v);
|
|
2108
|
+
if (v == null) return 0;
|
|
2109
|
+
const n = Number(v);
|
|
2110
|
+
return Number.isFinite(n) ? n : 0;
|
|
2111
|
+
}
|
|
2112
|
+
function str$16(v) {
|
|
2113
|
+
return v == null ? "" : String(v);
|
|
2114
|
+
}
|
|
2115
|
+
const bayesianCtrAnalyzer = defineAnalyzer$1({
|
|
2116
|
+
id: "bayesian-ctr",
|
|
2117
|
+
buildSql(params) {
|
|
2118
|
+
const { startDate, endDate } = periodOf$1(params);
|
|
2119
|
+
const minImpressions = params.minImpressions ?? 50;
|
|
2120
|
+
const limit = params.limit ?? 300;
|
|
2121
|
+
const priorMinEntities = 5;
|
|
2122
|
+
return {
|
|
2123
|
+
sql: `
|
|
2124
|
+
WITH entity AS (
|
|
2125
|
+
SELECT
|
|
2126
|
+
query,
|
|
2127
|
+
url,
|
|
2128
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
2129
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
2130
|
+
${METRIC_EXPR.ctr} AS observed_ctr,
|
|
2131
|
+
${METRIC_EXPR.position} AS position,
|
|
2132
|
+
CAST(ROUND(LEAST(${METRIC_EXPR.position}, 30)) AS INTEGER) AS bucket
|
|
2133
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
2134
|
+
WHERE date >= ? AND date <= ?
|
|
2135
|
+
AND query IS NOT NULL AND query <> ''
|
|
2136
|
+
AND url IS NOT NULL AND url <> ''
|
|
2137
|
+
GROUP BY query, url
|
|
2138
|
+
HAVING SUM(impressions) >= ?
|
|
2139
|
+
AND ${METRIC_EXPR.position} <= 30
|
|
2140
|
+
),
|
|
2141
|
+
bucket_mu AS (
|
|
2142
|
+
SELECT
|
|
2143
|
+
bucket,
|
|
2144
|
+
COUNT(*) AS n_entities,
|
|
2145
|
+
SUM(observed_ctr * impressions) / NULLIF(SUM(impressions), 0) AS mu,
|
|
2146
|
+
SUM(impressions) AS total_impressions
|
|
2147
|
+
FROM entity
|
|
2148
|
+
GROUP BY bucket
|
|
2149
|
+
),
|
|
2150
|
+
bucket_var AS (
|
|
2151
|
+
SELECT
|
|
2152
|
+
e.bucket,
|
|
2153
|
+
GREATEST(
|
|
2154
|
+
SUM(e.impressions * POWER(e.observed_ctr - b.mu, 2))
|
|
2155
|
+
/ NULLIF(SUM(e.impressions), 0),
|
|
2156
|
+
1e-9
|
|
2157
|
+
) AS v
|
|
2158
|
+
FROM entity e
|
|
2159
|
+
JOIN bucket_mu b USING (bucket)
|
|
2160
|
+
GROUP BY e.bucket
|
|
2161
|
+
),
|
|
2162
|
+
priors AS (
|
|
2163
|
+
SELECT
|
|
2164
|
+
m.bucket,
|
|
2165
|
+
m.n_entities,
|
|
2166
|
+
m.mu,
|
|
2167
|
+
v.v,
|
|
2168
|
+
CASE
|
|
2169
|
+
WHEN m.n_entities >= ${Number(priorMinEntities)}
|
|
2170
|
+
AND v.v > 0
|
|
2171
|
+
AND m.mu > 0 AND m.mu < 1
|
|
2172
|
+
AND (m.mu * (1.0 - m.mu) / v.v - 1.0) > 0
|
|
2173
|
+
THEN GREATEST(0.5, m.mu * (m.mu * (1.0 - m.mu) / v.v - 1.0))
|
|
2174
|
+
ELSE 2.0
|
|
2175
|
+
END AS alpha,
|
|
2176
|
+
CASE
|
|
2177
|
+
WHEN m.n_entities >= ${Number(priorMinEntities)}
|
|
2178
|
+
AND v.v > 0
|
|
2179
|
+
AND m.mu > 0 AND m.mu < 1
|
|
2180
|
+
AND (m.mu * (1.0 - m.mu) / v.v - 1.0) > 0
|
|
2181
|
+
THEN GREATEST(0.5, (1.0 - m.mu) * (m.mu * (1.0 - m.mu) / v.v - 1.0))
|
|
2182
|
+
ELSE 48.0
|
|
2183
|
+
END AS beta
|
|
2184
|
+
FROM bucket_mu m
|
|
2185
|
+
JOIN bucket_var v USING (bucket)
|
|
2186
|
+
),
|
|
2187
|
+
posterior AS (
|
|
2188
|
+
SELECT
|
|
2189
|
+
e.query,
|
|
2190
|
+
e.url,
|
|
2191
|
+
e.clicks,
|
|
2192
|
+
e.impressions,
|
|
2193
|
+
e.observed_ctr,
|
|
2194
|
+
e.position,
|
|
2195
|
+
e.bucket,
|
|
2196
|
+
p.alpha AS prior_alpha,
|
|
2197
|
+
p.beta AS prior_beta,
|
|
2198
|
+
p.mu AS bucket_prior_mean,
|
|
2199
|
+
p.alpha + e.clicks AS alpha_post,
|
|
2200
|
+
p.beta + (e.impressions - e.clicks) AS beta_post
|
|
2201
|
+
FROM entity e
|
|
2202
|
+
JOIN priors p USING (bucket)
|
|
2203
|
+
),
|
|
2204
|
+
scored AS (
|
|
2205
|
+
SELECT *,
|
|
2206
|
+
alpha_post / (alpha_post + beta_post) AS posterior_mean,
|
|
2207
|
+
SQRT((alpha_post * beta_post)
|
|
2208
|
+
/ (POWER(alpha_post + beta_post, 2) * (alpha_post + beta_post + 1))) AS posterior_sd
|
|
2209
|
+
FROM posterior
|
|
2210
|
+
)
|
|
2211
|
+
SELECT
|
|
2212
|
+
query AS keyword,
|
|
2213
|
+
url AS page,
|
|
2214
|
+
clicks,
|
|
2215
|
+
impressions,
|
|
2216
|
+
observed_ctr AS observedCtr,
|
|
2217
|
+
position,
|
|
2218
|
+
bucket,
|
|
2219
|
+
prior_alpha AS priorAlpha,
|
|
2220
|
+
prior_beta AS priorBeta,
|
|
2221
|
+
bucket_prior_mean AS bucketPriorMean,
|
|
2222
|
+
posterior_mean AS posteriorMean,
|
|
2223
|
+
posterior_sd AS posteriorSd,
|
|
2224
|
+
GREATEST(0.0, posterior_mean - 1.96 * posterior_sd) AS ciLow,
|
|
2225
|
+
LEAST(1.0, posterior_mean + 1.96 * posterior_sd) AS ciHigh,
|
|
2226
|
+
posterior_mean - observed_ctr AS shrinkageDelta,
|
|
2227
|
+
(posterior_mean - observed_ctr) * impressions AS expectedClicksDelta,
|
|
2228
|
+
ABS(observed_ctr - posterior_mean) / NULLIF(posterior_sd, 0) AS significance,
|
|
2229
|
+
CASE
|
|
2230
|
+
WHEN observed_ctr > LEAST(1.0, posterior_mean + 1.96 * posterior_sd) THEN 'overperforming'
|
|
2231
|
+
WHEN observed_ctr < GREATEST(0.0, posterior_mean - 1.96 * posterior_sd) THEN 'underperforming'
|
|
2232
|
+
ELSE 'expected'
|
|
2233
|
+
END AS classification
|
|
2234
|
+
FROM scored
|
|
2235
|
+
ORDER BY significance DESC NULLS LAST
|
|
2236
|
+
LIMIT ${Number(limit)}
|
|
2237
|
+
`,
|
|
2238
|
+
params: [
|
|
2239
|
+
startDate,
|
|
2240
|
+
endDate,
|
|
2241
|
+
minImpressions
|
|
2242
|
+
],
|
|
2243
|
+
current: {
|
|
2244
|
+
table: "page_keywords",
|
|
2245
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
2246
|
+
}
|
|
2247
|
+
};
|
|
2248
|
+
},
|
|
2249
|
+
reduceSql(rows, params) {
|
|
2250
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
2251
|
+
const minImpressions = params.minImpressions ?? 50;
|
|
2252
|
+
const results = arr.map((r) => ({
|
|
2253
|
+
keyword: str$16(r.keyword),
|
|
2254
|
+
page: str$16(r.page),
|
|
2255
|
+
clicks: num$6(r.clicks),
|
|
2256
|
+
impressions: num$6(r.impressions),
|
|
2257
|
+
observedCtr: num$6(r.observedCtr),
|
|
2258
|
+
position: num$6(r.position),
|
|
2259
|
+
bucket: num$6(r.bucket),
|
|
2260
|
+
priorAlpha: num$6(r.priorAlpha),
|
|
2261
|
+
priorBeta: num$6(r.priorBeta),
|
|
2262
|
+
bucketPriorMean: num$6(r.bucketPriorMean),
|
|
2263
|
+
posteriorMean: num$6(r.posteriorMean),
|
|
2264
|
+
posteriorSd: num$6(r.posteriorSd),
|
|
2265
|
+
ciLow: num$6(r.ciLow),
|
|
2266
|
+
ciHigh: num$6(r.ciHigh),
|
|
2267
|
+
shrinkageDelta: num$6(r.shrinkageDelta),
|
|
2268
|
+
expectedClicksDelta: num$6(r.expectedClicksDelta),
|
|
2269
|
+
significance: num$6(r.significance),
|
|
2270
|
+
classification: str$16(r.classification)
|
|
2271
|
+
}));
|
|
2272
|
+
const under = results.filter((r) => r.classification === "underperforming").length;
|
|
2273
|
+
const over = results.filter((r) => r.classification === "overperforming").length;
|
|
2274
|
+
return {
|
|
2275
|
+
results,
|
|
2276
|
+
meta: {
|
|
2277
|
+
total: results.length,
|
|
2278
|
+
underperforming: under,
|
|
2279
|
+
overperforming: over,
|
|
2280
|
+
expected: results.length - under - over,
|
|
2281
|
+
minImpressions
|
|
2282
|
+
}
|
|
2283
|
+
};
|
|
2284
|
+
}
|
|
2285
|
+
});
|
|
2286
|
+
const BIPARTITE_PAGERANK_ITERATIONS = 25;
|
|
2287
|
+
const BIPARTITE_PAGERANK_DAMPING = .85;
|
|
2288
|
+
function str$15(v) {
|
|
2289
|
+
return v == null ? "" : String(v);
|
|
2290
|
+
}
|
|
2291
|
+
function parseJsonList$11(v) {
|
|
2292
|
+
if (Array.isArray(v)) return v;
|
|
2293
|
+
if (typeof v === "string" && v.length > 0) {
|
|
2294
|
+
const parsed = JSON.parse(v);
|
|
2295
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
2296
|
+
}
|
|
2297
|
+
return [];
|
|
2298
|
+
}
|
|
2299
|
+
const bipartitePagerankAnalyzer = defineAnalyzer$1({
|
|
2300
|
+
id: "bipartite-pagerank",
|
|
2301
|
+
buildSql(params) {
|
|
2302
|
+
const { startDate, endDate } = periodOf$1(params);
|
|
2303
|
+
const minImpressions = params.minImpressions ?? 50;
|
|
2304
|
+
const topQueries = 1e3;
|
|
2305
|
+
const topUrls = 500;
|
|
2306
|
+
const limit = params.limit ?? 50;
|
|
2307
|
+
const bridgingEdgeThreshold = .05;
|
|
2308
|
+
const anchoringEdgeThreshold = .05;
|
|
2309
|
+
const iterations = BIPARTITE_PAGERANK_ITERATIONS;
|
|
2310
|
+
const d = BIPARTITE_PAGERANK_DAMPING;
|
|
2311
|
+
const iterCtes = [];
|
|
2312
|
+
for (let i = 1; i <= iterations; i++) iterCtes.push(`
|
|
2313
|
+
ranks_${i} AS (
|
|
2314
|
+
SELECT
|
|
2315
|
+
'q' AS kind,
|
|
2316
|
+
e.qid AS id,
|
|
2317
|
+
(1.0 - ${d}) / (SELECT n FROM query_count)
|
|
2318
|
+
+ ${d} * SUM(e.w_u_to_q * r.rank) AS rank
|
|
2319
|
+
FROM u_to_q_weights e
|
|
2320
|
+
JOIN ranks_${i - 1} r ON r.kind = 'u' AND r.id = e.uid
|
|
2321
|
+
GROUP BY e.qid
|
|
2322
|
+
UNION ALL
|
|
2323
|
+
SELECT
|
|
2324
|
+
'u' AS kind,
|
|
2325
|
+
e.uid AS id,
|
|
2326
|
+
(1.0 - ${d}) / (SELECT n FROM url_count)
|
|
2327
|
+
+ ${d} * SUM(e.w_q_to_u * r.rank) AS rank
|
|
2328
|
+
FROM q_to_u_weights e
|
|
2329
|
+
JOIN ranks_${i - 1} r ON r.kind = 'q' AND r.id = e.qid
|
|
2330
|
+
GROUP BY e.uid
|
|
2331
|
+
)`);
|
|
2332
|
+
const deltaParts = [];
|
|
2333
|
+
for (let i = 1; i <= iterations; i++) deltaParts.push(`
|
|
2334
|
+
SELECT ${i} AS step,
|
|
2335
|
+
(SELECT COALESCE(SUM(ABS(a.rank - b.rank)), 0.0)
|
|
2336
|
+
FROM ranks_${i} a
|
|
2337
|
+
JOIN ranks_${i - 1} b USING (kind, id)) AS l1`);
|
|
2338
|
+
return {
|
|
2339
|
+
sql: `
|
|
2340
|
+
WITH edges0 AS (
|
|
2341
|
+
SELECT
|
|
2342
|
+
query AS qid,
|
|
2343
|
+
url AS uid,
|
|
2344
|
+
CAST(SUM(impressions) AS DOUBLE) AS impressions
|
|
2345
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
2346
|
+
WHERE date >= ? AND date <= ?
|
|
2347
|
+
AND query IS NOT NULL AND query <> ''
|
|
2348
|
+
AND url IS NOT NULL AND url <> ''
|
|
2349
|
+
GROUP BY query, url
|
|
2350
|
+
HAVING SUM(impressions) >= ?
|
|
2351
|
+
),
|
|
2352
|
+
-- Top-N caps per side keep the iteration tractable.
|
|
2353
|
+
query_totals AS (
|
|
2354
|
+
SELECT qid, SUM(impressions) AS tot
|
|
2355
|
+
FROM edges0 GROUP BY qid
|
|
2356
|
+
),
|
|
2357
|
+
url_totals AS (
|
|
2358
|
+
SELECT uid, SUM(impressions) AS tot
|
|
2359
|
+
FROM edges0 GROUP BY uid
|
|
2360
|
+
),
|
|
2361
|
+
top_queries AS (
|
|
2362
|
+
SELECT qid FROM query_totals
|
|
2363
|
+
ORDER BY tot DESC, qid ASC LIMIT ${Number(topQueries)}
|
|
2364
|
+
),
|
|
2365
|
+
top_urls AS (
|
|
2366
|
+
SELECT uid FROM url_totals
|
|
2367
|
+
ORDER BY tot DESC, uid ASC LIMIT ${Number(topUrls)}
|
|
2368
|
+
),
|
|
2369
|
+
edges AS (
|
|
2370
|
+
SELECT e.qid, e.uid, e.impressions
|
|
2371
|
+
FROM edges0 e
|
|
2372
|
+
JOIN top_queries tq USING (qid)
|
|
2373
|
+
JOIN top_urls tu USING (uid)
|
|
2374
|
+
),
|
|
2375
|
+
query_nodes AS (SELECT DISTINCT qid FROM edges),
|
|
2376
|
+
url_nodes AS (SELECT DISTINCT uid FROM edges),
|
|
2377
|
+
query_count AS (SELECT GREATEST(COUNT(*), 1) AS n FROM query_nodes),
|
|
2378
|
+
url_count AS (SELECT GREATEST(COUNT(*), 1) AS n FROM url_nodes),
|
|
2379
|
+
-- Row-stochastic transition weights in each direction. For q->u the
|
|
2380
|
+
-- weights out of a query sum to 1; symmetric for u->q.
|
|
2381
|
+
q_out AS (SELECT qid, SUM(impressions) AS s FROM edges GROUP BY qid),
|
|
2382
|
+
u_out AS (SELECT uid, SUM(impressions) AS s FROM edges GROUP BY uid),
|
|
2383
|
+
q_to_u_weights AS (
|
|
2384
|
+
SELECT e.qid, e.uid,
|
|
2385
|
+
e.impressions / NULLIF(q.s, 0) AS w_q_to_u
|
|
2386
|
+
FROM edges e JOIN q_out q USING (qid)
|
|
2387
|
+
),
|
|
2388
|
+
u_to_q_weights AS (
|
|
2389
|
+
SELECT e.qid, e.uid,
|
|
2390
|
+
e.impressions / NULLIF(u.s, 0) AS w_u_to_q
|
|
2391
|
+
FROM edges e JOIN u_out u USING (uid)
|
|
2392
|
+
),
|
|
2393
|
+
-- Seed: uniform distribution per side. Total mass = 2 (one unit per side).
|
|
2394
|
+
ranks_0 AS (
|
|
2395
|
+
SELECT 'q' AS kind, q.qid AS id, 1.0 / (SELECT n FROM query_count) AS rank
|
|
2396
|
+
FROM query_nodes q
|
|
2397
|
+
UNION ALL
|
|
2398
|
+
SELECT 'u' AS kind, u.uid AS id, 1.0 / (SELECT n FROM url_count) AS rank
|
|
2399
|
+
FROM url_nodes u
|
|
2400
|
+
),
|
|
2401
|
+
${iterCtes.join(",\n")},
|
|
2402
|
+
final_ranks AS (SELECT * FROM ranks_${iterations}),
|
|
2403
|
+
-- Hub/anchor diagnostics computed from raw edge mass (not rank). A
|
|
2404
|
+
-- query "bridges" URLs it sends >= ${bridgingEdgeThreshold} of its mass
|
|
2405
|
+
-- to; a URL "anchors" queries that contribute >= ${anchoringEdgeThreshold}
|
|
2406
|
+
-- of its incoming mass.
|
|
2407
|
+
q_bridging AS (
|
|
2408
|
+
SELECT qid, COUNT(*) AS bridging
|
|
2409
|
+
FROM q_to_u_weights
|
|
2410
|
+
WHERE w_q_to_u >= ${bridgingEdgeThreshold}
|
|
2411
|
+
GROUP BY qid
|
|
2412
|
+
),
|
|
2413
|
+
u_anchoring AS (
|
|
2414
|
+
SELECT uid, COUNT(*) AS anchoring
|
|
2415
|
+
FROM u_to_q_weights
|
|
2416
|
+
WHERE w_u_to_q >= ${anchoringEdgeThreshold}
|
|
2417
|
+
GROUP BY uid
|
|
2418
|
+
),
|
|
2419
|
+
q_degree AS (
|
|
2420
|
+
SELECT qid, COUNT(*) AS degree, SUM(impressions) AS impressions
|
|
2421
|
+
FROM edges GROUP BY qid
|
|
2422
|
+
),
|
|
2423
|
+
u_degree AS (
|
|
2424
|
+
SELECT uid, COUNT(*) AS degree, SUM(impressions) AS impressions
|
|
2425
|
+
FROM edges GROUP BY uid
|
|
2426
|
+
),
|
|
2427
|
+
deltas AS (
|
|
2428
|
+
${deltaParts.join("\n UNION ALL\n")}
|
|
2429
|
+
),
|
|
2430
|
+
query_rows AS (
|
|
2431
|
+
SELECT
|
|
2432
|
+
'query' AS kind, f.id, f.rank,
|
|
2433
|
+
COALESCE(b.bridging, 0) AS bridging,
|
|
2434
|
+
0 AS anchoring,
|
|
2435
|
+
COALESCE(qd.degree, 0) AS degree,
|
|
2436
|
+
COALESCE(qd.impressions, 0) AS impressions
|
|
2437
|
+
FROM final_ranks f
|
|
2438
|
+
LEFT JOIN q_bridging b ON b.qid = f.id
|
|
2439
|
+
LEFT JOIN q_degree qd ON qd.qid = f.id
|
|
2440
|
+
WHERE f.kind = 'q'
|
|
2441
|
+
ORDER BY f.rank DESC
|
|
2442
|
+
LIMIT ${Number(limit)}
|
|
2443
|
+
),
|
|
2444
|
+
url_rows AS (
|
|
2445
|
+
SELECT
|
|
2446
|
+
'url' AS kind, f.id, f.rank,
|
|
2447
|
+
0 AS bridging,
|
|
2448
|
+
COALESCE(a.anchoring, 0) AS anchoring,
|
|
2449
|
+
COALESCE(ud.degree, 0) AS degree,
|
|
2450
|
+
COALESCE(ud.impressions, 0) AS impressions
|
|
2451
|
+
FROM final_ranks f
|
|
2452
|
+
LEFT JOIN u_anchoring a ON a.uid = f.id
|
|
2453
|
+
LEFT JOIN u_degree ud ON ud.uid = f.id
|
|
2454
|
+
WHERE f.kind = 'u'
|
|
2455
|
+
ORDER BY f.rank DESC
|
|
2456
|
+
LIMIT ${Number(limit)}
|
|
2457
|
+
),
|
|
2458
|
+
nodes AS (
|
|
2459
|
+
SELECT * FROM query_rows
|
|
2460
|
+
UNION ALL
|
|
2461
|
+
SELECT * FROM url_rows
|
|
2462
|
+
),
|
|
2463
|
+
counts AS (
|
|
2464
|
+
SELECT
|
|
2465
|
+
(SELECT n FROM query_count) AS q_count,
|
|
2466
|
+
(SELECT n FROM url_count) AS u_count
|
|
2467
|
+
),
|
|
2468
|
+
deltas_json AS (
|
|
2469
|
+
SELECT to_json(list({ 'step': step, 'l1': l1 } ORDER BY step)) AS dj
|
|
2470
|
+
FROM deltas
|
|
2471
|
+
)
|
|
2472
|
+
SELECT
|
|
2473
|
+
n.kind,
|
|
2474
|
+
n.id,
|
|
2475
|
+
n.rank,
|
|
2476
|
+
n.bridging,
|
|
2477
|
+
n.anchoring,
|
|
2478
|
+
n.degree,
|
|
2479
|
+
n.impressions,
|
|
2480
|
+
c.q_count AS queryCount,
|
|
2481
|
+
c.u_count AS urlCount,
|
|
2482
|
+
dj.dj AS deltasJson
|
|
2483
|
+
FROM nodes n
|
|
2484
|
+
CROSS JOIN counts c
|
|
2485
|
+
CROSS JOIN deltas_json dj
|
|
2486
|
+
ORDER BY n.kind, n.rank DESC
|
|
2487
|
+
`,
|
|
2488
|
+
params: [
|
|
2489
|
+
startDate,
|
|
2490
|
+
endDate,
|
|
2491
|
+
minImpressions
|
|
2492
|
+
],
|
|
2493
|
+
current: {
|
|
2494
|
+
table: "page_keywords",
|
|
2495
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
2496
|
+
}
|
|
2497
|
+
};
|
|
2498
|
+
},
|
|
2499
|
+
reduceSql(rows) {
|
|
2500
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
2501
|
+
const iterations = BIPARTITE_PAGERANK_ITERATIONS;
|
|
2502
|
+
const d = BIPARTITE_PAGERANK_DAMPING;
|
|
2503
|
+
const results = arr.map((r) => ({
|
|
2504
|
+
kind: str$15(r.kind),
|
|
2505
|
+
id: str$15(r.id),
|
|
2506
|
+
rank: num$1(r.rank),
|
|
2507
|
+
bridging: num$1(r.bridging),
|
|
2508
|
+
anchoring: num$1(r.anchoring),
|
|
2509
|
+
degree: num$1(r.degree),
|
|
2510
|
+
impressions: num$1(r.impressions)
|
|
2511
|
+
}));
|
|
2512
|
+
const first = arr[0] ?? {};
|
|
2513
|
+
const queryCount = num$1(first.queryCount);
|
|
2514
|
+
const urlCount = num$1(first.urlCount);
|
|
2515
|
+
const deltas = parseJsonList$11(first.deltasJson).map((e) => ({
|
|
2516
|
+
step: num$1(e.step),
|
|
2517
|
+
l1: num$1(e.l1)
|
|
2518
|
+
}));
|
|
2519
|
+
const convergenceDelta = deltas.length > 0 ? deltas[deltas.length - 1].l1 : 0;
|
|
2520
|
+
return {
|
|
2521
|
+
results,
|
|
2522
|
+
meta: {
|
|
2523
|
+
total: results.length,
|
|
2524
|
+
convergenceDelta,
|
|
2525
|
+
iterations,
|
|
2526
|
+
damping: d,
|
|
2527
|
+
queryCount,
|
|
2528
|
+
urlCount,
|
|
2529
|
+
deltas
|
|
2530
|
+
}
|
|
2531
|
+
};
|
|
2532
|
+
}
|
|
2533
|
+
});
|
|
2534
|
+
function num$5(v) {
|
|
2535
|
+
if (typeof v === "number") return v;
|
|
2536
|
+
if (typeof v === "bigint") return Number(v);
|
|
2537
|
+
if (v == null) return 0;
|
|
2538
|
+
const n = Number(v);
|
|
2539
|
+
return Number.isFinite(n) ? n : 0;
|
|
2540
|
+
}
|
|
2541
|
+
function str$14(v) {
|
|
2542
|
+
return v == null ? "" : String(v);
|
|
2543
|
+
}
|
|
2544
|
+
function parseJsonList$10(v) {
|
|
2545
|
+
if (Array.isArray(v)) return v;
|
|
2546
|
+
if (typeof v === "string" && v.length > 0) {
|
|
2547
|
+
const parsed = JSON.parse(v);
|
|
2548
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
2549
|
+
}
|
|
2550
|
+
return [];
|
|
2551
|
+
}
|
|
2552
|
+
const changePointAnalyzer = defineAnalyzer$1({
|
|
2553
|
+
id: "change-point",
|
|
2554
|
+
buildSql(params) {
|
|
2555
|
+
const endDate = params.endDate ?? defaultEndDate();
|
|
2556
|
+
const startDate = params.startDate ?? daysAgo(93);
|
|
2557
|
+
const minDays = 21;
|
|
2558
|
+
const minSide = 7;
|
|
2559
|
+
const threshold = params.threshold ?? 10;
|
|
2560
|
+
const minImpressions = params.minImpressions ?? 50;
|
|
2561
|
+
const metric = params.metric === "clicks" || params.metric === "impressions" ? params.metric : "position";
|
|
2562
|
+
const limit = params.limit ?? 100;
|
|
2563
|
+
const valueExpr = metric === "position" ? METRIC_EXPR.position : `CAST(SUM(${metric}) AS DOUBLE)`;
|
|
2564
|
+
return {
|
|
2565
|
+
sql: `
|
|
2566
|
+
WITH daily AS (
|
|
2567
|
+
SELECT
|
|
2568
|
+
query,
|
|
2569
|
+
url AS page,
|
|
2570
|
+
date,
|
|
2571
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
2572
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
2573
|
+
${valueExpr} AS value
|
|
2574
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
2575
|
+
WHERE date >= ? AND date <= ?
|
|
2576
|
+
AND query IS NOT NULL AND query <> ''
|
|
2577
|
+
AND url IS NOT NULL AND url <> ''
|
|
2578
|
+
GROUP BY query, url, date
|
|
2579
|
+
HAVING SUM(impressions) >= 1
|
|
2580
|
+
),
|
|
2581
|
+
entity_stats AS (
|
|
2582
|
+
SELECT query, page,
|
|
2583
|
+
COUNT(*) AS n_total,
|
|
2584
|
+
SUM(impressions) AS total_impressions,
|
|
2585
|
+
SUM(value) AS sum_total,
|
|
2586
|
+
SUM(value * value) AS sumsq_total
|
|
2587
|
+
FROM daily
|
|
2588
|
+
GROUP BY query, page
|
|
2589
|
+
HAVING COUNT(*) >= ${Number(minDays)}
|
|
2590
|
+
AND SUM(impressions) >= ?
|
|
2591
|
+
),
|
|
2592
|
+
filtered AS (
|
|
2593
|
+
SELECT d.*,
|
|
2594
|
+
e.n_total, e.sum_total, e.sumsq_total, e.total_impressions
|
|
2595
|
+
FROM daily d
|
|
2596
|
+
JOIN entity_stats e USING (query, page)
|
|
2597
|
+
),
|
|
2598
|
+
cumulated AS (
|
|
2599
|
+
SELECT *,
|
|
2600
|
+
COUNT(*) OVER w AS n_left,
|
|
2601
|
+
SUM(value) OVER w AS sum_left,
|
|
2602
|
+
SUM(value * value) OVER w AS sumsq_left
|
|
2603
|
+
FROM filtered
|
|
2604
|
+
WINDOW w AS (
|
|
2605
|
+
PARTITION BY query, page
|
|
2606
|
+
ORDER BY date
|
|
2607
|
+
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
|
2608
|
+
)
|
|
2609
|
+
),
|
|
2610
|
+
llr_scored AS (
|
|
2611
|
+
SELECT *,
|
|
2612
|
+
(n_total - n_left) AS n_right,
|
|
2613
|
+
(sum_total - sum_left) AS sum_right,
|
|
2614
|
+
(sumsq_total - sumsq_left) AS sumsq_right,
|
|
2615
|
+
GREATEST(
|
|
2616
|
+
(sumsq_left / NULLIF(n_left, 0))
|
|
2617
|
+
- (sum_left / NULLIF(n_left, 0)) * (sum_left / NULLIF(n_left, 0)),
|
|
2618
|
+
1e-9
|
|
2619
|
+
) AS var_left,
|
|
2620
|
+
GREATEST(
|
|
2621
|
+
((sumsq_total - sumsq_left) / NULLIF(n_total - n_left, 0))
|
|
2622
|
+
- ((sum_total - sum_left) / NULLIF(n_total - n_left, 0))
|
|
2623
|
+
* ((sum_total - sum_left) / NULLIF(n_total - n_left, 0)),
|
|
2624
|
+
1e-9
|
|
2625
|
+
) AS var_right,
|
|
2626
|
+
GREATEST(
|
|
2627
|
+
(sumsq_total / NULLIF(n_total, 0))
|
|
2628
|
+
- (sum_total / NULLIF(n_total, 0)) * (sum_total / NULLIF(n_total, 0)),
|
|
2629
|
+
1e-9
|
|
2630
|
+
) AS var_single
|
|
2631
|
+
FROM cumulated
|
|
2632
|
+
),
|
|
2633
|
+
llr AS (
|
|
2634
|
+
SELECT *,
|
|
2635
|
+
CASE
|
|
2636
|
+
WHEN n_left >= ${Number(minSide)} AND (n_total - n_left) >= ${Number(minSide)}
|
|
2637
|
+
THEN n_total * LN(var_single)
|
|
2638
|
+
- n_left * LN(var_left)
|
|
2639
|
+
- (n_total - n_left) * LN(var_right)
|
|
2640
|
+
ELSE NULL
|
|
2641
|
+
END AS llr
|
|
2642
|
+
FROM llr_scored
|
|
2643
|
+
),
|
|
2644
|
+
best AS (
|
|
2645
|
+
SELECT query, page, n_total, total_impressions,
|
|
2646
|
+
arg_max(date, llr) AS change_date,
|
|
2647
|
+
MAX(llr) AS best_llr,
|
|
2648
|
+
arg_max(sum_left / NULLIF(n_left, 0), llr) AS left_mean,
|
|
2649
|
+
arg_max((sum_total - sum_left) / NULLIF(n_total - n_left, 0), llr) AS right_mean,
|
|
2650
|
+
arg_max(sqrt(var_left), llr) AS left_std,
|
|
2651
|
+
arg_max(sqrt(var_right), llr) AS right_std
|
|
2652
|
+
FROM llr
|
|
2653
|
+
WHERE llr IS NOT NULL
|
|
2654
|
+
GROUP BY query, page, n_total, total_impressions
|
|
2655
|
+
HAVING MAX(llr) > ${Number(threshold)}
|
|
2656
|
+
),
|
|
2657
|
+
series AS (
|
|
2658
|
+
SELECT query, page,
|
|
2659
|
+
to_json(list({
|
|
2660
|
+
'date': strftime(date, '%Y-%m-%d'),
|
|
2661
|
+
'value': value
|
|
2662
|
+
} ORDER BY date)) AS seriesJson
|
|
2663
|
+
FROM daily
|
|
2664
|
+
GROUP BY query, page
|
|
2665
|
+
)
|
|
2666
|
+
SELECT
|
|
2667
|
+
b.query AS keyword,
|
|
2668
|
+
b.page,
|
|
2669
|
+
CAST(b.n_total AS DOUBLE) AS totalDays,
|
|
2670
|
+
CAST(b.total_impressions AS DOUBLE) AS totalImpressions,
|
|
2671
|
+
strftime(b.change_date, '%Y-%m-%d') AS changeDate,
|
|
2672
|
+
b.best_llr AS llr,
|
|
2673
|
+
b.left_mean AS leftMean,
|
|
2674
|
+
b.right_mean AS rightMean,
|
|
2675
|
+
(b.right_mean - b.left_mean) AS delta,
|
|
2676
|
+
b.left_std AS leftStddev,
|
|
2677
|
+
b.right_std AS rightStddev,
|
|
2678
|
+
s.seriesJson
|
|
2679
|
+
FROM best b
|
|
2680
|
+
LEFT JOIN series s USING (query, page)
|
|
2681
|
+
ORDER BY b.best_llr DESC
|
|
2682
|
+
LIMIT ${Number(limit)}
|
|
2683
|
+
`,
|
|
2684
|
+
params: [
|
|
2685
|
+
startDate,
|
|
2686
|
+
endDate,
|
|
2687
|
+
minImpressions
|
|
2688
|
+
],
|
|
2689
|
+
current: {
|
|
2690
|
+
table: "page_keywords",
|
|
2691
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
2692
|
+
}
|
|
2693
|
+
};
|
|
2694
|
+
},
|
|
2695
|
+
reduceSql(rows, params) {
|
|
2696
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
2697
|
+
const threshold = params.threshold ?? 10;
|
|
2698
|
+
const metric = params.metric === "clicks" || params.metric === "impressions" ? params.metric : "position";
|
|
2699
|
+
const lowerIsBetter = metric === "position";
|
|
2700
|
+
const results = arr.map((r) => {
|
|
2701
|
+
const delta = num$5(r.delta);
|
|
2702
|
+
const improved = lowerIsBetter ? delta < 0 : delta > 0;
|
|
2703
|
+
return {
|
|
2704
|
+
keyword: str$14(r.keyword),
|
|
2705
|
+
page: str$14(r.page),
|
|
2706
|
+
totalDays: num$5(r.totalDays),
|
|
2707
|
+
totalImpressions: num$5(r.totalImpressions),
|
|
2708
|
+
changeDate: str$14(r.changeDate),
|
|
2709
|
+
llr: num$5(r.llr),
|
|
2710
|
+
leftMean: num$5(r.leftMean),
|
|
2711
|
+
rightMean: num$5(r.rightMean),
|
|
2712
|
+
delta,
|
|
2713
|
+
leftStddev: num$5(r.leftStddev),
|
|
2714
|
+
rightStddev: num$5(r.rightStddev),
|
|
2715
|
+
direction: improved ? "improved" : "worsened",
|
|
2716
|
+
series: parseJsonList$10(r.seriesJson).map((s) => ({
|
|
2717
|
+
date: str$14(s.date),
|
|
2718
|
+
value: num$5(s.value)
|
|
2719
|
+
}))
|
|
2720
|
+
};
|
|
2721
|
+
});
|
|
2722
|
+
return {
|
|
2723
|
+
results,
|
|
2724
|
+
meta: {
|
|
2725
|
+
total: results.length,
|
|
2726
|
+
metric,
|
|
2727
|
+
threshold,
|
|
2728
|
+
improved: results.filter((r) => r.direction === "improved").length,
|
|
2729
|
+
worsened: results.filter((r) => r.direction === "worsened").length
|
|
2730
|
+
}
|
|
2731
|
+
};
|
|
2732
|
+
}
|
|
2733
|
+
});
|
|
2734
|
+
function num$4(v) {
|
|
2735
|
+
if (typeof v === "number") return v;
|
|
2736
|
+
if (typeof v === "bigint") return Number(v);
|
|
2737
|
+
if (v == null) return 0;
|
|
2738
|
+
const n = Number(v);
|
|
2739
|
+
return Number.isFinite(n) ? n : 0;
|
|
2740
|
+
}
|
|
2741
|
+
function str$13(v) {
|
|
2742
|
+
return v == null ? "" : String(v);
|
|
2743
|
+
}
|
|
2744
|
+
const contentVelocityAnalyzer = defineAnalyzer$1({
|
|
2745
|
+
id: "content-velocity",
|
|
2746
|
+
buildSql(params) {
|
|
2747
|
+
const days = Math.min(Math.max(Number(params.days ?? 90), 7), 365);
|
|
2748
|
+
const { endDate } = periodOf$1(params);
|
|
2749
|
+
const start = new Date(endDate);
|
|
2750
|
+
start.setUTCDate(start.getUTCDate() - days);
|
|
2751
|
+
const startDate = toIsoDate(start);
|
|
2752
|
+
return {
|
|
2753
|
+
sql: `
|
|
2754
|
+
WITH src AS (
|
|
2755
|
+
SELECT query, date
|
|
2756
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
2757
|
+
WHERE date >= ? AND date <= ? AND impressions > 0
|
|
2758
|
+
),
|
|
2759
|
+
first_seen AS (
|
|
2760
|
+
SELECT query, MIN(date) AS first_date FROM src GROUP BY query
|
|
2761
|
+
),
|
|
2762
|
+
per_week AS (
|
|
2763
|
+
SELECT
|
|
2764
|
+
strftime(CAST(date AS DATE), '%G-W%V') AS week,
|
|
2765
|
+
MIN(date) AS week_start,
|
|
2766
|
+
CAST(COUNT(DISTINCT query) AS DOUBLE) AS totalKeywords
|
|
2767
|
+
FROM src
|
|
2768
|
+
GROUP BY week
|
|
2769
|
+
),
|
|
2770
|
+
new_per_week AS (
|
|
2771
|
+
SELECT
|
|
2772
|
+
strftime(CAST(first_date AS DATE), '%G-W%V') AS week,
|
|
2773
|
+
CAST(COUNT(*) AS DOUBLE) AS newKeywords
|
|
2774
|
+
FROM first_seen
|
|
2775
|
+
GROUP BY week
|
|
2776
|
+
)
|
|
2777
|
+
SELECT
|
|
2778
|
+
pw.week AS week,
|
|
2779
|
+
COALESCE(npw.newKeywords, 0) AS newKeywords,
|
|
2780
|
+
pw.totalKeywords AS totalKeywords
|
|
2781
|
+
FROM per_week pw
|
|
2782
|
+
LEFT JOIN new_per_week npw ON pw.week = npw.week
|
|
2783
|
+
ORDER BY pw.week ASC
|
|
2784
|
+
`,
|
|
2785
|
+
params: [startDate, endDate],
|
|
2786
|
+
current: {
|
|
2787
|
+
table: "keywords",
|
|
2788
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
2789
|
+
}
|
|
2790
|
+
};
|
|
2791
|
+
},
|
|
2792
|
+
reduceSql(rows, params) {
|
|
2793
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
2794
|
+
const days = Math.min(Math.max(Number(params.days ?? 90), 7), 365);
|
|
2795
|
+
const { endDate } = periodOf$1(params);
|
|
2796
|
+
const startDateD = new Date(endDate);
|
|
2797
|
+
startDateD.setUTCDate(startDateD.getUTCDate() - days);
|
|
2798
|
+
const startDate = toIsoDate(startDateD);
|
|
2799
|
+
const weekly = arr.map((r) => ({
|
|
2800
|
+
week: str$13(r.week),
|
|
2801
|
+
newKeywords: num$4(r.newKeywords),
|
|
2802
|
+
totalKeywords: num$4(r.totalKeywords)
|
|
2803
|
+
}));
|
|
2804
|
+
const total = weekly.reduce((s, w) => s + w.newKeywords, 0);
|
|
2805
|
+
const avg = weekly.length > 0 ? total / weekly.length : 0;
|
|
2806
|
+
const mid = Math.floor(weekly.length / 2);
|
|
2807
|
+
const firstAvg = mid > 0 ? weekly.slice(0, mid).reduce((s, w) => s + w.newKeywords, 0) / mid : 0;
|
|
2808
|
+
const diff = (weekly.length - mid > 0 ? weekly.slice(mid).reduce((s, w) => s + w.newKeywords, 0) / (weekly.length - mid) : 0) - firstAvg;
|
|
2809
|
+
const threshold = Math.max(1, avg * .15);
|
|
2810
|
+
return {
|
|
2811
|
+
results: weekly,
|
|
2812
|
+
meta: {
|
|
2813
|
+
summary: {
|
|
2814
|
+
totalNewKeywords: total,
|
|
2815
|
+
avgPerWeek: avg,
|
|
2816
|
+
trend: diff > threshold ? "accelerating" : diff < -threshold ? "decelerating" : "stable"
|
|
2817
|
+
},
|
|
2818
|
+
days,
|
|
2819
|
+
startDate,
|
|
2820
|
+
endDate
|
|
2821
|
+
}
|
|
2822
|
+
};
|
|
2823
|
+
}
|
|
2824
|
+
});
|
|
2825
|
+
function num$3(v) {
|
|
2826
|
+
if (typeof v === "number") return v;
|
|
2827
|
+
if (typeof v === "bigint") return Number(v);
|
|
2828
|
+
if (v == null) return 0;
|
|
2829
|
+
const n = Number(v);
|
|
2830
|
+
return Number.isFinite(n) ? n : 0;
|
|
2831
|
+
}
|
|
2832
|
+
function str$12(v) {
|
|
2833
|
+
return v == null ? "" : String(v);
|
|
2834
|
+
}
|
|
2835
|
+
function bool$1(v) {
|
|
2836
|
+
return v === true || v === 1 || v === "true";
|
|
2837
|
+
}
|
|
2838
|
+
function parseJsonList$9(v) {
|
|
2839
|
+
if (Array.isArray(v)) return v;
|
|
2840
|
+
if (typeof v === "string" && v.length > 0) {
|
|
2841
|
+
const parsed = JSON.parse(v);
|
|
2842
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
2843
|
+
}
|
|
2844
|
+
return [];
|
|
2845
|
+
}
|
|
2846
|
+
const ctrAnomalyAnalyzer = defineAnalyzer$1({
|
|
2847
|
+
id: "ctr-anomaly",
|
|
2848
|
+
buildSql(params) {
|
|
2849
|
+
const endDate = params.endDate ?? defaultEndDate();
|
|
2850
|
+
const startDate = params.startDate ?? daysAgo(93);
|
|
2851
|
+
const minDailyImpressions = params.minImpressions ?? 5;
|
|
2852
|
+
const minRollingN = 14;
|
|
2853
|
+
const zThreshold = params.threshold ?? 2;
|
|
2854
|
+
const maxPositionDelta = 1.5;
|
|
2855
|
+
const minBreachDays = 2;
|
|
2856
|
+
const limit = params.limit ?? 200;
|
|
2857
|
+
return {
|
|
2858
|
+
sql: `
|
|
2859
|
+
WITH daily AS (
|
|
2860
|
+
SELECT
|
|
2861
|
+
query,
|
|
2862
|
+
url AS page,
|
|
2863
|
+
date,
|
|
2864
|
+
${METRIC_EXPR.clicks} AS day_clicks,
|
|
2865
|
+
${METRIC_EXPR.impressions} AS day_impressions,
|
|
2866
|
+
${METRIC_EXPR.ctr} AS day_ctr,
|
|
2867
|
+
${METRIC_EXPR.position} AS day_position
|
|
2868
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
2869
|
+
WHERE date >= ? AND date <= ?
|
|
2870
|
+
AND query IS NOT NULL AND query <> ''
|
|
2871
|
+
AND url IS NOT NULL AND url <> ''
|
|
2872
|
+
GROUP BY query, url, date
|
|
2873
|
+
HAVING SUM(impressions) >= ?
|
|
2874
|
+
),
|
|
2875
|
+
rolled AS (
|
|
2876
|
+
SELECT *,
|
|
2877
|
+
AVG(day_ctr) OVER w AS rolling_ctr,
|
|
2878
|
+
STDDEV_POP(day_ctr) OVER w AS rolling_stddev,
|
|
2879
|
+
AVG(day_position) OVER w AS rolling_position,
|
|
2880
|
+
COUNT(*) OVER w AS rolling_n
|
|
2881
|
+
FROM daily
|
|
2882
|
+
WINDOW w AS (
|
|
2883
|
+
PARTITION BY query, page
|
|
2884
|
+
ORDER BY date
|
|
2885
|
+
ROWS BETWEEN 28 PRECEDING AND 1 PRECEDING
|
|
2886
|
+
)
|
|
2887
|
+
),
|
|
2888
|
+
flagged AS (
|
|
2889
|
+
SELECT *,
|
|
2890
|
+
CASE
|
|
2891
|
+
WHEN rolling_n >= ${Number(minRollingN)} AND rolling_stddev > 0
|
|
2892
|
+
THEN (day_ctr - rolling_ctr) / rolling_stddev
|
|
2893
|
+
ELSE 0.0
|
|
2894
|
+
END AS z_score,
|
|
2895
|
+
CASE
|
|
2896
|
+
WHEN rolling_position IS NULL THEN 0.0
|
|
2897
|
+
ELSE ABS(day_position - rolling_position)
|
|
2898
|
+
END AS position_delta
|
|
2899
|
+
FROM rolled
|
|
2900
|
+
),
|
|
2901
|
+
breaches AS (
|
|
2902
|
+
SELECT *,
|
|
2903
|
+
CASE
|
|
2904
|
+
WHEN ABS(z_score) >= ${zThreshold}
|
|
2905
|
+
AND position_delta <= ${maxPositionDelta}
|
|
2906
|
+
AND rolling_n >= ${Number(minRollingN)}
|
|
2907
|
+
THEN true ELSE false
|
|
2908
|
+
END AS is_breach
|
|
2909
|
+
FROM flagged
|
|
2910
|
+
),
|
|
2911
|
+
per_entity AS (
|
|
2912
|
+
SELECT
|
|
2913
|
+
query, page,
|
|
2914
|
+
COUNT(*) FILTER (WHERE is_breach AND z_score < 0) AS breach_days_down,
|
|
2915
|
+
COUNT(*) FILTER (WHERE is_breach AND z_score > 0) AS breach_days_up,
|
|
2916
|
+
SUM(CASE
|
|
2917
|
+
WHEN is_breach AND z_score < 0
|
|
2918
|
+
THEN (rolling_ctr - day_ctr) * day_impressions
|
|
2919
|
+
ELSE 0.0
|
|
2920
|
+
END) AS clicks_lost,
|
|
2921
|
+
SUM(CASE
|
|
2922
|
+
WHEN is_breach AND z_score < 0
|
|
2923
|
+
THEN ABS(z_score) * day_impressions
|
|
2924
|
+
ELSE 0.0
|
|
2925
|
+
END) AS severity_raw,
|
|
2926
|
+
MAX(CASE WHEN is_breach THEN ABS(z_score) ELSE 0.0 END) AS max_z,
|
|
2927
|
+
AVG(rolling_ctr) FILTER (WHERE rolling_n >= ${Number(minRollingN)}) AS baseline_ctr,
|
|
2928
|
+
AVG(rolling_position) FILTER (WHERE rolling_n >= ${Number(minRollingN)}) AS baseline_position,
|
|
2929
|
+
SUM(day_impressions) AS total_impressions,
|
|
2930
|
+
SUM(day_clicks) AS total_clicks
|
|
2931
|
+
FROM breaches
|
|
2932
|
+
GROUP BY query, page
|
|
2933
|
+
HAVING COUNT(*) FILTER (WHERE is_breach AND z_score < 0) >= ${Number(minBreachDays)}
|
|
2934
|
+
),
|
|
2935
|
+
series AS (
|
|
2936
|
+
SELECT query, page,
|
|
2937
|
+
to_json(list({
|
|
2938
|
+
'date': strftime(date, '%Y-%m-%d'),
|
|
2939
|
+
'ctr': day_ctr,
|
|
2940
|
+
'position': day_position,
|
|
2941
|
+
'impressions': day_impressions,
|
|
2942
|
+
'rollingCtr': rolling_ctr,
|
|
2943
|
+
'rollingStddev': rolling_stddev,
|
|
2944
|
+
'z': z_score,
|
|
2945
|
+
'breach': is_breach AND z_score < 0
|
|
2946
|
+
} ORDER BY date)) AS seriesJson
|
|
2947
|
+
FROM breaches
|
|
2948
|
+
GROUP BY query, page
|
|
2949
|
+
)
|
|
2950
|
+
SELECT
|
|
2951
|
+
e.query AS keyword,
|
|
2952
|
+
e.page,
|
|
2953
|
+
CAST(e.breach_days_down AS DOUBLE) AS breachDaysDown,
|
|
2954
|
+
CAST(e.breach_days_up AS DOUBLE) AS breachDaysUp,
|
|
2955
|
+
CAST(ROUND(e.clicks_lost) AS DOUBLE) AS clicksLost,
|
|
2956
|
+
e.severity_raw AS severityRaw,
|
|
2957
|
+
e.max_z AS maxZ,
|
|
2958
|
+
e.baseline_ctr AS baselineCtr,
|
|
2959
|
+
e.baseline_position AS baselinePosition,
|
|
2960
|
+
e.total_impressions AS totalImpressions,
|
|
2961
|
+
e.total_clicks AS totalClicks,
|
|
2962
|
+
s.seriesJson
|
|
2963
|
+
FROM per_entity e
|
|
2964
|
+
LEFT JOIN series s USING (query, page)
|
|
2965
|
+
ORDER BY clicksLost DESC
|
|
2966
|
+
LIMIT ${Number(limit)}
|
|
2967
|
+
`,
|
|
2968
|
+
params: [
|
|
2969
|
+
startDate,
|
|
2970
|
+
endDate,
|
|
2971
|
+
minDailyImpressions
|
|
2972
|
+
],
|
|
2973
|
+
current: {
|
|
2974
|
+
table: "page_keywords",
|
|
2975
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
2976
|
+
}
|
|
2977
|
+
};
|
|
2978
|
+
},
|
|
2979
|
+
reduceSql(rows, params) {
|
|
2980
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
2981
|
+
const minRollingN = 14;
|
|
2982
|
+
const zThreshold = params.threshold ?? 2;
|
|
2983
|
+
const anomalies = arr.map((r) => ({
|
|
2984
|
+
keyword: str$12(r.keyword),
|
|
2985
|
+
page: str$12(r.page),
|
|
2986
|
+
breachDaysDown: num$3(r.breachDaysDown),
|
|
2987
|
+
breachDaysUp: num$3(r.breachDaysUp),
|
|
2988
|
+
clicksLost: num$3(r.clicksLost),
|
|
2989
|
+
severity: num$3(r.severityRaw),
|
|
2990
|
+
maxZ: num$3(r.maxZ),
|
|
2991
|
+
baselineCtr: num$3(r.baselineCtr),
|
|
2992
|
+
baselinePosition: num$3(r.baselinePosition),
|
|
2993
|
+
totalImpressions: num$3(r.totalImpressions),
|
|
2994
|
+
totalClicks: num$3(r.totalClicks),
|
|
2995
|
+
series: parseJsonList$9(r.seriesJson).map((s) => ({
|
|
2996
|
+
date: str$12(s.date),
|
|
2997
|
+
ctr: num$3(s.ctr),
|
|
2998
|
+
position: num$3(s.position),
|
|
2999
|
+
impressions: num$3(s.impressions),
|
|
3000
|
+
rollingCtr: s.rollingCtr == null ? null : num$3(s.rollingCtr),
|
|
3001
|
+
rollingStddev: s.rollingStddev == null ? null : num$3(s.rollingStddev),
|
|
3002
|
+
z: num$3(s.z),
|
|
3003
|
+
breach: bool$1(s.breach)
|
|
3004
|
+
}))
|
|
3005
|
+
}));
|
|
3006
|
+
const totalClicksLost = anomalies.reduce((s, a) => s + a.clicksLost, 0);
|
|
3007
|
+
const totalBreachDays = anomalies.reduce((s, a) => s + a.breachDaysDown, 0);
|
|
3008
|
+
return {
|
|
3009
|
+
results: anomalies,
|
|
3010
|
+
meta: {
|
|
3011
|
+
total: anomalies.length,
|
|
3012
|
+
totalClicksLost,
|
|
3013
|
+
totalBreachDays,
|
|
3014
|
+
zThreshold,
|
|
3015
|
+
minRollingN
|
|
3016
|
+
}
|
|
3017
|
+
};
|
|
3018
|
+
}
|
|
3019
|
+
});
|
|
3020
|
+
function num$2(v) {
|
|
3021
|
+
if (typeof v === "number") return v;
|
|
3022
|
+
if (typeof v === "bigint") return Number(v);
|
|
3023
|
+
if (v == null) return 0;
|
|
3024
|
+
const n = Number(v);
|
|
3025
|
+
return Number.isFinite(n) ? n : 0;
|
|
3026
|
+
}
|
|
3027
|
+
function str$11(v) {
|
|
3028
|
+
return v == null ? "" : String(v);
|
|
3029
|
+
}
|
|
3030
|
+
function parseJsonList$8(v) {
|
|
3031
|
+
if (Array.isArray(v)) return v;
|
|
3032
|
+
if (typeof v === "string" && v.length > 0) {
|
|
3033
|
+
const parsed = JSON.parse(v);
|
|
3034
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
3035
|
+
}
|
|
3036
|
+
return [];
|
|
3037
|
+
}
|
|
3038
|
+
const ctrCurveAnalyzer = defineAnalyzer$1({
|
|
3039
|
+
id: "ctr-curve",
|
|
3040
|
+
buildSql(params) {
|
|
3041
|
+
const { startDate, endDate } = periodOf$1(params);
|
|
3042
|
+
return {
|
|
3043
|
+
sql: `
|
|
3044
|
+
WITH src AS (
|
|
3045
|
+
SELECT
|
|
3046
|
+
query,
|
|
3047
|
+
clicks,
|
|
3048
|
+
impressions,
|
|
3049
|
+
sum_position,
|
|
3050
|
+
(sum_position / NULLIF(impressions, 0) + 1) AS avg_pos
|
|
3051
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
3052
|
+
WHERE date >= ? AND date <= ? AND impressions > 0
|
|
3053
|
+
),
|
|
3054
|
+
curve AS (
|
|
3055
|
+
SELECT
|
|
3056
|
+
CASE
|
|
3057
|
+
WHEN avg_pos <= 1.5 THEN '1'
|
|
3058
|
+
WHEN avg_pos <= 2.5 THEN '2'
|
|
3059
|
+
WHEN avg_pos <= 3.5 THEN '3'
|
|
3060
|
+
WHEN avg_pos <= 5.5 THEN '4-5'
|
|
3061
|
+
WHEN avg_pos <= 10.5 THEN '6-10'
|
|
3062
|
+
WHEN avg_pos <= 20.5 THEN '11-20'
|
|
3063
|
+
ELSE '20+'
|
|
3064
|
+
END AS bucket,
|
|
3065
|
+
AVG(CAST(clicks AS DOUBLE) / NULLIF(impressions, 0)) AS avgCtr,
|
|
3066
|
+
AVG(avg_pos) AS medianPosition,
|
|
3067
|
+
CAST(COUNT(DISTINCT query) AS DOUBLE) AS keywordCount,
|
|
3068
|
+
${METRIC_EXPR.clicks} AS totalClicks,
|
|
3069
|
+
${METRIC_EXPR.impressions} AS totalImpressions
|
|
3070
|
+
FROM src
|
|
3071
|
+
GROUP BY bucket
|
|
3072
|
+
),
|
|
3073
|
+
ks AS (
|
|
3074
|
+
SELECT
|
|
3075
|
+
query,
|
|
3076
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
3077
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
3078
|
+
${METRIC_EXPR.ctr} AS ctr,
|
|
3079
|
+
${METRIC_EXPR.position} AS position,
|
|
3080
|
+
CASE
|
|
3081
|
+
WHEN ${METRIC_EXPR.position} <= 3.5 THEN 'top3'
|
|
3082
|
+
WHEN ${METRIC_EXPR.position} <= 10.5 THEN 'page1'
|
|
3083
|
+
WHEN ${METRIC_EXPR.position} <= 20.5 THEN 'page2'
|
|
3084
|
+
ELSE 'deep'
|
|
3085
|
+
END AS band
|
|
3086
|
+
FROM src
|
|
3087
|
+
GROUP BY query
|
|
3088
|
+
HAVING SUM(impressions) >= 20
|
|
3089
|
+
),
|
|
3090
|
+
band_avg AS (
|
|
3091
|
+
SELECT band, AVG(ctr) AS band_avg_ctr FROM ks GROUP BY band
|
|
3092
|
+
),
|
|
3093
|
+
outliers AS (
|
|
3094
|
+
SELECT
|
|
3095
|
+
ks.query, ks.clicks, ks.impressions, ks.ctr, ks.position,
|
|
3096
|
+
ba.band_avg_ctr AS expectedCtr,
|
|
3097
|
+
ks.ctr - ba.band_avg_ctr AS ctrDiff
|
|
3098
|
+
FROM ks JOIN band_avg ba ON ks.band = ba.band
|
|
3099
|
+
ORDER BY ABS(ks.ctr - ba.band_avg_ctr) DESC
|
|
3100
|
+
LIMIT 50
|
|
3101
|
+
)
|
|
3102
|
+
SELECT
|
|
3103
|
+
(SELECT to_json(list({
|
|
3104
|
+
'bucket': bucket,
|
|
3105
|
+
'avgCtr': avgCtr,
|
|
3106
|
+
'medianPosition': medianPosition,
|
|
3107
|
+
'keywordCount': keywordCount,
|
|
3108
|
+
'totalClicks': totalClicks,
|
|
3109
|
+
'totalImpressions': totalImpressions
|
|
3110
|
+
})) FROM curve) AS curve_json,
|
|
3111
|
+
(SELECT to_json(list({
|
|
3112
|
+
'query': query,
|
|
3113
|
+
'clicks': clicks,
|
|
3114
|
+
'impressions': impressions,
|
|
3115
|
+
'ctr': ctr,
|
|
3116
|
+
'position': position,
|
|
3117
|
+
'expectedCtr': expectedCtr,
|
|
3118
|
+
'ctrDiff': ctrDiff
|
|
3119
|
+
})) FROM outliers) AS outliers_json
|
|
3120
|
+
`,
|
|
3121
|
+
params: [startDate, endDate],
|
|
3122
|
+
current: {
|
|
3123
|
+
table: "keywords",
|
|
3124
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
3125
|
+
}
|
|
3126
|
+
};
|
|
3127
|
+
},
|
|
3128
|
+
reduceSql(rows, params) {
|
|
3129
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
3130
|
+
const { startDate, endDate } = periodOf$1(params);
|
|
3131
|
+
const row = arr[0] ?? {};
|
|
3132
|
+
const curve = parseJsonList$8(row.curve_json).map((r) => ({
|
|
3133
|
+
bucket: str$11(r.bucket),
|
|
3134
|
+
avgCtr: num$2(r.avgCtr),
|
|
3135
|
+
medianPosition: num$2(r.medianPosition),
|
|
3136
|
+
keywordCount: num$2(r.keywordCount),
|
|
3137
|
+
totalClicks: num$2(r.totalClicks),
|
|
3138
|
+
totalImpressions: num$2(r.totalImpressions)
|
|
3139
|
+
}));
|
|
3140
|
+
const outliers = parseJsonList$8(row.outliers_json).map((r) => ({
|
|
3141
|
+
query: str$11(r.query),
|
|
3142
|
+
clicks: num$2(r.clicks),
|
|
3143
|
+
impressions: num$2(r.impressions),
|
|
3144
|
+
ctr: num$2(r.ctr),
|
|
3145
|
+
position: num$2(r.position),
|
|
3146
|
+
expectedCtr: num$2(r.expectedCtr),
|
|
3147
|
+
ctrDiff: num$2(r.ctrDiff)
|
|
3148
|
+
}));
|
|
3149
|
+
return {
|
|
3150
|
+
results: curve,
|
|
3151
|
+
meta: {
|
|
3152
|
+
overperforming: outliers.filter((o) => o.ctrDiff > 0).slice(0, 25),
|
|
3153
|
+
underperforming: outliers.filter((o) => o.ctrDiff < 0).slice(0, 25),
|
|
3154
|
+
startDate,
|
|
3155
|
+
endDate
|
|
3156
|
+
}
|
|
3157
|
+
};
|
|
3158
|
+
}
|
|
3159
|
+
});
|
|
3160
|
+
function str$10(v) {
|
|
3161
|
+
return v == null ? "" : String(v);
|
|
3162
|
+
}
|
|
3163
|
+
function parseJsonList$7(v) {
|
|
3164
|
+
if (Array.isArray(v)) return v;
|
|
3165
|
+
if (typeof v === "string" && v.length > 0) {
|
|
3166
|
+
const parsed = JSON.parse(v);
|
|
3167
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
3168
|
+
}
|
|
3169
|
+
return [];
|
|
3170
|
+
}
|
|
3171
|
+
const darkTrafficAnalyzer = defineAnalyzer$1({
|
|
3172
|
+
id: "dark-traffic",
|
|
3173
|
+
buildSql(params) {
|
|
3174
|
+
const { startDate, endDate } = periodOf$1(params);
|
|
3175
|
+
return {
|
|
3176
|
+
sql: `
|
|
3177
|
+
WITH page_totals AS (
|
|
3178
|
+
SELECT SUM(clicks) AS total_clicks, SUM(impressions) AS total_impressions
|
|
3179
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
3180
|
+
WHERE date >= ? AND date <= ?
|
|
3181
|
+
),
|
|
3182
|
+
kw_totals AS (
|
|
3183
|
+
SELECT SUM(clicks) AS total_clicks, SUM(impressions) AS total_impressions
|
|
3184
|
+
FROM read_parquet({{FILES_KEYWORDS}}, union_by_name = true)
|
|
3185
|
+
WHERE date >= ? AND date <= ?
|
|
3186
|
+
),
|
|
3187
|
+
per_page AS (
|
|
3188
|
+
SELECT url, SUM(clicks) AS page_clicks
|
|
3189
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
3190
|
+
WHERE date >= ? AND date <= ?
|
|
3191
|
+
GROUP BY url
|
|
3192
|
+
HAVING SUM(clicks) > 0
|
|
3193
|
+
),
|
|
3194
|
+
per_page_kw AS (
|
|
3195
|
+
SELECT url, SUM(clicks) AS attributed_clicks, COUNT(DISTINCT query) AS kw_count
|
|
3196
|
+
FROM read_parquet({{FILES_PAGE_KEYWORDS}}, union_by_name = true)
|
|
3197
|
+
WHERE date >= ? AND date <= ?
|
|
3198
|
+
GROUP BY url
|
|
3199
|
+
),
|
|
3200
|
+
page_rows AS (
|
|
3201
|
+
SELECT
|
|
3202
|
+
p.url AS url,
|
|
3203
|
+
CAST(p.page_clicks AS DOUBLE) AS totalClicks,
|
|
3204
|
+
CAST(COALESCE(k.attributed_clicks, 0) AS DOUBLE) AS attributedClicks,
|
|
3205
|
+
CAST(p.page_clicks - COALESCE(k.attributed_clicks, 0) AS DOUBLE) AS darkClicks,
|
|
3206
|
+
CAST(p.page_clicks - COALESCE(k.attributed_clicks, 0) AS DOUBLE)
|
|
3207
|
+
/ NULLIF(p.page_clicks, 0) AS darkPercent,
|
|
3208
|
+
CAST(COALESCE(k.kw_count, 0) AS DOUBLE) AS keywordCount
|
|
3209
|
+
FROM per_page p
|
|
3210
|
+
LEFT JOIN per_page_kw k ON p.url = k.url
|
|
3211
|
+
WHERE p.page_clicks - COALESCE(k.attributed_clicks, 0) > 0
|
|
3212
|
+
ORDER BY darkClicks DESC
|
|
3213
|
+
LIMIT 50
|
|
3214
|
+
)
|
|
3215
|
+
SELECT
|
|
3216
|
+
(SELECT to_json({
|
|
3217
|
+
'totalClicks': CAST(total_clicks AS DOUBLE),
|
|
3218
|
+
'totalImpressions': CAST(total_impressions AS DOUBLE)
|
|
3219
|
+
}) FROM page_totals) AS page_totals_json,
|
|
3220
|
+
(SELECT to_json({
|
|
3221
|
+
'attributedClicks': CAST(total_clicks AS DOUBLE),
|
|
3222
|
+
'attributedImpressions': CAST(total_impressions AS DOUBLE)
|
|
3223
|
+
}) FROM kw_totals) AS kw_totals_json,
|
|
3224
|
+
(SELECT to_json(list({
|
|
3225
|
+
'url': url,
|
|
3226
|
+
'totalClicks': totalClicks,
|
|
3227
|
+
'attributedClicks': attributedClicks,
|
|
3228
|
+
'darkClicks': darkClicks,
|
|
3229
|
+
'darkPercent': darkPercent,
|
|
3230
|
+
'keywordCount': keywordCount
|
|
3231
|
+
})) FROM page_rows) AS pages_json
|
|
3232
|
+
`,
|
|
3233
|
+
params: [
|
|
3234
|
+
startDate,
|
|
3235
|
+
endDate,
|
|
3236
|
+
startDate,
|
|
3237
|
+
endDate,
|
|
3238
|
+
startDate,
|
|
3239
|
+
endDate,
|
|
3240
|
+
startDate,
|
|
3241
|
+
endDate
|
|
3242
|
+
],
|
|
3243
|
+
current: {
|
|
3244
|
+
table: "pages",
|
|
3245
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
3246
|
+
},
|
|
3247
|
+
extraFiles: {
|
|
3248
|
+
KEYWORDS: {
|
|
3249
|
+
table: "keywords",
|
|
3250
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
3251
|
+
},
|
|
3252
|
+
PAGE_KEYWORDS: {
|
|
3253
|
+
table: "page_keywords",
|
|
3254
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
3255
|
+
}
|
|
3256
|
+
}
|
|
3257
|
+
};
|
|
3258
|
+
},
|
|
3259
|
+
reduceSql(rows, params) {
|
|
3260
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
3261
|
+
const { startDate, endDate } = periodOf$1(params);
|
|
3262
|
+
const row = arr[0] ?? {};
|
|
3263
|
+
const pageTotals = typeof row.page_totals_json === "string" ? JSON.parse(row.page_totals_json) : row.page_totals_json ?? {};
|
|
3264
|
+
const kwTotals = typeof row.kw_totals_json === "string" ? JSON.parse(row.kw_totals_json) : row.kw_totals_json ?? {};
|
|
3265
|
+
const totalClicks = num$1(pageTotals.totalClicks);
|
|
3266
|
+
const totalImpressions = num$1(pageTotals.totalImpressions);
|
|
3267
|
+
const attributedClicks = num$1(kwTotals.attributedClicks);
|
|
3268
|
+
const attributedImpressions = num$1(kwTotals.attributedImpressions);
|
|
3269
|
+
const darkClicks = Math.max(0, totalClicks - attributedClicks);
|
|
3270
|
+
const darkPercent = totalClicks > 0 ? darkClicks / totalClicks : 0;
|
|
3271
|
+
return {
|
|
3272
|
+
results: parseJsonList$7(row.pages_json).map((r) => ({
|
|
3273
|
+
url: str$10(r.url),
|
|
3274
|
+
totalClicks: num$1(r.totalClicks),
|
|
3275
|
+
attributedClicks: num$1(r.attributedClicks),
|
|
3276
|
+
darkClicks: num$1(r.darkClicks),
|
|
3277
|
+
darkPercent: num$1(r.darkPercent),
|
|
3278
|
+
keywordCount: num$1(r.keywordCount)
|
|
3279
|
+
})),
|
|
3280
|
+
meta: {
|
|
3281
|
+
summary: {
|
|
3282
|
+
totalClicks,
|
|
3283
|
+
attributedClicks,
|
|
3284
|
+
darkClicks,
|
|
3285
|
+
darkPercent,
|
|
3286
|
+
totalImpressions,
|
|
3287
|
+
attributedImpressions
|
|
3288
|
+
},
|
|
3289
|
+
startDate,
|
|
3290
|
+
endDate
|
|
3291
|
+
}
|
|
3292
|
+
};
|
|
3293
|
+
}
|
|
3294
|
+
});
|
|
3295
|
+
function requireBuilderState(input, tool) {
|
|
3296
|
+
if (!input || typeof input !== "object" || !("dimensions" in input) || !Array.isArray(input.dimensions)) throw new Error(`${tool}: params.q is required (BuilderState)`);
|
|
3297
|
+
return input;
|
|
3298
|
+
}
|
|
3299
|
+
function optionalBuilderState(input, tool, key) {
|
|
3300
|
+
if (input == null) return null;
|
|
3301
|
+
if (typeof input !== "object" || !("dimensions" in input) || !Array.isArray(input.dimensions)) throw new Error(`${tool}: params.${key} must be a BuilderState`);
|
|
3302
|
+
return input;
|
|
3303
|
+
}
|
|
3304
|
+
const NUMERIC_METRIC_COLS = [
|
|
3305
|
+
"clicks",
|
|
3306
|
+
"impressions",
|
|
3307
|
+
"ctr",
|
|
3308
|
+
"position",
|
|
3309
|
+
"prevClicks",
|
|
3310
|
+
"prevImpressions",
|
|
3311
|
+
"prevCtr",
|
|
3312
|
+
"prevPosition",
|
|
3313
|
+
"variantCount",
|
|
3314
|
+
"totalCount"
|
|
3315
|
+
];
|
|
3316
|
+
function coerceNumericCols(row) {
|
|
3317
|
+
const out = { ...row };
|
|
3318
|
+
for (const col of NUMERIC_METRIC_COLS) if (col in out && out[col] != null) out[col] = Number(out[col]);
|
|
3319
|
+
return out;
|
|
3320
|
+
}
|
|
3321
|
+
function shapeDataQuery(rows, extras, opts) {
|
|
3322
|
+
let totalCount;
|
|
3323
|
+
let cleaned;
|
|
3324
|
+
if (opts.hasPrev) {
|
|
3325
|
+
cleaned = rows.map(coerceNumericCols);
|
|
3326
|
+
totalCount = Number((extras?.count?.[0])?.total ?? cleaned.length);
|
|
3327
|
+
} else {
|
|
3328
|
+
const first = rows[0];
|
|
3329
|
+
totalCount = Number(first?.totalCount ?? 0);
|
|
3330
|
+
cleaned = rows.map((raw) => {
|
|
3331
|
+
const { totalCount: _tc, totalClicks: _tclk, totalImpressions: _timp, totalCtr: _tctr, totalPosition: _tpos, sum_position: _sp, ...rest } = raw;
|
|
3332
|
+
return coerceNumericCols(rest);
|
|
3333
|
+
});
|
|
3334
|
+
}
|
|
3335
|
+
const totalsRow = extras?.totals?.[0] ?? {};
|
|
3336
|
+
const totals = {
|
|
3337
|
+
clicks: Number(totalsRow.clicks ?? 0),
|
|
3338
|
+
impressions: Number(totalsRow.impressions ?? 0),
|
|
3339
|
+
ctr: Number(totalsRow.ctr ?? 0),
|
|
3340
|
+
position: Number(totalsRow.position ?? 0)
|
|
3341
|
+
};
|
|
3342
|
+
const extrasResults = [];
|
|
3343
|
+
if (extras?.canonicalExtras) extrasResults.push({
|
|
3344
|
+
key: "canonicalExtras",
|
|
3345
|
+
results: extras.canonicalExtras
|
|
3346
|
+
});
|
|
3347
|
+
return {
|
|
3348
|
+
results: mergeExtras(cleaned, extrasResults),
|
|
3349
|
+
meta: {
|
|
3350
|
+
totalCount,
|
|
3351
|
+
totals
|
|
3352
|
+
}
|
|
3353
|
+
};
|
|
3354
|
+
}
|
|
3355
|
+
function buildDataQueryPlan(params, options) {
|
|
3356
|
+
const state = requireBuilderState(params.q, "data-query");
|
|
3357
|
+
if (state.dimensions.includes("date")) throw new Error("data-query: date dimension not supported; use data-detail");
|
|
3358
|
+
const prev = optionalBuilderState(params.qc, "data-query", "qc");
|
|
3359
|
+
const totals = buildTotalsSql(state, options);
|
|
3360
|
+
const extras = buildExtrasQueries(state, options);
|
|
3361
|
+
const extraQueries = [{
|
|
3362
|
+
name: "totals",
|
|
3363
|
+
sql: totals.sql,
|
|
3364
|
+
params: totals.params
|
|
3365
|
+
}, ...extras.map((extra) => ({
|
|
3366
|
+
name: extra.key,
|
|
3367
|
+
sql: extra.sql,
|
|
3368
|
+
params: extra.params
|
|
3369
|
+
}))];
|
|
3370
|
+
const tableKey = options.adapter.inferTable(state.dimensions);
|
|
3371
|
+
if (prev) {
|
|
3372
|
+
const comparison = resolveComparisonSQL(state, prev, options, params.comparisonFilter);
|
|
3373
|
+
extraQueries.push({
|
|
3374
|
+
name: "count",
|
|
3375
|
+
sql: comparison.countSql,
|
|
3376
|
+
params: comparison.countParams
|
|
3377
|
+
});
|
|
3378
|
+
return {
|
|
3379
|
+
tableKey,
|
|
3380
|
+
sql: comparison.sql,
|
|
3381
|
+
params: comparison.params,
|
|
3382
|
+
extraQueries,
|
|
3383
|
+
shape: (rows, _params, resolvedExtras) => shapeDataQuery(rows, resolvedExtras, { hasPrev: true })
|
|
3384
|
+
};
|
|
3385
|
+
}
|
|
3386
|
+
const optimized = resolveToSQLOptimized(state, options);
|
|
3387
|
+
return {
|
|
3388
|
+
tableKey,
|
|
3389
|
+
sql: optimized.sql,
|
|
3390
|
+
params: optimized.params,
|
|
3391
|
+
extraQueries,
|
|
3392
|
+
shape: (rows, _params, resolvedExtras) => shapeDataQuery(rows, resolvedExtras, { hasPrev: false })
|
|
3393
|
+
};
|
|
3394
|
+
}
|
|
3395
|
+
function buildDataDetailPlan(params, options) {
|
|
3396
|
+
const state = requireBuilderState(params.q, "data-detail");
|
|
3397
|
+
if (!state.dimensions.includes("date")) throw new Error("data-detail: `date` dimension is required");
|
|
3398
|
+
const main = resolveToSQL(state, options);
|
|
3399
|
+
const totals = buildTotalsSql(state, options);
|
|
3400
|
+
const prev = optionalBuilderState(params.qc, "data-detail", "qc");
|
|
3401
|
+
const extraQueries = [{
|
|
3402
|
+
name: "totals",
|
|
3403
|
+
sql: totals.sql,
|
|
3404
|
+
params: totals.params
|
|
3405
|
+
}];
|
|
3406
|
+
if (prev) {
|
|
3407
|
+
const previousTotals = buildTotalsSql(prev, options);
|
|
3408
|
+
extraQueries.push({
|
|
3409
|
+
name: "prevTotals",
|
|
3410
|
+
sql: previousTotals.sql,
|
|
3411
|
+
params: previousTotals.params
|
|
3412
|
+
});
|
|
3413
|
+
}
|
|
3414
|
+
const tableKey = options.adapter.inferTable(state.dimensions);
|
|
3415
|
+
const { startDate: rangeStart, endDate: rangeEnd } = extractDateRange(state.filter);
|
|
3416
|
+
return {
|
|
3417
|
+
tableKey,
|
|
3418
|
+
sql: main.sql,
|
|
3419
|
+
params: main.params,
|
|
3420
|
+
extraQueries,
|
|
3421
|
+
shape: (rows, _params, extras) => {
|
|
3422
|
+
const coerced = rows.map(coerceNumericCols);
|
|
3423
|
+
const daily = rangeStart && rangeEnd ? padTimeseries$1(coerced, {
|
|
3424
|
+
startDate: rangeStart,
|
|
3425
|
+
endDate: rangeEnd
|
|
3426
|
+
}) : coerced;
|
|
3427
|
+
const totalsRow = extras?.totals?.[0] ?? {};
|
|
3428
|
+
const meta = { totals: {
|
|
3429
|
+
clicks: Number(totalsRow.clicks ?? 0),
|
|
3430
|
+
impressions: Number(totalsRow.impressions ?? 0),
|
|
3431
|
+
ctr: Number(totalsRow.ctr ?? 0),
|
|
3432
|
+
position: Number(totalsRow.position ?? 0)
|
|
3433
|
+
} };
|
|
3434
|
+
if (extras?.prevTotals) {
|
|
3435
|
+
const previousTotalsRow = extras.prevTotals[0] ?? {};
|
|
3436
|
+
meta.previousTotals = {
|
|
3437
|
+
clicks: Number(previousTotalsRow.clicks ?? 0),
|
|
3438
|
+
impressions: Number(previousTotalsRow.impressions ?? 0),
|
|
3439
|
+
ctr: Number(previousTotalsRow.ctr ?? 0),
|
|
3440
|
+
position: Number(previousTotalsRow.position ?? 0)
|
|
3441
|
+
};
|
|
3442
|
+
}
|
|
3443
|
+
return {
|
|
3444
|
+
results: daily,
|
|
3445
|
+
meta
|
|
3446
|
+
};
|
|
3447
|
+
}
|
|
3448
|
+
};
|
|
3449
|
+
}
|
|
3450
|
+
const SYNONYMS = {
|
|
3451
|
+
checker: "validator",
|
|
3452
|
+
tester: "validator",
|
|
3453
|
+
verifier: "validator",
|
|
3454
|
+
verify: "validate",
|
|
3455
|
+
check: "validate",
|
|
3456
|
+
test: "validate",
|
|
3457
|
+
checking: "validate",
|
|
3458
|
+
testing: "validate",
|
|
3459
|
+
creator: "generator",
|
|
3460
|
+
builder: "generator",
|
|
3461
|
+
maker: "generator",
|
|
3462
|
+
create: "generate",
|
|
3463
|
+
build: "generate",
|
|
3464
|
+
make: "generate",
|
|
3465
|
+
lookup: "search",
|
|
3466
|
+
finder: "search",
|
|
3467
|
+
find: "search",
|
|
3468
|
+
online: "",
|
|
3469
|
+
free: ""
|
|
3470
|
+
};
|
|
3471
|
+
const NO_STRIP_S = new Set([
|
|
3472
|
+
"css",
|
|
3473
|
+
"js",
|
|
3474
|
+
"ts",
|
|
3475
|
+
"os",
|
|
3476
|
+
"as",
|
|
3477
|
+
"is",
|
|
3478
|
+
"us",
|
|
3479
|
+
"has",
|
|
3480
|
+
"was",
|
|
3481
|
+
"its",
|
|
3482
|
+
"this",
|
|
3483
|
+
"yes",
|
|
3484
|
+
"no",
|
|
3485
|
+
"bus",
|
|
3486
|
+
"gas",
|
|
3487
|
+
"dns",
|
|
3488
|
+
"rss",
|
|
3489
|
+
"sms",
|
|
3490
|
+
"gps",
|
|
3491
|
+
"aws",
|
|
3492
|
+
"sas",
|
|
3493
|
+
"cms",
|
|
3494
|
+
"ios",
|
|
3495
|
+
"less",
|
|
3496
|
+
"loss",
|
|
3497
|
+
"miss",
|
|
3498
|
+
"pass",
|
|
3499
|
+
"class",
|
|
3500
|
+
"access",
|
|
3501
|
+
"process",
|
|
3502
|
+
"express",
|
|
3503
|
+
"address",
|
|
3504
|
+
"cross",
|
|
3505
|
+
"press",
|
|
3506
|
+
"stress",
|
|
3507
|
+
"progress",
|
|
3508
|
+
"success",
|
|
3509
|
+
"business",
|
|
3510
|
+
"wordpress",
|
|
3511
|
+
"status",
|
|
3512
|
+
"radius",
|
|
3513
|
+
"nexus",
|
|
3514
|
+
"focus",
|
|
3515
|
+
"bonus",
|
|
3516
|
+
"campus",
|
|
3517
|
+
"census",
|
|
3518
|
+
"corpus",
|
|
3519
|
+
"nucleus",
|
|
3520
|
+
"stimulus",
|
|
3521
|
+
"terminus",
|
|
3522
|
+
"versus",
|
|
3523
|
+
"virus",
|
|
3524
|
+
"surplus",
|
|
3525
|
+
"cactus",
|
|
3526
|
+
"analysis",
|
|
3527
|
+
"basis",
|
|
3528
|
+
"thesis",
|
|
3529
|
+
"crisis",
|
|
3530
|
+
"axis",
|
|
3531
|
+
"genesis",
|
|
3532
|
+
"synopsis",
|
|
3533
|
+
"diagnosis",
|
|
3534
|
+
"emphasis",
|
|
3535
|
+
"hypothesis",
|
|
3536
|
+
"synthesis",
|
|
3537
|
+
"parenthesis",
|
|
3538
|
+
"redis",
|
|
3539
|
+
"apis",
|
|
3540
|
+
"chaos",
|
|
3541
|
+
"demos",
|
|
3542
|
+
"logos",
|
|
3543
|
+
"photos",
|
|
3544
|
+
"videos",
|
|
3545
|
+
"nuxtjs",
|
|
3546
|
+
"nextjs",
|
|
3547
|
+
"nodejs",
|
|
3548
|
+
"reactjs",
|
|
3549
|
+
"vuejs",
|
|
3550
|
+
"angularjs",
|
|
3551
|
+
"expressjs",
|
|
3552
|
+
"nestjs",
|
|
3553
|
+
"threejs",
|
|
3554
|
+
"alpinejs",
|
|
3555
|
+
"solidjs",
|
|
3556
|
+
"sveltejs",
|
|
3557
|
+
"dejs",
|
|
3558
|
+
"bunjs",
|
|
3559
|
+
"denojs",
|
|
3560
|
+
"canvas",
|
|
3561
|
+
"atlas",
|
|
3562
|
+
"alias",
|
|
3563
|
+
"bias",
|
|
3564
|
+
"perhaps",
|
|
3565
|
+
"whereas",
|
|
3566
|
+
"kubernetes",
|
|
3567
|
+
"sass",
|
|
3568
|
+
"postgres",
|
|
3569
|
+
"always",
|
|
3570
|
+
"across",
|
|
3571
|
+
"previous",
|
|
3572
|
+
"various",
|
|
3573
|
+
"serious",
|
|
3574
|
+
"famous",
|
|
3575
|
+
"anonymous",
|
|
3576
|
+
"continuous",
|
|
3577
|
+
"dangerous",
|
|
3578
|
+
"generous",
|
|
3579
|
+
"obvious",
|
|
3580
|
+
"numerous",
|
|
3581
|
+
"curious",
|
|
3582
|
+
"nervous",
|
|
3583
|
+
"conscious"
|
|
3584
|
+
]);
|
|
3585
|
+
function depluralize(token) {
|
|
2569
3586
|
if (token.length <= 3) return token;
|
|
2570
3587
|
if (NO_STRIP_S.has(token)) return token;
|
|
2571
3588
|
if (token.endsWith("ies") && token.length > 4) return `${token.slice(0, -3)}y`;
|
|
@@ -2574,124 +3591,1599 @@ function depluralize(token) {
|
|
|
2574
3591
|
if (token.endsWith("s") && !token.endsWith("ss")) return token.slice(0, -1);
|
|
2575
3592
|
return token;
|
|
2576
3593
|
}
|
|
2577
|
-
const SEPARATOR_RE = /[-_/.@#:+]+/g;
|
|
2578
|
-
const WHITESPACE_RE = /\s+/g;
|
|
2579
|
-
function normalizeQuery(query) {
|
|
2580
|
-
return query.toLowerCase().replace(SEPARATOR_RE, " ").replace(WHITESPACE_RE, " ").trim().split(" ").filter(Boolean).map((token) => SYNONYMS[token] ?? token).filter(Boolean).map(depluralize).sort().join(" ");
|
|
3594
|
+
const SEPARATOR_RE = /[-_/.@#:+]+/g;
|
|
3595
|
+
const WHITESPACE_RE = /\s+/g;
|
|
3596
|
+
function normalizeQuery(query) {
|
|
3597
|
+
return query.toLowerCase().replace(SEPARATOR_RE, " ").replace(WHITESPACE_RE, " ").trim().split(" ").filter(Boolean).map((token) => SYNONYMS[token] ?? token).filter(Boolean).map(depluralize).sort().join(" ");
|
|
3598
|
+
}
|
|
3599
|
+
const dataDetailAnalyzer = defineAnalyzer$1({
|
|
3600
|
+
id: "data-detail",
|
|
3601
|
+
buildSql(params) {
|
|
3602
|
+
const plan = buildDataDetailPlan(params, { adapter: pgResolverAdapter });
|
|
3603
|
+
return {
|
|
3604
|
+
sql: plan.sql,
|
|
3605
|
+
params: plan.params,
|
|
3606
|
+
current: {
|
|
3607
|
+
table: plan.tableKey,
|
|
3608
|
+
partitions: []
|
|
3609
|
+
},
|
|
3610
|
+
requiresAttachedTables: true,
|
|
3611
|
+
extraQueries: plan.extraQueries
|
|
3612
|
+
};
|
|
3613
|
+
},
|
|
3614
|
+
reduceSql(rows, params, ctx) {
|
|
3615
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
3616
|
+
const { results, meta } = buildDataDetailPlan(params, { adapter: pgResolverAdapter }).shape(arr, params, ctx.extras);
|
|
3617
|
+
return {
|
|
3618
|
+
results,
|
|
3619
|
+
meta
|
|
3620
|
+
};
|
|
3621
|
+
}
|
|
3622
|
+
});
|
|
3623
|
+
const dataQueryAnalyzer = defineAnalyzer$1({
|
|
3624
|
+
id: "data-query",
|
|
3625
|
+
buildSql(params) {
|
|
3626
|
+
const plan = buildDataQueryPlan(params, { adapter: pgResolverAdapter });
|
|
3627
|
+
return {
|
|
3628
|
+
sql: plan.sql,
|
|
3629
|
+
params: plan.params,
|
|
3630
|
+
current: {
|
|
3631
|
+
table: plan.tableKey,
|
|
3632
|
+
partitions: []
|
|
3633
|
+
},
|
|
3634
|
+
requiresAttachedTables: true,
|
|
3635
|
+
extraQueries: plan.extraQueries
|
|
3636
|
+
};
|
|
3637
|
+
},
|
|
3638
|
+
reduceSql(rows, params, ctx) {
|
|
3639
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
3640
|
+
const { results, meta } = buildDataQueryPlan(params, { adapter: pgResolverAdapter }).shape(arr, params, ctx.extras);
|
|
3641
|
+
return {
|
|
3642
|
+
results,
|
|
3643
|
+
meta
|
|
3644
|
+
};
|
|
3645
|
+
}
|
|
3646
|
+
});
|
|
3647
|
+
function str$9(v) {
|
|
3648
|
+
return v == null ? "" : String(v);
|
|
3649
|
+
}
|
|
3650
|
+
const deviceGapAnalyzer = defineAnalyzer$1({
|
|
3651
|
+
id: "device-gap",
|
|
3652
|
+
buildSql(params) {
|
|
3653
|
+
const { startDate, endDate } = periodOf$1(params);
|
|
3654
|
+
return {
|
|
3655
|
+
sql: `
|
|
3656
|
+
SELECT
|
|
3657
|
+
date,
|
|
3658
|
+
device,
|
|
3659
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
3660
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
3661
|
+
${METRIC_EXPR.ctr} AS ctr,
|
|
3662
|
+
${METRIC_EXPR.position} AS position
|
|
3663
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
3664
|
+
WHERE date >= ? AND date <= ?
|
|
3665
|
+
GROUP BY date, device
|
|
3666
|
+
ORDER BY date ASC
|
|
3667
|
+
`,
|
|
3668
|
+
params: [startDate, endDate],
|
|
3669
|
+
current: {
|
|
3670
|
+
table: "devices",
|
|
3671
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
3672
|
+
}
|
|
3673
|
+
};
|
|
3674
|
+
},
|
|
3675
|
+
reduceSql(rows, params) {
|
|
3676
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
3677
|
+
const { startDate, endDate } = periodOf$1(params);
|
|
3678
|
+
const typed = arr.map((r) => ({
|
|
3679
|
+
date: str$9(r.date),
|
|
3680
|
+
device: str$9(r.device).toUpperCase(),
|
|
3681
|
+
clicks: num$1(r.clicks),
|
|
3682
|
+
impressions: num$1(r.impressions),
|
|
3683
|
+
ctr: num$1(r.ctr),
|
|
3684
|
+
position: num$1(r.position)
|
|
3685
|
+
}));
|
|
3686
|
+
const byDate = /* @__PURE__ */ new Map();
|
|
3687
|
+
for (const r of typed) {
|
|
3688
|
+
const entry = byDate.get(r.date) ?? {};
|
|
3689
|
+
const metrics = {
|
|
3690
|
+
clicks: r.clicks,
|
|
3691
|
+
impressions: r.impressions,
|
|
3692
|
+
ctr: r.ctr,
|
|
3693
|
+
position: r.position
|
|
3694
|
+
};
|
|
3695
|
+
if (r.device === "DESKTOP") entry.desktop = metrics;
|
|
3696
|
+
else if (r.device === "MOBILE") entry.mobile = metrics;
|
|
3697
|
+
byDate.set(r.date, entry);
|
|
3698
|
+
}
|
|
3699
|
+
const zero = {
|
|
3700
|
+
clicks: 0,
|
|
3701
|
+
impressions: 0,
|
|
3702
|
+
ctr: 0,
|
|
3703
|
+
position: 0
|
|
3704
|
+
};
|
|
3705
|
+
const daily = [...byDate.entries()].sort(([a], [b]) => a.localeCompare(b)).map(([date, sides]) => {
|
|
3706
|
+
const d = sides.desktop ?? zero;
|
|
3707
|
+
const m = sides.mobile ?? zero;
|
|
3708
|
+
return {
|
|
3709
|
+
date,
|
|
3710
|
+
desktop: d,
|
|
3711
|
+
mobile: m,
|
|
3712
|
+
gaps: {
|
|
3713
|
+
ctrGap: d.ctr - m.ctr,
|
|
3714
|
+
positionGap: m.position - d.position
|
|
3715
|
+
}
|
|
3716
|
+
};
|
|
3717
|
+
});
|
|
3718
|
+
const weekly = (start, end) => {
|
|
3719
|
+
const slice = daily.slice(start, end);
|
|
3720
|
+
if (slice.length === 0) return {
|
|
3721
|
+
ctr: 0,
|
|
3722
|
+
pos: 0
|
|
3723
|
+
};
|
|
3724
|
+
const sum = slice.reduce((acc, d) => ({
|
|
3725
|
+
ctr: acc.ctr + d.gaps.ctrGap,
|
|
3726
|
+
pos: acc.pos + d.gaps.positionGap
|
|
3727
|
+
}), {
|
|
3728
|
+
ctr: 0,
|
|
3729
|
+
pos: 0
|
|
3730
|
+
});
|
|
3731
|
+
return {
|
|
3732
|
+
ctr: sum.ctr / slice.length,
|
|
3733
|
+
pos: sum.pos / slice.length
|
|
3734
|
+
};
|
|
3735
|
+
};
|
|
3736
|
+
const first = weekly(0, 7);
|
|
3737
|
+
const last = weekly(Math.max(0, daily.length - 7), daily.length);
|
|
3738
|
+
const classify = (firstVal, lastVal) => {
|
|
3739
|
+
const diff = Math.abs(lastVal) - Math.abs(firstVal);
|
|
3740
|
+
if (Math.abs(diff) < .005) return "stable";
|
|
3741
|
+
return diff < 0 ? "improving" : "worsening";
|
|
3742
|
+
};
|
|
3743
|
+
return {
|
|
3744
|
+
results: daily,
|
|
3745
|
+
meta: {
|
|
3746
|
+
summary: {
|
|
3747
|
+
avgCtrGap: daily.reduce((s, d) => s + d.gaps.ctrGap, 0) / Math.max(1, daily.length),
|
|
3748
|
+
avgPositionGap: daily.reduce((s, d) => s + d.gaps.positionGap, 0) / Math.max(1, daily.length),
|
|
3749
|
+
ctrGapTrend: classify(first.ctr, last.ctr),
|
|
3750
|
+
positionGapTrend: classify(first.pos, last.pos)
|
|
3751
|
+
},
|
|
3752
|
+
startDate,
|
|
3753
|
+
endDate
|
|
3754
|
+
}
|
|
3755
|
+
};
|
|
3756
|
+
}
|
|
3757
|
+
});
|
|
3758
|
+
function str$8(v) {
|
|
3759
|
+
return v == null ? "" : String(v);
|
|
3760
|
+
}
|
|
3761
|
+
function parseJsonList$6(v) {
|
|
3762
|
+
if (Array.isArray(v)) return v;
|
|
3763
|
+
if (typeof v === "string" && v.length > 0) {
|
|
3764
|
+
const parsed = JSON.parse(v);
|
|
3765
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
3766
|
+
}
|
|
3767
|
+
return [];
|
|
3768
|
+
}
|
|
3769
|
+
const INTENT_ATLAS_STOP_WORDS = [
|
|
3770
|
+
"the",
|
|
3771
|
+
"a",
|
|
3772
|
+
"an",
|
|
3773
|
+
"is",
|
|
3774
|
+
"are",
|
|
3775
|
+
"was",
|
|
3776
|
+
"were",
|
|
3777
|
+
"be",
|
|
3778
|
+
"been",
|
|
3779
|
+
"of",
|
|
3780
|
+
"to",
|
|
3781
|
+
"in",
|
|
3782
|
+
"for",
|
|
3783
|
+
"on",
|
|
3784
|
+
"and",
|
|
3785
|
+
"or",
|
|
3786
|
+
"with",
|
|
3787
|
+
"at",
|
|
3788
|
+
"by",
|
|
3789
|
+
"from",
|
|
3790
|
+
"into",
|
|
3791
|
+
"about",
|
|
3792
|
+
"as",
|
|
3793
|
+
"so",
|
|
3794
|
+
"than",
|
|
3795
|
+
"then",
|
|
3796
|
+
"that",
|
|
3797
|
+
"this",
|
|
3798
|
+
"my",
|
|
3799
|
+
"your",
|
|
3800
|
+
"our",
|
|
3801
|
+
"their",
|
|
3802
|
+
"his",
|
|
3803
|
+
"her",
|
|
3804
|
+
"its",
|
|
3805
|
+
"me",
|
|
3806
|
+
"you",
|
|
3807
|
+
"what",
|
|
3808
|
+
"how",
|
|
3809
|
+
"why",
|
|
3810
|
+
"when",
|
|
3811
|
+
"where",
|
|
3812
|
+
"who",
|
|
3813
|
+
"which",
|
|
3814
|
+
"do",
|
|
3815
|
+
"does"
|
|
3816
|
+
];
|
|
3817
|
+
const intentAtlasAnalyzer = defineAnalyzer$1({
|
|
3818
|
+
id: "intent-atlas",
|
|
3819
|
+
buildSql(params) {
|
|
3820
|
+
const endDate = params.endDate ?? defaultEndDate();
|
|
3821
|
+
const startDate = params.startDate ?? daysAgo(90);
|
|
3822
|
+
const minQueryImpressions = params.minImpressions ?? 20;
|
|
3823
|
+
const minClusterSize = params.minClusterSize ?? 3;
|
|
3824
|
+
const minTokenImpressions = 50;
|
|
3825
|
+
const limit = params.limit ?? 200;
|
|
3826
|
+
const stopList = INTENT_ATLAS_STOP_WORDS.map((w) => `'${w}'`).join(", ");
|
|
3827
|
+
return {
|
|
3828
|
+
sql: `
|
|
3829
|
+
WITH queries AS (
|
|
3830
|
+
SELECT
|
|
3831
|
+
query,
|
|
3832
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
3833
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
3834
|
+
${METRIC_EXPR.position} AS position
|
|
3835
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
3836
|
+
WHERE date >= ? AND date <= ?
|
|
3837
|
+
AND query IS NOT NULL AND query <> ''
|
|
3838
|
+
GROUP BY query
|
|
3839
|
+
HAVING SUM(impressions) >= ?
|
|
3840
|
+
),
|
|
3841
|
+
tokens AS (
|
|
3842
|
+
SELECT q.query, q.impressions, q.clicks, q.position,
|
|
3843
|
+
LOWER(t.token) AS token
|
|
3844
|
+
FROM queries q,
|
|
3845
|
+
unnest(regexp_split_to_array(LOWER(q.query), '\\s+')) AS t(token)
|
|
3846
|
+
WHERE LENGTH(t.token) >= 3
|
|
3847
|
+
AND LOWER(t.token) NOT IN (${stopList})
|
|
3848
|
+
),
|
|
3849
|
+
token_weights AS (
|
|
3850
|
+
SELECT token,
|
|
3851
|
+
SUM(impressions) AS token_impressions,
|
|
3852
|
+
COUNT(DISTINCT query) AS query_count
|
|
3853
|
+
FROM tokens
|
|
3854
|
+
GROUP BY token
|
|
3855
|
+
HAVING SUM(impressions) >= ${Number(minTokenImpressions)}
|
|
3856
|
+
),
|
|
3857
|
+
ranked_tokens AS (
|
|
3858
|
+
SELECT t.query, t.token, tw.token_impressions,
|
|
3859
|
+
ROW_NUMBER() OVER (
|
|
3860
|
+
PARTITION BY t.query
|
|
3861
|
+
ORDER BY tw.token_impressions DESC, t.token ASC
|
|
3862
|
+
) AS rnk
|
|
3863
|
+
FROM tokens t
|
|
3864
|
+
JOIN token_weights tw USING (token)
|
|
3865
|
+
),
|
|
3866
|
+
cluster_keys AS (
|
|
3867
|
+
SELECT query,
|
|
3868
|
+
array_to_string(list(token ORDER BY token), ' + ') AS cluster_key
|
|
3869
|
+
FROM ranked_tokens
|
|
3870
|
+
WHERE rnk <= 2
|
|
3871
|
+
GROUP BY query
|
|
3872
|
+
HAVING COUNT(*) >= 2
|
|
3873
|
+
),
|
|
3874
|
+
clustered AS (
|
|
3875
|
+
SELECT q.query, q.impressions, q.clicks, q.position, ck.cluster_key
|
|
3876
|
+
FROM queries q
|
|
3877
|
+
JOIN cluster_keys ck USING (query)
|
|
3878
|
+
)
|
|
3879
|
+
SELECT
|
|
3880
|
+
cluster_key AS clusterKey,
|
|
3881
|
+
COUNT(*) AS keywordCount,
|
|
3882
|
+
SUM(impressions) AS totalImpressions,
|
|
3883
|
+
SUM(clicks) AS totalClicks,
|
|
3884
|
+
SUM(clicks) / NULLIF(SUM(impressions), 0) AS ctr,
|
|
3885
|
+
AVG(position) AS avgPosition,
|
|
3886
|
+
to_json(list({
|
|
3887
|
+
'query': query,
|
|
3888
|
+
'impressions': impressions,
|
|
3889
|
+
'clicks': clicks,
|
|
3890
|
+
'position': position
|
|
3891
|
+
} ORDER BY impressions DESC)) AS keywords
|
|
3892
|
+
FROM clustered
|
|
3893
|
+
GROUP BY cluster_key
|
|
3894
|
+
HAVING COUNT(*) >= ${Number(minClusterSize)}
|
|
3895
|
+
ORDER BY totalImpressions DESC
|
|
3896
|
+
LIMIT ${Number(limit)}
|
|
3897
|
+
`,
|
|
3898
|
+
params: [
|
|
3899
|
+
startDate,
|
|
3900
|
+
endDate,
|
|
3901
|
+
minQueryImpressions
|
|
3902
|
+
],
|
|
3903
|
+
current: {
|
|
3904
|
+
table: "keywords",
|
|
3905
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
3906
|
+
}
|
|
3907
|
+
};
|
|
3908
|
+
},
|
|
3909
|
+
reduceSql(rows) {
|
|
3910
|
+
const clusters = (Array.isArray(rows) ? rows : []).map((r) => ({
|
|
3911
|
+
clusterKey: str$8(r.clusterKey),
|
|
3912
|
+
keywordCount: num$1(r.keywordCount),
|
|
3913
|
+
totalImpressions: num$1(r.totalImpressions),
|
|
3914
|
+
totalClicks: num$1(r.totalClicks),
|
|
3915
|
+
ctr: num$1(r.ctr),
|
|
3916
|
+
avgPosition: num$1(r.avgPosition),
|
|
3917
|
+
keywords: parseJsonList$6(r.keywords).slice(0, 25).map((k) => ({
|
|
3918
|
+
query: str$8(k.query),
|
|
3919
|
+
impressions: num$1(k.impressions),
|
|
3920
|
+
clicks: num$1(k.clicks),
|
|
3921
|
+
position: num$1(k.position)
|
|
3922
|
+
}))
|
|
3923
|
+
}));
|
|
3924
|
+
const totalImpressions = clusters.reduce((s, c) => s + c.totalImpressions, 0);
|
|
3925
|
+
const totalKeywords = clusters.reduce((s, c) => s + c.keywordCount, 0);
|
|
3926
|
+
return {
|
|
3927
|
+
results: clusters,
|
|
3928
|
+
meta: {
|
|
3929
|
+
total: clusters.length,
|
|
3930
|
+
totalImpressions,
|
|
3931
|
+
totalKeywords
|
|
3932
|
+
}
|
|
3933
|
+
};
|
|
3934
|
+
}
|
|
3935
|
+
});
|
|
3936
|
+
function str$7(v) {
|
|
3937
|
+
return v == null ? "" : String(v);
|
|
3938
|
+
}
|
|
3939
|
+
function parseJsonList$5(v) {
|
|
3940
|
+
if (Array.isArray(v)) return v;
|
|
3941
|
+
if (typeof v === "string" && v.length > 0) {
|
|
3942
|
+
const parsed = JSON.parse(v);
|
|
3943
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
3944
|
+
}
|
|
3945
|
+
return [];
|
|
3946
|
+
}
|
|
3947
|
+
const keywordBreadthAnalyzer = defineAnalyzer$1({
|
|
3948
|
+
id: "keyword-breadth",
|
|
3949
|
+
buildSql(params) {
|
|
3950
|
+
const { startDate, endDate } = periodOf$1(params);
|
|
3951
|
+
return {
|
|
3952
|
+
sql: `
|
|
3953
|
+
WITH per_page AS (
|
|
3954
|
+
SELECT
|
|
3955
|
+
url,
|
|
3956
|
+
CAST(COUNT(DISTINCT query) AS DOUBLE) AS keywordCount,
|
|
3957
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
3958
|
+
${METRIC_EXPR.impressions} AS impressions
|
|
3959
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
3960
|
+
WHERE date >= ? AND date <= ? AND impressions > 0
|
|
3961
|
+
GROUP BY url
|
|
3962
|
+
),
|
|
3963
|
+
bucketed AS (
|
|
3964
|
+
SELECT
|
|
3965
|
+
CASE
|
|
3966
|
+
WHEN keywordCount = 1 THEN '1'
|
|
3967
|
+
WHEN keywordCount BETWEEN 2 AND 5 THEN '2-5'
|
|
3968
|
+
WHEN keywordCount BETWEEN 6 AND 15 THEN '6-15'
|
|
3969
|
+
WHEN keywordCount BETWEEN 16 AND 50 THEN '16-50'
|
|
3970
|
+
ELSE '50+'
|
|
3971
|
+
END AS bucket,
|
|
3972
|
+
MIN(keywordCount) AS sort_key,
|
|
3973
|
+
CAST(COUNT(*) AS DOUBLE) AS pageCount
|
|
3974
|
+
FROM per_page
|
|
3975
|
+
GROUP BY bucket
|
|
3976
|
+
),
|
|
3977
|
+
fragile AS (
|
|
3978
|
+
SELECT url, keywordCount, clicks, impressions
|
|
3979
|
+
FROM per_page
|
|
3980
|
+
WHERE keywordCount <= 2 AND clicks >= 5
|
|
3981
|
+
ORDER BY clicks DESC
|
|
3982
|
+
LIMIT 20
|
|
3983
|
+
),
|
|
3984
|
+
authority AS (
|
|
3985
|
+
SELECT url, keywordCount, clicks, impressions
|
|
3986
|
+
FROM per_page
|
|
3987
|
+
WHERE keywordCount >= 20
|
|
3988
|
+
ORDER BY keywordCount DESC
|
|
3989
|
+
LIMIT 20
|
|
3990
|
+
),
|
|
3991
|
+
stats AS (
|
|
3992
|
+
SELECT
|
|
3993
|
+
CAST(COUNT(*) AS DOUBLE) AS totalPages,
|
|
3994
|
+
CAST(AVG(keywordCount) AS DOUBLE) AS avgKeywordsPerPage,
|
|
3995
|
+
CAST(SUM(CASE WHEN keywordCount <= 2 THEN 1 ELSE 0 END) AS DOUBLE) AS fragileCount,
|
|
3996
|
+
CAST(SUM(CASE WHEN keywordCount >= 20 THEN 1 ELSE 0 END) AS DOUBLE) AS authorityCount
|
|
3997
|
+
FROM per_page
|
|
3998
|
+
)
|
|
3999
|
+
SELECT
|
|
4000
|
+
(SELECT to_json(list({ 'bucket': bucket, 'pageCount': pageCount, 'sortKey': sort_key })
|
|
4001
|
+
ORDER BY sort_key ASC) FROM bucketed) AS distribution_json,
|
|
4002
|
+
(SELECT to_json(list({ 'url': url, 'keywordCount': keywordCount, 'clicks': clicks, 'impressions': impressions })) FROM fragile) AS fragile_json,
|
|
4003
|
+
(SELECT to_json(list({ 'url': url, 'keywordCount': keywordCount, 'clicks': clicks, 'impressions': impressions })) FROM authority) AS authority_json,
|
|
4004
|
+
(SELECT to_json({
|
|
4005
|
+
'totalPages': totalPages,
|
|
4006
|
+
'avgKeywordsPerPage': avgKeywordsPerPage,
|
|
4007
|
+
'fragileCount': fragileCount,
|
|
4008
|
+
'authorityCount': authorityCount
|
|
4009
|
+
}) FROM stats) AS stats_json
|
|
4010
|
+
`,
|
|
4011
|
+
params: [startDate, endDate],
|
|
4012
|
+
current: {
|
|
4013
|
+
table: "page_keywords",
|
|
4014
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
4015
|
+
}
|
|
4016
|
+
};
|
|
4017
|
+
},
|
|
4018
|
+
reduceSql(rows, params) {
|
|
4019
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
4020
|
+
const { startDate, endDate } = periodOf$1(params);
|
|
4021
|
+
const row = arr[0] ?? {};
|
|
4022
|
+
const distribution = parseJsonList$5(row.distribution_json).sort((a, b) => num$1(a.sortKey) - num$1(b.sortKey)).map((r) => ({
|
|
4023
|
+
bucket: str$7(r.bucket),
|
|
4024
|
+
pageCount: num$1(r.pageCount)
|
|
4025
|
+
}));
|
|
4026
|
+
const fragile = parseJsonList$5(row.fragile_json).map((r) => ({
|
|
4027
|
+
url: str$7(r.url),
|
|
4028
|
+
keywordCount: num$1(r.keywordCount),
|
|
4029
|
+
clicks: num$1(r.clicks),
|
|
4030
|
+
impressions: num$1(r.impressions)
|
|
4031
|
+
}));
|
|
4032
|
+
const authority = parseJsonList$5(row.authority_json).map((r) => ({
|
|
4033
|
+
url: str$7(r.url),
|
|
4034
|
+
keywordCount: num$1(r.keywordCount),
|
|
4035
|
+
clicks: num$1(r.clicks),
|
|
4036
|
+
impressions: num$1(r.impressions)
|
|
4037
|
+
}));
|
|
4038
|
+
const stats = typeof row.stats_json === "string" ? JSON.parse(row.stats_json) : row.stats_json ?? {};
|
|
4039
|
+
return {
|
|
4040
|
+
results: distribution,
|
|
4041
|
+
meta: {
|
|
4042
|
+
fragilePages: fragile,
|
|
4043
|
+
authorityPages: authority,
|
|
4044
|
+
summary: {
|
|
4045
|
+
totalPages: num$1(stats.totalPages),
|
|
4046
|
+
avgKeywordsPerPage: num$1(stats.avgKeywordsPerPage),
|
|
4047
|
+
fragileCount: num$1(stats.fragileCount),
|
|
4048
|
+
authorityCount: num$1(stats.authorityCount)
|
|
4049
|
+
},
|
|
4050
|
+
startDate,
|
|
4051
|
+
endDate
|
|
4052
|
+
}
|
|
4053
|
+
};
|
|
4054
|
+
}
|
|
4055
|
+
});
|
|
4056
|
+
function str$6(v) {
|
|
4057
|
+
return v == null ? "" : String(v);
|
|
4058
|
+
}
|
|
4059
|
+
function parseJsonList$4(v) {
|
|
4060
|
+
if (Array.isArray(v)) return v;
|
|
4061
|
+
if (typeof v === "string" && v.length > 0) {
|
|
4062
|
+
const parsed = JSON.parse(v);
|
|
4063
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
4064
|
+
}
|
|
4065
|
+
return [];
|
|
4066
|
+
}
|
|
4067
|
+
function downsampleLogRank(points) {
|
|
4068
|
+
const all = points.map((p) => ({
|
|
4069
|
+
rank: num$1(p.rank),
|
|
4070
|
+
impressions: num$1(p.impressions),
|
|
4071
|
+
clicks: num$1(p.clicks),
|
|
4072
|
+
query: str$6(p.query)
|
|
4073
|
+
}));
|
|
4074
|
+
if (all.length <= 80) return all;
|
|
4075
|
+
const top = all.slice(0, 10);
|
|
4076
|
+
const rest = all.slice(10);
|
|
4077
|
+
const stepped = [];
|
|
4078
|
+
let nextThreshold = 1.15;
|
|
4079
|
+
for (const p of rest) if (p.rank >= nextThreshold) {
|
|
4080
|
+
stepped.push(p);
|
|
4081
|
+
nextThreshold *= 1.15;
|
|
4082
|
+
}
|
|
4083
|
+
return [...top, ...stepped];
|
|
4084
|
+
}
|
|
4085
|
+
const longTailAnalyzer = defineAnalyzer$1({
|
|
4086
|
+
id: "long-tail",
|
|
4087
|
+
buildSql(params) {
|
|
4088
|
+
const { startDate, endDate } = periodOf$1(params);
|
|
4089
|
+
const minQueries = 10;
|
|
4090
|
+
const minQueryImpressions = params.minImpressions ?? 5;
|
|
4091
|
+
const limit = params.limit ?? 100;
|
|
4092
|
+
return {
|
|
4093
|
+
sql: `
|
|
4094
|
+
WITH page_queries AS (
|
|
4095
|
+
SELECT
|
|
4096
|
+
url AS page,
|
|
4097
|
+
query,
|
|
4098
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
4099
|
+
${METRIC_EXPR.clicks} AS clicks
|
|
4100
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
4101
|
+
WHERE date >= ? AND date <= ?
|
|
4102
|
+
AND query IS NOT NULL AND query <> ''
|
|
4103
|
+
AND url IS NOT NULL AND url <> ''
|
|
4104
|
+
GROUP BY url, query
|
|
4105
|
+
HAVING SUM(impressions) >= ?
|
|
4106
|
+
),
|
|
4107
|
+
ranked AS (
|
|
4108
|
+
SELECT
|
|
4109
|
+
page, query, impressions, clicks,
|
|
4110
|
+
ROW_NUMBER() OVER (PARTITION BY page ORDER BY impressions DESC, query ASC) AS rnk
|
|
4111
|
+
FROM page_queries
|
|
4112
|
+
),
|
|
4113
|
+
log_space AS (
|
|
4114
|
+
SELECT *,
|
|
4115
|
+
LN(rnk) AS log_rank,
|
|
4116
|
+
LN(impressions) AS log_impr
|
|
4117
|
+
FROM ranked
|
|
4118
|
+
),
|
|
4119
|
+
fit AS (
|
|
4120
|
+
SELECT
|
|
4121
|
+
page,
|
|
4122
|
+
COUNT(*) AS query_count,
|
|
4123
|
+
SUM(impressions) AS total_impressions,
|
|
4124
|
+
SUM(clicks) AS total_clicks,
|
|
4125
|
+
REGR_SLOPE(log_impr, log_rank) AS slope,
|
|
4126
|
+
REGR_INTERCEPT(log_impr, log_rank) AS intercept,
|
|
4127
|
+
REGR_R2(log_impr, log_rank) AS r2,
|
|
4128
|
+
MAX(impressions) AS head_impressions,
|
|
4129
|
+
MAX(CASE WHEN rnk = 1 THEN impressions END) / NULLIF(SUM(impressions), 0) AS head_share
|
|
4130
|
+
FROM log_space
|
|
4131
|
+
GROUP BY page
|
|
4132
|
+
HAVING COUNT(*) >= ${Number(minQueries)}
|
|
4133
|
+
),
|
|
4134
|
+
scatter AS (
|
|
4135
|
+
SELECT
|
|
4136
|
+
l.page,
|
|
4137
|
+
to_json(list({
|
|
4138
|
+
'rank': l.rnk,
|
|
4139
|
+
'impressions': l.impressions,
|
|
4140
|
+
'clicks': l.clicks,
|
|
4141
|
+
'query': l.query
|
|
4142
|
+
} ORDER BY l.rnk)) AS pointsJson
|
|
4143
|
+
FROM log_space l
|
|
4144
|
+
JOIN fit f USING (page)
|
|
4145
|
+
GROUP BY l.page
|
|
4146
|
+
)
|
|
4147
|
+
SELECT
|
|
4148
|
+
f.page,
|
|
4149
|
+
f.query_count AS queryCount,
|
|
4150
|
+
f.total_impressions AS totalImpressions,
|
|
4151
|
+
f.total_clicks AS totalClicks,
|
|
4152
|
+
f.slope AS slope,
|
|
4153
|
+
f.intercept AS intercept,
|
|
4154
|
+
f.r2 AS r2,
|
|
4155
|
+
f.head_impressions AS headImpressions,
|
|
4156
|
+
f.head_share AS headShare,
|
|
4157
|
+
s.pointsJson AS pointsJson,
|
|
4158
|
+
CASE
|
|
4159
|
+
WHEN f.slope > -0.6 THEN 'flat-tail'
|
|
4160
|
+
WHEN f.slope > -1.2 THEN 'balanced'
|
|
4161
|
+
ELSE 'head-heavy'
|
|
4162
|
+
END AS fingerprint
|
|
4163
|
+
FROM fit f
|
|
4164
|
+
LEFT JOIN scatter s USING (page)
|
|
4165
|
+
ORDER BY f.total_impressions DESC
|
|
4166
|
+
LIMIT ${Number(limit)}
|
|
4167
|
+
`,
|
|
4168
|
+
params: [
|
|
4169
|
+
startDate,
|
|
4170
|
+
endDate,
|
|
4171
|
+
minQueryImpressions
|
|
4172
|
+
],
|
|
4173
|
+
current: {
|
|
4174
|
+
table: "page_keywords",
|
|
4175
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
4176
|
+
}
|
|
4177
|
+
};
|
|
4178
|
+
},
|
|
4179
|
+
reduceSql(rows) {
|
|
4180
|
+
const results = (Array.isArray(rows) ? rows : []).map((r) => ({
|
|
4181
|
+
page: str$6(r.page),
|
|
4182
|
+
queryCount: num$1(r.queryCount),
|
|
4183
|
+
totalImpressions: num$1(r.totalImpressions),
|
|
4184
|
+
totalClicks: num$1(r.totalClicks),
|
|
4185
|
+
slope: num$1(r.slope),
|
|
4186
|
+
intercept: num$1(r.intercept),
|
|
4187
|
+
r2: num$1(r.r2),
|
|
4188
|
+
headImpressions: num$1(r.headImpressions),
|
|
4189
|
+
headShare: num$1(r.headShare),
|
|
4190
|
+
fingerprint: str$6(r.fingerprint),
|
|
4191
|
+
points: downsampleLogRank(parseJsonList$4(r.pointsJson))
|
|
4192
|
+
}));
|
|
4193
|
+
const counts = {
|
|
4194
|
+
"flat-tail": 0,
|
|
4195
|
+
"balanced": 0,
|
|
4196
|
+
"head-heavy": 0
|
|
4197
|
+
};
|
|
4198
|
+
for (const r of results) counts[r.fingerprint]++;
|
|
4199
|
+
return {
|
|
4200
|
+
results,
|
|
4201
|
+
meta: {
|
|
4202
|
+
total: results.length,
|
|
4203
|
+
fingerprints: counts,
|
|
4204
|
+
avgSlope: results.length > 0 ? results.reduce((s, r) => s + r.slope, 0) / results.length : 0
|
|
4205
|
+
}
|
|
4206
|
+
};
|
|
4207
|
+
}
|
|
4208
|
+
});
|
|
4209
|
+
function str$5(v) {
|
|
4210
|
+
return v == null ? "" : String(v);
|
|
4211
|
+
}
|
|
4212
|
+
const positionDistributionAnalyzer = defineAnalyzer$1({
|
|
4213
|
+
id: "position-distribution",
|
|
4214
|
+
buildSql(params) {
|
|
4215
|
+
const { startDate, endDate } = periodOf$1(params);
|
|
4216
|
+
return {
|
|
4217
|
+
sql: `
|
|
4218
|
+
WITH pos AS (
|
|
4219
|
+
SELECT
|
|
4220
|
+
date,
|
|
4221
|
+
(sum_position / NULLIF(impressions, 0) + 1) AS avg_pos
|
|
4222
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
4223
|
+
WHERE date >= ? AND date <= ? AND impressions > 0
|
|
4224
|
+
)
|
|
4225
|
+
SELECT
|
|
4226
|
+
date,
|
|
4227
|
+
CAST(SUM(CASE WHEN avg_pos <= 3 THEN 1 ELSE 0 END) AS DOUBLE) AS pos_1_3,
|
|
4228
|
+
CAST(SUM(CASE WHEN avg_pos > 3 AND avg_pos <= 10 THEN 1 ELSE 0 END) AS DOUBLE) AS pos_4_10,
|
|
4229
|
+
CAST(SUM(CASE WHEN avg_pos > 10 AND avg_pos <= 20 THEN 1 ELSE 0 END) AS DOUBLE) AS pos_11_20,
|
|
4230
|
+
CAST(SUM(CASE WHEN avg_pos > 20 THEN 1 ELSE 0 END) AS DOUBLE) AS pos_20_plus,
|
|
4231
|
+
CAST(COUNT(*) AS DOUBLE) AS total
|
|
4232
|
+
FROM pos
|
|
4233
|
+
GROUP BY date
|
|
4234
|
+
ORDER BY date ASC
|
|
4235
|
+
`,
|
|
4236
|
+
params: [startDate, endDate],
|
|
4237
|
+
current: {
|
|
4238
|
+
table: "keywords",
|
|
4239
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
4240
|
+
}
|
|
4241
|
+
};
|
|
4242
|
+
},
|
|
4243
|
+
reduceSql(rows, params) {
|
|
4244
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
4245
|
+
const { startDate, endDate } = periodOf$1(params);
|
|
4246
|
+
return {
|
|
4247
|
+
results: arr.map((r) => ({
|
|
4248
|
+
date: str$5(r.date),
|
|
4249
|
+
pos_1_3: num$1(r.pos_1_3),
|
|
4250
|
+
pos_4_10: num$1(r.pos_4_10),
|
|
4251
|
+
pos_11_20: num$1(r.pos_11_20),
|
|
4252
|
+
pos_20_plus: num$1(r.pos_20_plus),
|
|
4253
|
+
total: num$1(r.total)
|
|
4254
|
+
})),
|
|
4255
|
+
meta: {
|
|
4256
|
+
total: arr.length,
|
|
4257
|
+
startDate,
|
|
4258
|
+
endDate
|
|
4259
|
+
}
|
|
4260
|
+
};
|
|
4261
|
+
}
|
|
4262
|
+
});
|
|
4263
|
+
function str$4(v) {
|
|
4264
|
+
return v == null ? "" : String(v);
|
|
4265
|
+
}
|
|
4266
|
+
const positionVolatilityAnalyzer = defineAnalyzer$1({
|
|
4267
|
+
id: "position-volatility",
|
|
4268
|
+
buildSql(params) {
|
|
4269
|
+
const { startDate, endDate } = periodOf$1(params);
|
|
4270
|
+
const topN = params.topN ?? 30;
|
|
4271
|
+
const minDayImpressions = params.minImpressions ?? 10;
|
|
4272
|
+
const minDays = params.minWeeksWithData ?? 7;
|
|
4273
|
+
return {
|
|
4274
|
+
sql: `
|
|
4275
|
+
WITH query_day AS (
|
|
4276
|
+
SELECT
|
|
4277
|
+
url AS page,
|
|
4278
|
+
query,
|
|
4279
|
+
date,
|
|
4280
|
+
${METRIC_EXPR.impressions} AS q_impressions,
|
|
4281
|
+
${METRIC_EXPR.position} AS q_position
|
|
4282
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
4283
|
+
WHERE date >= ? AND date <= ?
|
|
4284
|
+
AND query IS NOT NULL AND query <> ''
|
|
4285
|
+
AND url IS NOT NULL AND url <> ''
|
|
4286
|
+
GROUP BY url, query, date
|
|
4287
|
+
HAVING SUM(impressions) >= 1
|
|
4288
|
+
),
|
|
4289
|
+
daily AS (
|
|
4290
|
+
SELECT
|
|
4291
|
+
page, date,
|
|
4292
|
+
COUNT(*) AS query_count,
|
|
4293
|
+
SUM(q_impressions) AS day_impressions,
|
|
4294
|
+
SUM(q_position * q_impressions) / NULLIF(SUM(q_impressions), 0) AS avg_position,
|
|
4295
|
+
COALESCE(STDDEV_POP(q_position), 0.0) AS pos_stddev,
|
|
4296
|
+
MIN(q_position) AS best_position,
|
|
4297
|
+
MAX(q_position) AS worst_position
|
|
4298
|
+
FROM query_day
|
|
4299
|
+
GROUP BY page, date
|
|
4300
|
+
HAVING SUM(q_impressions) >= ?
|
|
4301
|
+
),
|
|
4302
|
+
with_shift AS (
|
|
4303
|
+
SELECT *,
|
|
4304
|
+
LAG(avg_position) OVER (PARTITION BY page ORDER BY date) AS prev_position,
|
|
4305
|
+
COALESCE(
|
|
4306
|
+
ABS(avg_position - LAG(avg_position) OVER (PARTITION BY page ORDER BY date)),
|
|
4307
|
+
0.0
|
|
4308
|
+
) AS dod_shift
|
|
4309
|
+
FROM daily
|
|
4310
|
+
),
|
|
4311
|
+
scored AS (
|
|
4312
|
+
SELECT *,
|
|
4313
|
+
pos_stddev + dod_shift AS volatility
|
|
4314
|
+
FROM with_shift
|
|
4315
|
+
),
|
|
4316
|
+
top_pages AS (
|
|
4317
|
+
SELECT page,
|
|
4318
|
+
SUM(day_impressions) AS total_impressions,
|
|
4319
|
+
AVG(volatility) AS avg_volatility,
|
|
4320
|
+
MAX(volatility) AS peak_volatility,
|
|
4321
|
+
COUNT(*) AS days_with_data
|
|
4322
|
+
FROM scored
|
|
4323
|
+
GROUP BY page
|
|
4324
|
+
HAVING COUNT(*) >= ?
|
|
4325
|
+
ORDER BY avg_volatility DESC
|
|
4326
|
+
LIMIT ${Number(topN)}
|
|
4327
|
+
)
|
|
4328
|
+
SELECT
|
|
4329
|
+
s.page,
|
|
4330
|
+
strftime(s.date, '%Y-%m-%d') AS date,
|
|
4331
|
+
s.query_count AS queryCount,
|
|
4332
|
+
s.day_impressions AS dayImpressions,
|
|
4333
|
+
s.avg_position AS avgPosition,
|
|
4334
|
+
s.pos_stddev AS posStddev,
|
|
4335
|
+
s.best_position AS bestPosition,
|
|
4336
|
+
s.worst_position AS worstPosition,
|
|
4337
|
+
s.dod_shift AS dodShift,
|
|
4338
|
+
s.volatility AS volatility,
|
|
4339
|
+
t.avg_volatility AS pageAvgVolatility,
|
|
4340
|
+
t.peak_volatility AS pagePeakVolatility,
|
|
4341
|
+
t.total_impressions AS pageTotalImpressions
|
|
4342
|
+
FROM scored s
|
|
4343
|
+
JOIN top_pages t USING (page)
|
|
4344
|
+
ORDER BY t.avg_volatility DESC, s.date ASC
|
|
4345
|
+
`,
|
|
4346
|
+
params: [
|
|
4347
|
+
startDate,
|
|
4348
|
+
endDate,
|
|
4349
|
+
minDayImpressions,
|
|
4350
|
+
minDays
|
|
4351
|
+
],
|
|
4352
|
+
current: {
|
|
4353
|
+
table: "page_keywords",
|
|
4354
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
4355
|
+
}
|
|
4356
|
+
};
|
|
4357
|
+
},
|
|
4358
|
+
reduceSql(rows) {
|
|
4359
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
4360
|
+
const byPage = /* @__PURE__ */ new Map();
|
|
4361
|
+
const allDates = /* @__PURE__ */ new Set();
|
|
4362
|
+
for (const r of arr) {
|
|
4363
|
+
const page = str$4(r.page);
|
|
4364
|
+
const date = str$4(r.date);
|
|
4365
|
+
allDates.add(date);
|
|
4366
|
+
const entry = byPage.get(page) ?? {
|
|
4367
|
+
page,
|
|
4368
|
+
avgVolatility: num$1(r.pageAvgVolatility),
|
|
4369
|
+
peakVolatility: num$1(r.pagePeakVolatility),
|
|
4370
|
+
totalImpressions: num$1(r.pageTotalImpressions),
|
|
4371
|
+
days: []
|
|
4372
|
+
};
|
|
4373
|
+
entry.days.push({
|
|
4374
|
+
date,
|
|
4375
|
+
queryCount: num$1(r.queryCount),
|
|
4376
|
+
dayImpressions: num$1(r.dayImpressions),
|
|
4377
|
+
avgPosition: num$1(r.avgPosition),
|
|
4378
|
+
posStddev: num$1(r.posStddev),
|
|
4379
|
+
bestPosition: num$1(r.bestPosition),
|
|
4380
|
+
worstPosition: num$1(r.worstPosition),
|
|
4381
|
+
dodShift: num$1(r.dodShift),
|
|
4382
|
+
volatility: num$1(r.volatility)
|
|
4383
|
+
});
|
|
4384
|
+
byPage.set(page, entry);
|
|
4385
|
+
}
|
|
4386
|
+
const pages = [...byPage.values()].sort((a, b) => b.avgVolatility - a.avgVolatility);
|
|
4387
|
+
const dates = [...allDates].sort();
|
|
4388
|
+
const maxVolatility = pages.reduce((m, p) => Math.max(m, p.peakVolatility), 0);
|
|
4389
|
+
return {
|
|
4390
|
+
results: pages,
|
|
4391
|
+
meta: {
|
|
4392
|
+
total: pages.length,
|
|
4393
|
+
dates,
|
|
4394
|
+
maxVolatility
|
|
4395
|
+
}
|
|
4396
|
+
};
|
|
4397
|
+
}
|
|
4398
|
+
});
|
|
4399
|
+
function str$3(v) {
|
|
4400
|
+
return v == null ? "" : String(v);
|
|
4401
|
+
}
|
|
4402
|
+
function parseJsonList$3(v) {
|
|
4403
|
+
if (Array.isArray(v)) return v;
|
|
4404
|
+
if (typeof v === "string" && v.length > 0) {
|
|
4405
|
+
const parsed = JSON.parse(v);
|
|
4406
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
4407
|
+
}
|
|
4408
|
+
return [];
|
|
4409
|
+
}
|
|
4410
|
+
const queryMigrationAnalyzer = defineAnalyzer$1({
|
|
4411
|
+
id: "query-migration",
|
|
4412
|
+
buildSql(params) {
|
|
4413
|
+
const cur = periodOf$1(params);
|
|
4414
|
+
let prevStart = params.prevStartDate;
|
|
4415
|
+
let prevEnd = params.prevEndDate;
|
|
4416
|
+
if (prevStart == null || prevEnd == null) {
|
|
4417
|
+
const curStartMs = new Date(cur.startDate).getTime();
|
|
4418
|
+
const span = new Date(cur.endDate).getTime() - curStartMs;
|
|
4419
|
+
prevEnd = toIsoDate(new Date(curStartMs - MS_PER_DAY));
|
|
4420
|
+
prevStart = toIsoDate(new Date(curStartMs - MS_PER_DAY - span));
|
|
4421
|
+
}
|
|
4422
|
+
const minImpressions = params.minImpressions ?? 20;
|
|
4423
|
+
const limit = params.limit ?? 200;
|
|
4424
|
+
const maxLevenshtein = 2;
|
|
4425
|
+
return {
|
|
4426
|
+
sql: `
|
|
4427
|
+
WITH cur AS (
|
|
4428
|
+
SELECT query, url AS page,
|
|
4429
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
4430
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
4431
|
+
${METRIC_EXPR.position} AS position
|
|
4432
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
4433
|
+
WHERE date >= ? AND date <= ?
|
|
4434
|
+
AND query IS NOT NULL AND query <> ''
|
|
4435
|
+
AND url IS NOT NULL AND url <> ''
|
|
4436
|
+
GROUP BY query, url
|
|
4437
|
+
HAVING SUM(impressions) >= ?
|
|
4438
|
+
),
|
|
4439
|
+
prev AS (
|
|
4440
|
+
SELECT query, url AS page,
|
|
4441
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
4442
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
4443
|
+
${METRIC_EXPR.position} AS position
|
|
4444
|
+
FROM read_parquet({{FILES_PREV}}, union_by_name = true)
|
|
4445
|
+
WHERE date >= ? AND date <= ?
|
|
4446
|
+
AND query IS NOT NULL AND query <> ''
|
|
4447
|
+
AND url IS NOT NULL AND url <> ''
|
|
4448
|
+
GROUP BY query, url
|
|
4449
|
+
HAVING SUM(impressions) >= ?
|
|
4450
|
+
),
|
|
4451
|
+
lost AS (
|
|
4452
|
+
SELECT p.page AS source_page, p.query AS source_query, p.impressions AS source_impressions
|
|
4453
|
+
FROM prev p
|
|
4454
|
+
LEFT JOIN cur c ON p.page = c.page AND p.query = c.query
|
|
4455
|
+
WHERE c.query IS NULL
|
|
4456
|
+
),
|
|
4457
|
+
gained AS (
|
|
4458
|
+
SELECT c.page AS target_page, c.query AS target_query, c.impressions AS target_impressions
|
|
4459
|
+
FROM cur c
|
|
4460
|
+
LEFT JOIN prev p ON p.page = c.page AND p.query = c.query
|
|
4461
|
+
WHERE p.query IS NULL
|
|
4462
|
+
),
|
|
4463
|
+
matched AS (
|
|
4464
|
+
SELECT
|
|
4465
|
+
l.source_page, l.source_query, l.source_impressions,
|
|
4466
|
+
g.target_page, g.target_query, g.target_impressions,
|
|
4467
|
+
CASE
|
|
4468
|
+
WHEN l.source_query = g.target_query THEN 'exact'
|
|
4469
|
+
ELSE 'fuzzy'
|
|
4470
|
+
END AS match_type,
|
|
4471
|
+
LEAST(l.source_impressions, g.target_impressions) AS absorbed_impressions
|
|
4472
|
+
FROM lost l
|
|
4473
|
+
JOIN gained g
|
|
4474
|
+
ON l.source_page <> g.target_page
|
|
4475
|
+
AND ABS(LENGTH(l.source_query) - LENGTH(g.target_query)) <= ${maxLevenshtein}
|
|
4476
|
+
AND (
|
|
4477
|
+
l.source_query = g.target_query
|
|
4478
|
+
OR levenshtein(l.source_query, g.target_query) <= ${maxLevenshtein}
|
|
4479
|
+
)
|
|
4480
|
+
),
|
|
4481
|
+
edges AS (
|
|
4482
|
+
SELECT
|
|
4483
|
+
source_page, target_page,
|
|
4484
|
+
SUM(absorbed_impressions) AS weight,
|
|
4485
|
+
COUNT(*) AS query_count,
|
|
4486
|
+
SUM(CASE WHEN match_type = 'exact' THEN 1 ELSE 0 END) AS exact_count,
|
|
4487
|
+
to_json(list({
|
|
4488
|
+
'sourceQuery': source_query,
|
|
4489
|
+
'targetQuery': target_query,
|
|
4490
|
+
'absorbed': absorbed_impressions,
|
|
4491
|
+
'matchType': match_type
|
|
4492
|
+
} ORDER BY absorbed_impressions DESC)) AS examplesJson
|
|
4493
|
+
FROM matched
|
|
4494
|
+
GROUP BY source_page, target_page
|
|
4495
|
+
)
|
|
4496
|
+
SELECT *
|
|
4497
|
+
FROM edges
|
|
4498
|
+
ORDER BY weight DESC
|
|
4499
|
+
LIMIT ${Number(limit)}
|
|
4500
|
+
`,
|
|
4501
|
+
params: [
|
|
4502
|
+
cur.startDate,
|
|
4503
|
+
cur.endDate,
|
|
4504
|
+
minImpressions,
|
|
4505
|
+
prevStart,
|
|
4506
|
+
prevEnd,
|
|
4507
|
+
minImpressions
|
|
4508
|
+
],
|
|
4509
|
+
current: {
|
|
4510
|
+
table: "page_keywords",
|
|
4511
|
+
partitions: enumeratePartitions(cur.startDate, cur.endDate)
|
|
4512
|
+
},
|
|
4513
|
+
previous: {
|
|
4514
|
+
table: "page_keywords",
|
|
4515
|
+
partitions: enumeratePartitions(prevStart, prevEnd)
|
|
4516
|
+
}
|
|
4517
|
+
};
|
|
4518
|
+
},
|
|
4519
|
+
reduceSql(rows, params) {
|
|
4520
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
4521
|
+
const cur = periodOf$1(params);
|
|
4522
|
+
let prevStart = params.prevStartDate;
|
|
4523
|
+
let prevEnd = params.prevEndDate;
|
|
4524
|
+
if (prevStart == null || prevEnd == null) {
|
|
4525
|
+
const curStartMs = new Date(cur.startDate).getTime();
|
|
4526
|
+
const span = new Date(cur.endDate).getTime() - curStartMs;
|
|
4527
|
+
prevEnd = toIsoDate(new Date(curStartMs - MS_PER_DAY));
|
|
4528
|
+
prevStart = toIsoDate(new Date(curStartMs - MS_PER_DAY - span));
|
|
4529
|
+
}
|
|
4530
|
+
const edges = arr.map((r) => ({
|
|
4531
|
+
sourcePage: str$3(r.source_page),
|
|
4532
|
+
targetPage: str$3(r.target_page),
|
|
4533
|
+
weight: num$1(r.weight),
|
|
4534
|
+
queryCount: num$1(r.query_count),
|
|
4535
|
+
exactCount: num$1(r.exact_count),
|
|
4536
|
+
fuzzyCount: num$1(r.query_count) - num$1(r.exact_count),
|
|
4537
|
+
examples: parseJsonList$3(r.examplesJson).slice(0, 8).map((e) => ({
|
|
4538
|
+
sourceQuery: str$3(e.sourceQuery),
|
|
4539
|
+
targetQuery: str$3(e.targetQuery),
|
|
4540
|
+
absorbed: num$1(e.absorbed),
|
|
4541
|
+
matchType: str$3(e.matchType)
|
|
4542
|
+
}))
|
|
4543
|
+
}));
|
|
4544
|
+
const nodeAgg = /* @__PURE__ */ new Map();
|
|
4545
|
+
for (const e of edges) {
|
|
4546
|
+
const src = nodeAgg.get(e.sourcePage) ?? {
|
|
4547
|
+
url: e.sourcePage,
|
|
4548
|
+
outgoing: 0,
|
|
4549
|
+
incoming: 0
|
|
4550
|
+
};
|
|
4551
|
+
src.outgoing += e.weight;
|
|
4552
|
+
nodeAgg.set(e.sourcePage, src);
|
|
4553
|
+
const tgt = nodeAgg.get(e.targetPage) ?? {
|
|
4554
|
+
url: e.targetPage,
|
|
4555
|
+
outgoing: 0,
|
|
4556
|
+
incoming: 0
|
|
4557
|
+
};
|
|
4558
|
+
tgt.incoming += e.weight;
|
|
4559
|
+
nodeAgg.set(e.targetPage, tgt);
|
|
4560
|
+
}
|
|
4561
|
+
const nodes = [...nodeAgg.values()];
|
|
4562
|
+
const totalAbsorbed = edges.reduce((s, e) => s + e.weight, 0);
|
|
4563
|
+
return {
|
|
4564
|
+
results: edges,
|
|
4565
|
+
meta: {
|
|
4566
|
+
total: edges.length,
|
|
4567
|
+
totalAbsorbed,
|
|
4568
|
+
period: {
|
|
4569
|
+
current: cur,
|
|
4570
|
+
previous: {
|
|
4571
|
+
startDate: prevStart,
|
|
4572
|
+
endDate: prevEnd
|
|
4573
|
+
}
|
|
4574
|
+
},
|
|
4575
|
+
nodes
|
|
4576
|
+
}
|
|
4577
|
+
};
|
|
4578
|
+
}
|
|
4579
|
+
});
|
|
4580
|
+
function str$2(v) {
|
|
4581
|
+
return v == null ? "" : String(v);
|
|
4582
|
+
}
|
|
4583
|
+
function bool(v) {
|
|
4584
|
+
return v === true || v === 1 || v === "true";
|
|
2581
4585
|
}
|
|
2582
|
-
|
|
2583
|
-
|
|
2584
|
-
|
|
2585
|
-
|
|
4586
|
+
function parseJsonList$2(v) {
|
|
4587
|
+
if (Array.isArray(v)) return v;
|
|
4588
|
+
if (typeof v === "string" && v.length > 0) {
|
|
4589
|
+
const parsed = JSON.parse(v);
|
|
4590
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
4591
|
+
}
|
|
4592
|
+
return [];
|
|
2586
4593
|
}
|
|
2587
|
-
const
|
|
2588
|
-
"
|
|
2589
|
-
|
|
2590
|
-
|
|
2591
|
-
|
|
2592
|
-
|
|
2593
|
-
|
|
2594
|
-
|
|
2595
|
-
|
|
2596
|
-
|
|
2597
|
-
|
|
2598
|
-
|
|
2599
|
-
|
|
2600
|
-
|
|
2601
|
-
|
|
2602
|
-
|
|
2603
|
-
|
|
2604
|
-
|
|
2605
|
-
|
|
2606
|
-
|
|
2607
|
-
|
|
2608
|
-
|
|
2609
|
-
|
|
2610
|
-
|
|
2611
|
-
|
|
2612
|
-
|
|
2613
|
-
|
|
2614
|
-
|
|
2615
|
-
|
|
2616
|
-
|
|
4594
|
+
const stlDecomposeAnalyzer = defineAnalyzer$1({
|
|
4595
|
+
id: "stl-decompose",
|
|
4596
|
+
buildSql(params) {
|
|
4597
|
+
const endDate = params.endDate ?? defaultEndDate();
|
|
4598
|
+
const startDate = params.startDate ?? daysAgo(93);
|
|
4599
|
+
const minImpressions = params.minImpressions ?? 100;
|
|
4600
|
+
const minDays = 21;
|
|
4601
|
+
const metric = params.metric === "clicks" ? "clicks" : "impressions";
|
|
4602
|
+
const limit = params.limit ?? 100;
|
|
4603
|
+
return {
|
|
4604
|
+
sql: `
|
|
4605
|
+
WITH daily AS (
|
|
4606
|
+
SELECT
|
|
4607
|
+
query,
|
|
4608
|
+
url AS page,
|
|
4609
|
+
date,
|
|
4610
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
4611
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
4612
|
+
CAST(SUM(${metric}) AS DOUBLE) AS observed
|
|
4613
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
4614
|
+
WHERE date >= ? AND date <= ?
|
|
4615
|
+
AND query IS NOT NULL AND query <> ''
|
|
4616
|
+
AND url IS NOT NULL AND url <> ''
|
|
4617
|
+
GROUP BY query, url, date
|
|
4618
|
+
),
|
|
4619
|
+
entity_stats AS (
|
|
4620
|
+
SELECT query, page,
|
|
4621
|
+
COUNT(*) AS days,
|
|
4622
|
+
SUM(impressions) AS total_impressions
|
|
4623
|
+
FROM daily
|
|
4624
|
+
GROUP BY query, page
|
|
4625
|
+
HAVING COUNT(*) >= ${Number(minDays)}
|
|
4626
|
+
AND SUM(impressions) >= ?
|
|
4627
|
+
),
|
|
4628
|
+
filtered AS (
|
|
4629
|
+
SELECT d.*
|
|
4630
|
+
FROM daily d
|
|
4631
|
+
JOIN entity_stats e USING (query, page)
|
|
4632
|
+
),
|
|
4633
|
+
trended AS (
|
|
4634
|
+
SELECT *,
|
|
4635
|
+
CASE
|
|
4636
|
+
WHEN COUNT(*) OVER w = 7
|
|
4637
|
+
THEN AVG(observed) OVER w
|
|
4638
|
+
ELSE NULL
|
|
4639
|
+
END AS trend
|
|
4640
|
+
FROM filtered
|
|
4641
|
+
WINDOW w AS (
|
|
4642
|
+
PARTITION BY query, page
|
|
4643
|
+
ORDER BY date
|
|
4644
|
+
ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING
|
|
4645
|
+
)
|
|
4646
|
+
),
|
|
4647
|
+
detrended AS (
|
|
4648
|
+
SELECT *,
|
|
4649
|
+
observed - trend AS detrended,
|
|
4650
|
+
dayofweek(date) AS dow
|
|
4651
|
+
FROM trended
|
|
4652
|
+
),
|
|
4653
|
+
seasonal_raw AS (
|
|
4654
|
+
SELECT *,
|
|
4655
|
+
AVG(detrended) OVER (PARTITION BY query, page, dow) AS seasonal_dow
|
|
4656
|
+
FROM detrended
|
|
4657
|
+
),
|
|
4658
|
+
seasonal_centered AS (
|
|
4659
|
+
SELECT *,
|
|
4660
|
+
seasonal_dow - AVG(seasonal_dow) OVER (PARTITION BY query, page) AS seasonal
|
|
4661
|
+
FROM seasonal_raw
|
|
4662
|
+
),
|
|
4663
|
+
residualed AS (
|
|
4664
|
+
SELECT *,
|
|
4665
|
+
CASE
|
|
4666
|
+
WHEN trend IS NULL OR seasonal IS NULL THEN NULL
|
|
4667
|
+
ELSE observed - trend - seasonal
|
|
4668
|
+
END AS residual
|
|
4669
|
+
FROM seasonal_centered
|
|
4670
|
+
),
|
|
4671
|
+
scored AS (
|
|
4672
|
+
SELECT *,
|
|
4673
|
+
STDDEV_POP(residual) OVER (PARTITION BY query, page) AS resid_std,
|
|
4674
|
+
CASE
|
|
4675
|
+
WHEN residual IS NOT NULL
|
|
4676
|
+
AND STDDEV_POP(residual) OVER (PARTITION BY query, page) > 0
|
|
4677
|
+
AND ABS(residual) > 2.0 * STDDEV_POP(residual) OVER (PARTITION BY query, page)
|
|
4678
|
+
THEN true ELSE false
|
|
4679
|
+
END AS anomaly
|
|
4680
|
+
FROM residualed
|
|
4681
|
+
),
|
|
4682
|
+
per_entity AS (
|
|
4683
|
+
SELECT query, page,
|
|
4684
|
+
COUNT(*) AS days,
|
|
4685
|
+
SUM(impressions) AS total_impressions,
|
|
4686
|
+
VAR_POP(detrended) AS var_detrended,
|
|
4687
|
+
VAR_POP(seasonal) AS var_seasonal,
|
|
4688
|
+
VAR_POP(residual) AS var_residual,
|
|
4689
|
+
COUNT(*) FILTER (WHERE anomaly) AS residual_anomalies,
|
|
4690
|
+
REGR_SLOPE(observed, epoch(date) / 86400.0) AS trend_slope
|
|
4691
|
+
FROM scored
|
|
4692
|
+
GROUP BY query, page
|
|
4693
|
+
),
|
|
4694
|
+
series AS (
|
|
4695
|
+
SELECT query, page,
|
|
4696
|
+
to_json(list({
|
|
4697
|
+
'date': strftime(date, '%Y-%m-%d'),
|
|
4698
|
+
'observed': observed,
|
|
4699
|
+
'trend': trend,
|
|
4700
|
+
'seasonal': seasonal,
|
|
4701
|
+
'residual': residual,
|
|
4702
|
+
'anomaly': anomaly
|
|
4703
|
+
} ORDER BY date)) AS seriesJson
|
|
4704
|
+
FROM scored
|
|
4705
|
+
GROUP BY query, page
|
|
4706
|
+
)
|
|
4707
|
+
SELECT
|
|
4708
|
+
e.query AS keyword,
|
|
4709
|
+
e.page,
|
|
4710
|
+
CAST(e.total_impressions AS DOUBLE) AS totalImpressions,
|
|
4711
|
+
CAST(e.days AS DOUBLE) AS days,
|
|
4712
|
+
CASE
|
|
4713
|
+
WHEN e.var_detrended IS NULL OR e.var_detrended = 0 THEN 0.0
|
|
4714
|
+
ELSE LEAST(e.var_seasonal / NULLIF(e.var_detrended, 0), 1.0)
|
|
4715
|
+
END AS seasonalStrength,
|
|
4716
|
+
CASE
|
|
4717
|
+
WHEN e.var_detrended IS NULL OR e.var_detrended = 0 THEN 0.0
|
|
4718
|
+
ELSE GREATEST(0.0, 1.0 - e.var_residual / NULLIF(e.var_detrended, 0))
|
|
4719
|
+
END AS trendStrength,
|
|
4720
|
+
CAST(e.residual_anomalies AS DOUBLE) AS residualAnomalies,
|
|
4721
|
+
COALESCE(e.trend_slope, 0.0) AS trendSlope,
|
|
4722
|
+
s.seriesJson
|
|
4723
|
+
FROM per_entity e
|
|
4724
|
+
LEFT JOIN series s USING (query, page)
|
|
4725
|
+
ORDER BY seasonalStrength DESC, ABS(COALESCE(e.trend_slope, 0.0)) DESC
|
|
4726
|
+
LIMIT ${Number(limit)}
|
|
4727
|
+
`,
|
|
4728
|
+
params: [
|
|
4729
|
+
startDate,
|
|
4730
|
+
endDate,
|
|
4731
|
+
minImpressions
|
|
4732
|
+
],
|
|
4733
|
+
current: {
|
|
4734
|
+
table: "page_keywords",
|
|
4735
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
4736
|
+
}
|
|
4737
|
+
};
|
|
4738
|
+
},
|
|
4739
|
+
reduceSql(rows, params) {
|
|
4740
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
4741
|
+
const metric = params.metric === "clicks" ? "clicks" : "impressions";
|
|
4742
|
+
const results = arr.map((r) => ({
|
|
4743
|
+
keyword: str$2(r.keyword),
|
|
4744
|
+
page: str$2(r.page),
|
|
4745
|
+
totalImpressions: num$1(r.totalImpressions),
|
|
4746
|
+
days: num$1(r.days),
|
|
4747
|
+
seasonalStrength: num$1(r.seasonalStrength),
|
|
4748
|
+
trendStrength: num$1(r.trendStrength),
|
|
4749
|
+
residualAnomalies: num$1(r.residualAnomalies),
|
|
4750
|
+
trendSlope: num$1(r.trendSlope),
|
|
4751
|
+
series: parseJsonList$2(r.seriesJson).map((s) => ({
|
|
4752
|
+
date: str$2(s.date),
|
|
4753
|
+
observed: num$1(s.observed),
|
|
4754
|
+
trend: s.trend == null ? null : num$1(s.trend),
|
|
4755
|
+
seasonal: s.seasonal == null ? null : num$1(s.seasonal),
|
|
4756
|
+
residual: s.residual == null ? null : num$1(s.residual),
|
|
4757
|
+
anomaly: bool(s.anomaly)
|
|
4758
|
+
}))
|
|
4759
|
+
}));
|
|
4760
|
+
return {
|
|
4761
|
+
results,
|
|
4762
|
+
meta: {
|
|
4763
|
+
total: results.length,
|
|
4764
|
+
metric,
|
|
4765
|
+
avgSeasonalStrength: results.length > 0 ? results.reduce((a, r) => a + r.seasonalStrength, 0) / results.length : 0
|
|
4766
|
+
}
|
|
4767
|
+
};
|
|
4768
|
+
}
|
|
4769
|
+
});
|
|
4770
|
+
function str$1(v) {
|
|
4771
|
+
return v == null ? "" : String(v);
|
|
2617
4772
|
}
|
|
2618
|
-
|
|
2619
|
-
|
|
2620
|
-
|
|
2621
|
-
|
|
2622
|
-
|
|
2623
|
-
}
|
|
2624
|
-
|
|
2625
|
-
|
|
2626
|
-
|
|
2627
|
-
|
|
2628
|
-
|
|
2629
|
-
|
|
2630
|
-
|
|
2631
|
-
|
|
2632
|
-
|
|
2633
|
-
|
|
2634
|
-
|
|
2635
|
-
|
|
2636
|
-
|
|
2637
|
-
|
|
2638
|
-
|
|
2639
|
-
|
|
2640
|
-
|
|
2641
|
-
|
|
2642
|
-
|
|
2643
|
-
|
|
2644
|
-
|
|
2645
|
-
|
|
2646
|
-
|
|
4773
|
+
function parseJsonList$1(v) {
|
|
4774
|
+
if (Array.isArray(v)) return v;
|
|
4775
|
+
if (typeof v === "string" && v.length > 0) {
|
|
4776
|
+
const parsed = JSON.parse(v);
|
|
4777
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
4778
|
+
}
|
|
4779
|
+
return [];
|
|
4780
|
+
}
|
|
4781
|
+
const survivalAnalyzer = defineAnalyzer$1({
|
|
4782
|
+
id: "survival",
|
|
4783
|
+
buildSql(params) {
|
|
4784
|
+
const endDate = params.endDate ?? defaultEndDate();
|
|
4785
|
+
const startDate = params.startDate ?? daysAgo(183);
|
|
4786
|
+
const minImpressions = params.minImpressions ?? 5;
|
|
4787
|
+
return {
|
|
4788
|
+
sql: `
|
|
4789
|
+
WITH daily AS (
|
|
4790
|
+
SELECT
|
|
4791
|
+
query,
|
|
4792
|
+
url,
|
|
4793
|
+
date,
|
|
4794
|
+
${METRIC_EXPR.clicks} AS day_clicks,
|
|
4795
|
+
${METRIC_EXPR.impressions} AS day_impressions,
|
|
4796
|
+
${METRIC_EXPR.position} AS day_position
|
|
4797
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
4798
|
+
WHERE date >= ? AND date <= ?
|
|
4799
|
+
AND query IS NOT NULL AND query <> ''
|
|
4800
|
+
AND url IS NOT NULL AND url <> ''
|
|
4801
|
+
GROUP BY query, url, date
|
|
4802
|
+
HAVING SUM(impressions) >= ?
|
|
4803
|
+
),
|
|
4804
|
+
classified AS (
|
|
4805
|
+
SELECT *,
|
|
4806
|
+
(day_position <= 10) AS in_top10
|
|
4807
|
+
FROM daily
|
|
4808
|
+
),
|
|
4809
|
+
transitions AS (
|
|
4810
|
+
SELECT *,
|
|
4811
|
+
CASE
|
|
4812
|
+
WHEN in_top10 AND (LAG(in_top10) OVER w IS NULL OR NOT LAG(in_top10) OVER w)
|
|
4813
|
+
THEN 1 ELSE 0
|
|
4814
|
+
END AS is_entry
|
|
4815
|
+
FROM classified
|
|
4816
|
+
WINDOW w AS (PARTITION BY query, url ORDER BY date)
|
|
4817
|
+
),
|
|
4818
|
+
run_ids AS (
|
|
4819
|
+
SELECT *,
|
|
4820
|
+
SUM(is_entry) OVER (PARTITION BY query, url ORDER BY date) AS run_id
|
|
4821
|
+
FROM transitions
|
|
4822
|
+
WHERE in_top10
|
|
4823
|
+
),
|
|
4824
|
+
window_bounds AS (
|
|
4825
|
+
SELECT MIN(date) AS window_start, MAX(date) AS window_end FROM daily
|
|
4826
|
+
),
|
|
4827
|
+
episodes_raw AS (
|
|
4828
|
+
SELECT
|
|
4829
|
+
query, url, run_id,
|
|
4830
|
+
MIN(date) AS entry_date,
|
|
4831
|
+
MAX(date) AS exit_date,
|
|
4832
|
+
DATEDIFF('day', MIN(date), MAX(date)) + 1 AS tenure
|
|
4833
|
+
FROM run_ids
|
|
4834
|
+
GROUP BY query, url, run_id
|
|
4835
|
+
),
|
|
4836
|
+
episodes AS (
|
|
4837
|
+
SELECT
|
|
4838
|
+
e.query, e.url, e.run_id, e.entry_date, e.exit_date, e.tenure,
|
|
4839
|
+
(e.exit_date >= wb.window_end - INTERVAL 2 DAY) AS censored,
|
|
4840
|
+
CASE
|
|
4841
|
+
WHEN regexp_extract(e.url, '^(?:https?://[^/]+)?(/[^/?#]*)', 1) = '/' OR e.url = '/'
|
|
4842
|
+
THEN 'home'
|
|
4843
|
+
WHEN regexp_extract(e.url, '^(?:https?://[^/]+)?/([^/?#]+)', 1) = ''
|
|
4844
|
+
THEN 'home'
|
|
4845
|
+
ELSE regexp_extract(e.url, '^(?:https?://[^/]+)?/([^/?#]+)', 1)
|
|
4846
|
+
END AS cohort
|
|
4847
|
+
FROM episodes_raw e
|
|
4848
|
+
CROSS JOIN window_bounds wb
|
|
4849
|
+
),
|
|
4850
|
+
episodes_all AS (
|
|
4851
|
+
SELECT query, url, tenure, censored, cohort FROM episodes
|
|
4852
|
+
UNION ALL
|
|
4853
|
+
SELECT query, url, tenure, censored, '__all__' AS cohort FROM episodes
|
|
4854
|
+
),
|
|
4855
|
+
cohort_totals AS (
|
|
4856
|
+
SELECT cohort, COUNT(*) AS n_total
|
|
4857
|
+
FROM episodes_all
|
|
4858
|
+
GROUP BY cohort
|
|
4859
|
+
),
|
|
4860
|
+
events AS (
|
|
4861
|
+
SELECT
|
|
4862
|
+
cohort,
|
|
4863
|
+
tenure,
|
|
4864
|
+
COUNT(*) FILTER (WHERE NOT censored) AS d_t,
|
|
4865
|
+
COUNT(*) AS n_ending_at_t
|
|
4866
|
+
FROM episodes_all
|
|
4867
|
+
GROUP BY cohort, tenure
|
|
4868
|
+
),
|
|
4869
|
+
km AS (
|
|
4870
|
+
SELECT
|
|
4871
|
+
e.cohort,
|
|
4872
|
+
e.tenure,
|
|
4873
|
+
e.d_t,
|
|
4874
|
+
e.n_ending_at_t,
|
|
4875
|
+
SUM(e.n_ending_at_t) OVER (PARTITION BY e.cohort ORDER BY e.tenure DESC
|
|
4876
|
+
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS at_risk
|
|
4877
|
+
FROM events e
|
|
4878
|
+
),
|
|
4879
|
+
km_surv AS (
|
|
4880
|
+
SELECT
|
|
4881
|
+
cohort, tenure, d_t, at_risk,
|
|
4882
|
+
EXP(SUM(LN(GREATEST(1.0 - CAST(d_t AS DOUBLE) / NULLIF(at_risk, 0), 1e-9)))
|
|
4883
|
+
OVER (PARTITION BY cohort ORDER BY tenure
|
|
4884
|
+
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)) AS survival
|
|
4885
|
+
FROM km
|
|
4886
|
+
),
|
|
4887
|
+
curve_agg AS (
|
|
4888
|
+
SELECT
|
|
4889
|
+
cohort,
|
|
4890
|
+
to_json(list({
|
|
4891
|
+
'tenure': tenure,
|
|
4892
|
+
'survival': survival,
|
|
4893
|
+
'atRisk': at_risk,
|
|
4894
|
+
'events': d_t
|
|
4895
|
+
} ORDER BY tenure)) AS curveJson
|
|
4896
|
+
FROM km_surv
|
|
4897
|
+
GROUP BY cohort
|
|
4898
|
+
),
|
|
4899
|
+
cohort_stats AS (
|
|
4900
|
+
SELECT
|
|
4901
|
+
ea.cohort,
|
|
4902
|
+
COUNT(*) AS episode_count,
|
|
4903
|
+
AVG(CASE WHEN ea.censored THEN 1.0 ELSE 0.0 END) AS censoring_rate
|
|
4904
|
+
FROM episodes_all ea
|
|
4905
|
+
GROUP BY ea.cohort
|
|
4906
|
+
)
|
|
4907
|
+
SELECT
|
|
4908
|
+
cs.cohort,
|
|
4909
|
+
cs.episode_count AS episodeCount,
|
|
4910
|
+
cs.censoring_rate AS censoringRate,
|
|
4911
|
+
ca.curveJson
|
|
4912
|
+
FROM cohort_stats cs
|
|
4913
|
+
LEFT JOIN curve_agg ca USING (cohort)
|
|
4914
|
+
ORDER BY cs.cohort
|
|
4915
|
+
`,
|
|
4916
|
+
params: [
|
|
4917
|
+
startDate,
|
|
4918
|
+
endDate,
|
|
4919
|
+
minImpressions
|
|
4920
|
+
],
|
|
4921
|
+
current: {
|
|
4922
|
+
table: "page_keywords",
|
|
4923
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
4924
|
+
}
|
|
4925
|
+
};
|
|
4926
|
+
},
|
|
4927
|
+
reduceSql(rows, params) {
|
|
4928
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
4929
|
+
const endDate = params.endDate ?? defaultEndDate();
|
|
4930
|
+
const startDate = params.startDate ?? daysAgo(183);
|
|
4931
|
+
const windowDays = Math.round((new Date(endDate).getTime() - new Date(startDate).getTime()) / MS_PER_DAY) + 1;
|
|
4932
|
+
const results = arr.map((r) => {
|
|
4933
|
+
const curve = parseJsonList$1(r.curveJson).map((p) => ({
|
|
4934
|
+
tenure: num$1(p.tenure),
|
|
4935
|
+
survival: num$1(p.survival),
|
|
4936
|
+
atRisk: num$1(p.atRisk),
|
|
4937
|
+
events: num$1(p.events)
|
|
4938
|
+
}));
|
|
4939
|
+
let medianTenure = 0;
|
|
4940
|
+
for (let i = 0; i < curve.length; i++) {
|
|
4941
|
+
const cur = curve[i];
|
|
4942
|
+
if (cur.survival <= .5) {
|
|
4943
|
+
if (i === 0) medianTenure = cur.tenure;
|
|
4944
|
+
else {
|
|
4945
|
+
const prev = curve[i - 1];
|
|
4946
|
+
const span = prev.survival - cur.survival;
|
|
4947
|
+
const frac = span > 0 ? (prev.survival - .5) / span : 0;
|
|
4948
|
+
medianTenure = prev.tenure + frac * (cur.tenure - prev.tenure);
|
|
4949
|
+
}
|
|
4950
|
+
break;
|
|
4951
|
+
}
|
|
4952
|
+
}
|
|
4953
|
+
const last = curve[curve.length - 1];
|
|
4954
|
+
if (medianTenure === 0 && last && last.survival > .5) medianTenure = last.tenure;
|
|
4955
|
+
return {
|
|
4956
|
+
cohort: str$1(r.cohort),
|
|
4957
|
+
episodeCount: num$1(r.episodeCount),
|
|
4958
|
+
censoringRate: num$1(r.censoringRate),
|
|
4959
|
+
medianTenure,
|
|
4960
|
+
curve
|
|
4961
|
+
};
|
|
4962
|
+
});
|
|
4963
|
+
return {
|
|
4964
|
+
results,
|
|
4965
|
+
meta: {
|
|
4966
|
+
totalEpisodes: results.find((r) => r.cohort === "__all__")?.episodeCount ?? 0,
|
|
4967
|
+
cohortCount: results.filter((r) => r.cohort !== "__all__").length,
|
|
4968
|
+
windowDays
|
|
4969
|
+
}
|
|
4970
|
+
};
|
|
4971
|
+
}
|
|
4972
|
+
});
|
|
4973
|
+
function str(v) {
|
|
4974
|
+
return v == null ? "" : String(v);
|
|
2647
4975
|
}
|
|
2648
|
-
function
|
|
2649
|
-
return
|
|
2650
|
-
|
|
2651
|
-
|
|
2652
|
-
|
|
2653
|
-
|
|
2654
|
-
]
|
|
4976
|
+
function parseJsonList(v) {
|
|
4977
|
+
if (Array.isArray(v)) return v;
|
|
4978
|
+
if (typeof v === "string" && v.length > 0) {
|
|
4979
|
+
const parsed = JSON.parse(v);
|
|
4980
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
4981
|
+
}
|
|
4982
|
+
return [];
|
|
2655
4983
|
}
|
|
2656
|
-
const
|
|
2657
|
-
|
|
2658
|
-
|
|
2659
|
-
|
|
2660
|
-
|
|
2661
|
-
|
|
2662
|
-
|
|
2663
|
-
|
|
2664
|
-
|
|
2665
|
-
|
|
4984
|
+
const trendsAnalyzer = defineAnalyzer$1({
|
|
4985
|
+
id: "trends",
|
|
4986
|
+
buildSql(params) {
|
|
4987
|
+
const weeks = params.weeks ?? 28;
|
|
4988
|
+
const endDate = params.endDate || defaultEndDate();
|
|
4989
|
+
const startDate = params.startDate || toIsoDate(/* @__PURE__ */ new Date(Date.parse(endDate) - (weeks * 7 - 1) * MS_PER_DAY));
|
|
4990
|
+
const minImpressions = params.minImpressions ?? 100;
|
|
4991
|
+
const minWeeksWithData = params.minWeeksWithData ?? Math.max(2, Math.floor(weeks / 4));
|
|
4992
|
+
const limit = params.limit ?? 500;
|
|
4993
|
+
const dim = params.dimension === "keywords" ? "keywords" : "pages";
|
|
4994
|
+
const table = dim === "keywords" ? "keywords" : "pages";
|
|
4995
|
+
return {
|
|
4996
|
+
sql: `
|
|
4997
|
+
WITH bucketed AS (
|
|
4998
|
+
SELECT
|
|
4999
|
+
${dim === "keywords" ? "query" : "url"} AS entity,
|
|
5000
|
+
date_trunc('week', CAST(date AS DATE)) AS week,
|
|
5001
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
5002
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
5003
|
+
SUM(sum_position) AS sum_position_sum
|
|
5004
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
5005
|
+
WHERE date >= ? AND date <= ?
|
|
5006
|
+
GROUP BY entity, week
|
|
5007
|
+
),
|
|
5008
|
+
with_meta AS (
|
|
5009
|
+
SELECT
|
|
5010
|
+
entity, week, clicks, impressions, sum_position_sum,
|
|
5011
|
+
ROW_NUMBER() OVER (PARTITION BY entity ORDER BY week) - 1 AS week_idx,
|
|
5012
|
+
COUNT(*) OVER (PARTITION BY entity) AS n_weeks,
|
|
5013
|
+
(ROW_NUMBER() OVER (PARTITION BY entity ORDER BY week) - 1)
|
|
5014
|
+
< (COUNT(*) OVER (PARTITION BY entity) / 2) AS is_first_half
|
|
5015
|
+
FROM bucketed
|
|
5016
|
+
),
|
|
5017
|
+
agg AS (
|
|
5018
|
+
SELECT
|
|
5019
|
+
entity,
|
|
5020
|
+
SUM(clicks) AS totalClicks,
|
|
5021
|
+
SUM(impressions) AS totalImpressions,
|
|
5022
|
+
any_value(n_weeks) AS weeksWithData,
|
|
5023
|
+
COALESCE(regr_slope(clicks, CAST(week_idx AS DOUBLE)), 0.0) AS slope,
|
|
5024
|
+
SUM(CASE WHEN is_first_half THEN clicks ELSE 0 END) AS firstHalfClicks,
|
|
5025
|
+
SUM(CASE WHEN NOT is_first_half THEN clicks ELSE 0 END) AS secondHalfClicks,
|
|
5026
|
+
SUM(sum_position_sum) / NULLIF(SUM(impressions), 0) + 1 AS avgPosition,
|
|
5027
|
+
to_json(list({
|
|
5028
|
+
'week': strftime(week, '%Y-%m-%d'),
|
|
5029
|
+
'clicks': clicks,
|
|
5030
|
+
'impressions': impressions
|
|
5031
|
+
} ORDER BY week)) AS seriesJson
|
|
5032
|
+
FROM with_meta
|
|
5033
|
+
GROUP BY entity
|
|
5034
|
+
HAVING SUM(impressions) >= ? AND any_value(n_weeks) >= ?
|
|
5035
|
+
),
|
|
5036
|
+
classified AS (
|
|
5037
|
+
SELECT
|
|
5038
|
+
*,
|
|
5039
|
+
CASE
|
|
5040
|
+
WHEN firstHalfClicks = 0 AND secondHalfClicks > 0 THEN 10.0
|
|
5041
|
+
WHEN firstHalfClicks = 0 THEN 1.0
|
|
5042
|
+
ELSE secondHalfClicks / firstHalfClicks
|
|
5043
|
+
END AS growthRatio
|
|
5044
|
+
FROM agg
|
|
5045
|
+
)
|
|
5046
|
+
SELECT
|
|
5047
|
+
entity,
|
|
5048
|
+
totalClicks,
|
|
5049
|
+
totalImpressions,
|
|
5050
|
+
weeksWithData,
|
|
5051
|
+
slope,
|
|
5052
|
+
growthRatio,
|
|
5053
|
+
avgPosition,
|
|
5054
|
+
CASE
|
|
5055
|
+
WHEN growthRatio >= 1.5 AND slope > 0 THEN 'accelerating'
|
|
5056
|
+
WHEN growthRatio >= 1.1 AND slope >= 0 THEN 'growing'
|
|
5057
|
+
WHEN growthRatio < 0.5 THEN 'cratering'
|
|
5058
|
+
WHEN growthRatio < 0.9 AND slope < 0 THEN 'declining'
|
|
5059
|
+
ELSE 'steady'
|
|
5060
|
+
END AS trend,
|
|
5061
|
+
seriesJson
|
|
5062
|
+
FROM classified
|
|
5063
|
+
ORDER BY
|
|
5064
|
+
CASE
|
|
5065
|
+
WHEN growthRatio >= 1.5 AND slope > 0 THEN 0
|
|
5066
|
+
WHEN growthRatio < 0.5 THEN 1
|
|
5067
|
+
WHEN growthRatio >= 1.1 AND slope >= 0 THEN 2
|
|
5068
|
+
WHEN growthRatio < 0.9 AND slope < 0 THEN 3
|
|
5069
|
+
ELSE 4
|
|
5070
|
+
END,
|
|
5071
|
+
ABS(growthRatio - 1) DESC,
|
|
5072
|
+
totalClicks DESC
|
|
5073
|
+
LIMIT ${Number(limit)}
|
|
5074
|
+
`,
|
|
5075
|
+
params: [
|
|
5076
|
+
startDate,
|
|
5077
|
+
endDate,
|
|
5078
|
+
minImpressions,
|
|
5079
|
+
minWeeksWithData
|
|
5080
|
+
],
|
|
5081
|
+
current: {
|
|
5082
|
+
table,
|
|
5083
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
5084
|
+
}
|
|
5085
|
+
};
|
|
5086
|
+
},
|
|
5087
|
+
reduceSql(rows, params) {
|
|
5088
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
5089
|
+
const weeks = params.weeks ?? 28;
|
|
5090
|
+
const endDate = params.endDate || defaultEndDate();
|
|
5091
|
+
const startDate = params.startDate || toIsoDate(/* @__PURE__ */ new Date(Date.parse(endDate) - (weeks * 7 - 1) * MS_PER_DAY));
|
|
5092
|
+
const dim = params.dimension === "keywords" ? "keywords" : "pages";
|
|
5093
|
+
const results = arr.map((r) => {
|
|
5094
|
+
const series = parseJsonList(r.seriesJson).map((s) => ({
|
|
5095
|
+
week: str(s.week),
|
|
5096
|
+
clicks: num$1(s.clicks),
|
|
5097
|
+
impressions: num$1(s.impressions)
|
|
5098
|
+
}));
|
|
5099
|
+
return {
|
|
5100
|
+
[dim === "keywords" ? "query" : "page"]: str(r.entity),
|
|
5101
|
+
totalClicks: num$1(r.totalClicks),
|
|
5102
|
+
totalImpressions: num$1(r.totalImpressions),
|
|
5103
|
+
weeksWithData: num$1(r.weeksWithData),
|
|
5104
|
+
slope: num$1(r.slope),
|
|
5105
|
+
growthRatio: num$1(r.growthRatio),
|
|
5106
|
+
avgPosition: num$1(r.avgPosition),
|
|
5107
|
+
trend: str(r.trend),
|
|
5108
|
+
series
|
|
5109
|
+
};
|
|
5110
|
+
});
|
|
5111
|
+
const counts = {
|
|
5112
|
+
accelerating: 0,
|
|
5113
|
+
growing: 0,
|
|
5114
|
+
steady: 0,
|
|
5115
|
+
declining: 0,
|
|
5116
|
+
cratering: 0
|
|
5117
|
+
};
|
|
5118
|
+
for (const r of results) counts[r.trend] = (counts[r.trend] ?? 0) + 1;
|
|
5119
|
+
return {
|
|
5120
|
+
results,
|
|
5121
|
+
meta: {
|
|
5122
|
+
total: results.length,
|
|
5123
|
+
dimension: dim,
|
|
5124
|
+
weeks: Number(weeks),
|
|
5125
|
+
startDate,
|
|
5126
|
+
endDate,
|
|
5127
|
+
counts
|
|
5128
|
+
}
|
|
5129
|
+
};
|
|
5130
|
+
}
|
|
5131
|
+
});
|
|
5132
|
+
const SQL_ANALYZERS = [
|
|
5133
|
+
bayesianCtrAnalyzer.sql,
|
|
5134
|
+
bipartitePagerankAnalyzer.sql,
|
|
5135
|
+
brandAnalyzer.sql,
|
|
5136
|
+
cannibalizationAnalyzer.sql,
|
|
5137
|
+
changePointAnalyzer.sql,
|
|
5138
|
+
clusteringAnalyzer.sql,
|
|
5139
|
+
concentrationAnalyzer.sql,
|
|
5140
|
+
contentVelocityAnalyzer.sql,
|
|
5141
|
+
ctrAnomalyAnalyzer.sql,
|
|
5142
|
+
ctrCurveAnalyzer.sql,
|
|
5143
|
+
darkTrafficAnalyzer.sql,
|
|
5144
|
+
dataDetailAnalyzer.sql,
|
|
5145
|
+
dataQueryAnalyzer.sql,
|
|
5146
|
+
decayAnalyzer.sql,
|
|
5147
|
+
deviceGapAnalyzer.sql,
|
|
5148
|
+
intentAtlasAnalyzer.sql,
|
|
5149
|
+
keywordBreadthAnalyzer.sql,
|
|
5150
|
+
longTailAnalyzer.sql,
|
|
5151
|
+
moversAnalyzer.sql,
|
|
5152
|
+
opportunityAnalyzer.sql,
|
|
5153
|
+
positionDistributionAnalyzer.sql,
|
|
5154
|
+
positionVolatilityAnalyzer.sql,
|
|
5155
|
+
queryMigrationAnalyzer.sql,
|
|
5156
|
+
seasonalityAnalyzer.sql,
|
|
5157
|
+
stlDecomposeAnalyzer.sql,
|
|
5158
|
+
strikingDistanceAnalyzer.sql,
|
|
5159
|
+
survivalAnalyzer.sql,
|
|
5160
|
+
trendsAnalyzer.sql,
|
|
5161
|
+
zeroClickAnalyzer.sql
|
|
5162
|
+
];
|
|
5163
|
+
const defaultAnalyzerRegistry = createAnalyzerRegistry$1({
|
|
5164
|
+
rows: ROW_ANALYZERS,
|
|
5165
|
+
sql: SQL_ANALYZERS
|
|
5166
|
+
});
|
|
5167
|
+
async function analyzeInBrowser(runner, opts, params) {
|
|
5168
|
+
opts.signal?.throwIfAborted();
|
|
5169
|
+
return runAnalyzerFromSource$1(createAttachedTableSource(runner, opts), params, defaultAnalyzerRegistry);
|
|
5170
|
+
}
|
|
5171
|
+
function createCompositeSource(opts) {
|
|
5172
|
+
const { engine, live, site } = opts;
|
|
5173
|
+
function rangeCovered(state) {
|
|
5174
|
+
const { startDate, endDate } = extractDateRange(state.filter);
|
|
5175
|
+
return !!(startDate && endDate && site.oldestDateSynced && site.newestDateSynced && startDate >= site.oldestDateSynced && endDate <= site.newestDateSynced);
|
|
5176
|
+
}
|
|
2666
5177
|
return {
|
|
2667
|
-
name: "engine",
|
|
2668
|
-
capabilities:
|
|
5178
|
+
name: "composite-engine-live",
|
|
5179
|
+
capabilities: engine.capabilities,
|
|
2669
5180
|
async queryRows(state) {
|
|
2670
|
-
|
|
2671
|
-
|
|
2672
|
-
if (state.dimensions.includes("queryCanonical") || filterDims.includes("queryCanonical")) throw new Error("engine query source does not support queryCanonical; use browser/sqlite query sources for derived dimensions");
|
|
2673
|
-
return (await engine.query(ctx, state)).rows;
|
|
5181
|
+
if (!rangeCovered(state) && canProxyToGsc(state)) return live.queryRows(state);
|
|
5182
|
+
return engine.queryRows(state);
|
|
2674
5183
|
},
|
|
2675
|
-
|
|
2676
|
-
const fileSets = opts?.fileSets;
|
|
2677
|
-
if (!fileSets?.FILES) throw new Error("engine query source: executeSql requires opts.fileSets with a FILES entry");
|
|
2678
|
-
const { rows } = await engine.runSQL({
|
|
2679
|
-
ctx,
|
|
2680
|
-
table: fileSets.FILES.table,
|
|
2681
|
-
fileSets,
|
|
2682
|
-
sql,
|
|
2683
|
-
params: params ?? []
|
|
2684
|
-
});
|
|
2685
|
-
return rows;
|
|
2686
|
-
}
|
|
5184
|
+
executeSql: engine.executeSql
|
|
2687
5185
|
};
|
|
2688
5186
|
}
|
|
2689
|
-
async function runAnalyzerWithEngine(deps, ctx, params, registry) {
|
|
2690
|
-
return runAnalyzerFromSource(createEngineQuerySource({
|
|
2691
|
-
engine: deps.engine,
|
|
2692
|
-
ctx
|
|
2693
|
-
}), params, registry);
|
|
2694
|
-
}
|
|
2695
5187
|
const IN_MEMORY_DEFAULT_CAPABILITIES = {
|
|
2696
5188
|
regex: true,
|
|
2697
5189
|
multiDataset: true,
|
|
@@ -2712,10 +5204,10 @@ function analyzeStrikingDistance(keywords, options = {}) {
|
|
|
2712
5204
|
const { minPosition = 4, maxPosition = 20, minImpressions = 100, maxCtr = .05, sortBy = "potentialClicks", sortOrder = "desc" } = options;
|
|
2713
5205
|
const results = [];
|
|
2714
5206
|
for (const row of keywords) {
|
|
2715
|
-
const position = num(row.position);
|
|
2716
|
-
const impressions = num(row.impressions);
|
|
2717
|
-
const ctr = num(row.ctr);
|
|
2718
|
-
const clicks = num(row.clicks);
|
|
5207
|
+
const position = num$1(row.position);
|
|
5208
|
+
const impressions = num$1(row.impressions);
|
|
5209
|
+
const ctr = num$1(row.ctr);
|
|
5210
|
+
const clicks = num$1(row.clicks);
|
|
2719
5211
|
if (position < minPosition || position > maxPosition) continue;
|
|
2720
5212
|
if (impressions < minImpressions) continue;
|
|
2721
5213
|
if (ctr > maxCtr) continue;
|
|
@@ -2732,31 +5224,14 @@ function analyzeStrikingDistance(keywords, options = {}) {
|
|
|
2732
5224
|
}
|
|
2733
5225
|
return sortResults(results, sortBy, sortOrder);
|
|
2734
5226
|
}
|
|
2735
|
-
function typedQuery(state) {
|
|
2736
|
-
return { state };
|
|
2737
|
-
}
|
|
2738
|
-
function isTypedQuery(value) {
|
|
2739
|
-
return "state" in value;
|
|
2740
|
-
}
|
|
2741
|
-
async function queryRows(source, query) {
|
|
2742
|
-
const state = isTypedQuery(query) ? query.state : query;
|
|
2743
|
-
return await source.queryRows(state);
|
|
2744
|
-
}
|
|
2745
|
-
async function queryComparisonRows(source, current, previous) {
|
|
2746
|
-
const [currentRows, previousRows] = await Promise.all([queryRows(source, current), queryRows(source, previous)]);
|
|
2747
|
-
return {
|
|
2748
|
-
current: currentRows,
|
|
2749
|
-
previous: previousRows
|
|
2750
|
-
};
|
|
2751
|
-
}
|
|
2752
5227
|
function keywordQuery(period, limit) {
|
|
2753
|
-
return typedQuery(keywordsQueryState(period, limit));
|
|
5228
|
+
return typedQuery$1(keywordsQueryState(period, limit));
|
|
2754
5229
|
}
|
|
2755
5230
|
function pageQuery(period, limit) {
|
|
2756
|
-
return typedQuery(pagesQueryState(period, limit));
|
|
5231
|
+
return typedQuery$1(pagesQueryState(period, limit));
|
|
2757
5232
|
}
|
|
2758
5233
|
function dateQuery(period, limit) {
|
|
2759
|
-
return typedQuery(datesQueryState(period, limit));
|
|
5234
|
+
return typedQuery$1(datesQueryState(period, limit));
|
|
2760
5235
|
}
|
|
2761
5236
|
function definePortableAnalyzer(definition) {
|
|
2762
5237
|
return definition;
|
|
@@ -2764,7 +5239,7 @@ function definePortableAnalyzer(definition) {
|
|
|
2764
5239
|
async function runPortableAnalyzer(source, definition, input, options, limit = 25e3) {
|
|
2765
5240
|
const requiredQueries = definition.requiredQueries(input, limit);
|
|
2766
5241
|
const entries = Object.entries(requiredQueries);
|
|
2767
|
-
const resolvedRows = await Promise.all(entries.map(async ([key, spec]) => [key, await queryRows(source, spec)]));
|
|
5242
|
+
const resolvedRows = await Promise.all(entries.map(async ([key, spec]) => [key, await queryRows$1(source, spec)]));
|
|
2768
5243
|
return definition.run(Object.fromEntries(resolvedRows), options);
|
|
2769
5244
|
}
|
|
2770
5245
|
const PORTABLE_ANALYZERS = {
|
|
@@ -2826,9 +5301,9 @@ const PORTABLE_ANALYZERS = {
|
|
|
2826
5301
|
async function queryAnalyticsFromSource(source, period, options = {}) {
|
|
2827
5302
|
const limit = options.limit ?? 25e3;
|
|
2828
5303
|
const [keywords, pages, dates] = await Promise.all([
|
|
2829
|
-
queryRows(source, keywordQuery(period, limit)),
|
|
2830
|
-
queryRows(source, pageQuery(period, limit)),
|
|
2831
|
-
queryRows(source, dateQuery(period, limit))
|
|
5304
|
+
queryRows$1(source, keywordQuery(period, limit)),
|
|
5305
|
+
queryRows$1(source, pageQuery(period, limit)),
|
|
5306
|
+
queryRows$1(source, dateQuery(period, limit))
|
|
2832
5307
|
]);
|
|
2833
5308
|
return {
|
|
2834
5309
|
keywords,
|
|
@@ -2870,4 +5345,4 @@ async function analyzeDecayFromSource(source, periods, options) {
|
|
|
2870
5345
|
async function analyzeMoversFromSource(source, periods, options) {
|
|
2871
5346
|
return runPortableAnalyzer(source, PORTABLE_ANALYZERS.movers, periods, options);
|
|
2872
5347
|
}
|
|
2873
|
-
export { AnalyzerCapabilityError, ROW_ANALYZERS, analyzeActionPriority, analyzeActionPriorityFromSource, analyzeBrandSegmentation, analyzeBrandSegmentationFromSource, analyzeCannibalization, analyzeClustering, analyzeClusteringFromSource, analyzeConcentration, analyzeDecay, analyzeDecayFromSource, analyzeFromSource, analyzeKeywordConcentration, analyzeKeywordConcentrationFromSource, analyzeMovers, analyzeMoversFromSource, analyzeOpportunityFromSource, analyzePageConcentration, analyzePageConcentrationFromSource, analyzeSeasonality, analyzeSeasonalityFromSource, analyzeStrikingDistance, analyzeStrikingDistanceFromSource, comparisonOf, createAnalyzerRegistry,
|
|
5348
|
+
export { AnalyzerCapabilityError, ENGINE_QUERY_CAPABILITIES, IN_MEMORY_DEFAULT_CAPABILITIES, ROW_ANALYZERS, SQL_ANALYZERS, analyzeActionPriority, analyzeActionPriorityFromSource, analyzeBrandSegmentation, analyzeBrandSegmentationFromSource, analyzeCannibalization, analyzeClustering, analyzeClusteringFromSource, analyzeConcentration, analyzeDecay, analyzeDecayFromSource, analyzeFromSource, analyzeInBrowser, analyzeKeywordConcentration, analyzeKeywordConcentrationFromSource, analyzeMovers, analyzeMoversFromSource, analyzeOpportunityFromSource, analyzePageConcentration, analyzePageConcentrationFromSource, analyzeSeasonality, analyzeSeasonalityFromSource, analyzeStrikingDistance, analyzeStrikingDistanceFromSource, comparisonOf, createAnalyzerRegistry, createCompositeSource, createEngineQuerySource, createInMemoryQuerySource, createSorter, defaultAnalyzerRegistry, defineAnalyzer, isSqlQuerySource, mergePriorityActions, normalizePriorityActions, normalizeQuery, num, padTimeseries, periodOf, queryAnalyticsFromSource, queryComparisonFromSource, queryComparisonRows, queryRows, resolveWindow, rewriteForTableSource, runAnalyzerFromSource, runAnalyzerWithEngine, scorePriorityActions, typedQuery, windowToComparisonPeriod, windowToPeriod };
|