@gscdump/analysis 0.6.3 → 0.7.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,50 +1,11 @@
1
- import { SQL_ANALYZERS } from "@gscdump/engine-duckdb-node";
1
+ import { createAnalyzerRegistry, defineAnalyzer } from "@gscdump/engine/analyzer";
2
+ import { num } from "@gscdump/engine/analysis-types";
3
+ import { comparisonOf, defaultEndDate, padTimeseries, periodOf } from "@gscdump/engine/period";
2
4
  import { enumeratePartitions } from "@gscdump/engine/planner";
3
5
  import { METRIC_EXPR } from "@gscdump/engine/sql-fragments";
4
- import { between, date, gsc, page, query } from "gscdump/query";
5
- import { daysAgo } from "gscdump";
6
- function createAnalyzerRegistry(init = {}) {
7
- const byId = /* @__PURE__ */ new Map();
8
- for (const a of init.rows ?? []) {
9
- const entry = byId.get(a.id) ?? {};
10
- entry.rows = a;
11
- byId.set(a.id, entry);
12
- }
13
- for (const a of init.sql ?? []) {
14
- const entry = byId.get(a.id) ?? {};
15
- entry.sql = a;
16
- byId.set(a.id, entry);
17
- }
18
- const listAnalyzerIds = () => [...byId.keys()].sort();
19
- const getAnalyzerVariants = (id) => byId.get(id);
20
- const resolveAnalyzer = (id, sourceSupportsSql) => {
21
- const variants = byId.get(id);
22
- if (!variants) return void 0;
23
- if (sourceSupportsSql) return variants.sql ?? variants.rows;
24
- return variants.rows;
25
- };
26
- const listAnalyzersFor = (sourceSupportsSql) => {
27
- const out = [];
28
- for (const id of listAnalyzerIds()) {
29
- const a = resolveAnalyzer(id, sourceSupportsSql);
30
- if (a) out.push(a);
31
- }
32
- return out;
33
- };
34
- const listAnalyzerIdsFor = (source) => {
35
- const sourceSupportsSql = typeof source.executeSql === "function";
36
- const out = [];
37
- for (const id of listAnalyzerIds()) if (resolveAnalyzer(id, sourceSupportsSql)) out.push(id);
38
- return out;
39
- };
40
- return {
41
- listAnalyzerIds,
42
- getAnalyzerVariants,
43
- resolveAnalyzer,
44
- listAnalyzersFor,
45
- listAnalyzerIdsFor
46
- };
47
- }
6
+ import { between, date, extractDateRange, gsc, page, query } from "gscdump/query";
7
+ import { MS_PER_DAY, daysAgo, toIsoDate } from "gscdump";
8
+ import { buildExtrasQueries, buildTotalsSql, mergeExtras, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized } from "@gscdump/engine/resolver";
48
9
  const DEFAULT_LIMIT$1 = 25e3;
49
10
  function keywordsQueryState(period, limit = DEFAULT_LIMIT$1) {
50
11
  return gsc.select(query, page).where(between(date, period.startDate, period.endDate)).limit(limit).getState();
@@ -55,118 +16,10 @@ function pagesQueryState(period, limit = DEFAULT_LIMIT$1) {
55
16
  function datesQueryState(period, limit = DEFAULT_LIMIT$1) {
56
17
  return gsc.select(date).where(between(date, period.startDate, period.endDate)).limit(limit).getState();
57
18
  }
58
- const DEFAULT_SQL_REQUIRES = ["executeSql", "partitionedParquet"];
59
- function defineAnalyzer(opts) {
60
- const { id, reduce, reduceSql, reduceRows, buildSql, buildRows, sqlRequires = DEFAULT_SQL_REQUIRES, rowsRequires = [] } = opts;
61
- const sqlReducer = reduceSql ?? reduce;
62
- const rowsReducer = reduceRows ?? reduce;
63
- if (buildSql && !sqlReducer) throw new Error(`defineAnalyzer(${id}): buildSql requires reduce or reduceSql`);
64
- if (buildRows && !rowsReducer) throw new Error(`defineAnalyzer(${id}): buildRows requires reduce or reduceRows`);
65
- const wrap = (fn) => (rows, params, ctx) => {
66
- return fn(Array.isArray(rows) ? rows : pickSingle(rows) ?? rows, params, ctx);
67
- };
68
- return {
69
- id,
70
- sql: buildSql && sqlReducer ? {
71
- id,
72
- requires: sqlRequires,
73
- build(params) {
74
- const spec = buildSql(params);
75
- return {
76
- kind: "sql",
77
- sql: spec.sql,
78
- params: spec.params,
79
- current: spec.current,
80
- previous: spec.previous,
81
- extraFiles: spec.extraFiles,
82
- extraQueries: spec.extraQueries,
83
- requiresAttachedTables: spec.requiresAttachedTables
84
- };
85
- },
86
- reduce(rows, ctx) {
87
- const { results, meta } = wrap(sqlReducer)(rows, ctx.params, { extras: ctx.extras });
88
- return {
89
- results,
90
- meta
91
- };
92
- }
93
- } : void 0,
94
- rows: buildRows && rowsReducer ? {
95
- id,
96
- requires: rowsRequires,
97
- build(params) {
98
- const queries = buildRows(params);
99
- return {
100
- kind: "rows",
101
- queries: Object.fromEntries(Object.entries(queries).map(([k, state]) => [k, { state }]))
102
- };
103
- },
104
- reduce(rows, ctx) {
105
- const { results, meta } = wrap(rowsReducer)(rows, ctx.params, {});
106
- return {
107
- results,
108
- meta
109
- };
110
- }
111
- } : void 0
112
- };
113
- }
114
- function pickSingle(rows) {
115
- const keys = Object.keys(rows);
116
- return keys.length === 1 ? rows[keys[0]] : void 0;
117
- }
118
- function defaultEndDate() {
119
- return daysAgo(3);
120
- }
121
- function defaultStartDate() {
122
- return daysAgo(31);
123
- }
124
- function periodOf(params) {
125
- return {
126
- startDate: params.startDate || defaultStartDate(),
127
- endDate: params.endDate || defaultEndDate()
128
- };
129
- }
130
- function comparisonOf(params) {
131
- if (!params.prevStartDate || !params.prevEndDate) throw new Error(`${params.type} analysis requires prevStartDate and prevEndDate`);
132
- return {
133
- current: periodOf(params),
134
- previous: {
135
- startDate: params.prevStartDate,
136
- endDate: params.prevEndDate
137
- }
138
- };
139
- }
140
- function num(v) {
141
- if (typeof v === "number") return v;
142
- if (typeof v === "bigint") return Number(v);
143
- if (v == null) return 0;
144
- return Number(v);
145
- }
146
- function buildPeriodMap(rows, key, value, filter) {
147
- const out = /* @__PURE__ */ new Map();
148
- for (const row of rows) {
149
- if (filter && !filter(row)) continue;
150
- out.set(key(row), value(row));
151
- }
152
- return out;
153
- }
154
- function createSorter(getValue, defaultMetric, defaultOrder = "desc") {
155
- return (items, sortBy = defaultMetric, sortOrder = defaultOrder) => {
156
- const mult = sortOrder === "desc" ? -1 : 1;
157
- return [...items].sort((a, b) => (getValue(a, sortBy) - getValue(b, sortBy)) * mult);
158
- };
159
- }
160
- function createMetricSorter(defaultMetric, orderByMetric) {
161
- return (items, sortBy = defaultMetric) => {
162
- const mult = orderByMetric[sortBy] === "desc" ? -1 : 1;
163
- return [...items].sort((a, b) => (a[sortBy] - b[sortBy]) * mult);
164
- };
165
- }
166
19
  function escapeRegexAlt(s) {
167
20
  return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
168
21
  }
169
- function str$6(v) {
22
+ function str$23(v) {
170
23
  return v == null ? "" : String(v);
171
24
  }
172
25
  function analyzeBrandSegmentation(keywords, options) {
@@ -238,13 +91,13 @@ const brandAnalyzer = defineAnalyzer({
238
91
  },
239
92
  reduceSql(rows) {
240
93
  const normalized = (Array.isArray(rows) ? rows : []).map((r) => ({
241
- query: str$6(r.query),
242
- page: r.page == null ? void 0 : str$6(r.page),
94
+ query: str$23(r.query),
95
+ page: r.page == null ? void 0 : str$23(r.page),
243
96
  clicks: num(r.clicks),
244
97
  impressions: num(r.impressions),
245
98
  ctr: num(r.ctr),
246
99
  position: num(r.position),
247
- segment: str$6(r.segment)
100
+ segment: str$23(r.segment)
248
101
  }));
249
102
  let brandClicks = 0;
250
103
  let nonBrandClicks = 0;
@@ -293,6 +146,26 @@ const brandAnalyzer = defineAnalyzer({
293
146
  };
294
147
  }
295
148
  });
149
+ function buildPeriodMap(rows, key, value, filter) {
150
+ const out = /* @__PURE__ */ new Map();
151
+ for (const row of rows) {
152
+ if (filter && !filter(row)) continue;
153
+ out.set(key(row), value(row));
154
+ }
155
+ return out;
156
+ }
157
+ function createSorter(getValue, defaultMetric, defaultOrder = "desc") {
158
+ return (items, sortBy = defaultMetric, sortOrder = defaultOrder) => {
159
+ const mult = sortOrder === "desc" ? -1 : 1;
160
+ return [...items].sort((a, b) => (getValue(a, sortBy) - getValue(b, sortBy)) * mult);
161
+ };
162
+ }
163
+ function createMetricSorter(defaultMetric, orderByMetric) {
164
+ return (items, sortBy = defaultMetric) => {
165
+ const mult = orderByMetric[sortBy] === "desc" ? -1 : 1;
166
+ return [...items].sort((a, b) => (a[sortBy] - b[sortBy]) * mult);
167
+ };
168
+ }
296
169
  const sortRowResults$1 = createSorter((item, metric) => {
297
170
  switch (metric) {
298
171
  case "clicks": return item.totalClicks;
@@ -301,10 +174,10 @@ const sortRowResults$1 = createSorter((item, metric) => {
301
174
  case "pageCount": return item.pages.length;
302
175
  }
303
176
  }, "clicks");
304
- function str$5(v) {
177
+ function str$22(v) {
305
178
  return v == null ? "" : String(v);
306
179
  }
307
- function parseJsonList$4(v) {
180
+ function parseJsonList$16(v) {
308
181
  if (Array.isArray(v)) return v;
309
182
  if (typeof v === "string" && v.length > 0) {
310
183
  const parsed = JSON.parse(v);
@@ -464,19 +337,19 @@ const cannibalizationAnalyzer = defineAnalyzer({
464
337
  },
465
338
  reduceSql(rows) {
466
339
  const events = (Array.isArray(rows) ? rows : []).map((r) => ({
467
- keyword: str$5(r.keyword),
340
+ keyword: str$22(r.keyword),
468
341
  totalImpressions: num(r.totalImpressions),
469
342
  totalClicks: num(r.totalClicks),
470
343
  competitorCount: num(r.competitorCount),
471
- leaderUrl: str$5(r.leaderUrl),
344
+ leaderUrl: str$22(r.leaderUrl),
472
345
  leaderCtr: num(r.leaderCtr),
473
346
  leaderPosition: num(r.leaderPosition),
474
347
  hhi: num(r.hhi),
475
348
  fragmentation: num(r.fragmentation),
476
349
  stolenClicks: num(r.stolenClicks),
477
350
  severity: num(r.severity),
478
- competitors: parseJsonList$4(r.competitors).map((c) => ({
479
- url: str$5(c.url),
351
+ competitors: parseJsonList$16(r.competitors).map((c) => ({
352
+ url: str$22(c.url),
480
353
  clicks: num(c.clicks),
481
354
  impressions: num(c.impressions),
482
355
  ctr: num(c.ctr),
@@ -574,10 +447,10 @@ const INTENT_PREFIXES = [
574
447
  "near me"
575
448
  ];
576
449
  const WHITESPACE_RE = /\s+/;
577
- function str$4(v) {
450
+ function str$21(v) {
578
451
  return v == null ? "" : String(v);
579
452
  }
580
- function parseJsonList$3(v) {
453
+ function parseJsonList$15(v) {
581
454
  if (Array.isArray(v)) return v;
582
455
  if (typeof v === "string" && v.length > 0) {
583
456
  const parsed = JSON.parse(v);
@@ -722,14 +595,14 @@ const clusteringAnalyzer = defineAnalyzer({
722
595
  },
723
596
  reduceSql(rows) {
724
597
  const clusters = (Array.isArray(rows) ? rows : []).map((r) => ({
725
- clusterName: str$4(r.clusterName),
726
- clusterType: str$4(r.clusterType),
598
+ clusterName: str$21(r.clusterName),
599
+ clusterType: str$21(r.clusterType),
727
600
  keywordCount: num(r.keywordCount),
728
601
  totalClicks: num(r.totalClicks),
729
602
  totalImpressions: num(r.totalImpressions),
730
603
  avgPosition: num(r.avgPosition),
731
- keywords: parseJsonList$3(r.keywords).map((k) => ({
732
- query: str$4(k.query),
604
+ keywords: parseJsonList$15(r.keywords).map((k) => ({
605
+ query: str$21(k.query),
733
606
  clicks: num(k.clicks),
734
607
  impressions: num(k.impressions),
735
608
  ctr: num(k.ctr),
@@ -759,10 +632,10 @@ const clusteringAnalyzer = defineAnalyzer({
759
632
  };
760
633
  }
761
634
  });
762
- function str$3(v) {
635
+ function str$20(v) {
763
636
  return v == null ? "" : String(v);
764
637
  }
765
- function parseJsonList$2(v) {
638
+ function parseJsonList$14(v) {
766
639
  if (Array.isArray(v)) return v;
767
640
  if (typeof v === "string" && v.length > 0) {
768
641
  const parsed = JSON.parse(v);
@@ -910,20 +783,20 @@ const concentrationAnalyzer = defineAnalyzer({
910
783
  },
911
784
  reduceSql(rows, params) {
912
785
  const r = (Array.isArray(rows) ? rows : [])[0] ?? {};
913
- const topRaw = parseJsonList$2(r.topNItems);
786
+ const topRaw = parseJsonList$14(r.topNItems);
914
787
  return {
915
788
  results: [{
916
789
  giniCoefficient: num(r.giniCoefficient),
917
790
  hhi: num(r.hhi),
918
791
  topNConcentration: num(r.topNConcentration),
919
792
  topNItems: topRaw.map((t) => ({
920
- key: str$3(t.key),
793
+ key: str$20(t.key),
921
794
  clicks: num(t.clicks),
922
795
  share: num(t.share)
923
796
  })),
924
797
  totalItems: num(r.totalItems),
925
798
  totalClicks: num(r.totalClicks),
926
- riskLevel: str$3(r.riskLevel)
799
+ riskLevel: str$20(r.riskLevel)
927
800
  }],
928
801
  meta: {
929
802
  total: 1,
@@ -953,10 +826,10 @@ const sortResults$1 = createMetricSorter("lostClicks", {
953
826
  declinePercent: "desc",
954
827
  currentClicks: "asc"
955
828
  });
956
- function str$2(v) {
829
+ function str$19(v) {
957
830
  return v == null ? "" : String(v);
958
831
  }
959
- function parseJsonList$1(v) {
832
+ function parseJsonList$13(v) {
960
833
  if (Array.isArray(v)) return v;
961
834
  if (typeof v === "string" && v.length > 0) {
962
835
  const parsed = JSON.parse(v);
@@ -1093,7 +966,7 @@ const decayAnalyzer = defineAnalyzer({
1093
966
  const arr = Array.isArray(rows) ? rows : [];
1094
967
  return {
1095
968
  results: arr.map((r) => ({
1096
- page: str$2(r.page),
969
+ page: str$19(r.page),
1097
970
  currentClicks: num(r.currentClicks),
1098
971
  previousClicks: num(r.previousClicks),
1099
972
  lostClicks: num(r.lostClicks),
@@ -1101,8 +974,8 @@ const decayAnalyzer = defineAnalyzer({
1101
974
  currentPosition: num(r.currentPosition),
1102
975
  previousPosition: num(r.previousPosition),
1103
976
  positionDrop: num(r.positionDrop),
1104
- series: parseJsonList$1(r.seriesJson).map((s) => ({
1105
- week: str$2(s.week),
977
+ series: parseJsonList$13(r.seriesJson).map((s) => ({
978
+ week: str$19(s.week),
1106
979
  clicks: num(s.clicks),
1107
980
  impressions: num(s.impressions)
1108
981
  }))
@@ -1139,10 +1012,10 @@ function percentDifference(current, previous) {
1139
1012
  if (previous === 0) return current > 0 ? 100 : 0;
1140
1013
  return (current - previous) / previous * 100;
1141
1014
  }
1142
- function str$1(v) {
1015
+ function str$18(v) {
1143
1016
  return v == null ? "" : String(v);
1144
1017
  }
1145
- function parseJsonList(v) {
1018
+ function parseJsonList$12(v) {
1146
1019
  if (Array.isArray(v)) return v;
1147
1020
  if (typeof v === "string" && v.length > 0) {
1148
1021
  const parsed = JSON.parse(v);
@@ -1329,8 +1202,8 @@ const moversAnalyzer = defineAnalyzer({
1329
1202
  },
1330
1203
  reduceSql(rows) {
1331
1204
  const normalized = (Array.isArray(rows) ? rows : []).map((r) => ({
1332
- keyword: str$1(r.keyword),
1333
- page: r.page == null ? null : str$1(r.page),
1205
+ keyword: str$18(r.keyword),
1206
+ page: r.page == null ? null : str$18(r.page),
1334
1207
  recentClicks: num(r.recentClicks),
1335
1208
  recentImpressions: num(r.recentImpressions),
1336
1209
  recentPosition: num(r.recentPosition),
@@ -1341,9 +1214,9 @@ const moversAnalyzer = defineAnalyzer({
1341
1214
  clicksChangePercent: num(r.clicksChangePercent),
1342
1215
  impressionsChangePercent: num(r.impressionsChangePercent),
1343
1216
  positionChange: num(r.positionChange),
1344
- direction: str$1(r.direction),
1345
- series: parseJsonList(r.seriesJson).map((s) => ({
1346
- week: str$1(s.week),
1217
+ direction: str$18(r.direction),
1218
+ series: parseJsonList$12(r.seriesJson).map((s) => ({
1219
+ week: str$18(s.week),
1347
1220
  clicks: num(s.clicks),
1348
1221
  impressions: num(s.impressions)
1349
1222
  }))
@@ -1618,10 +1491,10 @@ const opportunityAnalyzer = defineAnalyzer({
1618
1491
  };
1619
1492
  }
1620
1493
  });
1621
- function str(v) {
1494
+ function str$17(v) {
1622
1495
  return v == null ? "" : String(v);
1623
1496
  }
1624
- function bool(v) {
1497
+ function bool$2(v) {
1625
1498
  return v === true || v === 1 || v === "true";
1626
1499
  }
1627
1500
  function calculateCV(values) {
@@ -1717,11 +1590,11 @@ const seasonalityAnalyzer = defineAnalyzer({
1717
1590
  reduceSql(rows) {
1718
1591
  const arr = Array.isArray(rows) ? rows : [];
1719
1592
  const breakdown = arr.map((r) => ({
1720
- month: str(r.month),
1593
+ month: str$17(r.month),
1721
1594
  value: num(r.value),
1722
1595
  vsAverage: num(r.vsAverage),
1723
- isPeak: bool(r.isPeak),
1724
- isTrough: bool(r.isTrough)
1596
+ isPeak: bool$2(r.isPeak),
1597
+ isTrough: bool$2(r.isTrough)
1725
1598
  }));
1726
1599
  const first = arr[0];
1727
1600
  const strength = first ? num(first.strength) : 0;
@@ -1939,19 +1812,2928 @@ const zeroClickAnalyzer = defineAnalyzer({
1939
1812
  };
1940
1813
  }
1941
1814
  });
1815
+ const ROW_ANALYZERS = [
1816
+ strikingDistanceAnalyzer.rows,
1817
+ opportunityAnalyzer.rows,
1818
+ brandAnalyzer.rows,
1819
+ concentrationAnalyzer.rows,
1820
+ clusteringAnalyzer.rows,
1821
+ seasonalityAnalyzer.rows,
1822
+ moversAnalyzer.rows,
1823
+ decayAnalyzer.rows,
1824
+ cannibalizationAnalyzer.rows,
1825
+ zeroClickAnalyzer.rows
1826
+ ];
1827
+ function num$5(v) {
1828
+ if (typeof v === "number") return v;
1829
+ if (typeof v === "bigint") return Number(v);
1830
+ if (v == null) return 0;
1831
+ const n = Number(v);
1832
+ return Number.isFinite(n) ? n : 0;
1833
+ }
1834
+ function str$16(v) {
1835
+ return v == null ? "" : String(v);
1836
+ }
1837
+ const bayesianCtrAnalyzer = defineAnalyzer({
1838
+ id: "bayesian-ctr",
1839
+ buildSql(params) {
1840
+ const { startDate, endDate } = periodOf(params);
1841
+ const minImpressions = params.minImpressions ?? 50;
1842
+ const limit = params.limit ?? 300;
1843
+ const priorMinEntities = 5;
1844
+ return {
1845
+ sql: `
1846
+ WITH entity AS (
1847
+ SELECT
1848
+ query,
1849
+ url,
1850
+ ${METRIC_EXPR.clicks} AS clicks,
1851
+ ${METRIC_EXPR.impressions} AS impressions,
1852
+ ${METRIC_EXPR.ctr} AS observed_ctr,
1853
+ ${METRIC_EXPR.position} AS position,
1854
+ CAST(ROUND(LEAST(${METRIC_EXPR.position}, 30)) AS INTEGER) AS bucket
1855
+ FROM read_parquet({{FILES}}, union_by_name = true)
1856
+ WHERE date >= ? AND date <= ?
1857
+ AND query IS NOT NULL AND query <> ''
1858
+ AND url IS NOT NULL AND url <> ''
1859
+ GROUP BY query, url
1860
+ HAVING SUM(impressions) >= ?
1861
+ AND ${METRIC_EXPR.position} <= 30
1862
+ ),
1863
+ bucket_mu AS (
1864
+ SELECT
1865
+ bucket,
1866
+ COUNT(*) AS n_entities,
1867
+ SUM(observed_ctr * impressions) / NULLIF(SUM(impressions), 0) AS mu,
1868
+ SUM(impressions) AS total_impressions
1869
+ FROM entity
1870
+ GROUP BY bucket
1871
+ ),
1872
+ bucket_var AS (
1873
+ SELECT
1874
+ e.bucket,
1875
+ GREATEST(
1876
+ SUM(e.impressions * POWER(e.observed_ctr - b.mu, 2))
1877
+ / NULLIF(SUM(e.impressions), 0),
1878
+ 1e-9
1879
+ ) AS v
1880
+ FROM entity e
1881
+ JOIN bucket_mu b USING (bucket)
1882
+ GROUP BY e.bucket
1883
+ ),
1884
+ priors AS (
1885
+ SELECT
1886
+ m.bucket,
1887
+ m.n_entities,
1888
+ m.mu,
1889
+ v.v,
1890
+ CASE
1891
+ WHEN m.n_entities >= ${Number(priorMinEntities)}
1892
+ AND v.v > 0
1893
+ AND m.mu > 0 AND m.mu < 1
1894
+ AND (m.mu * (1.0 - m.mu) / v.v - 1.0) > 0
1895
+ THEN GREATEST(0.5, m.mu * (m.mu * (1.0 - m.mu) / v.v - 1.0))
1896
+ ELSE 2.0
1897
+ END AS alpha,
1898
+ CASE
1899
+ WHEN m.n_entities >= ${Number(priorMinEntities)}
1900
+ AND v.v > 0
1901
+ AND m.mu > 0 AND m.mu < 1
1902
+ AND (m.mu * (1.0 - m.mu) / v.v - 1.0) > 0
1903
+ THEN GREATEST(0.5, (1.0 - m.mu) * (m.mu * (1.0 - m.mu) / v.v - 1.0))
1904
+ ELSE 48.0
1905
+ END AS beta
1906
+ FROM bucket_mu m
1907
+ JOIN bucket_var v USING (bucket)
1908
+ ),
1909
+ posterior AS (
1910
+ SELECT
1911
+ e.query,
1912
+ e.url,
1913
+ e.clicks,
1914
+ e.impressions,
1915
+ e.observed_ctr,
1916
+ e.position,
1917
+ e.bucket,
1918
+ p.alpha AS prior_alpha,
1919
+ p.beta AS prior_beta,
1920
+ p.mu AS bucket_prior_mean,
1921
+ p.alpha + e.clicks AS alpha_post,
1922
+ p.beta + (e.impressions - e.clicks) AS beta_post
1923
+ FROM entity e
1924
+ JOIN priors p USING (bucket)
1925
+ ),
1926
+ scored AS (
1927
+ SELECT *,
1928
+ alpha_post / (alpha_post + beta_post) AS posterior_mean,
1929
+ SQRT((alpha_post * beta_post)
1930
+ / (POWER(alpha_post + beta_post, 2) * (alpha_post + beta_post + 1))) AS posterior_sd
1931
+ FROM posterior
1932
+ )
1933
+ SELECT
1934
+ query AS keyword,
1935
+ url AS page,
1936
+ clicks,
1937
+ impressions,
1938
+ observed_ctr AS observedCtr,
1939
+ position,
1940
+ bucket,
1941
+ prior_alpha AS priorAlpha,
1942
+ prior_beta AS priorBeta,
1943
+ bucket_prior_mean AS bucketPriorMean,
1944
+ posterior_mean AS posteriorMean,
1945
+ posterior_sd AS posteriorSd,
1946
+ GREATEST(0.0, posterior_mean - 1.96 * posterior_sd) AS ciLow,
1947
+ LEAST(1.0, posterior_mean + 1.96 * posterior_sd) AS ciHigh,
1948
+ posterior_mean - observed_ctr AS shrinkageDelta,
1949
+ (posterior_mean - observed_ctr) * impressions AS expectedClicksDelta,
1950
+ ABS(observed_ctr - posterior_mean) / NULLIF(posterior_sd, 0) AS significance,
1951
+ CASE
1952
+ WHEN observed_ctr > LEAST(1.0, posterior_mean + 1.96 * posterior_sd) THEN 'overperforming'
1953
+ WHEN observed_ctr < GREATEST(0.0, posterior_mean - 1.96 * posterior_sd) THEN 'underperforming'
1954
+ ELSE 'expected'
1955
+ END AS classification
1956
+ FROM scored
1957
+ ORDER BY significance DESC NULLS LAST
1958
+ LIMIT ${Number(limit)}
1959
+ `,
1960
+ params: [
1961
+ startDate,
1962
+ endDate,
1963
+ minImpressions
1964
+ ],
1965
+ current: {
1966
+ table: "page_keywords",
1967
+ partitions: enumeratePartitions(startDate, endDate)
1968
+ }
1969
+ };
1970
+ },
1971
+ reduceSql(rows, params) {
1972
+ const arr = Array.isArray(rows) ? rows : [];
1973
+ const minImpressions = params.minImpressions ?? 50;
1974
+ const results = arr.map((r) => ({
1975
+ keyword: str$16(r.keyword),
1976
+ page: str$16(r.page),
1977
+ clicks: num$5(r.clicks),
1978
+ impressions: num$5(r.impressions),
1979
+ observedCtr: num$5(r.observedCtr),
1980
+ position: num$5(r.position),
1981
+ bucket: num$5(r.bucket),
1982
+ priorAlpha: num$5(r.priorAlpha),
1983
+ priorBeta: num$5(r.priorBeta),
1984
+ bucketPriorMean: num$5(r.bucketPriorMean),
1985
+ posteriorMean: num$5(r.posteriorMean),
1986
+ posteriorSd: num$5(r.posteriorSd),
1987
+ ciLow: num$5(r.ciLow),
1988
+ ciHigh: num$5(r.ciHigh),
1989
+ shrinkageDelta: num$5(r.shrinkageDelta),
1990
+ expectedClicksDelta: num$5(r.expectedClicksDelta),
1991
+ significance: num$5(r.significance),
1992
+ classification: str$16(r.classification)
1993
+ }));
1994
+ const under = results.filter((r) => r.classification === "underperforming").length;
1995
+ const over = results.filter((r) => r.classification === "overperforming").length;
1996
+ return {
1997
+ results,
1998
+ meta: {
1999
+ total: results.length,
2000
+ underperforming: under,
2001
+ overperforming: over,
2002
+ expected: results.length - under - over,
2003
+ minImpressions
2004
+ }
2005
+ };
2006
+ }
2007
+ });
2008
+ const BIPARTITE_PAGERANK_ITERATIONS = 25;
2009
+ const BIPARTITE_PAGERANK_DAMPING = .85;
2010
+ function str$15(v) {
2011
+ return v == null ? "" : String(v);
2012
+ }
2013
+ function parseJsonList$11(v) {
2014
+ if (Array.isArray(v)) return v;
2015
+ if (typeof v === "string" && v.length > 0) {
2016
+ const parsed = JSON.parse(v);
2017
+ return Array.isArray(parsed) ? parsed : [];
2018
+ }
2019
+ return [];
2020
+ }
2021
+ const bipartitePagerankAnalyzer = defineAnalyzer({
2022
+ id: "bipartite-pagerank",
2023
+ buildSql(params) {
2024
+ const { startDate, endDate } = periodOf(params);
2025
+ const minImpressions = params.minImpressions ?? 50;
2026
+ const topQueries = 1e3;
2027
+ const topUrls = 500;
2028
+ const limit = params.limit ?? 50;
2029
+ const bridgingEdgeThreshold = .05;
2030
+ const anchoringEdgeThreshold = .05;
2031
+ const iterations = BIPARTITE_PAGERANK_ITERATIONS;
2032
+ const d = BIPARTITE_PAGERANK_DAMPING;
2033
+ const iterCtes = [];
2034
+ for (let i = 1; i <= iterations; i++) iterCtes.push(`
2035
+ ranks_${i} AS (
2036
+ SELECT
2037
+ 'q' AS kind,
2038
+ e.qid AS id,
2039
+ (1.0 - ${d}) / (SELECT n FROM query_count)
2040
+ + ${d} * SUM(e.w_u_to_q * r.rank) AS rank
2041
+ FROM u_to_q_weights e
2042
+ JOIN ranks_${i - 1} r ON r.kind = 'u' AND r.id = e.uid
2043
+ GROUP BY e.qid
2044
+ UNION ALL
2045
+ SELECT
2046
+ 'u' AS kind,
2047
+ e.uid AS id,
2048
+ (1.0 - ${d}) / (SELECT n FROM url_count)
2049
+ + ${d} * SUM(e.w_q_to_u * r.rank) AS rank
2050
+ FROM q_to_u_weights e
2051
+ JOIN ranks_${i - 1} r ON r.kind = 'q' AND r.id = e.qid
2052
+ GROUP BY e.uid
2053
+ )`);
2054
+ const deltaParts = [];
2055
+ for (let i = 1; i <= iterations; i++) deltaParts.push(`
2056
+ SELECT ${i} AS step,
2057
+ (SELECT COALESCE(SUM(ABS(a.rank - b.rank)), 0.0)
2058
+ FROM ranks_${i} a
2059
+ JOIN ranks_${i - 1} b USING (kind, id)) AS l1`);
2060
+ return {
2061
+ sql: `
2062
+ WITH edges0 AS (
2063
+ SELECT
2064
+ query AS qid,
2065
+ url AS uid,
2066
+ CAST(SUM(impressions) AS DOUBLE) AS impressions
2067
+ FROM read_parquet({{FILES}}, union_by_name = true)
2068
+ WHERE date >= ? AND date <= ?
2069
+ AND query IS NOT NULL AND query <> ''
2070
+ AND url IS NOT NULL AND url <> ''
2071
+ GROUP BY query, url
2072
+ HAVING SUM(impressions) >= ?
2073
+ ),
2074
+ -- Top-N caps per side keep the iteration tractable.
2075
+ query_totals AS (
2076
+ SELECT qid, SUM(impressions) AS tot
2077
+ FROM edges0 GROUP BY qid
2078
+ ),
2079
+ url_totals AS (
2080
+ SELECT uid, SUM(impressions) AS tot
2081
+ FROM edges0 GROUP BY uid
2082
+ ),
2083
+ top_queries AS (
2084
+ SELECT qid FROM query_totals
2085
+ ORDER BY tot DESC, qid ASC LIMIT ${Number(topQueries)}
2086
+ ),
2087
+ top_urls AS (
2088
+ SELECT uid FROM url_totals
2089
+ ORDER BY tot DESC, uid ASC LIMIT ${Number(topUrls)}
2090
+ ),
2091
+ edges AS (
2092
+ SELECT e.qid, e.uid, e.impressions
2093
+ FROM edges0 e
2094
+ JOIN top_queries tq USING (qid)
2095
+ JOIN top_urls tu USING (uid)
2096
+ ),
2097
+ query_nodes AS (SELECT DISTINCT qid FROM edges),
2098
+ url_nodes AS (SELECT DISTINCT uid FROM edges),
2099
+ query_count AS (SELECT GREATEST(COUNT(*), 1) AS n FROM query_nodes),
2100
+ url_count AS (SELECT GREATEST(COUNT(*), 1) AS n FROM url_nodes),
2101
+ -- Row-stochastic transition weights in each direction. For q->u the
2102
+ -- weights out of a query sum to 1; symmetric for u->q.
2103
+ q_out AS (SELECT qid, SUM(impressions) AS s FROM edges GROUP BY qid),
2104
+ u_out AS (SELECT uid, SUM(impressions) AS s FROM edges GROUP BY uid),
2105
+ q_to_u_weights AS (
2106
+ SELECT e.qid, e.uid,
2107
+ e.impressions / NULLIF(q.s, 0) AS w_q_to_u
2108
+ FROM edges e JOIN q_out q USING (qid)
2109
+ ),
2110
+ u_to_q_weights AS (
2111
+ SELECT e.qid, e.uid,
2112
+ e.impressions / NULLIF(u.s, 0) AS w_u_to_q
2113
+ FROM edges e JOIN u_out u USING (uid)
2114
+ ),
2115
+ -- Seed: uniform distribution per side. Total mass = 2 (one unit per side).
2116
+ ranks_0 AS (
2117
+ SELECT 'q' AS kind, q.qid AS id, 1.0 / (SELECT n FROM query_count) AS rank
2118
+ FROM query_nodes q
2119
+ UNION ALL
2120
+ SELECT 'u' AS kind, u.uid AS id, 1.0 / (SELECT n FROM url_count) AS rank
2121
+ FROM url_nodes u
2122
+ ),
2123
+ ${iterCtes.join(",\n")},
2124
+ final_ranks AS (SELECT * FROM ranks_${iterations}),
2125
+ -- Hub/anchor diagnostics computed from raw edge mass (not rank). A
2126
+ -- query "bridges" URLs it sends >= ${bridgingEdgeThreshold} of its mass
2127
+ -- to; a URL "anchors" queries that contribute >= ${anchoringEdgeThreshold}
2128
+ -- of its incoming mass.
2129
+ q_bridging AS (
2130
+ SELECT qid, COUNT(*) AS bridging
2131
+ FROM q_to_u_weights
2132
+ WHERE w_q_to_u >= ${bridgingEdgeThreshold}
2133
+ GROUP BY qid
2134
+ ),
2135
+ u_anchoring AS (
2136
+ SELECT uid, COUNT(*) AS anchoring
2137
+ FROM u_to_q_weights
2138
+ WHERE w_u_to_q >= ${anchoringEdgeThreshold}
2139
+ GROUP BY uid
2140
+ ),
2141
+ q_degree AS (
2142
+ SELECT qid, COUNT(*) AS degree, SUM(impressions) AS impressions
2143
+ FROM edges GROUP BY qid
2144
+ ),
2145
+ u_degree AS (
2146
+ SELECT uid, COUNT(*) AS degree, SUM(impressions) AS impressions
2147
+ FROM edges GROUP BY uid
2148
+ ),
2149
+ deltas AS (
2150
+ ${deltaParts.join("\n UNION ALL\n")}
2151
+ ),
2152
+ query_rows AS (
2153
+ SELECT
2154
+ 'query' AS kind, f.id, f.rank,
2155
+ COALESCE(b.bridging, 0) AS bridging,
2156
+ 0 AS anchoring,
2157
+ COALESCE(qd.degree, 0) AS degree,
2158
+ COALESCE(qd.impressions, 0) AS impressions
2159
+ FROM final_ranks f
2160
+ LEFT JOIN q_bridging b ON b.qid = f.id
2161
+ LEFT JOIN q_degree qd ON qd.qid = f.id
2162
+ WHERE f.kind = 'q'
2163
+ ORDER BY f.rank DESC
2164
+ LIMIT ${Number(limit)}
2165
+ ),
2166
+ url_rows AS (
2167
+ SELECT
2168
+ 'url' AS kind, f.id, f.rank,
2169
+ 0 AS bridging,
2170
+ COALESCE(a.anchoring, 0) AS anchoring,
2171
+ COALESCE(ud.degree, 0) AS degree,
2172
+ COALESCE(ud.impressions, 0) AS impressions
2173
+ FROM final_ranks f
2174
+ LEFT JOIN u_anchoring a ON a.uid = f.id
2175
+ LEFT JOIN u_degree ud ON ud.uid = f.id
2176
+ WHERE f.kind = 'u'
2177
+ ORDER BY f.rank DESC
2178
+ LIMIT ${Number(limit)}
2179
+ ),
2180
+ nodes AS (
2181
+ SELECT * FROM query_rows
2182
+ UNION ALL
2183
+ SELECT * FROM url_rows
2184
+ ),
2185
+ counts AS (
2186
+ SELECT
2187
+ (SELECT n FROM query_count) AS q_count,
2188
+ (SELECT n FROM url_count) AS u_count
2189
+ ),
2190
+ deltas_json AS (
2191
+ SELECT to_json(list({ 'step': step, 'l1': l1 } ORDER BY step)) AS dj
2192
+ FROM deltas
2193
+ )
2194
+ SELECT
2195
+ n.kind,
2196
+ n.id,
2197
+ n.rank,
2198
+ n.bridging,
2199
+ n.anchoring,
2200
+ n.degree,
2201
+ n.impressions,
2202
+ c.q_count AS queryCount,
2203
+ c.u_count AS urlCount,
2204
+ dj.dj AS deltasJson
2205
+ FROM nodes n
2206
+ CROSS JOIN counts c
2207
+ CROSS JOIN deltas_json dj
2208
+ ORDER BY n.kind, n.rank DESC
2209
+ `,
2210
+ params: [
2211
+ startDate,
2212
+ endDate,
2213
+ minImpressions
2214
+ ],
2215
+ current: {
2216
+ table: "page_keywords",
2217
+ partitions: enumeratePartitions(startDate, endDate)
2218
+ }
2219
+ };
2220
+ },
2221
+ reduceSql(rows) {
2222
+ const arr = Array.isArray(rows) ? rows : [];
2223
+ const iterations = BIPARTITE_PAGERANK_ITERATIONS;
2224
+ const d = BIPARTITE_PAGERANK_DAMPING;
2225
+ const results = arr.map((r) => ({
2226
+ kind: str$15(r.kind),
2227
+ id: str$15(r.id),
2228
+ rank: num(r.rank),
2229
+ bridging: num(r.bridging),
2230
+ anchoring: num(r.anchoring),
2231
+ degree: num(r.degree),
2232
+ impressions: num(r.impressions)
2233
+ }));
2234
+ const first = arr[0] ?? {};
2235
+ const queryCount = num(first.queryCount);
2236
+ const urlCount = num(first.urlCount);
2237
+ const deltas = parseJsonList$11(first.deltasJson).map((e) => ({
2238
+ step: num(e.step),
2239
+ l1: num(e.l1)
2240
+ }));
2241
+ const convergenceDelta = deltas.length > 0 ? deltas[deltas.length - 1].l1 : 0;
2242
+ return {
2243
+ results,
2244
+ meta: {
2245
+ total: results.length,
2246
+ convergenceDelta,
2247
+ iterations,
2248
+ damping: d,
2249
+ queryCount,
2250
+ urlCount,
2251
+ deltas
2252
+ }
2253
+ };
2254
+ }
2255
+ });
2256
+ function num$4(v) {
2257
+ if (typeof v === "number") return v;
2258
+ if (typeof v === "bigint") return Number(v);
2259
+ if (v == null) return 0;
2260
+ const n = Number(v);
2261
+ return Number.isFinite(n) ? n : 0;
2262
+ }
2263
+ function str$14(v) {
2264
+ return v == null ? "" : String(v);
2265
+ }
2266
+ function parseJsonList$10(v) {
2267
+ if (Array.isArray(v)) return v;
2268
+ if (typeof v === "string" && v.length > 0) {
2269
+ const parsed = JSON.parse(v);
2270
+ return Array.isArray(parsed) ? parsed : [];
2271
+ }
2272
+ return [];
2273
+ }
2274
+ const changePointAnalyzer = defineAnalyzer({
2275
+ id: "change-point",
2276
+ buildSql(params) {
2277
+ const endDate = params.endDate ?? defaultEndDate();
2278
+ const startDate = params.startDate ?? daysAgo(93);
2279
+ const minDays = 21;
2280
+ const minSide = 7;
2281
+ const threshold = params.threshold ?? 10;
2282
+ const minImpressions = params.minImpressions ?? 50;
2283
+ const metric = params.metric === "clicks" || params.metric === "impressions" ? params.metric : "position";
2284
+ const limit = params.limit ?? 100;
2285
+ const valueExpr = metric === "position" ? METRIC_EXPR.position : `CAST(SUM(${metric}) AS DOUBLE)`;
2286
+ return {
2287
+ sql: `
2288
+ WITH daily AS (
2289
+ SELECT
2290
+ query,
2291
+ url AS page,
2292
+ date,
2293
+ ${METRIC_EXPR.clicks} AS clicks,
2294
+ ${METRIC_EXPR.impressions} AS impressions,
2295
+ ${valueExpr} AS value
2296
+ FROM read_parquet({{FILES}}, union_by_name = true)
2297
+ WHERE date >= ? AND date <= ?
2298
+ AND query IS NOT NULL AND query <> ''
2299
+ AND url IS NOT NULL AND url <> ''
2300
+ GROUP BY query, url, date
2301
+ HAVING SUM(impressions) >= 1
2302
+ ),
2303
+ entity_stats AS (
2304
+ SELECT query, page,
2305
+ COUNT(*) AS n_total,
2306
+ SUM(impressions) AS total_impressions,
2307
+ SUM(value) AS sum_total,
2308
+ SUM(value * value) AS sumsq_total
2309
+ FROM daily
2310
+ GROUP BY query, page
2311
+ HAVING COUNT(*) >= ${Number(minDays)}
2312
+ AND SUM(impressions) >= ?
2313
+ ),
2314
+ filtered AS (
2315
+ SELECT d.*,
2316
+ e.n_total, e.sum_total, e.sumsq_total, e.total_impressions
2317
+ FROM daily d
2318
+ JOIN entity_stats e USING (query, page)
2319
+ ),
2320
+ cumulated AS (
2321
+ SELECT *,
2322
+ COUNT(*) OVER w AS n_left,
2323
+ SUM(value) OVER w AS sum_left,
2324
+ SUM(value * value) OVER w AS sumsq_left
2325
+ FROM filtered
2326
+ WINDOW w AS (
2327
+ PARTITION BY query, page
2328
+ ORDER BY date
2329
+ ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
2330
+ )
2331
+ ),
2332
+ llr_scored AS (
2333
+ SELECT *,
2334
+ (n_total - n_left) AS n_right,
2335
+ (sum_total - sum_left) AS sum_right,
2336
+ (sumsq_total - sumsq_left) AS sumsq_right,
2337
+ GREATEST(
2338
+ (sumsq_left / NULLIF(n_left, 0))
2339
+ - (sum_left / NULLIF(n_left, 0)) * (sum_left / NULLIF(n_left, 0)),
2340
+ 1e-9
2341
+ ) AS var_left,
2342
+ GREATEST(
2343
+ ((sumsq_total - sumsq_left) / NULLIF(n_total - n_left, 0))
2344
+ - ((sum_total - sum_left) / NULLIF(n_total - n_left, 0))
2345
+ * ((sum_total - sum_left) / NULLIF(n_total - n_left, 0)),
2346
+ 1e-9
2347
+ ) AS var_right,
2348
+ GREATEST(
2349
+ (sumsq_total / NULLIF(n_total, 0))
2350
+ - (sum_total / NULLIF(n_total, 0)) * (sum_total / NULLIF(n_total, 0)),
2351
+ 1e-9
2352
+ ) AS var_single
2353
+ FROM cumulated
2354
+ ),
2355
+ llr AS (
2356
+ SELECT *,
2357
+ CASE
2358
+ WHEN n_left >= ${Number(minSide)} AND (n_total - n_left) >= ${Number(minSide)}
2359
+ THEN n_total * LN(var_single)
2360
+ - n_left * LN(var_left)
2361
+ - (n_total - n_left) * LN(var_right)
2362
+ ELSE NULL
2363
+ END AS llr
2364
+ FROM llr_scored
2365
+ ),
2366
+ best AS (
2367
+ SELECT query, page, n_total, total_impressions,
2368
+ arg_max(date, llr) AS change_date,
2369
+ MAX(llr) AS best_llr,
2370
+ arg_max(sum_left / NULLIF(n_left, 0), llr) AS left_mean,
2371
+ arg_max((sum_total - sum_left) / NULLIF(n_total - n_left, 0), llr) AS right_mean,
2372
+ arg_max(sqrt(var_left), llr) AS left_std,
2373
+ arg_max(sqrt(var_right), llr) AS right_std
2374
+ FROM llr
2375
+ WHERE llr IS NOT NULL
2376
+ GROUP BY query, page, n_total, total_impressions
2377
+ HAVING MAX(llr) > ${Number(threshold)}
2378
+ ),
2379
+ series AS (
2380
+ SELECT query, page,
2381
+ to_json(list({
2382
+ 'date': strftime(date, '%Y-%m-%d'),
2383
+ 'value': value
2384
+ } ORDER BY date)) AS seriesJson
2385
+ FROM daily
2386
+ GROUP BY query, page
2387
+ )
2388
+ SELECT
2389
+ b.query AS keyword,
2390
+ b.page,
2391
+ CAST(b.n_total AS DOUBLE) AS totalDays,
2392
+ CAST(b.total_impressions AS DOUBLE) AS totalImpressions,
2393
+ strftime(b.change_date, '%Y-%m-%d') AS changeDate,
2394
+ b.best_llr AS llr,
2395
+ b.left_mean AS leftMean,
2396
+ b.right_mean AS rightMean,
2397
+ (b.right_mean - b.left_mean) AS delta,
2398
+ b.left_std AS leftStddev,
2399
+ b.right_std AS rightStddev,
2400
+ s.seriesJson
2401
+ FROM best b
2402
+ LEFT JOIN series s USING (query, page)
2403
+ ORDER BY b.best_llr DESC
2404
+ LIMIT ${Number(limit)}
2405
+ `,
2406
+ params: [
2407
+ startDate,
2408
+ endDate,
2409
+ minImpressions
2410
+ ],
2411
+ current: {
2412
+ table: "page_keywords",
2413
+ partitions: enumeratePartitions(startDate, endDate)
2414
+ }
2415
+ };
2416
+ },
2417
+ reduceSql(rows, params) {
2418
+ const arr = Array.isArray(rows) ? rows : [];
2419
+ const threshold = params.threshold ?? 10;
2420
+ const metric = params.metric === "clicks" || params.metric === "impressions" ? params.metric : "position";
2421
+ const lowerIsBetter = metric === "position";
2422
+ const results = arr.map((r) => {
2423
+ const delta = num$4(r.delta);
2424
+ const improved = lowerIsBetter ? delta < 0 : delta > 0;
2425
+ return {
2426
+ keyword: str$14(r.keyword),
2427
+ page: str$14(r.page),
2428
+ totalDays: num$4(r.totalDays),
2429
+ totalImpressions: num$4(r.totalImpressions),
2430
+ changeDate: str$14(r.changeDate),
2431
+ llr: num$4(r.llr),
2432
+ leftMean: num$4(r.leftMean),
2433
+ rightMean: num$4(r.rightMean),
2434
+ delta,
2435
+ leftStddev: num$4(r.leftStddev),
2436
+ rightStddev: num$4(r.rightStddev),
2437
+ direction: improved ? "improved" : "worsened",
2438
+ series: parseJsonList$10(r.seriesJson).map((s) => ({
2439
+ date: str$14(s.date),
2440
+ value: num$4(s.value)
2441
+ }))
2442
+ };
2443
+ });
2444
+ return {
2445
+ results,
2446
+ meta: {
2447
+ total: results.length,
2448
+ metric,
2449
+ threshold,
2450
+ improved: results.filter((r) => r.direction === "improved").length,
2451
+ worsened: results.filter((r) => r.direction === "worsened").length
2452
+ }
2453
+ };
2454
+ }
2455
+ });
2456
+ function num$3(v) {
2457
+ if (typeof v === "number") return v;
2458
+ if (typeof v === "bigint") return Number(v);
2459
+ if (v == null) return 0;
2460
+ const n = Number(v);
2461
+ return Number.isFinite(n) ? n : 0;
2462
+ }
2463
+ function str$13(v) {
2464
+ return v == null ? "" : String(v);
2465
+ }
2466
+ const contentVelocityAnalyzer = defineAnalyzer({
2467
+ id: "content-velocity",
2468
+ buildSql(params) {
2469
+ const days = Math.min(Math.max(Number(params.days ?? 90), 7), 365);
2470
+ const { endDate } = periodOf(params);
2471
+ const start = new Date(endDate);
2472
+ start.setUTCDate(start.getUTCDate() - days);
2473
+ const startDate = toIsoDate(start);
2474
+ return {
2475
+ sql: `
2476
+ WITH src AS (
2477
+ SELECT query, date
2478
+ FROM read_parquet({{FILES}}, union_by_name = true)
2479
+ WHERE date >= ? AND date <= ? AND impressions > 0
2480
+ ),
2481
+ first_seen AS (
2482
+ SELECT query, MIN(date) AS first_date FROM src GROUP BY query
2483
+ ),
2484
+ per_week AS (
2485
+ SELECT
2486
+ strftime(CAST(date AS DATE), '%G-W%V') AS week,
2487
+ MIN(date) AS week_start,
2488
+ CAST(COUNT(DISTINCT query) AS DOUBLE) AS totalKeywords
2489
+ FROM src
2490
+ GROUP BY week
2491
+ ),
2492
+ new_per_week AS (
2493
+ SELECT
2494
+ strftime(CAST(first_date AS DATE), '%G-W%V') AS week,
2495
+ CAST(COUNT(*) AS DOUBLE) AS newKeywords
2496
+ FROM first_seen
2497
+ GROUP BY week
2498
+ )
2499
+ SELECT
2500
+ pw.week AS week,
2501
+ COALESCE(npw.newKeywords, 0) AS newKeywords,
2502
+ pw.totalKeywords AS totalKeywords
2503
+ FROM per_week pw
2504
+ LEFT JOIN new_per_week npw ON pw.week = npw.week
2505
+ ORDER BY pw.week ASC
2506
+ `,
2507
+ params: [startDate, endDate],
2508
+ current: {
2509
+ table: "keywords",
2510
+ partitions: enumeratePartitions(startDate, endDate)
2511
+ }
2512
+ };
2513
+ },
2514
+ reduceSql(rows, params) {
2515
+ const arr = Array.isArray(rows) ? rows : [];
2516
+ const days = Math.min(Math.max(Number(params.days ?? 90), 7), 365);
2517
+ const { endDate } = periodOf(params);
2518
+ const startDateD = new Date(endDate);
2519
+ startDateD.setUTCDate(startDateD.getUTCDate() - days);
2520
+ const startDate = toIsoDate(startDateD);
2521
+ const weekly = arr.map((r) => ({
2522
+ week: str$13(r.week),
2523
+ newKeywords: num$3(r.newKeywords),
2524
+ totalKeywords: num$3(r.totalKeywords)
2525
+ }));
2526
+ const total = weekly.reduce((s, w) => s + w.newKeywords, 0);
2527
+ const avg = weekly.length > 0 ? total / weekly.length : 0;
2528
+ const mid = Math.floor(weekly.length / 2);
2529
+ const firstAvg = mid > 0 ? weekly.slice(0, mid).reduce((s, w) => s + w.newKeywords, 0) / mid : 0;
2530
+ const diff = (weekly.length - mid > 0 ? weekly.slice(mid).reduce((s, w) => s + w.newKeywords, 0) / (weekly.length - mid) : 0) - firstAvg;
2531
+ const threshold = Math.max(1, avg * .15);
2532
+ return {
2533
+ results: weekly,
2534
+ meta: {
2535
+ summary: {
2536
+ totalNewKeywords: total,
2537
+ avgPerWeek: avg,
2538
+ trend: diff > threshold ? "accelerating" : diff < -threshold ? "decelerating" : "stable"
2539
+ },
2540
+ days,
2541
+ startDate,
2542
+ endDate
2543
+ }
2544
+ };
2545
+ }
2546
+ });
2547
+ function num$2(v) {
2548
+ if (typeof v === "number") return v;
2549
+ if (typeof v === "bigint") return Number(v);
2550
+ if (v == null) return 0;
2551
+ const n = Number(v);
2552
+ return Number.isFinite(n) ? n : 0;
2553
+ }
2554
+ function str$12(v) {
2555
+ return v == null ? "" : String(v);
2556
+ }
2557
+ function bool$1(v) {
2558
+ return v === true || v === 1 || v === "true";
2559
+ }
2560
+ function parseJsonList$9(v) {
2561
+ if (Array.isArray(v)) return v;
2562
+ if (typeof v === "string" && v.length > 0) {
2563
+ const parsed = JSON.parse(v);
2564
+ return Array.isArray(parsed) ? parsed : [];
2565
+ }
2566
+ return [];
2567
+ }
2568
+ const ctrAnomalyAnalyzer = defineAnalyzer({
2569
+ id: "ctr-anomaly",
2570
+ buildSql(params) {
2571
+ const endDate = params.endDate ?? defaultEndDate();
2572
+ const startDate = params.startDate ?? daysAgo(93);
2573
+ const minDailyImpressions = params.minImpressions ?? 5;
2574
+ const minRollingN = 14;
2575
+ const zThreshold = params.threshold ?? 2;
2576
+ const maxPositionDelta = 1.5;
2577
+ const minBreachDays = 2;
2578
+ const limit = params.limit ?? 200;
2579
+ return {
2580
+ sql: `
2581
+ WITH daily AS (
2582
+ SELECT
2583
+ query,
2584
+ url AS page,
2585
+ date,
2586
+ ${METRIC_EXPR.clicks} AS day_clicks,
2587
+ ${METRIC_EXPR.impressions} AS day_impressions,
2588
+ ${METRIC_EXPR.ctr} AS day_ctr,
2589
+ ${METRIC_EXPR.position} AS day_position
2590
+ FROM read_parquet({{FILES}}, union_by_name = true)
2591
+ WHERE date >= ? AND date <= ?
2592
+ AND query IS NOT NULL AND query <> ''
2593
+ AND url IS NOT NULL AND url <> ''
2594
+ GROUP BY query, url, date
2595
+ HAVING SUM(impressions) >= ?
2596
+ ),
2597
+ rolled AS (
2598
+ SELECT *,
2599
+ AVG(day_ctr) OVER w AS rolling_ctr,
2600
+ STDDEV_POP(day_ctr) OVER w AS rolling_stddev,
2601
+ AVG(day_position) OVER w AS rolling_position,
2602
+ COUNT(*) OVER w AS rolling_n
2603
+ FROM daily
2604
+ WINDOW w AS (
2605
+ PARTITION BY query, page
2606
+ ORDER BY date
2607
+ ROWS BETWEEN 28 PRECEDING AND 1 PRECEDING
2608
+ )
2609
+ ),
2610
+ flagged AS (
2611
+ SELECT *,
2612
+ CASE
2613
+ WHEN rolling_n >= ${Number(minRollingN)} AND rolling_stddev > 0
2614
+ THEN (day_ctr - rolling_ctr) / rolling_stddev
2615
+ ELSE 0.0
2616
+ END AS z_score,
2617
+ CASE
2618
+ WHEN rolling_position IS NULL THEN 0.0
2619
+ ELSE ABS(day_position - rolling_position)
2620
+ END AS position_delta
2621
+ FROM rolled
2622
+ ),
2623
+ breaches AS (
2624
+ SELECT *,
2625
+ CASE
2626
+ WHEN ABS(z_score) >= ${zThreshold}
2627
+ AND position_delta <= ${maxPositionDelta}
2628
+ AND rolling_n >= ${Number(minRollingN)}
2629
+ THEN true ELSE false
2630
+ END AS is_breach
2631
+ FROM flagged
2632
+ ),
2633
+ per_entity AS (
2634
+ SELECT
2635
+ query, page,
2636
+ COUNT(*) FILTER (WHERE is_breach AND z_score < 0) AS breach_days_down,
2637
+ COUNT(*) FILTER (WHERE is_breach AND z_score > 0) AS breach_days_up,
2638
+ SUM(CASE
2639
+ WHEN is_breach AND z_score < 0
2640
+ THEN (rolling_ctr - day_ctr) * day_impressions
2641
+ ELSE 0.0
2642
+ END) AS clicks_lost,
2643
+ SUM(CASE
2644
+ WHEN is_breach AND z_score < 0
2645
+ THEN ABS(z_score) * day_impressions
2646
+ ELSE 0.0
2647
+ END) AS severity_raw,
2648
+ MAX(CASE WHEN is_breach THEN ABS(z_score) ELSE 0.0 END) AS max_z,
2649
+ AVG(rolling_ctr) FILTER (WHERE rolling_n >= ${Number(minRollingN)}) AS baseline_ctr,
2650
+ AVG(rolling_position) FILTER (WHERE rolling_n >= ${Number(minRollingN)}) AS baseline_position,
2651
+ SUM(day_impressions) AS total_impressions,
2652
+ SUM(day_clicks) AS total_clicks
2653
+ FROM breaches
2654
+ GROUP BY query, page
2655
+ HAVING COUNT(*) FILTER (WHERE is_breach AND z_score < 0) >= ${Number(minBreachDays)}
2656
+ ),
2657
+ series AS (
2658
+ SELECT query, page,
2659
+ to_json(list({
2660
+ 'date': strftime(date, '%Y-%m-%d'),
2661
+ 'ctr': day_ctr,
2662
+ 'position': day_position,
2663
+ 'impressions': day_impressions,
2664
+ 'rollingCtr': rolling_ctr,
2665
+ 'rollingStddev': rolling_stddev,
2666
+ 'z': z_score,
2667
+ 'breach': is_breach AND z_score < 0
2668
+ } ORDER BY date)) AS seriesJson
2669
+ FROM breaches
2670
+ GROUP BY query, page
2671
+ )
2672
+ SELECT
2673
+ e.query AS keyword,
2674
+ e.page,
2675
+ CAST(e.breach_days_down AS DOUBLE) AS breachDaysDown,
2676
+ CAST(e.breach_days_up AS DOUBLE) AS breachDaysUp,
2677
+ CAST(ROUND(e.clicks_lost) AS DOUBLE) AS clicksLost,
2678
+ e.severity_raw AS severityRaw,
2679
+ e.max_z AS maxZ,
2680
+ e.baseline_ctr AS baselineCtr,
2681
+ e.baseline_position AS baselinePosition,
2682
+ e.total_impressions AS totalImpressions,
2683
+ e.total_clicks AS totalClicks,
2684
+ s.seriesJson
2685
+ FROM per_entity e
2686
+ LEFT JOIN series s USING (query, page)
2687
+ ORDER BY clicksLost DESC
2688
+ LIMIT ${Number(limit)}
2689
+ `,
2690
+ params: [
2691
+ startDate,
2692
+ endDate,
2693
+ minDailyImpressions
2694
+ ],
2695
+ current: {
2696
+ table: "page_keywords",
2697
+ partitions: enumeratePartitions(startDate, endDate)
2698
+ }
2699
+ };
2700
+ },
2701
+ reduceSql(rows, params) {
2702
+ const arr = Array.isArray(rows) ? rows : [];
2703
+ const minRollingN = 14;
2704
+ const zThreshold = params.threshold ?? 2;
2705
+ const anomalies = arr.map((r) => ({
2706
+ keyword: str$12(r.keyword),
2707
+ page: str$12(r.page),
2708
+ breachDaysDown: num$2(r.breachDaysDown),
2709
+ breachDaysUp: num$2(r.breachDaysUp),
2710
+ clicksLost: num$2(r.clicksLost),
2711
+ severity: num$2(r.severityRaw),
2712
+ maxZ: num$2(r.maxZ),
2713
+ baselineCtr: num$2(r.baselineCtr),
2714
+ baselinePosition: num$2(r.baselinePosition),
2715
+ totalImpressions: num$2(r.totalImpressions),
2716
+ totalClicks: num$2(r.totalClicks),
2717
+ series: parseJsonList$9(r.seriesJson).map((s) => ({
2718
+ date: str$12(s.date),
2719
+ ctr: num$2(s.ctr),
2720
+ position: num$2(s.position),
2721
+ impressions: num$2(s.impressions),
2722
+ rollingCtr: s.rollingCtr == null ? null : num$2(s.rollingCtr),
2723
+ rollingStddev: s.rollingStddev == null ? null : num$2(s.rollingStddev),
2724
+ z: num$2(s.z),
2725
+ breach: bool$1(s.breach)
2726
+ }))
2727
+ }));
2728
+ const totalClicksLost = anomalies.reduce((s, a) => s + a.clicksLost, 0);
2729
+ const totalBreachDays = anomalies.reduce((s, a) => s + a.breachDaysDown, 0);
2730
+ return {
2731
+ results: anomalies,
2732
+ meta: {
2733
+ total: anomalies.length,
2734
+ totalClicksLost,
2735
+ totalBreachDays,
2736
+ zThreshold,
2737
+ minRollingN
2738
+ }
2739
+ };
2740
+ }
2741
+ });
2742
+ function num$1(v) {
2743
+ if (typeof v === "number") return v;
2744
+ if (typeof v === "bigint") return Number(v);
2745
+ if (v == null) return 0;
2746
+ const n = Number(v);
2747
+ return Number.isFinite(n) ? n : 0;
2748
+ }
2749
+ function str$11(v) {
2750
+ return v == null ? "" : String(v);
2751
+ }
2752
+ function parseJsonList$8(v) {
2753
+ if (Array.isArray(v)) return v;
2754
+ if (typeof v === "string" && v.length > 0) {
2755
+ const parsed = JSON.parse(v);
2756
+ return Array.isArray(parsed) ? parsed : [];
2757
+ }
2758
+ return [];
2759
+ }
2760
+ const ctrCurveAnalyzer = defineAnalyzer({
2761
+ id: "ctr-curve",
2762
+ buildSql(params) {
2763
+ const { startDate, endDate } = periodOf(params);
2764
+ return {
2765
+ sql: `
2766
+ WITH src AS (
2767
+ SELECT
2768
+ query,
2769
+ clicks,
2770
+ impressions,
2771
+ sum_position,
2772
+ (sum_position / NULLIF(impressions, 0) + 1) AS avg_pos
2773
+ FROM read_parquet({{FILES}}, union_by_name = true)
2774
+ WHERE date >= ? AND date <= ? AND impressions > 0
2775
+ ),
2776
+ curve AS (
2777
+ SELECT
2778
+ CASE
2779
+ WHEN avg_pos <= 1.5 THEN '1'
2780
+ WHEN avg_pos <= 2.5 THEN '2'
2781
+ WHEN avg_pos <= 3.5 THEN '3'
2782
+ WHEN avg_pos <= 5.5 THEN '4-5'
2783
+ WHEN avg_pos <= 10.5 THEN '6-10'
2784
+ WHEN avg_pos <= 20.5 THEN '11-20'
2785
+ ELSE '20+'
2786
+ END AS bucket,
2787
+ AVG(CAST(clicks AS DOUBLE) / NULLIF(impressions, 0)) AS avgCtr,
2788
+ AVG(avg_pos) AS medianPosition,
2789
+ CAST(COUNT(DISTINCT query) AS DOUBLE) AS keywordCount,
2790
+ ${METRIC_EXPR.clicks} AS totalClicks,
2791
+ ${METRIC_EXPR.impressions} AS totalImpressions
2792
+ FROM src
2793
+ GROUP BY bucket
2794
+ ),
2795
+ ks AS (
2796
+ SELECT
2797
+ query,
2798
+ ${METRIC_EXPR.clicks} AS clicks,
2799
+ ${METRIC_EXPR.impressions} AS impressions,
2800
+ ${METRIC_EXPR.ctr} AS ctr,
2801
+ ${METRIC_EXPR.position} AS position,
2802
+ CASE
2803
+ WHEN ${METRIC_EXPR.position} <= 3.5 THEN 'top3'
2804
+ WHEN ${METRIC_EXPR.position} <= 10.5 THEN 'page1'
2805
+ WHEN ${METRIC_EXPR.position} <= 20.5 THEN 'page2'
2806
+ ELSE 'deep'
2807
+ END AS band
2808
+ FROM src
2809
+ GROUP BY query
2810
+ HAVING SUM(impressions) >= 20
2811
+ ),
2812
+ band_avg AS (
2813
+ SELECT band, AVG(ctr) AS band_avg_ctr FROM ks GROUP BY band
2814
+ ),
2815
+ outliers AS (
2816
+ SELECT
2817
+ ks.query, ks.clicks, ks.impressions, ks.ctr, ks.position,
2818
+ ba.band_avg_ctr AS expectedCtr,
2819
+ ks.ctr - ba.band_avg_ctr AS ctrDiff
2820
+ FROM ks JOIN band_avg ba ON ks.band = ba.band
2821
+ ORDER BY ABS(ks.ctr - ba.band_avg_ctr) DESC
2822
+ LIMIT 50
2823
+ )
2824
+ SELECT
2825
+ (SELECT to_json(list({
2826
+ 'bucket': bucket,
2827
+ 'avgCtr': avgCtr,
2828
+ 'medianPosition': medianPosition,
2829
+ 'keywordCount': keywordCount,
2830
+ 'totalClicks': totalClicks,
2831
+ 'totalImpressions': totalImpressions
2832
+ })) FROM curve) AS curve_json,
2833
+ (SELECT to_json(list({
2834
+ 'query': query,
2835
+ 'clicks': clicks,
2836
+ 'impressions': impressions,
2837
+ 'ctr': ctr,
2838
+ 'position': position,
2839
+ 'expectedCtr': expectedCtr,
2840
+ 'ctrDiff': ctrDiff
2841
+ })) FROM outliers) AS outliers_json
2842
+ `,
2843
+ params: [startDate, endDate],
2844
+ current: {
2845
+ table: "keywords",
2846
+ partitions: enumeratePartitions(startDate, endDate)
2847
+ }
2848
+ };
2849
+ },
2850
+ reduceSql(rows, params) {
2851
+ const arr = Array.isArray(rows) ? rows : [];
2852
+ const { startDate, endDate } = periodOf(params);
2853
+ const row = arr[0] ?? {};
2854
+ const curve = parseJsonList$8(row.curve_json).map((r) => ({
2855
+ bucket: str$11(r.bucket),
2856
+ avgCtr: num$1(r.avgCtr),
2857
+ medianPosition: num$1(r.medianPosition),
2858
+ keywordCount: num$1(r.keywordCount),
2859
+ totalClicks: num$1(r.totalClicks),
2860
+ totalImpressions: num$1(r.totalImpressions)
2861
+ }));
2862
+ const outliers = parseJsonList$8(row.outliers_json).map((r) => ({
2863
+ query: str$11(r.query),
2864
+ clicks: num$1(r.clicks),
2865
+ impressions: num$1(r.impressions),
2866
+ ctr: num$1(r.ctr),
2867
+ position: num$1(r.position),
2868
+ expectedCtr: num$1(r.expectedCtr),
2869
+ ctrDiff: num$1(r.ctrDiff)
2870
+ }));
2871
+ return {
2872
+ results: curve,
2873
+ meta: {
2874
+ overperforming: outliers.filter((o) => o.ctrDiff > 0).slice(0, 25),
2875
+ underperforming: outliers.filter((o) => o.ctrDiff < 0).slice(0, 25),
2876
+ startDate,
2877
+ endDate
2878
+ }
2879
+ };
2880
+ }
2881
+ });
2882
+ function str$10(v) {
2883
+ return v == null ? "" : String(v);
2884
+ }
2885
+ function parseJsonList$7(v) {
2886
+ if (Array.isArray(v)) return v;
2887
+ if (typeof v === "string" && v.length > 0) {
2888
+ const parsed = JSON.parse(v);
2889
+ return Array.isArray(parsed) ? parsed : [];
2890
+ }
2891
+ return [];
2892
+ }
2893
+ const darkTrafficAnalyzer = defineAnalyzer({
2894
+ id: "dark-traffic",
2895
+ buildSql(params) {
2896
+ const { startDate, endDate } = periodOf(params);
2897
+ return {
2898
+ sql: `
2899
+ WITH page_totals AS (
2900
+ SELECT SUM(clicks) AS total_clicks, SUM(impressions) AS total_impressions
2901
+ FROM read_parquet({{FILES}}, union_by_name = true)
2902
+ WHERE date >= ? AND date <= ?
2903
+ ),
2904
+ kw_totals AS (
2905
+ SELECT SUM(clicks) AS total_clicks, SUM(impressions) AS total_impressions
2906
+ FROM read_parquet({{FILES_KEYWORDS}}, union_by_name = true)
2907
+ WHERE date >= ? AND date <= ?
2908
+ ),
2909
+ per_page AS (
2910
+ SELECT url, SUM(clicks) AS page_clicks
2911
+ FROM read_parquet({{FILES}}, union_by_name = true)
2912
+ WHERE date >= ? AND date <= ?
2913
+ GROUP BY url
2914
+ HAVING SUM(clicks) > 0
2915
+ ),
2916
+ per_page_kw AS (
2917
+ SELECT url, SUM(clicks) AS attributed_clicks, COUNT(DISTINCT query) AS kw_count
2918
+ FROM read_parquet({{FILES_PAGE_KEYWORDS}}, union_by_name = true)
2919
+ WHERE date >= ? AND date <= ?
2920
+ GROUP BY url
2921
+ ),
2922
+ page_rows AS (
2923
+ SELECT
2924
+ p.url AS url,
2925
+ CAST(p.page_clicks AS DOUBLE) AS totalClicks,
2926
+ CAST(COALESCE(k.attributed_clicks, 0) AS DOUBLE) AS attributedClicks,
2927
+ CAST(p.page_clicks - COALESCE(k.attributed_clicks, 0) AS DOUBLE) AS darkClicks,
2928
+ CAST(p.page_clicks - COALESCE(k.attributed_clicks, 0) AS DOUBLE)
2929
+ / NULLIF(p.page_clicks, 0) AS darkPercent,
2930
+ CAST(COALESCE(k.kw_count, 0) AS DOUBLE) AS keywordCount
2931
+ FROM per_page p
2932
+ LEFT JOIN per_page_kw k ON p.url = k.url
2933
+ WHERE p.page_clicks - COALESCE(k.attributed_clicks, 0) > 0
2934
+ ORDER BY darkClicks DESC
2935
+ LIMIT 50
2936
+ )
2937
+ SELECT
2938
+ (SELECT to_json({
2939
+ 'totalClicks': CAST(total_clicks AS DOUBLE),
2940
+ 'totalImpressions': CAST(total_impressions AS DOUBLE)
2941
+ }) FROM page_totals) AS page_totals_json,
2942
+ (SELECT to_json({
2943
+ 'attributedClicks': CAST(total_clicks AS DOUBLE),
2944
+ 'attributedImpressions': CAST(total_impressions AS DOUBLE)
2945
+ }) FROM kw_totals) AS kw_totals_json,
2946
+ (SELECT to_json(list({
2947
+ 'url': url,
2948
+ 'totalClicks': totalClicks,
2949
+ 'attributedClicks': attributedClicks,
2950
+ 'darkClicks': darkClicks,
2951
+ 'darkPercent': darkPercent,
2952
+ 'keywordCount': keywordCount
2953
+ })) FROM page_rows) AS pages_json
2954
+ `,
2955
+ params: [
2956
+ startDate,
2957
+ endDate,
2958
+ startDate,
2959
+ endDate,
2960
+ startDate,
2961
+ endDate,
2962
+ startDate,
2963
+ endDate
2964
+ ],
2965
+ current: {
2966
+ table: "pages",
2967
+ partitions: enumeratePartitions(startDate, endDate)
2968
+ },
2969
+ extraFiles: {
2970
+ KEYWORDS: {
2971
+ table: "keywords",
2972
+ partitions: enumeratePartitions(startDate, endDate)
2973
+ },
2974
+ PAGE_KEYWORDS: {
2975
+ table: "page_keywords",
2976
+ partitions: enumeratePartitions(startDate, endDate)
2977
+ }
2978
+ }
2979
+ };
2980
+ },
2981
+ reduceSql(rows, params) {
2982
+ const arr = Array.isArray(rows) ? rows : [];
2983
+ const { startDate, endDate } = periodOf(params);
2984
+ const row = arr[0] ?? {};
2985
+ const pageTotals = typeof row.page_totals_json === "string" ? JSON.parse(row.page_totals_json) : row.page_totals_json ?? {};
2986
+ const kwTotals = typeof row.kw_totals_json === "string" ? JSON.parse(row.kw_totals_json) : row.kw_totals_json ?? {};
2987
+ const totalClicks = num(pageTotals.totalClicks);
2988
+ const totalImpressions = num(pageTotals.totalImpressions);
2989
+ const attributedClicks = num(kwTotals.attributedClicks);
2990
+ const attributedImpressions = num(kwTotals.attributedImpressions);
2991
+ const darkClicks = Math.max(0, totalClicks - attributedClicks);
2992
+ const darkPercent = totalClicks > 0 ? darkClicks / totalClicks : 0;
2993
+ return {
2994
+ results: parseJsonList$7(row.pages_json).map((r) => ({
2995
+ url: str$10(r.url),
2996
+ totalClicks: num(r.totalClicks),
2997
+ attributedClicks: num(r.attributedClicks),
2998
+ darkClicks: num(r.darkClicks),
2999
+ darkPercent: num(r.darkPercent),
3000
+ keywordCount: num(r.keywordCount)
3001
+ })),
3002
+ meta: {
3003
+ summary: {
3004
+ totalClicks,
3005
+ attributedClicks,
3006
+ darkClicks,
3007
+ darkPercent,
3008
+ totalImpressions,
3009
+ attributedImpressions
3010
+ },
3011
+ startDate,
3012
+ endDate
3013
+ }
3014
+ };
3015
+ }
3016
+ });
3017
+ function requireBuilderState(input, tool) {
3018
+ if (!input || typeof input !== "object" || !("dimensions" in input) || !Array.isArray(input.dimensions)) throw new Error(`${tool}: params.q is required (BuilderState)`);
3019
+ return input;
3020
+ }
3021
+ function optionalBuilderState(input, tool, key) {
3022
+ if (input == null) return null;
3023
+ if (typeof input !== "object" || !("dimensions" in input) || !Array.isArray(input.dimensions)) throw new Error(`${tool}: params.${key} must be a BuilderState`);
3024
+ return input;
3025
+ }
3026
+ const NUMERIC_METRIC_COLS = [
3027
+ "clicks",
3028
+ "impressions",
3029
+ "ctr",
3030
+ "position",
3031
+ "prevClicks",
3032
+ "prevImpressions",
3033
+ "prevCtr",
3034
+ "prevPosition",
3035
+ "variantCount",
3036
+ "totalCount"
3037
+ ];
3038
+ function coerceNumericCols(row) {
3039
+ const out = { ...row };
3040
+ for (const col of NUMERIC_METRIC_COLS) if (col in out && out[col] != null) out[col] = Number(out[col]);
3041
+ return out;
3042
+ }
3043
+ function shapeDataQuery(rows, extras, opts) {
3044
+ let totalCount;
3045
+ let cleaned;
3046
+ if (opts.hasPrev) {
3047
+ cleaned = rows.map(coerceNumericCols);
3048
+ totalCount = Number((extras?.count?.[0])?.total ?? cleaned.length);
3049
+ } else {
3050
+ const first = rows[0];
3051
+ totalCount = Number(first?.totalCount ?? 0);
3052
+ cleaned = rows.map((raw) => {
3053
+ const { totalCount: _tc, totalClicks: _tclk, totalImpressions: _timp, totalCtr: _tctr, totalPosition: _tpos, sum_position: _sp, ...rest } = raw;
3054
+ return coerceNumericCols(rest);
3055
+ });
3056
+ }
3057
+ const totalsRow = extras?.totals?.[0] ?? {};
3058
+ const totals = {
3059
+ clicks: Number(totalsRow.clicks ?? 0),
3060
+ impressions: Number(totalsRow.impressions ?? 0),
3061
+ ctr: Number(totalsRow.ctr ?? 0),
3062
+ position: Number(totalsRow.position ?? 0)
3063
+ };
3064
+ const extrasResults = [];
3065
+ if (extras?.canonicalExtras) extrasResults.push({
3066
+ key: "canonicalExtras",
3067
+ results: extras.canonicalExtras
3068
+ });
3069
+ return {
3070
+ results: mergeExtras(cleaned, extrasResults),
3071
+ meta: {
3072
+ totalCount,
3073
+ totals
3074
+ }
3075
+ };
3076
+ }
3077
+ function buildDataQueryPlan(params, options) {
3078
+ const state = requireBuilderState(params.q, "data-query");
3079
+ if (state.dimensions.includes("date")) throw new Error("data-query: date dimension not supported; use data-detail");
3080
+ const prev = optionalBuilderState(params.qc, "data-query", "qc");
3081
+ const totals = buildTotalsSql(state, options);
3082
+ const extras = buildExtrasQueries(state, options);
3083
+ const extraQueries = [{
3084
+ name: "totals",
3085
+ sql: totals.sql,
3086
+ params: totals.params
3087
+ }, ...extras.map((extra) => ({
3088
+ name: extra.key,
3089
+ sql: extra.sql,
3090
+ params: extra.params
3091
+ }))];
3092
+ const tableKey = options.adapter.inferTable(state.dimensions);
3093
+ if (prev) {
3094
+ const comparison = resolveComparisonSQL(state, prev, options, params.comparisonFilter);
3095
+ extraQueries.push({
3096
+ name: "count",
3097
+ sql: comparison.countSql,
3098
+ params: comparison.countParams
3099
+ });
3100
+ return {
3101
+ tableKey,
3102
+ sql: comparison.sql,
3103
+ params: comparison.params,
3104
+ extraQueries,
3105
+ shape: (rows, _params, resolvedExtras) => shapeDataQuery(rows, resolvedExtras, { hasPrev: true })
3106
+ };
3107
+ }
3108
+ const optimized = resolveToSQLOptimized(state, options);
3109
+ return {
3110
+ tableKey,
3111
+ sql: optimized.sql,
3112
+ params: optimized.params,
3113
+ extraQueries,
3114
+ shape: (rows, _params, resolvedExtras) => shapeDataQuery(rows, resolvedExtras, { hasPrev: false })
3115
+ };
3116
+ }
3117
+ function buildDataDetailPlan(params, options) {
3118
+ const state = requireBuilderState(params.q, "data-detail");
3119
+ if (!state.dimensions.includes("date")) throw new Error("data-detail: `date` dimension is required");
3120
+ const main = resolveToSQL(state, options);
3121
+ const totals = buildTotalsSql(state, options);
3122
+ const prev = optionalBuilderState(params.qc, "data-detail", "qc");
3123
+ const extraQueries = [{
3124
+ name: "totals",
3125
+ sql: totals.sql,
3126
+ params: totals.params
3127
+ }];
3128
+ if (prev) {
3129
+ const previousTotals = buildTotalsSql(prev, options);
3130
+ extraQueries.push({
3131
+ name: "prevTotals",
3132
+ sql: previousTotals.sql,
3133
+ params: previousTotals.params
3134
+ });
3135
+ }
3136
+ const tableKey = options.adapter.inferTable(state.dimensions);
3137
+ const { startDate: rangeStart, endDate: rangeEnd } = extractDateRange(state.filter);
3138
+ return {
3139
+ tableKey,
3140
+ sql: main.sql,
3141
+ params: main.params,
3142
+ extraQueries,
3143
+ shape: (rows, _params, extras) => {
3144
+ const coerced = rows.map(coerceNumericCols);
3145
+ const daily = rangeStart && rangeEnd ? padTimeseries(coerced, {
3146
+ startDate: rangeStart,
3147
+ endDate: rangeEnd
3148
+ }) : coerced;
3149
+ const totalsRow = extras?.totals?.[0] ?? {};
3150
+ const meta = { totals: {
3151
+ clicks: Number(totalsRow.clicks ?? 0),
3152
+ impressions: Number(totalsRow.impressions ?? 0),
3153
+ ctr: Number(totalsRow.ctr ?? 0),
3154
+ position: Number(totalsRow.position ?? 0)
3155
+ } };
3156
+ if (extras?.prevTotals) {
3157
+ const previousTotalsRow = extras.prevTotals[0] ?? {};
3158
+ meta.previousTotals = {
3159
+ clicks: Number(previousTotalsRow.clicks ?? 0),
3160
+ impressions: Number(previousTotalsRow.impressions ?? 0),
3161
+ ctr: Number(previousTotalsRow.ctr ?? 0),
3162
+ position: Number(previousTotalsRow.position ?? 0)
3163
+ };
3164
+ }
3165
+ return {
3166
+ results: daily,
3167
+ meta
3168
+ };
3169
+ }
3170
+ };
3171
+ }
3172
+ const dataDetailAnalyzer = defineAnalyzer({
3173
+ id: "data-detail",
3174
+ buildSql(params) {
3175
+ const plan = buildDataDetailPlan(params, { adapter: pgResolverAdapter });
3176
+ return {
3177
+ sql: plan.sql,
3178
+ params: plan.params,
3179
+ current: {
3180
+ table: plan.tableKey,
3181
+ partitions: []
3182
+ },
3183
+ requiresAttachedTables: true,
3184
+ extraQueries: plan.extraQueries
3185
+ };
3186
+ },
3187
+ reduceSql(rows, params, ctx) {
3188
+ const arr = Array.isArray(rows) ? rows : [];
3189
+ const { results, meta } = buildDataDetailPlan(params, { adapter: pgResolverAdapter }).shape(arr, params, ctx.extras);
3190
+ return {
3191
+ results,
3192
+ meta
3193
+ };
3194
+ }
3195
+ });
3196
+ const dataQueryAnalyzer = defineAnalyzer({
3197
+ id: "data-query",
3198
+ buildSql(params) {
3199
+ const plan = buildDataQueryPlan(params, { adapter: pgResolverAdapter });
3200
+ return {
3201
+ sql: plan.sql,
3202
+ params: plan.params,
3203
+ current: {
3204
+ table: plan.tableKey,
3205
+ partitions: []
3206
+ },
3207
+ requiresAttachedTables: true,
3208
+ extraQueries: plan.extraQueries
3209
+ };
3210
+ },
3211
+ reduceSql(rows, params, ctx) {
3212
+ const arr = Array.isArray(rows) ? rows : [];
3213
+ const { results, meta } = buildDataQueryPlan(params, { adapter: pgResolverAdapter }).shape(arr, params, ctx.extras);
3214
+ return {
3215
+ results,
3216
+ meta
3217
+ };
3218
+ }
3219
+ });
3220
+ function str$9(v) {
3221
+ return v == null ? "" : String(v);
3222
+ }
3223
+ const deviceGapAnalyzer = defineAnalyzer({
3224
+ id: "device-gap",
3225
+ buildSql(params) {
3226
+ const { startDate, endDate } = periodOf(params);
3227
+ return {
3228
+ sql: `
3229
+ SELECT
3230
+ date,
3231
+ device,
3232
+ ${METRIC_EXPR.clicks} AS clicks,
3233
+ ${METRIC_EXPR.impressions} AS impressions,
3234
+ ${METRIC_EXPR.ctr} AS ctr,
3235
+ ${METRIC_EXPR.position} AS position
3236
+ FROM read_parquet({{FILES}}, union_by_name = true)
3237
+ WHERE date >= ? AND date <= ?
3238
+ GROUP BY date, device
3239
+ ORDER BY date ASC
3240
+ `,
3241
+ params: [startDate, endDate],
3242
+ current: {
3243
+ table: "devices",
3244
+ partitions: enumeratePartitions(startDate, endDate)
3245
+ }
3246
+ };
3247
+ },
3248
+ reduceSql(rows, params) {
3249
+ const arr = Array.isArray(rows) ? rows : [];
3250
+ const { startDate, endDate } = periodOf(params);
3251
+ const typed = arr.map((r) => ({
3252
+ date: str$9(r.date),
3253
+ device: str$9(r.device).toUpperCase(),
3254
+ clicks: num(r.clicks),
3255
+ impressions: num(r.impressions),
3256
+ ctr: num(r.ctr),
3257
+ position: num(r.position)
3258
+ }));
3259
+ const byDate = /* @__PURE__ */ new Map();
3260
+ for (const r of typed) {
3261
+ const entry = byDate.get(r.date) ?? {};
3262
+ const metrics = {
3263
+ clicks: r.clicks,
3264
+ impressions: r.impressions,
3265
+ ctr: r.ctr,
3266
+ position: r.position
3267
+ };
3268
+ if (r.device === "DESKTOP") entry.desktop = metrics;
3269
+ else if (r.device === "MOBILE") entry.mobile = metrics;
3270
+ byDate.set(r.date, entry);
3271
+ }
3272
+ const zero = {
3273
+ clicks: 0,
3274
+ impressions: 0,
3275
+ ctr: 0,
3276
+ position: 0
3277
+ };
3278
+ const daily = [...byDate.entries()].sort(([a], [b]) => a.localeCompare(b)).map(([date, sides]) => {
3279
+ const d = sides.desktop ?? zero;
3280
+ const m = sides.mobile ?? zero;
3281
+ return {
3282
+ date,
3283
+ desktop: d,
3284
+ mobile: m,
3285
+ gaps: {
3286
+ ctrGap: d.ctr - m.ctr,
3287
+ positionGap: m.position - d.position
3288
+ }
3289
+ };
3290
+ });
3291
+ const weekly = (start, end) => {
3292
+ const slice = daily.slice(start, end);
3293
+ if (slice.length === 0) return {
3294
+ ctr: 0,
3295
+ pos: 0
3296
+ };
3297
+ const sum = slice.reduce((acc, d) => ({
3298
+ ctr: acc.ctr + d.gaps.ctrGap,
3299
+ pos: acc.pos + d.gaps.positionGap
3300
+ }), {
3301
+ ctr: 0,
3302
+ pos: 0
3303
+ });
3304
+ return {
3305
+ ctr: sum.ctr / slice.length,
3306
+ pos: sum.pos / slice.length
3307
+ };
3308
+ };
3309
+ const first = weekly(0, 7);
3310
+ const last = weekly(Math.max(0, daily.length - 7), daily.length);
3311
+ const classify = (firstVal, lastVal) => {
3312
+ const diff = Math.abs(lastVal) - Math.abs(firstVal);
3313
+ if (Math.abs(diff) < .005) return "stable";
3314
+ return diff < 0 ? "improving" : "worsening";
3315
+ };
3316
+ return {
3317
+ results: daily,
3318
+ meta: {
3319
+ summary: {
3320
+ avgCtrGap: daily.reduce((s, d) => s + d.gaps.ctrGap, 0) / Math.max(1, daily.length),
3321
+ avgPositionGap: daily.reduce((s, d) => s + d.gaps.positionGap, 0) / Math.max(1, daily.length),
3322
+ ctrGapTrend: classify(first.ctr, last.ctr),
3323
+ positionGapTrend: classify(first.pos, last.pos)
3324
+ },
3325
+ startDate,
3326
+ endDate
3327
+ }
3328
+ };
3329
+ }
3330
+ });
3331
+ function str$8(v) {
3332
+ return v == null ? "" : String(v);
3333
+ }
3334
+ function parseJsonList$6(v) {
3335
+ if (Array.isArray(v)) return v;
3336
+ if (typeof v === "string" && v.length > 0) {
3337
+ const parsed = JSON.parse(v);
3338
+ return Array.isArray(parsed) ? parsed : [];
3339
+ }
3340
+ return [];
3341
+ }
3342
+ const INTENT_ATLAS_STOP_WORDS = [
3343
+ "the",
3344
+ "a",
3345
+ "an",
3346
+ "is",
3347
+ "are",
3348
+ "was",
3349
+ "were",
3350
+ "be",
3351
+ "been",
3352
+ "of",
3353
+ "to",
3354
+ "in",
3355
+ "for",
3356
+ "on",
3357
+ "and",
3358
+ "or",
3359
+ "with",
3360
+ "at",
3361
+ "by",
3362
+ "from",
3363
+ "into",
3364
+ "about",
3365
+ "as",
3366
+ "so",
3367
+ "than",
3368
+ "then",
3369
+ "that",
3370
+ "this",
3371
+ "my",
3372
+ "your",
3373
+ "our",
3374
+ "their",
3375
+ "his",
3376
+ "her",
3377
+ "its",
3378
+ "me",
3379
+ "you",
3380
+ "what",
3381
+ "how",
3382
+ "why",
3383
+ "when",
3384
+ "where",
3385
+ "who",
3386
+ "which",
3387
+ "do",
3388
+ "does"
3389
+ ];
3390
+ const intentAtlasAnalyzer = defineAnalyzer({
3391
+ id: "intent-atlas",
3392
+ buildSql(params) {
3393
+ const endDate = params.endDate ?? defaultEndDate();
3394
+ const startDate = params.startDate ?? daysAgo(90);
3395
+ const minQueryImpressions = params.minImpressions ?? 20;
3396
+ const minClusterSize = params.minClusterSize ?? 3;
3397
+ const minTokenImpressions = 50;
3398
+ const limit = params.limit ?? 200;
3399
+ const stopList = INTENT_ATLAS_STOP_WORDS.map((w) => `'${w}'`).join(", ");
3400
+ return {
3401
+ sql: `
3402
+ WITH queries AS (
3403
+ SELECT
3404
+ query,
3405
+ ${METRIC_EXPR.impressions} AS impressions,
3406
+ ${METRIC_EXPR.clicks} AS clicks,
3407
+ ${METRIC_EXPR.position} AS position
3408
+ FROM read_parquet({{FILES}}, union_by_name = true)
3409
+ WHERE date >= ? AND date <= ?
3410
+ AND query IS NOT NULL AND query <> ''
3411
+ GROUP BY query
3412
+ HAVING SUM(impressions) >= ?
3413
+ ),
3414
+ tokens AS (
3415
+ SELECT q.query, q.impressions, q.clicks, q.position,
3416
+ LOWER(t.token) AS token
3417
+ FROM queries q,
3418
+ unnest(regexp_split_to_array(LOWER(q.query), '\\s+')) AS t(token)
3419
+ WHERE LENGTH(t.token) >= 3
3420
+ AND LOWER(t.token) NOT IN (${stopList})
3421
+ ),
3422
+ token_weights AS (
3423
+ SELECT token,
3424
+ SUM(impressions) AS token_impressions,
3425
+ COUNT(DISTINCT query) AS query_count
3426
+ FROM tokens
3427
+ GROUP BY token
3428
+ HAVING SUM(impressions) >= ${Number(minTokenImpressions)}
3429
+ ),
3430
+ ranked_tokens AS (
3431
+ SELECT t.query, t.token, tw.token_impressions,
3432
+ ROW_NUMBER() OVER (
3433
+ PARTITION BY t.query
3434
+ ORDER BY tw.token_impressions DESC, t.token ASC
3435
+ ) AS rnk
3436
+ FROM tokens t
3437
+ JOIN token_weights tw USING (token)
3438
+ ),
3439
+ cluster_keys AS (
3440
+ SELECT query,
3441
+ array_to_string(list(token ORDER BY token), ' + ') AS cluster_key
3442
+ FROM ranked_tokens
3443
+ WHERE rnk <= 2
3444
+ GROUP BY query
3445
+ HAVING COUNT(*) >= 2
3446
+ ),
3447
+ clustered AS (
3448
+ SELECT q.query, q.impressions, q.clicks, q.position, ck.cluster_key
3449
+ FROM queries q
3450
+ JOIN cluster_keys ck USING (query)
3451
+ )
3452
+ SELECT
3453
+ cluster_key AS clusterKey,
3454
+ COUNT(*) AS keywordCount,
3455
+ SUM(impressions) AS totalImpressions,
3456
+ SUM(clicks) AS totalClicks,
3457
+ SUM(clicks) / NULLIF(SUM(impressions), 0) AS ctr,
3458
+ AVG(position) AS avgPosition,
3459
+ to_json(list({
3460
+ 'query': query,
3461
+ 'impressions': impressions,
3462
+ 'clicks': clicks,
3463
+ 'position': position
3464
+ } ORDER BY impressions DESC)) AS keywords
3465
+ FROM clustered
3466
+ GROUP BY cluster_key
3467
+ HAVING COUNT(*) >= ${Number(minClusterSize)}
3468
+ ORDER BY totalImpressions DESC
3469
+ LIMIT ${Number(limit)}
3470
+ `,
3471
+ params: [
3472
+ startDate,
3473
+ endDate,
3474
+ minQueryImpressions
3475
+ ],
3476
+ current: {
3477
+ table: "keywords",
3478
+ partitions: enumeratePartitions(startDate, endDate)
3479
+ }
3480
+ };
3481
+ },
3482
+ reduceSql(rows) {
3483
+ const clusters = (Array.isArray(rows) ? rows : []).map((r) => ({
3484
+ clusterKey: str$8(r.clusterKey),
3485
+ keywordCount: num(r.keywordCount),
3486
+ totalImpressions: num(r.totalImpressions),
3487
+ totalClicks: num(r.totalClicks),
3488
+ ctr: num(r.ctr),
3489
+ avgPosition: num(r.avgPosition),
3490
+ keywords: parseJsonList$6(r.keywords).slice(0, 25).map((k) => ({
3491
+ query: str$8(k.query),
3492
+ impressions: num(k.impressions),
3493
+ clicks: num(k.clicks),
3494
+ position: num(k.position)
3495
+ }))
3496
+ }));
3497
+ const totalImpressions = clusters.reduce((s, c) => s + c.totalImpressions, 0);
3498
+ const totalKeywords = clusters.reduce((s, c) => s + c.keywordCount, 0);
3499
+ return {
3500
+ results: clusters,
3501
+ meta: {
3502
+ total: clusters.length,
3503
+ totalImpressions,
3504
+ totalKeywords
3505
+ }
3506
+ };
3507
+ }
3508
+ });
3509
+ function str$7(v) {
3510
+ return v == null ? "" : String(v);
3511
+ }
3512
+ function parseJsonList$5(v) {
3513
+ if (Array.isArray(v)) return v;
3514
+ if (typeof v === "string" && v.length > 0) {
3515
+ const parsed = JSON.parse(v);
3516
+ return Array.isArray(parsed) ? parsed : [];
3517
+ }
3518
+ return [];
3519
+ }
3520
+ const keywordBreadthAnalyzer = defineAnalyzer({
3521
+ id: "keyword-breadth",
3522
+ buildSql(params) {
3523
+ const { startDate, endDate } = periodOf(params);
3524
+ return {
3525
+ sql: `
3526
+ WITH per_page AS (
3527
+ SELECT
3528
+ url,
3529
+ CAST(COUNT(DISTINCT query) AS DOUBLE) AS keywordCount,
3530
+ ${METRIC_EXPR.clicks} AS clicks,
3531
+ ${METRIC_EXPR.impressions} AS impressions
3532
+ FROM read_parquet({{FILES}}, union_by_name = true)
3533
+ WHERE date >= ? AND date <= ? AND impressions > 0
3534
+ GROUP BY url
3535
+ ),
3536
+ bucketed AS (
3537
+ SELECT
3538
+ CASE
3539
+ WHEN keywordCount = 1 THEN '1'
3540
+ WHEN keywordCount BETWEEN 2 AND 5 THEN '2-5'
3541
+ WHEN keywordCount BETWEEN 6 AND 15 THEN '6-15'
3542
+ WHEN keywordCount BETWEEN 16 AND 50 THEN '16-50'
3543
+ ELSE '50+'
3544
+ END AS bucket,
3545
+ MIN(keywordCount) AS sort_key,
3546
+ CAST(COUNT(*) AS DOUBLE) AS pageCount
3547
+ FROM per_page
3548
+ GROUP BY bucket
3549
+ ),
3550
+ fragile AS (
3551
+ SELECT url, keywordCount, clicks, impressions
3552
+ FROM per_page
3553
+ WHERE keywordCount <= 2 AND clicks >= 5
3554
+ ORDER BY clicks DESC
3555
+ LIMIT 20
3556
+ ),
3557
+ authority AS (
3558
+ SELECT url, keywordCount, clicks, impressions
3559
+ FROM per_page
3560
+ WHERE keywordCount >= 20
3561
+ ORDER BY keywordCount DESC
3562
+ LIMIT 20
3563
+ ),
3564
+ stats AS (
3565
+ SELECT
3566
+ CAST(COUNT(*) AS DOUBLE) AS totalPages,
3567
+ CAST(AVG(keywordCount) AS DOUBLE) AS avgKeywordsPerPage,
3568
+ CAST(SUM(CASE WHEN keywordCount <= 2 THEN 1 ELSE 0 END) AS DOUBLE) AS fragileCount,
3569
+ CAST(SUM(CASE WHEN keywordCount >= 20 THEN 1 ELSE 0 END) AS DOUBLE) AS authorityCount
3570
+ FROM per_page
3571
+ )
3572
+ SELECT
3573
+ (SELECT to_json(list({ 'bucket': bucket, 'pageCount': pageCount, 'sortKey': sort_key })
3574
+ ORDER BY sort_key ASC) FROM bucketed) AS distribution_json,
3575
+ (SELECT to_json(list({ 'url': url, 'keywordCount': keywordCount, 'clicks': clicks, 'impressions': impressions })) FROM fragile) AS fragile_json,
3576
+ (SELECT to_json(list({ 'url': url, 'keywordCount': keywordCount, 'clicks': clicks, 'impressions': impressions })) FROM authority) AS authority_json,
3577
+ (SELECT to_json({
3578
+ 'totalPages': totalPages,
3579
+ 'avgKeywordsPerPage': avgKeywordsPerPage,
3580
+ 'fragileCount': fragileCount,
3581
+ 'authorityCount': authorityCount
3582
+ }) FROM stats) AS stats_json
3583
+ `,
3584
+ params: [startDate, endDate],
3585
+ current: {
3586
+ table: "page_keywords",
3587
+ partitions: enumeratePartitions(startDate, endDate)
3588
+ }
3589
+ };
3590
+ },
3591
+ reduceSql(rows, params) {
3592
+ const arr = Array.isArray(rows) ? rows : [];
3593
+ const { startDate, endDate } = periodOf(params);
3594
+ const row = arr[0] ?? {};
3595
+ const distribution = parseJsonList$5(row.distribution_json).sort((a, b) => num(a.sortKey) - num(b.sortKey)).map((r) => ({
3596
+ bucket: str$7(r.bucket),
3597
+ pageCount: num(r.pageCount)
3598
+ }));
3599
+ const fragile = parseJsonList$5(row.fragile_json).map((r) => ({
3600
+ url: str$7(r.url),
3601
+ keywordCount: num(r.keywordCount),
3602
+ clicks: num(r.clicks),
3603
+ impressions: num(r.impressions)
3604
+ }));
3605
+ const authority = parseJsonList$5(row.authority_json).map((r) => ({
3606
+ url: str$7(r.url),
3607
+ keywordCount: num(r.keywordCount),
3608
+ clicks: num(r.clicks),
3609
+ impressions: num(r.impressions)
3610
+ }));
3611
+ const stats = typeof row.stats_json === "string" ? JSON.parse(row.stats_json) : row.stats_json ?? {};
3612
+ return {
3613
+ results: distribution,
3614
+ meta: {
3615
+ fragilePages: fragile,
3616
+ authorityPages: authority,
3617
+ summary: {
3618
+ totalPages: num(stats.totalPages),
3619
+ avgKeywordsPerPage: num(stats.avgKeywordsPerPage),
3620
+ fragileCount: num(stats.fragileCount),
3621
+ authorityCount: num(stats.authorityCount)
3622
+ },
3623
+ startDate,
3624
+ endDate
3625
+ }
3626
+ };
3627
+ }
3628
+ });
3629
+ function str$6(v) {
3630
+ return v == null ? "" : String(v);
3631
+ }
3632
+ function parseJsonList$4(v) {
3633
+ if (Array.isArray(v)) return v;
3634
+ if (typeof v === "string" && v.length > 0) {
3635
+ const parsed = JSON.parse(v);
3636
+ return Array.isArray(parsed) ? parsed : [];
3637
+ }
3638
+ return [];
3639
+ }
3640
+ function downsampleLogRank(points) {
3641
+ const all = points.map((p) => ({
3642
+ rank: num(p.rank),
3643
+ impressions: num(p.impressions),
3644
+ clicks: num(p.clicks),
3645
+ query: str$6(p.query)
3646
+ }));
3647
+ if (all.length <= 80) return all;
3648
+ const top = all.slice(0, 10);
3649
+ const rest = all.slice(10);
3650
+ const stepped = [];
3651
+ let nextThreshold = 1.15;
3652
+ for (const p of rest) if (p.rank >= nextThreshold) {
3653
+ stepped.push(p);
3654
+ nextThreshold *= 1.15;
3655
+ }
3656
+ return [...top, ...stepped];
3657
+ }
3658
+ const longTailAnalyzer = defineAnalyzer({
3659
+ id: "long-tail",
3660
+ buildSql(params) {
3661
+ const { startDate, endDate } = periodOf(params);
3662
+ const minQueries = 10;
3663
+ const minQueryImpressions = params.minImpressions ?? 5;
3664
+ const limit = params.limit ?? 100;
3665
+ return {
3666
+ sql: `
3667
+ WITH page_queries AS (
3668
+ SELECT
3669
+ url AS page,
3670
+ query,
3671
+ ${METRIC_EXPR.impressions} AS impressions,
3672
+ ${METRIC_EXPR.clicks} AS clicks
3673
+ FROM read_parquet({{FILES}}, union_by_name = true)
3674
+ WHERE date >= ? AND date <= ?
3675
+ AND query IS NOT NULL AND query <> ''
3676
+ AND url IS NOT NULL AND url <> ''
3677
+ GROUP BY url, query
3678
+ HAVING SUM(impressions) >= ?
3679
+ ),
3680
+ ranked AS (
3681
+ SELECT
3682
+ page, query, impressions, clicks,
3683
+ ROW_NUMBER() OVER (PARTITION BY page ORDER BY impressions DESC, query ASC) AS rnk
3684
+ FROM page_queries
3685
+ ),
3686
+ log_space AS (
3687
+ SELECT *,
3688
+ LN(rnk) AS log_rank,
3689
+ LN(impressions) AS log_impr
3690
+ FROM ranked
3691
+ ),
3692
+ fit AS (
3693
+ SELECT
3694
+ page,
3695
+ COUNT(*) AS query_count,
3696
+ SUM(impressions) AS total_impressions,
3697
+ SUM(clicks) AS total_clicks,
3698
+ REGR_SLOPE(log_impr, log_rank) AS slope,
3699
+ REGR_INTERCEPT(log_impr, log_rank) AS intercept,
3700
+ REGR_R2(log_impr, log_rank) AS r2,
3701
+ MAX(impressions) AS head_impressions,
3702
+ MAX(CASE WHEN rnk = 1 THEN impressions END) / NULLIF(SUM(impressions), 0) AS head_share
3703
+ FROM log_space
3704
+ GROUP BY page
3705
+ HAVING COUNT(*) >= ${Number(minQueries)}
3706
+ ),
3707
+ scatter AS (
3708
+ SELECT
3709
+ l.page,
3710
+ to_json(list({
3711
+ 'rank': l.rnk,
3712
+ 'impressions': l.impressions,
3713
+ 'clicks': l.clicks,
3714
+ 'query': l.query
3715
+ } ORDER BY l.rnk)) AS pointsJson
3716
+ FROM log_space l
3717
+ JOIN fit f USING (page)
3718
+ GROUP BY l.page
3719
+ )
3720
+ SELECT
3721
+ f.page,
3722
+ f.query_count AS queryCount,
3723
+ f.total_impressions AS totalImpressions,
3724
+ f.total_clicks AS totalClicks,
3725
+ f.slope AS slope,
3726
+ f.intercept AS intercept,
3727
+ f.r2 AS r2,
3728
+ f.head_impressions AS headImpressions,
3729
+ f.head_share AS headShare,
3730
+ s.pointsJson AS pointsJson,
3731
+ CASE
3732
+ WHEN f.slope > -0.6 THEN 'flat-tail'
3733
+ WHEN f.slope > -1.2 THEN 'balanced'
3734
+ ELSE 'head-heavy'
3735
+ END AS fingerprint
3736
+ FROM fit f
3737
+ LEFT JOIN scatter s USING (page)
3738
+ ORDER BY f.total_impressions DESC
3739
+ LIMIT ${Number(limit)}
3740
+ `,
3741
+ params: [
3742
+ startDate,
3743
+ endDate,
3744
+ minQueryImpressions
3745
+ ],
3746
+ current: {
3747
+ table: "page_keywords",
3748
+ partitions: enumeratePartitions(startDate, endDate)
3749
+ }
3750
+ };
3751
+ },
3752
+ reduceSql(rows) {
3753
+ const results = (Array.isArray(rows) ? rows : []).map((r) => ({
3754
+ page: str$6(r.page),
3755
+ queryCount: num(r.queryCount),
3756
+ totalImpressions: num(r.totalImpressions),
3757
+ totalClicks: num(r.totalClicks),
3758
+ slope: num(r.slope),
3759
+ intercept: num(r.intercept),
3760
+ r2: num(r.r2),
3761
+ headImpressions: num(r.headImpressions),
3762
+ headShare: num(r.headShare),
3763
+ fingerprint: str$6(r.fingerprint),
3764
+ points: downsampleLogRank(parseJsonList$4(r.pointsJson))
3765
+ }));
3766
+ const counts = {
3767
+ "flat-tail": 0,
3768
+ "balanced": 0,
3769
+ "head-heavy": 0
3770
+ };
3771
+ for (const r of results) counts[r.fingerprint]++;
3772
+ return {
3773
+ results,
3774
+ meta: {
3775
+ total: results.length,
3776
+ fingerprints: counts,
3777
+ avgSlope: results.length > 0 ? results.reduce((s, r) => s + r.slope, 0) / results.length : 0
3778
+ }
3779
+ };
3780
+ }
3781
+ });
3782
+ function str$5(v) {
3783
+ return v == null ? "" : String(v);
3784
+ }
3785
+ const positionDistributionAnalyzer = defineAnalyzer({
3786
+ id: "position-distribution",
3787
+ buildSql(params) {
3788
+ const { startDate, endDate } = periodOf(params);
3789
+ return {
3790
+ sql: `
3791
+ WITH pos AS (
3792
+ SELECT
3793
+ date,
3794
+ (sum_position / NULLIF(impressions, 0) + 1) AS avg_pos
3795
+ FROM read_parquet({{FILES}}, union_by_name = true)
3796
+ WHERE date >= ? AND date <= ? AND impressions > 0
3797
+ )
3798
+ SELECT
3799
+ date,
3800
+ CAST(SUM(CASE WHEN avg_pos <= 3 THEN 1 ELSE 0 END) AS DOUBLE) AS pos_1_3,
3801
+ CAST(SUM(CASE WHEN avg_pos > 3 AND avg_pos <= 10 THEN 1 ELSE 0 END) AS DOUBLE) AS pos_4_10,
3802
+ CAST(SUM(CASE WHEN avg_pos > 10 AND avg_pos <= 20 THEN 1 ELSE 0 END) AS DOUBLE) AS pos_11_20,
3803
+ CAST(SUM(CASE WHEN avg_pos > 20 THEN 1 ELSE 0 END) AS DOUBLE) AS pos_20_plus,
3804
+ CAST(COUNT(*) AS DOUBLE) AS total
3805
+ FROM pos
3806
+ GROUP BY date
3807
+ ORDER BY date ASC
3808
+ `,
3809
+ params: [startDate, endDate],
3810
+ current: {
3811
+ table: "keywords",
3812
+ partitions: enumeratePartitions(startDate, endDate)
3813
+ }
3814
+ };
3815
+ },
3816
+ reduceSql(rows, params) {
3817
+ const arr = Array.isArray(rows) ? rows : [];
3818
+ const { startDate, endDate } = periodOf(params);
3819
+ return {
3820
+ results: arr.map((r) => ({
3821
+ date: str$5(r.date),
3822
+ pos_1_3: num(r.pos_1_3),
3823
+ pos_4_10: num(r.pos_4_10),
3824
+ pos_11_20: num(r.pos_11_20),
3825
+ pos_20_plus: num(r.pos_20_plus),
3826
+ total: num(r.total)
3827
+ })),
3828
+ meta: {
3829
+ total: arr.length,
3830
+ startDate,
3831
+ endDate
3832
+ }
3833
+ };
3834
+ }
3835
+ });
3836
+ function str$4(v) {
3837
+ return v == null ? "" : String(v);
3838
+ }
3839
+ const positionVolatilityAnalyzer = defineAnalyzer({
3840
+ id: "position-volatility",
3841
+ buildSql(params) {
3842
+ const { startDate, endDate } = periodOf(params);
3843
+ const topN = params.topN ?? 30;
3844
+ const minDayImpressions = params.minImpressions ?? 10;
3845
+ const minDays = params.minWeeksWithData ?? 7;
3846
+ return {
3847
+ sql: `
3848
+ WITH query_day AS (
3849
+ SELECT
3850
+ url AS page,
3851
+ query,
3852
+ date,
3853
+ ${METRIC_EXPR.impressions} AS q_impressions,
3854
+ ${METRIC_EXPR.position} AS q_position
3855
+ FROM read_parquet({{FILES}}, union_by_name = true)
3856
+ WHERE date >= ? AND date <= ?
3857
+ AND query IS NOT NULL AND query <> ''
3858
+ AND url IS NOT NULL AND url <> ''
3859
+ GROUP BY url, query, date
3860
+ HAVING SUM(impressions) >= 1
3861
+ ),
3862
+ daily AS (
3863
+ SELECT
3864
+ page, date,
3865
+ COUNT(*) AS query_count,
3866
+ SUM(q_impressions) AS day_impressions,
3867
+ SUM(q_position * q_impressions) / NULLIF(SUM(q_impressions), 0) AS avg_position,
3868
+ COALESCE(STDDEV_POP(q_position), 0.0) AS pos_stddev,
3869
+ MIN(q_position) AS best_position,
3870
+ MAX(q_position) AS worst_position
3871
+ FROM query_day
3872
+ GROUP BY page, date
3873
+ HAVING SUM(q_impressions) >= ?
3874
+ ),
3875
+ with_shift AS (
3876
+ SELECT *,
3877
+ LAG(avg_position) OVER (PARTITION BY page ORDER BY date) AS prev_position,
3878
+ COALESCE(
3879
+ ABS(avg_position - LAG(avg_position) OVER (PARTITION BY page ORDER BY date)),
3880
+ 0.0
3881
+ ) AS dod_shift
3882
+ FROM daily
3883
+ ),
3884
+ scored AS (
3885
+ SELECT *,
3886
+ pos_stddev + dod_shift AS volatility
3887
+ FROM with_shift
3888
+ ),
3889
+ top_pages AS (
3890
+ SELECT page,
3891
+ SUM(day_impressions) AS total_impressions,
3892
+ AVG(volatility) AS avg_volatility,
3893
+ MAX(volatility) AS peak_volatility,
3894
+ COUNT(*) AS days_with_data
3895
+ FROM scored
3896
+ GROUP BY page
3897
+ HAVING COUNT(*) >= ?
3898
+ ORDER BY avg_volatility DESC
3899
+ LIMIT ${Number(topN)}
3900
+ )
3901
+ SELECT
3902
+ s.page,
3903
+ strftime(s.date, '%Y-%m-%d') AS date,
3904
+ s.query_count AS queryCount,
3905
+ s.day_impressions AS dayImpressions,
3906
+ s.avg_position AS avgPosition,
3907
+ s.pos_stddev AS posStddev,
3908
+ s.best_position AS bestPosition,
3909
+ s.worst_position AS worstPosition,
3910
+ s.dod_shift AS dodShift,
3911
+ s.volatility AS volatility,
3912
+ t.avg_volatility AS pageAvgVolatility,
3913
+ t.peak_volatility AS pagePeakVolatility,
3914
+ t.total_impressions AS pageTotalImpressions
3915
+ FROM scored s
3916
+ JOIN top_pages t USING (page)
3917
+ ORDER BY t.avg_volatility DESC, s.date ASC
3918
+ `,
3919
+ params: [
3920
+ startDate,
3921
+ endDate,
3922
+ minDayImpressions,
3923
+ minDays
3924
+ ],
3925
+ current: {
3926
+ table: "page_keywords",
3927
+ partitions: enumeratePartitions(startDate, endDate)
3928
+ }
3929
+ };
3930
+ },
3931
+ reduceSql(rows) {
3932
+ const arr = Array.isArray(rows) ? rows : [];
3933
+ const byPage = /* @__PURE__ */ new Map();
3934
+ const allDates = /* @__PURE__ */ new Set();
3935
+ for (const r of arr) {
3936
+ const page = str$4(r.page);
3937
+ const date = str$4(r.date);
3938
+ allDates.add(date);
3939
+ const entry = byPage.get(page) ?? {
3940
+ page,
3941
+ avgVolatility: num(r.pageAvgVolatility),
3942
+ peakVolatility: num(r.pagePeakVolatility),
3943
+ totalImpressions: num(r.pageTotalImpressions),
3944
+ days: []
3945
+ };
3946
+ entry.days.push({
3947
+ date,
3948
+ queryCount: num(r.queryCount),
3949
+ dayImpressions: num(r.dayImpressions),
3950
+ avgPosition: num(r.avgPosition),
3951
+ posStddev: num(r.posStddev),
3952
+ bestPosition: num(r.bestPosition),
3953
+ worstPosition: num(r.worstPosition),
3954
+ dodShift: num(r.dodShift),
3955
+ volatility: num(r.volatility)
3956
+ });
3957
+ byPage.set(page, entry);
3958
+ }
3959
+ const pages = [...byPage.values()].sort((a, b) => b.avgVolatility - a.avgVolatility);
3960
+ const dates = [...allDates].sort();
3961
+ const maxVolatility = pages.reduce((m, p) => Math.max(m, p.peakVolatility), 0);
3962
+ return {
3963
+ results: pages,
3964
+ meta: {
3965
+ total: pages.length,
3966
+ dates,
3967
+ maxVolatility
3968
+ }
3969
+ };
3970
+ }
3971
+ });
3972
+ function str$3(v) {
3973
+ return v == null ? "" : String(v);
3974
+ }
3975
+ function parseJsonList$3(v) {
3976
+ if (Array.isArray(v)) return v;
3977
+ if (typeof v === "string" && v.length > 0) {
3978
+ const parsed = JSON.parse(v);
3979
+ return Array.isArray(parsed) ? parsed : [];
3980
+ }
3981
+ return [];
3982
+ }
3983
+ const queryMigrationAnalyzer = defineAnalyzer({
3984
+ id: "query-migration",
3985
+ buildSql(params) {
3986
+ const cur = periodOf(params);
3987
+ let prevStart = params.prevStartDate;
3988
+ let prevEnd = params.prevEndDate;
3989
+ if (prevStart == null || prevEnd == null) {
3990
+ const curStartMs = new Date(cur.startDate).getTime();
3991
+ const span = new Date(cur.endDate).getTime() - curStartMs;
3992
+ prevEnd = toIsoDate(new Date(curStartMs - MS_PER_DAY));
3993
+ prevStart = toIsoDate(new Date(curStartMs - MS_PER_DAY - span));
3994
+ }
3995
+ const minImpressions = params.minImpressions ?? 20;
3996
+ const limit = params.limit ?? 200;
3997
+ const maxLevenshtein = 2;
3998
+ return {
3999
+ sql: `
4000
+ WITH cur AS (
4001
+ SELECT query, url AS page,
4002
+ ${METRIC_EXPR.impressions} AS impressions,
4003
+ ${METRIC_EXPR.clicks} AS clicks,
4004
+ ${METRIC_EXPR.position} AS position
4005
+ FROM read_parquet({{FILES}}, union_by_name = true)
4006
+ WHERE date >= ? AND date <= ?
4007
+ AND query IS NOT NULL AND query <> ''
4008
+ AND url IS NOT NULL AND url <> ''
4009
+ GROUP BY query, url
4010
+ HAVING SUM(impressions) >= ?
4011
+ ),
4012
+ prev AS (
4013
+ SELECT query, url AS page,
4014
+ ${METRIC_EXPR.impressions} AS impressions,
4015
+ ${METRIC_EXPR.clicks} AS clicks,
4016
+ ${METRIC_EXPR.position} AS position
4017
+ FROM read_parquet({{FILES_PREV}}, union_by_name = true)
4018
+ WHERE date >= ? AND date <= ?
4019
+ AND query IS NOT NULL AND query <> ''
4020
+ AND url IS NOT NULL AND url <> ''
4021
+ GROUP BY query, url
4022
+ HAVING SUM(impressions) >= ?
4023
+ ),
4024
+ lost AS (
4025
+ SELECT p.page AS source_page, p.query AS source_query, p.impressions AS source_impressions
4026
+ FROM prev p
4027
+ LEFT JOIN cur c ON p.page = c.page AND p.query = c.query
4028
+ WHERE c.query IS NULL
4029
+ ),
4030
+ gained AS (
4031
+ SELECT c.page AS target_page, c.query AS target_query, c.impressions AS target_impressions
4032
+ FROM cur c
4033
+ LEFT JOIN prev p ON p.page = c.page AND p.query = c.query
4034
+ WHERE p.query IS NULL
4035
+ ),
4036
+ matched AS (
4037
+ SELECT
4038
+ l.source_page, l.source_query, l.source_impressions,
4039
+ g.target_page, g.target_query, g.target_impressions,
4040
+ CASE
4041
+ WHEN l.source_query = g.target_query THEN 'exact'
4042
+ ELSE 'fuzzy'
4043
+ END AS match_type,
4044
+ LEAST(l.source_impressions, g.target_impressions) AS absorbed_impressions
4045
+ FROM lost l
4046
+ JOIN gained g
4047
+ ON l.source_page <> g.target_page
4048
+ AND ABS(LENGTH(l.source_query) - LENGTH(g.target_query)) <= ${maxLevenshtein}
4049
+ AND (
4050
+ l.source_query = g.target_query
4051
+ OR levenshtein(l.source_query, g.target_query) <= ${maxLevenshtein}
4052
+ )
4053
+ ),
4054
+ edges AS (
4055
+ SELECT
4056
+ source_page, target_page,
4057
+ SUM(absorbed_impressions) AS weight,
4058
+ COUNT(*) AS query_count,
4059
+ SUM(CASE WHEN match_type = 'exact' THEN 1 ELSE 0 END) AS exact_count,
4060
+ to_json(list({
4061
+ 'sourceQuery': source_query,
4062
+ 'targetQuery': target_query,
4063
+ 'absorbed': absorbed_impressions,
4064
+ 'matchType': match_type
4065
+ } ORDER BY absorbed_impressions DESC)) AS examplesJson
4066
+ FROM matched
4067
+ GROUP BY source_page, target_page
4068
+ )
4069
+ SELECT *
4070
+ FROM edges
4071
+ ORDER BY weight DESC
4072
+ LIMIT ${Number(limit)}
4073
+ `,
4074
+ params: [
4075
+ cur.startDate,
4076
+ cur.endDate,
4077
+ minImpressions,
4078
+ prevStart,
4079
+ prevEnd,
4080
+ minImpressions
4081
+ ],
4082
+ current: {
4083
+ table: "page_keywords",
4084
+ partitions: enumeratePartitions(cur.startDate, cur.endDate)
4085
+ },
4086
+ previous: {
4087
+ table: "page_keywords",
4088
+ partitions: enumeratePartitions(prevStart, prevEnd)
4089
+ }
4090
+ };
4091
+ },
4092
+ reduceSql(rows, params) {
4093
+ const arr = Array.isArray(rows) ? rows : [];
4094
+ const cur = periodOf(params);
4095
+ let prevStart = params.prevStartDate;
4096
+ let prevEnd = params.prevEndDate;
4097
+ if (prevStart == null || prevEnd == null) {
4098
+ const curStartMs = new Date(cur.startDate).getTime();
4099
+ const span = new Date(cur.endDate).getTime() - curStartMs;
4100
+ prevEnd = toIsoDate(new Date(curStartMs - MS_PER_DAY));
4101
+ prevStart = toIsoDate(new Date(curStartMs - MS_PER_DAY - span));
4102
+ }
4103
+ const edges = arr.map((r) => ({
4104
+ sourcePage: str$3(r.source_page),
4105
+ targetPage: str$3(r.target_page),
4106
+ weight: num(r.weight),
4107
+ queryCount: num(r.query_count),
4108
+ exactCount: num(r.exact_count),
4109
+ fuzzyCount: num(r.query_count) - num(r.exact_count),
4110
+ examples: parseJsonList$3(r.examplesJson).slice(0, 8).map((e) => ({
4111
+ sourceQuery: str$3(e.sourceQuery),
4112
+ targetQuery: str$3(e.targetQuery),
4113
+ absorbed: num(e.absorbed),
4114
+ matchType: str$3(e.matchType)
4115
+ }))
4116
+ }));
4117
+ const nodeAgg = /* @__PURE__ */ new Map();
4118
+ for (const e of edges) {
4119
+ const src = nodeAgg.get(e.sourcePage) ?? {
4120
+ url: e.sourcePage,
4121
+ outgoing: 0,
4122
+ incoming: 0
4123
+ };
4124
+ src.outgoing += e.weight;
4125
+ nodeAgg.set(e.sourcePage, src);
4126
+ const tgt = nodeAgg.get(e.targetPage) ?? {
4127
+ url: e.targetPage,
4128
+ outgoing: 0,
4129
+ incoming: 0
4130
+ };
4131
+ tgt.incoming += e.weight;
4132
+ nodeAgg.set(e.targetPage, tgt);
4133
+ }
4134
+ const nodes = [...nodeAgg.values()];
4135
+ const totalAbsorbed = edges.reduce((s, e) => s + e.weight, 0);
4136
+ return {
4137
+ results: edges,
4138
+ meta: {
4139
+ total: edges.length,
4140
+ totalAbsorbed,
4141
+ period: {
4142
+ current: cur,
4143
+ previous: {
4144
+ startDate: prevStart,
4145
+ endDate: prevEnd
4146
+ }
4147
+ },
4148
+ nodes
4149
+ }
4150
+ };
4151
+ }
4152
+ });
4153
+ function str$2(v) {
4154
+ return v == null ? "" : String(v);
4155
+ }
4156
+ function bool(v) {
4157
+ return v === true || v === 1 || v === "true";
4158
+ }
4159
+ function parseJsonList$2(v) {
4160
+ if (Array.isArray(v)) return v;
4161
+ if (typeof v === "string" && v.length > 0) {
4162
+ const parsed = JSON.parse(v);
4163
+ return Array.isArray(parsed) ? parsed : [];
4164
+ }
4165
+ return [];
4166
+ }
4167
+ const stlDecomposeAnalyzer = defineAnalyzer({
4168
+ id: "stl-decompose",
4169
+ buildSql(params) {
4170
+ const endDate = params.endDate ?? defaultEndDate();
4171
+ const startDate = params.startDate ?? daysAgo(93);
4172
+ const minImpressions = params.minImpressions ?? 100;
4173
+ const minDays = 21;
4174
+ const metric = params.metric === "clicks" ? "clicks" : "impressions";
4175
+ const limit = params.limit ?? 100;
4176
+ return {
4177
+ sql: `
4178
+ WITH daily AS (
4179
+ SELECT
4180
+ query,
4181
+ url AS page,
4182
+ date,
4183
+ ${METRIC_EXPR.clicks} AS clicks,
4184
+ ${METRIC_EXPR.impressions} AS impressions,
4185
+ CAST(SUM(${metric}) AS DOUBLE) AS observed
4186
+ FROM read_parquet({{FILES}}, union_by_name = true)
4187
+ WHERE date >= ? AND date <= ?
4188
+ AND query IS NOT NULL AND query <> ''
4189
+ AND url IS NOT NULL AND url <> ''
4190
+ GROUP BY query, url, date
4191
+ ),
4192
+ entity_stats AS (
4193
+ SELECT query, page,
4194
+ COUNT(*) AS days,
4195
+ SUM(impressions) AS total_impressions
4196
+ FROM daily
4197
+ GROUP BY query, page
4198
+ HAVING COUNT(*) >= ${Number(minDays)}
4199
+ AND SUM(impressions) >= ?
4200
+ ),
4201
+ filtered AS (
4202
+ SELECT d.*
4203
+ FROM daily d
4204
+ JOIN entity_stats e USING (query, page)
4205
+ ),
4206
+ trended AS (
4207
+ SELECT *,
4208
+ CASE
4209
+ WHEN COUNT(*) OVER w = 7
4210
+ THEN AVG(observed) OVER w
4211
+ ELSE NULL
4212
+ END AS trend
4213
+ FROM filtered
4214
+ WINDOW w AS (
4215
+ PARTITION BY query, page
4216
+ ORDER BY date
4217
+ ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING
4218
+ )
4219
+ ),
4220
+ detrended AS (
4221
+ SELECT *,
4222
+ observed - trend AS detrended,
4223
+ dayofweek(date) AS dow
4224
+ FROM trended
4225
+ ),
4226
+ seasonal_raw AS (
4227
+ SELECT *,
4228
+ AVG(detrended) OVER (PARTITION BY query, page, dow) AS seasonal_dow
4229
+ FROM detrended
4230
+ ),
4231
+ seasonal_centered AS (
4232
+ SELECT *,
4233
+ seasonal_dow - AVG(seasonal_dow) OVER (PARTITION BY query, page) AS seasonal
4234
+ FROM seasonal_raw
4235
+ ),
4236
+ residualed AS (
4237
+ SELECT *,
4238
+ CASE
4239
+ WHEN trend IS NULL OR seasonal IS NULL THEN NULL
4240
+ ELSE observed - trend - seasonal
4241
+ END AS residual
4242
+ FROM seasonal_centered
4243
+ ),
4244
+ scored AS (
4245
+ SELECT *,
4246
+ STDDEV_POP(residual) OVER (PARTITION BY query, page) AS resid_std,
4247
+ CASE
4248
+ WHEN residual IS NOT NULL
4249
+ AND STDDEV_POP(residual) OVER (PARTITION BY query, page) > 0
4250
+ AND ABS(residual) > 2.0 * STDDEV_POP(residual) OVER (PARTITION BY query, page)
4251
+ THEN true ELSE false
4252
+ END AS anomaly
4253
+ FROM residualed
4254
+ ),
4255
+ per_entity AS (
4256
+ SELECT query, page,
4257
+ COUNT(*) AS days,
4258
+ SUM(impressions) AS total_impressions,
4259
+ VAR_POP(detrended) AS var_detrended,
4260
+ VAR_POP(seasonal) AS var_seasonal,
4261
+ VAR_POP(residual) AS var_residual,
4262
+ COUNT(*) FILTER (WHERE anomaly) AS residual_anomalies,
4263
+ REGR_SLOPE(observed, epoch(date) / 86400.0) AS trend_slope
4264
+ FROM scored
4265
+ GROUP BY query, page
4266
+ ),
4267
+ series AS (
4268
+ SELECT query, page,
4269
+ to_json(list({
4270
+ 'date': strftime(date, '%Y-%m-%d'),
4271
+ 'observed': observed,
4272
+ 'trend': trend,
4273
+ 'seasonal': seasonal,
4274
+ 'residual': residual,
4275
+ 'anomaly': anomaly
4276
+ } ORDER BY date)) AS seriesJson
4277
+ FROM scored
4278
+ GROUP BY query, page
4279
+ )
4280
+ SELECT
4281
+ e.query AS keyword,
4282
+ e.page,
4283
+ CAST(e.total_impressions AS DOUBLE) AS totalImpressions,
4284
+ CAST(e.days AS DOUBLE) AS days,
4285
+ CASE
4286
+ WHEN e.var_detrended IS NULL OR e.var_detrended = 0 THEN 0.0
4287
+ ELSE LEAST(e.var_seasonal / NULLIF(e.var_detrended, 0), 1.0)
4288
+ END AS seasonalStrength,
4289
+ CASE
4290
+ WHEN e.var_detrended IS NULL OR e.var_detrended = 0 THEN 0.0
4291
+ ELSE GREATEST(0.0, 1.0 - e.var_residual / NULLIF(e.var_detrended, 0))
4292
+ END AS trendStrength,
4293
+ CAST(e.residual_anomalies AS DOUBLE) AS residualAnomalies,
4294
+ COALESCE(e.trend_slope, 0.0) AS trendSlope,
4295
+ s.seriesJson
4296
+ FROM per_entity e
4297
+ LEFT JOIN series s USING (query, page)
4298
+ ORDER BY seasonalStrength DESC, ABS(COALESCE(e.trend_slope, 0.0)) DESC
4299
+ LIMIT ${Number(limit)}
4300
+ `,
4301
+ params: [
4302
+ startDate,
4303
+ endDate,
4304
+ minImpressions
4305
+ ],
4306
+ current: {
4307
+ table: "page_keywords",
4308
+ partitions: enumeratePartitions(startDate, endDate)
4309
+ }
4310
+ };
4311
+ },
4312
+ reduceSql(rows, params) {
4313
+ const arr = Array.isArray(rows) ? rows : [];
4314
+ const metric = params.metric === "clicks" ? "clicks" : "impressions";
4315
+ const results = arr.map((r) => ({
4316
+ keyword: str$2(r.keyword),
4317
+ page: str$2(r.page),
4318
+ totalImpressions: num(r.totalImpressions),
4319
+ days: num(r.days),
4320
+ seasonalStrength: num(r.seasonalStrength),
4321
+ trendStrength: num(r.trendStrength),
4322
+ residualAnomalies: num(r.residualAnomalies),
4323
+ trendSlope: num(r.trendSlope),
4324
+ series: parseJsonList$2(r.seriesJson).map((s) => ({
4325
+ date: str$2(s.date),
4326
+ observed: num(s.observed),
4327
+ trend: s.trend == null ? null : num(s.trend),
4328
+ seasonal: s.seasonal == null ? null : num(s.seasonal),
4329
+ residual: s.residual == null ? null : num(s.residual),
4330
+ anomaly: bool(s.anomaly)
4331
+ }))
4332
+ }));
4333
+ return {
4334
+ results,
4335
+ meta: {
4336
+ total: results.length,
4337
+ metric,
4338
+ avgSeasonalStrength: results.length > 0 ? results.reduce((a, r) => a + r.seasonalStrength, 0) / results.length : 0
4339
+ }
4340
+ };
4341
+ }
4342
+ });
4343
+ function str$1(v) {
4344
+ return v == null ? "" : String(v);
4345
+ }
4346
+ function parseJsonList$1(v) {
4347
+ if (Array.isArray(v)) return v;
4348
+ if (typeof v === "string" && v.length > 0) {
4349
+ const parsed = JSON.parse(v);
4350
+ return Array.isArray(parsed) ? parsed : [];
4351
+ }
4352
+ return [];
4353
+ }
4354
+ const survivalAnalyzer = defineAnalyzer({
4355
+ id: "survival",
4356
+ buildSql(params) {
4357
+ const endDate = params.endDate ?? defaultEndDate();
4358
+ const startDate = params.startDate ?? daysAgo(183);
4359
+ const minImpressions = params.minImpressions ?? 5;
4360
+ return {
4361
+ sql: `
4362
+ WITH daily AS (
4363
+ SELECT
4364
+ query,
4365
+ url,
4366
+ date,
4367
+ ${METRIC_EXPR.clicks} AS day_clicks,
4368
+ ${METRIC_EXPR.impressions} AS day_impressions,
4369
+ ${METRIC_EXPR.position} AS day_position
4370
+ FROM read_parquet({{FILES}}, union_by_name = true)
4371
+ WHERE date >= ? AND date <= ?
4372
+ AND query IS NOT NULL AND query <> ''
4373
+ AND url IS NOT NULL AND url <> ''
4374
+ GROUP BY query, url, date
4375
+ HAVING SUM(impressions) >= ?
4376
+ ),
4377
+ classified AS (
4378
+ SELECT *,
4379
+ (day_position <= 10) AS in_top10
4380
+ FROM daily
4381
+ ),
4382
+ transitions AS (
4383
+ SELECT *,
4384
+ CASE
4385
+ WHEN in_top10 AND (LAG(in_top10) OVER w IS NULL OR NOT LAG(in_top10) OVER w)
4386
+ THEN 1 ELSE 0
4387
+ END AS is_entry
4388
+ FROM classified
4389
+ WINDOW w AS (PARTITION BY query, url ORDER BY date)
4390
+ ),
4391
+ run_ids AS (
4392
+ SELECT *,
4393
+ SUM(is_entry) OVER (PARTITION BY query, url ORDER BY date) AS run_id
4394
+ FROM transitions
4395
+ WHERE in_top10
4396
+ ),
4397
+ window_bounds AS (
4398
+ SELECT MIN(date) AS window_start, MAX(date) AS window_end FROM daily
4399
+ ),
4400
+ episodes_raw AS (
4401
+ SELECT
4402
+ query, url, run_id,
4403
+ MIN(date) AS entry_date,
4404
+ MAX(date) AS exit_date,
4405
+ DATEDIFF('day', MIN(date), MAX(date)) + 1 AS tenure
4406
+ FROM run_ids
4407
+ GROUP BY query, url, run_id
4408
+ ),
4409
+ episodes AS (
4410
+ SELECT
4411
+ e.query, e.url, e.run_id, e.entry_date, e.exit_date, e.tenure,
4412
+ (e.exit_date >= wb.window_end - INTERVAL 2 DAY) AS censored,
4413
+ CASE
4414
+ WHEN regexp_extract(e.url, '^(?:https?://[^/]+)?(/[^/?#]*)', 1) = '/' OR e.url = '/'
4415
+ THEN 'home'
4416
+ WHEN regexp_extract(e.url, '^(?:https?://[^/]+)?/([^/?#]+)', 1) = ''
4417
+ THEN 'home'
4418
+ ELSE regexp_extract(e.url, '^(?:https?://[^/]+)?/([^/?#]+)', 1)
4419
+ END AS cohort
4420
+ FROM episodes_raw e
4421
+ CROSS JOIN window_bounds wb
4422
+ ),
4423
+ episodes_all AS (
4424
+ SELECT query, url, tenure, censored, cohort FROM episodes
4425
+ UNION ALL
4426
+ SELECT query, url, tenure, censored, '__all__' AS cohort FROM episodes
4427
+ ),
4428
+ cohort_totals AS (
4429
+ SELECT cohort, COUNT(*) AS n_total
4430
+ FROM episodes_all
4431
+ GROUP BY cohort
4432
+ ),
4433
+ events AS (
4434
+ SELECT
4435
+ cohort,
4436
+ tenure,
4437
+ COUNT(*) FILTER (WHERE NOT censored) AS d_t,
4438
+ COUNT(*) AS n_ending_at_t
4439
+ FROM episodes_all
4440
+ GROUP BY cohort, tenure
4441
+ ),
4442
+ km AS (
4443
+ SELECT
4444
+ e.cohort,
4445
+ e.tenure,
4446
+ e.d_t,
4447
+ e.n_ending_at_t,
4448
+ SUM(e.n_ending_at_t) OVER (PARTITION BY e.cohort ORDER BY e.tenure DESC
4449
+ ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS at_risk
4450
+ FROM events e
4451
+ ),
4452
+ km_surv AS (
4453
+ SELECT
4454
+ cohort, tenure, d_t, at_risk,
4455
+ EXP(SUM(LN(GREATEST(1.0 - CAST(d_t AS DOUBLE) / NULLIF(at_risk, 0), 1e-9)))
4456
+ OVER (PARTITION BY cohort ORDER BY tenure
4457
+ ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)) AS survival
4458
+ FROM km
4459
+ ),
4460
+ curve_agg AS (
4461
+ SELECT
4462
+ cohort,
4463
+ to_json(list({
4464
+ 'tenure': tenure,
4465
+ 'survival': survival,
4466
+ 'atRisk': at_risk,
4467
+ 'events': d_t
4468
+ } ORDER BY tenure)) AS curveJson
4469
+ FROM km_surv
4470
+ GROUP BY cohort
4471
+ ),
4472
+ cohort_stats AS (
4473
+ SELECT
4474
+ ea.cohort,
4475
+ COUNT(*) AS episode_count,
4476
+ AVG(CASE WHEN ea.censored THEN 1.0 ELSE 0.0 END) AS censoring_rate
4477
+ FROM episodes_all ea
4478
+ GROUP BY ea.cohort
4479
+ )
4480
+ SELECT
4481
+ cs.cohort,
4482
+ cs.episode_count AS episodeCount,
4483
+ cs.censoring_rate AS censoringRate,
4484
+ ca.curveJson
4485
+ FROM cohort_stats cs
4486
+ LEFT JOIN curve_agg ca USING (cohort)
4487
+ ORDER BY cs.cohort
4488
+ `,
4489
+ params: [
4490
+ startDate,
4491
+ endDate,
4492
+ minImpressions
4493
+ ],
4494
+ current: {
4495
+ table: "page_keywords",
4496
+ partitions: enumeratePartitions(startDate, endDate)
4497
+ }
4498
+ };
4499
+ },
4500
+ reduceSql(rows, params) {
4501
+ const arr = Array.isArray(rows) ? rows : [];
4502
+ const endDate = params.endDate ?? defaultEndDate();
4503
+ const startDate = params.startDate ?? daysAgo(183);
4504
+ const windowDays = Math.round((new Date(endDate).getTime() - new Date(startDate).getTime()) / MS_PER_DAY) + 1;
4505
+ const results = arr.map((r) => {
4506
+ const curve = parseJsonList$1(r.curveJson).map((p) => ({
4507
+ tenure: num(p.tenure),
4508
+ survival: num(p.survival),
4509
+ atRisk: num(p.atRisk),
4510
+ events: num(p.events)
4511
+ }));
4512
+ let medianTenure = 0;
4513
+ for (let i = 0; i < curve.length; i++) {
4514
+ const cur = curve[i];
4515
+ if (cur.survival <= .5) {
4516
+ if (i === 0) medianTenure = cur.tenure;
4517
+ else {
4518
+ const prev = curve[i - 1];
4519
+ const span = prev.survival - cur.survival;
4520
+ const frac = span > 0 ? (prev.survival - .5) / span : 0;
4521
+ medianTenure = prev.tenure + frac * (cur.tenure - prev.tenure);
4522
+ }
4523
+ break;
4524
+ }
4525
+ }
4526
+ const last = curve[curve.length - 1];
4527
+ if (medianTenure === 0 && last && last.survival > .5) medianTenure = last.tenure;
4528
+ return {
4529
+ cohort: str$1(r.cohort),
4530
+ episodeCount: num(r.episodeCount),
4531
+ censoringRate: num(r.censoringRate),
4532
+ medianTenure,
4533
+ curve
4534
+ };
4535
+ });
4536
+ return {
4537
+ results,
4538
+ meta: {
4539
+ totalEpisodes: results.find((r) => r.cohort === "__all__")?.episodeCount ?? 0,
4540
+ cohortCount: results.filter((r) => r.cohort !== "__all__").length,
4541
+ windowDays
4542
+ }
4543
+ };
4544
+ }
4545
+ });
4546
+ function str(v) {
4547
+ return v == null ? "" : String(v);
4548
+ }
4549
+ function parseJsonList(v) {
4550
+ if (Array.isArray(v)) return v;
4551
+ if (typeof v === "string" && v.length > 0) {
4552
+ const parsed = JSON.parse(v);
4553
+ return Array.isArray(parsed) ? parsed : [];
4554
+ }
4555
+ return [];
4556
+ }
4557
+ const trendsAnalyzer = defineAnalyzer({
4558
+ id: "trends",
4559
+ buildSql(params) {
4560
+ const weeks = params.weeks ?? 28;
4561
+ const endDate = params.endDate || defaultEndDate();
4562
+ const startDate = params.startDate || toIsoDate(/* @__PURE__ */ new Date(Date.parse(endDate) - (weeks * 7 - 1) * MS_PER_DAY));
4563
+ const minImpressions = params.minImpressions ?? 100;
4564
+ const minWeeksWithData = params.minWeeksWithData ?? Math.max(2, Math.floor(weeks / 4));
4565
+ const limit = params.limit ?? 500;
4566
+ const dim = params.dimension === "keywords" ? "keywords" : "pages";
4567
+ const table = dim === "keywords" ? "keywords" : "pages";
4568
+ return {
4569
+ sql: `
4570
+ WITH bucketed AS (
4571
+ SELECT
4572
+ ${dim === "keywords" ? "query" : "url"} AS entity,
4573
+ date_trunc('week', CAST(date AS DATE)) AS week,
4574
+ ${METRIC_EXPR.clicks} AS clicks,
4575
+ ${METRIC_EXPR.impressions} AS impressions,
4576
+ SUM(sum_position) AS sum_position_sum
4577
+ FROM read_parquet({{FILES}}, union_by_name = true)
4578
+ WHERE date >= ? AND date <= ?
4579
+ GROUP BY entity, week
4580
+ ),
4581
+ with_meta AS (
4582
+ SELECT
4583
+ entity, week, clicks, impressions, sum_position_sum,
4584
+ ROW_NUMBER() OVER (PARTITION BY entity ORDER BY week) - 1 AS week_idx,
4585
+ COUNT(*) OVER (PARTITION BY entity) AS n_weeks,
4586
+ (ROW_NUMBER() OVER (PARTITION BY entity ORDER BY week) - 1)
4587
+ < (COUNT(*) OVER (PARTITION BY entity) / 2) AS is_first_half
4588
+ FROM bucketed
4589
+ ),
4590
+ agg AS (
4591
+ SELECT
4592
+ entity,
4593
+ SUM(clicks) AS totalClicks,
4594
+ SUM(impressions) AS totalImpressions,
4595
+ any_value(n_weeks) AS weeksWithData,
4596
+ COALESCE(regr_slope(clicks, CAST(week_idx AS DOUBLE)), 0.0) AS slope,
4597
+ SUM(CASE WHEN is_first_half THEN clicks ELSE 0 END) AS firstHalfClicks,
4598
+ SUM(CASE WHEN NOT is_first_half THEN clicks ELSE 0 END) AS secondHalfClicks,
4599
+ SUM(sum_position_sum) / NULLIF(SUM(impressions), 0) + 1 AS avgPosition,
4600
+ to_json(list({
4601
+ 'week': strftime(week, '%Y-%m-%d'),
4602
+ 'clicks': clicks,
4603
+ 'impressions': impressions
4604
+ } ORDER BY week)) AS seriesJson
4605
+ FROM with_meta
4606
+ GROUP BY entity
4607
+ HAVING SUM(impressions) >= ? AND any_value(n_weeks) >= ?
4608
+ ),
4609
+ classified AS (
4610
+ SELECT
4611
+ *,
4612
+ CASE
4613
+ WHEN firstHalfClicks = 0 AND secondHalfClicks > 0 THEN 10.0
4614
+ WHEN firstHalfClicks = 0 THEN 1.0
4615
+ ELSE secondHalfClicks / firstHalfClicks
4616
+ END AS growthRatio
4617
+ FROM agg
4618
+ )
4619
+ SELECT
4620
+ entity,
4621
+ totalClicks,
4622
+ totalImpressions,
4623
+ weeksWithData,
4624
+ slope,
4625
+ growthRatio,
4626
+ avgPosition,
4627
+ CASE
4628
+ WHEN growthRatio >= 1.5 AND slope > 0 THEN 'accelerating'
4629
+ WHEN growthRatio >= 1.1 AND slope >= 0 THEN 'growing'
4630
+ WHEN growthRatio < 0.5 THEN 'cratering'
4631
+ WHEN growthRatio < 0.9 AND slope < 0 THEN 'declining'
4632
+ ELSE 'steady'
4633
+ END AS trend,
4634
+ seriesJson
4635
+ FROM classified
4636
+ ORDER BY
4637
+ CASE
4638
+ WHEN growthRatio >= 1.5 AND slope > 0 THEN 0
4639
+ WHEN growthRatio < 0.5 THEN 1
4640
+ WHEN growthRatio >= 1.1 AND slope >= 0 THEN 2
4641
+ WHEN growthRatio < 0.9 AND slope < 0 THEN 3
4642
+ ELSE 4
4643
+ END,
4644
+ ABS(growthRatio - 1) DESC,
4645
+ totalClicks DESC
4646
+ LIMIT ${Number(limit)}
4647
+ `,
4648
+ params: [
4649
+ startDate,
4650
+ endDate,
4651
+ minImpressions,
4652
+ minWeeksWithData
4653
+ ],
4654
+ current: {
4655
+ table,
4656
+ partitions: enumeratePartitions(startDate, endDate)
4657
+ }
4658
+ };
4659
+ },
4660
+ reduceSql(rows, params) {
4661
+ const arr = Array.isArray(rows) ? rows : [];
4662
+ const weeks = params.weeks ?? 28;
4663
+ const endDate = params.endDate || defaultEndDate();
4664
+ const startDate = params.startDate || toIsoDate(/* @__PURE__ */ new Date(Date.parse(endDate) - (weeks * 7 - 1) * MS_PER_DAY));
4665
+ const dim = params.dimension === "keywords" ? "keywords" : "pages";
4666
+ const results = arr.map((r) => {
4667
+ const series = parseJsonList(r.seriesJson).map((s) => ({
4668
+ week: str(s.week),
4669
+ clicks: num(s.clicks),
4670
+ impressions: num(s.impressions)
4671
+ }));
4672
+ return {
4673
+ [dim === "keywords" ? "query" : "page"]: str(r.entity),
4674
+ totalClicks: num(r.totalClicks),
4675
+ totalImpressions: num(r.totalImpressions),
4676
+ weeksWithData: num(r.weeksWithData),
4677
+ slope: num(r.slope),
4678
+ growthRatio: num(r.growthRatio),
4679
+ avgPosition: num(r.avgPosition),
4680
+ trend: str(r.trend),
4681
+ series
4682
+ };
4683
+ });
4684
+ const counts = {
4685
+ accelerating: 0,
4686
+ growing: 0,
4687
+ steady: 0,
4688
+ declining: 0,
4689
+ cratering: 0
4690
+ };
4691
+ for (const r of results) counts[r.trend] = (counts[r.trend] ?? 0) + 1;
4692
+ return {
4693
+ results,
4694
+ meta: {
4695
+ total: results.length,
4696
+ dimension: dim,
4697
+ weeks: Number(weeks),
4698
+ startDate,
4699
+ endDate,
4700
+ counts
4701
+ }
4702
+ };
4703
+ }
4704
+ });
1942
4705
  const defaultAnalyzerRegistry = createAnalyzerRegistry({
1943
- rows: [
1944
- strikingDistanceAnalyzer.rows,
1945
- opportunityAnalyzer.rows,
1946
- brandAnalyzer.rows,
1947
- concentrationAnalyzer.rows,
1948
- clusteringAnalyzer.rows,
1949
- seasonalityAnalyzer.rows,
1950
- moversAnalyzer.rows,
1951
- decayAnalyzer.rows,
1952
- cannibalizationAnalyzer.rows,
1953
- zeroClickAnalyzer.rows
1954
- ],
1955
- sql: SQL_ANALYZERS
4706
+ rows: ROW_ANALYZERS,
4707
+ sql: [
4708
+ bayesianCtrAnalyzer.sql,
4709
+ bipartitePagerankAnalyzer.sql,
4710
+ brandAnalyzer.sql,
4711
+ cannibalizationAnalyzer.sql,
4712
+ changePointAnalyzer.sql,
4713
+ clusteringAnalyzer.sql,
4714
+ concentrationAnalyzer.sql,
4715
+ contentVelocityAnalyzer.sql,
4716
+ ctrAnomalyAnalyzer.sql,
4717
+ ctrCurveAnalyzer.sql,
4718
+ darkTrafficAnalyzer.sql,
4719
+ dataDetailAnalyzer.sql,
4720
+ dataQueryAnalyzer.sql,
4721
+ decayAnalyzer.sql,
4722
+ deviceGapAnalyzer.sql,
4723
+ intentAtlasAnalyzer.sql,
4724
+ keywordBreadthAnalyzer.sql,
4725
+ longTailAnalyzer.sql,
4726
+ moversAnalyzer.sql,
4727
+ opportunityAnalyzer.sql,
4728
+ positionDistributionAnalyzer.sql,
4729
+ positionVolatilityAnalyzer.sql,
4730
+ queryMigrationAnalyzer.sql,
4731
+ seasonalityAnalyzer.sql,
4732
+ stlDecomposeAnalyzer.sql,
4733
+ strikingDistanceAnalyzer.sql,
4734
+ survivalAnalyzer.sql,
4735
+ trendsAnalyzer.sql,
4736
+ zeroClickAnalyzer.sql
4737
+ ]
1956
4738
  });
1957
4739
  export { defaultAnalyzerRegistry };