@gscdump/analysis 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +251 -0
- package/dist/analyzer/index.d.mts +893 -0
- package/dist/analyzer/index.mjs +4944 -0
- package/dist/default-registry.d.mts +93 -0
- package/dist/default-registry.mjs +1957 -0
- package/dist/index.d.mts +620 -0
- package/dist/index.mjs +2873 -0
- package/dist/period/index.d.mts +57 -0
- package/dist/period/index.mjs +150 -0
- package/dist/query/index.d.mts +26 -0
- package/dist/query/index.mjs +340 -0
- package/dist/semantic/index.d.mts +70 -0
- package/dist/semantic/index.mjs +391 -0
- package/dist/source/index.d.mts +427 -0
- package/dist/source/index.mjs +1865 -0
- package/package.json +86 -0
|
@@ -0,0 +1,4944 @@
|
|
|
1
|
+
import { enumeratePartitions } from "@gscdump/engine/planner";
|
|
2
|
+
import { METRIC_EXPR } from "@gscdump/engine/sql-fragments";
|
|
3
|
+
import { MS_PER_DAY, daysAgo, toIsoDate } from "gscdump";
|
|
4
|
+
import { between, date, extractDateRange, gsc, page, query } from "gscdump/query";
|
|
5
|
+
import { buildExtrasQueries, buildTotalsSql, mergeExtras, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized } from "@gscdump/engine/resolver";
|
|
6
|
+
const DEFAULT_SQL_REQUIRES = ["executeSql", "partitionedParquet"];
|
|
7
|
+
function defineAnalyzer(opts) {
|
|
8
|
+
const { id, reduce, reduceSql, reduceRows, buildSql, buildRows, sqlRequires = DEFAULT_SQL_REQUIRES, rowsRequires = [] } = opts;
|
|
9
|
+
const sqlReducer = reduceSql ?? reduce;
|
|
10
|
+
const rowsReducer = reduceRows ?? reduce;
|
|
11
|
+
if (buildSql && !sqlReducer) throw new Error(`defineAnalyzer(${id}): buildSql requires reduce or reduceSql`);
|
|
12
|
+
if (buildRows && !rowsReducer) throw new Error(`defineAnalyzer(${id}): buildRows requires reduce or reduceRows`);
|
|
13
|
+
const wrap = (fn) => (rows, params, ctx) => {
|
|
14
|
+
return fn(Array.isArray(rows) ? rows : pickSingle(rows) ?? rows, params, ctx);
|
|
15
|
+
};
|
|
16
|
+
return {
|
|
17
|
+
id,
|
|
18
|
+
sql: buildSql && sqlReducer ? {
|
|
19
|
+
id,
|
|
20
|
+
requires: sqlRequires,
|
|
21
|
+
build(params) {
|
|
22
|
+
const spec = buildSql(params);
|
|
23
|
+
return {
|
|
24
|
+
kind: "sql",
|
|
25
|
+
sql: spec.sql,
|
|
26
|
+
params: spec.params,
|
|
27
|
+
current: spec.current,
|
|
28
|
+
previous: spec.previous,
|
|
29
|
+
extraFiles: spec.extraFiles,
|
|
30
|
+
extraQueries: spec.extraQueries,
|
|
31
|
+
requiresAttachedTables: spec.requiresAttachedTables
|
|
32
|
+
};
|
|
33
|
+
},
|
|
34
|
+
reduce(rows, ctx) {
|
|
35
|
+
const { results, meta } = wrap(sqlReducer)(rows, ctx.params, { extras: ctx.extras });
|
|
36
|
+
return {
|
|
37
|
+
results,
|
|
38
|
+
meta
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
} : void 0,
|
|
42
|
+
rows: buildRows && rowsReducer ? {
|
|
43
|
+
id,
|
|
44
|
+
requires: rowsRequires,
|
|
45
|
+
build(params) {
|
|
46
|
+
const queries = buildRows(params);
|
|
47
|
+
return {
|
|
48
|
+
kind: "rows",
|
|
49
|
+
queries: Object.fromEntries(Object.entries(queries).map(([k, state]) => [k, { state }]))
|
|
50
|
+
};
|
|
51
|
+
},
|
|
52
|
+
reduce(rows, ctx) {
|
|
53
|
+
const { results, meta } = wrap(rowsReducer)(rows, ctx.params, {});
|
|
54
|
+
return {
|
|
55
|
+
results,
|
|
56
|
+
meta
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
} : void 0
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
function pickSingle(rows) {
|
|
63
|
+
const keys = Object.keys(rows);
|
|
64
|
+
return keys.length === 1 ? rows[keys[0]] : void 0;
|
|
65
|
+
}
|
|
66
|
+
function defaultEndDate() {
|
|
67
|
+
return daysAgo(3);
|
|
68
|
+
}
|
|
69
|
+
function defaultStartDate() {
|
|
70
|
+
return daysAgo(31);
|
|
71
|
+
}
|
|
72
|
+
function periodOf(params) {
|
|
73
|
+
return {
|
|
74
|
+
startDate: params.startDate || defaultStartDate(),
|
|
75
|
+
endDate: params.endDate || defaultEndDate()
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
function comparisonOf(params) {
|
|
79
|
+
if (!params.prevStartDate || !params.prevEndDate) throw new Error(`${params.type} analysis requires prevStartDate and prevEndDate`);
|
|
80
|
+
return {
|
|
81
|
+
current: periodOf(params),
|
|
82
|
+
previous: {
|
|
83
|
+
startDate: params.prevStartDate,
|
|
84
|
+
endDate: params.prevEndDate
|
|
85
|
+
}
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
const DEFAULT_FILL = {
|
|
89
|
+
clicks: 0,
|
|
90
|
+
impressions: 0,
|
|
91
|
+
ctr: 0,
|
|
92
|
+
position: 0
|
|
93
|
+
};
|
|
94
|
+
function padTimeseries(rows, options) {
|
|
95
|
+
const { startDate, endDate } = options;
|
|
96
|
+
const dateKey = options.dateKey ?? "date";
|
|
97
|
+
const fill = options.fill ?? DEFAULT_FILL;
|
|
98
|
+
const byDate = /* @__PURE__ */ new Map();
|
|
99
|
+
for (const row of rows) {
|
|
100
|
+
const d = String(row[dateKey]);
|
|
101
|
+
const bucket = byDate.get(d);
|
|
102
|
+
if (bucket) bucket.push(row);
|
|
103
|
+
else byDate.set(d, [row]);
|
|
104
|
+
}
|
|
105
|
+
const result = [];
|
|
106
|
+
const start = /* @__PURE__ */ new Date(`${startDate}T00:00:00Z`);
|
|
107
|
+
const end = /* @__PURE__ */ new Date(`${endDate}T00:00:00Z`);
|
|
108
|
+
if (Number.isNaN(start.getTime()) || Number.isNaN(end.getTime())) throw new Error(`padTimeseries: invalid date range ${startDate}..${endDate}`);
|
|
109
|
+
for (let cursorMs = start.getTime(), endMs = end.getTime(); cursorMs <= endMs; cursorMs += MS_PER_DAY) {
|
|
110
|
+
const dateStr = toIsoDate(new Date(cursorMs));
|
|
111
|
+
const existing = byDate.get(dateStr);
|
|
112
|
+
if (existing) result.push(...existing);
|
|
113
|
+
else result.push({
|
|
114
|
+
...fill,
|
|
115
|
+
[dateKey]: dateStr
|
|
116
|
+
});
|
|
117
|
+
}
|
|
118
|
+
return result;
|
|
119
|
+
}
|
|
120
|
+
function num$5(v) {
|
|
121
|
+
if (typeof v === "number") return v;
|
|
122
|
+
if (typeof v === "bigint") return Number(v);
|
|
123
|
+
if (v == null) return 0;
|
|
124
|
+
const n = Number(v);
|
|
125
|
+
return Number.isFinite(n) ? n : 0;
|
|
126
|
+
}
|
|
127
|
+
function str$23(v) {
|
|
128
|
+
return v == null ? "" : String(v);
|
|
129
|
+
}
|
|
130
|
+
const bayesianCtrAnalyzer = defineAnalyzer({
|
|
131
|
+
id: "bayesian-ctr",
|
|
132
|
+
buildSql(params) {
|
|
133
|
+
const { startDate, endDate } = periodOf(params);
|
|
134
|
+
const minImpressions = params.minImpressions ?? 50;
|
|
135
|
+
const limit = params.limit ?? 300;
|
|
136
|
+
const priorMinEntities = 5;
|
|
137
|
+
return {
|
|
138
|
+
sql: `
|
|
139
|
+
WITH entity AS (
|
|
140
|
+
SELECT
|
|
141
|
+
query,
|
|
142
|
+
url,
|
|
143
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
144
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
145
|
+
${METRIC_EXPR.ctr} AS observed_ctr,
|
|
146
|
+
${METRIC_EXPR.position} AS position,
|
|
147
|
+
CAST(ROUND(LEAST(${METRIC_EXPR.position}, 30)) AS INTEGER) AS bucket
|
|
148
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
149
|
+
WHERE date >= ? AND date <= ?
|
|
150
|
+
AND query IS NOT NULL AND query <> ''
|
|
151
|
+
AND url IS NOT NULL AND url <> ''
|
|
152
|
+
GROUP BY query, url
|
|
153
|
+
HAVING SUM(impressions) >= ?
|
|
154
|
+
AND ${METRIC_EXPR.position} <= 30
|
|
155
|
+
),
|
|
156
|
+
bucket_mu AS (
|
|
157
|
+
SELECT
|
|
158
|
+
bucket,
|
|
159
|
+
COUNT(*) AS n_entities,
|
|
160
|
+
SUM(observed_ctr * impressions) / NULLIF(SUM(impressions), 0) AS mu,
|
|
161
|
+
SUM(impressions) AS total_impressions
|
|
162
|
+
FROM entity
|
|
163
|
+
GROUP BY bucket
|
|
164
|
+
),
|
|
165
|
+
bucket_var AS (
|
|
166
|
+
SELECT
|
|
167
|
+
e.bucket,
|
|
168
|
+
GREATEST(
|
|
169
|
+
SUM(e.impressions * POWER(e.observed_ctr - b.mu, 2))
|
|
170
|
+
/ NULLIF(SUM(e.impressions), 0),
|
|
171
|
+
1e-9
|
|
172
|
+
) AS v
|
|
173
|
+
FROM entity e
|
|
174
|
+
JOIN bucket_mu b USING (bucket)
|
|
175
|
+
GROUP BY e.bucket
|
|
176
|
+
),
|
|
177
|
+
priors AS (
|
|
178
|
+
SELECT
|
|
179
|
+
m.bucket,
|
|
180
|
+
m.n_entities,
|
|
181
|
+
m.mu,
|
|
182
|
+
v.v,
|
|
183
|
+
CASE
|
|
184
|
+
WHEN m.n_entities >= ${Number(priorMinEntities)}
|
|
185
|
+
AND v.v > 0
|
|
186
|
+
AND m.mu > 0 AND m.mu < 1
|
|
187
|
+
AND (m.mu * (1.0 - m.mu) / v.v - 1.0) > 0
|
|
188
|
+
THEN GREATEST(0.5, m.mu * (m.mu * (1.0 - m.mu) / v.v - 1.0))
|
|
189
|
+
ELSE 2.0
|
|
190
|
+
END AS alpha,
|
|
191
|
+
CASE
|
|
192
|
+
WHEN m.n_entities >= ${Number(priorMinEntities)}
|
|
193
|
+
AND v.v > 0
|
|
194
|
+
AND m.mu > 0 AND m.mu < 1
|
|
195
|
+
AND (m.mu * (1.0 - m.mu) / v.v - 1.0) > 0
|
|
196
|
+
THEN GREATEST(0.5, (1.0 - m.mu) * (m.mu * (1.0 - m.mu) / v.v - 1.0))
|
|
197
|
+
ELSE 48.0
|
|
198
|
+
END AS beta
|
|
199
|
+
FROM bucket_mu m
|
|
200
|
+
JOIN bucket_var v USING (bucket)
|
|
201
|
+
),
|
|
202
|
+
posterior AS (
|
|
203
|
+
SELECT
|
|
204
|
+
e.query,
|
|
205
|
+
e.url,
|
|
206
|
+
e.clicks,
|
|
207
|
+
e.impressions,
|
|
208
|
+
e.observed_ctr,
|
|
209
|
+
e.position,
|
|
210
|
+
e.bucket,
|
|
211
|
+
p.alpha AS prior_alpha,
|
|
212
|
+
p.beta AS prior_beta,
|
|
213
|
+
p.mu AS bucket_prior_mean,
|
|
214
|
+
p.alpha + e.clicks AS alpha_post,
|
|
215
|
+
p.beta + (e.impressions - e.clicks) AS beta_post
|
|
216
|
+
FROM entity e
|
|
217
|
+
JOIN priors p USING (bucket)
|
|
218
|
+
),
|
|
219
|
+
scored AS (
|
|
220
|
+
SELECT *,
|
|
221
|
+
alpha_post / (alpha_post + beta_post) AS posterior_mean,
|
|
222
|
+
SQRT((alpha_post * beta_post)
|
|
223
|
+
/ (POWER(alpha_post + beta_post, 2) * (alpha_post + beta_post + 1))) AS posterior_sd
|
|
224
|
+
FROM posterior
|
|
225
|
+
)
|
|
226
|
+
SELECT
|
|
227
|
+
query AS keyword,
|
|
228
|
+
url AS page,
|
|
229
|
+
clicks,
|
|
230
|
+
impressions,
|
|
231
|
+
observed_ctr AS observedCtr,
|
|
232
|
+
position,
|
|
233
|
+
bucket,
|
|
234
|
+
prior_alpha AS priorAlpha,
|
|
235
|
+
prior_beta AS priorBeta,
|
|
236
|
+
bucket_prior_mean AS bucketPriorMean,
|
|
237
|
+
posterior_mean AS posteriorMean,
|
|
238
|
+
posterior_sd AS posteriorSd,
|
|
239
|
+
GREATEST(0.0, posterior_mean - 1.96 * posterior_sd) AS ciLow,
|
|
240
|
+
LEAST(1.0, posterior_mean + 1.96 * posterior_sd) AS ciHigh,
|
|
241
|
+
posterior_mean - observed_ctr AS shrinkageDelta,
|
|
242
|
+
(posterior_mean - observed_ctr) * impressions AS expectedClicksDelta,
|
|
243
|
+
ABS(observed_ctr - posterior_mean) / NULLIF(posterior_sd, 0) AS significance,
|
|
244
|
+
CASE
|
|
245
|
+
WHEN observed_ctr > LEAST(1.0, posterior_mean + 1.96 * posterior_sd) THEN 'overperforming'
|
|
246
|
+
WHEN observed_ctr < GREATEST(0.0, posterior_mean - 1.96 * posterior_sd) THEN 'underperforming'
|
|
247
|
+
ELSE 'expected'
|
|
248
|
+
END AS classification
|
|
249
|
+
FROM scored
|
|
250
|
+
ORDER BY significance DESC NULLS LAST
|
|
251
|
+
LIMIT ${Number(limit)}
|
|
252
|
+
`,
|
|
253
|
+
params: [
|
|
254
|
+
startDate,
|
|
255
|
+
endDate,
|
|
256
|
+
minImpressions
|
|
257
|
+
],
|
|
258
|
+
current: {
|
|
259
|
+
table: "page_keywords",
|
|
260
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
261
|
+
}
|
|
262
|
+
};
|
|
263
|
+
},
|
|
264
|
+
reduceSql(rows, params) {
|
|
265
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
266
|
+
const minImpressions = params.minImpressions ?? 50;
|
|
267
|
+
const results = arr.map((r) => ({
|
|
268
|
+
keyword: str$23(r.keyword),
|
|
269
|
+
page: str$23(r.page),
|
|
270
|
+
clicks: num$5(r.clicks),
|
|
271
|
+
impressions: num$5(r.impressions),
|
|
272
|
+
observedCtr: num$5(r.observedCtr),
|
|
273
|
+
position: num$5(r.position),
|
|
274
|
+
bucket: num$5(r.bucket),
|
|
275
|
+
priorAlpha: num$5(r.priorAlpha),
|
|
276
|
+
priorBeta: num$5(r.priorBeta),
|
|
277
|
+
bucketPriorMean: num$5(r.bucketPriorMean),
|
|
278
|
+
posteriorMean: num$5(r.posteriorMean),
|
|
279
|
+
posteriorSd: num$5(r.posteriorSd),
|
|
280
|
+
ciLow: num$5(r.ciLow),
|
|
281
|
+
ciHigh: num$5(r.ciHigh),
|
|
282
|
+
shrinkageDelta: num$5(r.shrinkageDelta),
|
|
283
|
+
expectedClicksDelta: num$5(r.expectedClicksDelta),
|
|
284
|
+
significance: num$5(r.significance),
|
|
285
|
+
classification: str$23(r.classification)
|
|
286
|
+
}));
|
|
287
|
+
const under = results.filter((r) => r.classification === "underperforming").length;
|
|
288
|
+
const over = results.filter((r) => r.classification === "overperforming").length;
|
|
289
|
+
return {
|
|
290
|
+
results,
|
|
291
|
+
meta: {
|
|
292
|
+
total: results.length,
|
|
293
|
+
underperforming: under,
|
|
294
|
+
overperforming: over,
|
|
295
|
+
expected: results.length - under - over,
|
|
296
|
+
minImpressions
|
|
297
|
+
}
|
|
298
|
+
};
|
|
299
|
+
}
|
|
300
|
+
});
|
|
301
|
+
function num$4(v) {
|
|
302
|
+
if (typeof v === "number") return v;
|
|
303
|
+
if (typeof v === "bigint") return Number(v);
|
|
304
|
+
if (v == null) return 0;
|
|
305
|
+
return Number(v);
|
|
306
|
+
}
|
|
307
|
+
function buildPeriodMap(rows, key, value, filter) {
|
|
308
|
+
const out = /* @__PURE__ */ new Map();
|
|
309
|
+
for (const row of rows) {
|
|
310
|
+
if (filter && !filter(row)) continue;
|
|
311
|
+
out.set(key(row), value(row));
|
|
312
|
+
}
|
|
313
|
+
return out;
|
|
314
|
+
}
|
|
315
|
+
function createSorter(getValue, defaultMetric, defaultOrder = "desc") {
|
|
316
|
+
return (items, sortBy = defaultMetric, sortOrder = defaultOrder) => {
|
|
317
|
+
const mult = sortOrder === "desc" ? -1 : 1;
|
|
318
|
+
return [...items].sort((a, b) => (getValue(a, sortBy) - getValue(b, sortBy)) * mult);
|
|
319
|
+
};
|
|
320
|
+
}
|
|
321
|
+
function createMetricSorter(defaultMetric, orderByMetric) {
|
|
322
|
+
return (items, sortBy = defaultMetric) => {
|
|
323
|
+
const mult = orderByMetric[sortBy] === "desc" ? -1 : 1;
|
|
324
|
+
return [...items].sort((a, b) => (a[sortBy] - b[sortBy]) * mult);
|
|
325
|
+
};
|
|
326
|
+
}
|
|
327
|
+
const BIPARTITE_PAGERANK_ITERATIONS = 25;
|
|
328
|
+
const BIPARTITE_PAGERANK_DAMPING = .85;
|
|
329
|
+
function str$22(v) {
|
|
330
|
+
return v == null ? "" : String(v);
|
|
331
|
+
}
|
|
332
|
+
function parseJsonList$16(v) {
|
|
333
|
+
if (Array.isArray(v)) return v;
|
|
334
|
+
if (typeof v === "string" && v.length > 0) {
|
|
335
|
+
const parsed = JSON.parse(v);
|
|
336
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
337
|
+
}
|
|
338
|
+
return [];
|
|
339
|
+
}
|
|
340
|
+
const bipartitePagerankAnalyzer = defineAnalyzer({
|
|
341
|
+
id: "bipartite-pagerank",
|
|
342
|
+
buildSql(params) {
|
|
343
|
+
const { startDate, endDate } = periodOf(params);
|
|
344
|
+
const minImpressions = params.minImpressions ?? 50;
|
|
345
|
+
const topQueries = 1e3;
|
|
346
|
+
const topUrls = 500;
|
|
347
|
+
const limit = params.limit ?? 50;
|
|
348
|
+
const bridgingEdgeThreshold = .05;
|
|
349
|
+
const anchoringEdgeThreshold = .05;
|
|
350
|
+
const iterations = BIPARTITE_PAGERANK_ITERATIONS;
|
|
351
|
+
const d = BIPARTITE_PAGERANK_DAMPING;
|
|
352
|
+
const iterCtes = [];
|
|
353
|
+
for (let i = 1; i <= iterations; i++) iterCtes.push(`
|
|
354
|
+
ranks_${i} AS (
|
|
355
|
+
SELECT
|
|
356
|
+
'q' AS kind,
|
|
357
|
+
e.qid AS id,
|
|
358
|
+
(1.0 - ${d}) / (SELECT n FROM query_count)
|
|
359
|
+
+ ${d} * SUM(e.w_u_to_q * r.rank) AS rank
|
|
360
|
+
FROM u_to_q_weights e
|
|
361
|
+
JOIN ranks_${i - 1} r ON r.kind = 'u' AND r.id = e.uid
|
|
362
|
+
GROUP BY e.qid
|
|
363
|
+
UNION ALL
|
|
364
|
+
SELECT
|
|
365
|
+
'u' AS kind,
|
|
366
|
+
e.uid AS id,
|
|
367
|
+
(1.0 - ${d}) / (SELECT n FROM url_count)
|
|
368
|
+
+ ${d} * SUM(e.w_q_to_u * r.rank) AS rank
|
|
369
|
+
FROM q_to_u_weights e
|
|
370
|
+
JOIN ranks_${i - 1} r ON r.kind = 'q' AND r.id = e.qid
|
|
371
|
+
GROUP BY e.uid
|
|
372
|
+
)`);
|
|
373
|
+
const deltaParts = [];
|
|
374
|
+
for (let i = 1; i <= iterations; i++) deltaParts.push(`
|
|
375
|
+
SELECT ${i} AS step,
|
|
376
|
+
(SELECT COALESCE(SUM(ABS(a.rank - b.rank)), 0.0)
|
|
377
|
+
FROM ranks_${i} a
|
|
378
|
+
JOIN ranks_${i - 1} b USING (kind, id)) AS l1`);
|
|
379
|
+
return {
|
|
380
|
+
sql: `
|
|
381
|
+
WITH edges0 AS (
|
|
382
|
+
SELECT
|
|
383
|
+
query AS qid,
|
|
384
|
+
url AS uid,
|
|
385
|
+
CAST(SUM(impressions) AS DOUBLE) AS impressions
|
|
386
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
387
|
+
WHERE date >= ? AND date <= ?
|
|
388
|
+
AND query IS NOT NULL AND query <> ''
|
|
389
|
+
AND url IS NOT NULL AND url <> ''
|
|
390
|
+
GROUP BY query, url
|
|
391
|
+
HAVING SUM(impressions) >= ?
|
|
392
|
+
),
|
|
393
|
+
-- Top-N caps per side keep the iteration tractable.
|
|
394
|
+
query_totals AS (
|
|
395
|
+
SELECT qid, SUM(impressions) AS tot
|
|
396
|
+
FROM edges0 GROUP BY qid
|
|
397
|
+
),
|
|
398
|
+
url_totals AS (
|
|
399
|
+
SELECT uid, SUM(impressions) AS tot
|
|
400
|
+
FROM edges0 GROUP BY uid
|
|
401
|
+
),
|
|
402
|
+
top_queries AS (
|
|
403
|
+
SELECT qid FROM query_totals
|
|
404
|
+
ORDER BY tot DESC, qid ASC LIMIT ${Number(topQueries)}
|
|
405
|
+
),
|
|
406
|
+
top_urls AS (
|
|
407
|
+
SELECT uid FROM url_totals
|
|
408
|
+
ORDER BY tot DESC, uid ASC LIMIT ${Number(topUrls)}
|
|
409
|
+
),
|
|
410
|
+
edges AS (
|
|
411
|
+
SELECT e.qid, e.uid, e.impressions
|
|
412
|
+
FROM edges0 e
|
|
413
|
+
JOIN top_queries tq USING (qid)
|
|
414
|
+
JOIN top_urls tu USING (uid)
|
|
415
|
+
),
|
|
416
|
+
query_nodes AS (SELECT DISTINCT qid FROM edges),
|
|
417
|
+
url_nodes AS (SELECT DISTINCT uid FROM edges),
|
|
418
|
+
query_count AS (SELECT GREATEST(COUNT(*), 1) AS n FROM query_nodes),
|
|
419
|
+
url_count AS (SELECT GREATEST(COUNT(*), 1) AS n FROM url_nodes),
|
|
420
|
+
-- Row-stochastic transition weights in each direction. For q->u the
|
|
421
|
+
-- weights out of a query sum to 1; symmetric for u->q.
|
|
422
|
+
q_out AS (SELECT qid, SUM(impressions) AS s FROM edges GROUP BY qid),
|
|
423
|
+
u_out AS (SELECT uid, SUM(impressions) AS s FROM edges GROUP BY uid),
|
|
424
|
+
q_to_u_weights AS (
|
|
425
|
+
SELECT e.qid, e.uid,
|
|
426
|
+
e.impressions / NULLIF(q.s, 0) AS w_q_to_u
|
|
427
|
+
FROM edges e JOIN q_out q USING (qid)
|
|
428
|
+
),
|
|
429
|
+
u_to_q_weights AS (
|
|
430
|
+
SELECT e.qid, e.uid,
|
|
431
|
+
e.impressions / NULLIF(u.s, 0) AS w_u_to_q
|
|
432
|
+
FROM edges e JOIN u_out u USING (uid)
|
|
433
|
+
),
|
|
434
|
+
-- Seed: uniform distribution per side. Total mass = 2 (one unit per side).
|
|
435
|
+
ranks_0 AS (
|
|
436
|
+
SELECT 'q' AS kind, q.qid AS id, 1.0 / (SELECT n FROM query_count) AS rank
|
|
437
|
+
FROM query_nodes q
|
|
438
|
+
UNION ALL
|
|
439
|
+
SELECT 'u' AS kind, u.uid AS id, 1.0 / (SELECT n FROM url_count) AS rank
|
|
440
|
+
FROM url_nodes u
|
|
441
|
+
),
|
|
442
|
+
${iterCtes.join(",\n")},
|
|
443
|
+
final_ranks AS (SELECT * FROM ranks_${iterations}),
|
|
444
|
+
-- Hub/anchor diagnostics computed from raw edge mass (not rank). A
|
|
445
|
+
-- query "bridges" URLs it sends >= ${bridgingEdgeThreshold} of its mass
|
|
446
|
+
-- to; a URL "anchors" queries that contribute >= ${anchoringEdgeThreshold}
|
|
447
|
+
-- of its incoming mass.
|
|
448
|
+
q_bridging AS (
|
|
449
|
+
SELECT qid, COUNT(*) AS bridging
|
|
450
|
+
FROM q_to_u_weights
|
|
451
|
+
WHERE w_q_to_u >= ${bridgingEdgeThreshold}
|
|
452
|
+
GROUP BY qid
|
|
453
|
+
),
|
|
454
|
+
u_anchoring AS (
|
|
455
|
+
SELECT uid, COUNT(*) AS anchoring
|
|
456
|
+
FROM u_to_q_weights
|
|
457
|
+
WHERE w_u_to_q >= ${anchoringEdgeThreshold}
|
|
458
|
+
GROUP BY uid
|
|
459
|
+
),
|
|
460
|
+
q_degree AS (
|
|
461
|
+
SELECT qid, COUNT(*) AS degree, SUM(impressions) AS impressions
|
|
462
|
+
FROM edges GROUP BY qid
|
|
463
|
+
),
|
|
464
|
+
u_degree AS (
|
|
465
|
+
SELECT uid, COUNT(*) AS degree, SUM(impressions) AS impressions
|
|
466
|
+
FROM edges GROUP BY uid
|
|
467
|
+
),
|
|
468
|
+
deltas AS (
|
|
469
|
+
${deltaParts.join("\n UNION ALL\n")}
|
|
470
|
+
),
|
|
471
|
+
query_rows AS (
|
|
472
|
+
SELECT
|
|
473
|
+
'query' AS kind, f.id, f.rank,
|
|
474
|
+
COALESCE(b.bridging, 0) AS bridging,
|
|
475
|
+
0 AS anchoring,
|
|
476
|
+
COALESCE(qd.degree, 0) AS degree,
|
|
477
|
+
COALESCE(qd.impressions, 0) AS impressions
|
|
478
|
+
FROM final_ranks f
|
|
479
|
+
LEFT JOIN q_bridging b ON b.qid = f.id
|
|
480
|
+
LEFT JOIN q_degree qd ON qd.qid = f.id
|
|
481
|
+
WHERE f.kind = 'q'
|
|
482
|
+
ORDER BY f.rank DESC
|
|
483
|
+
LIMIT ${Number(limit)}
|
|
484
|
+
),
|
|
485
|
+
url_rows AS (
|
|
486
|
+
SELECT
|
|
487
|
+
'url' AS kind, f.id, f.rank,
|
|
488
|
+
0 AS bridging,
|
|
489
|
+
COALESCE(a.anchoring, 0) AS anchoring,
|
|
490
|
+
COALESCE(ud.degree, 0) AS degree,
|
|
491
|
+
COALESCE(ud.impressions, 0) AS impressions
|
|
492
|
+
FROM final_ranks f
|
|
493
|
+
LEFT JOIN u_anchoring a ON a.uid = f.id
|
|
494
|
+
LEFT JOIN u_degree ud ON ud.uid = f.id
|
|
495
|
+
WHERE f.kind = 'u'
|
|
496
|
+
ORDER BY f.rank DESC
|
|
497
|
+
LIMIT ${Number(limit)}
|
|
498
|
+
),
|
|
499
|
+
nodes AS (
|
|
500
|
+
SELECT * FROM query_rows
|
|
501
|
+
UNION ALL
|
|
502
|
+
SELECT * FROM url_rows
|
|
503
|
+
),
|
|
504
|
+
counts AS (
|
|
505
|
+
SELECT
|
|
506
|
+
(SELECT n FROM query_count) AS q_count,
|
|
507
|
+
(SELECT n FROM url_count) AS u_count
|
|
508
|
+
),
|
|
509
|
+
deltas_json AS (
|
|
510
|
+
SELECT to_json(list({ 'step': step, 'l1': l1 } ORDER BY step)) AS dj
|
|
511
|
+
FROM deltas
|
|
512
|
+
)
|
|
513
|
+
SELECT
|
|
514
|
+
n.kind,
|
|
515
|
+
n.id,
|
|
516
|
+
n.rank,
|
|
517
|
+
n.bridging,
|
|
518
|
+
n.anchoring,
|
|
519
|
+
n.degree,
|
|
520
|
+
n.impressions,
|
|
521
|
+
c.q_count AS queryCount,
|
|
522
|
+
c.u_count AS urlCount,
|
|
523
|
+
dj.dj AS deltasJson
|
|
524
|
+
FROM nodes n
|
|
525
|
+
CROSS JOIN counts c
|
|
526
|
+
CROSS JOIN deltas_json dj
|
|
527
|
+
ORDER BY n.kind, n.rank DESC
|
|
528
|
+
`,
|
|
529
|
+
params: [
|
|
530
|
+
startDate,
|
|
531
|
+
endDate,
|
|
532
|
+
minImpressions
|
|
533
|
+
],
|
|
534
|
+
current: {
|
|
535
|
+
table: "page_keywords",
|
|
536
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
537
|
+
}
|
|
538
|
+
};
|
|
539
|
+
},
|
|
540
|
+
reduceSql(rows) {
|
|
541
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
542
|
+
const iterations = BIPARTITE_PAGERANK_ITERATIONS;
|
|
543
|
+
const d = BIPARTITE_PAGERANK_DAMPING;
|
|
544
|
+
const results = arr.map((r) => ({
|
|
545
|
+
kind: str$22(r.kind),
|
|
546
|
+
id: str$22(r.id),
|
|
547
|
+
rank: num$4(r.rank),
|
|
548
|
+
bridging: num$4(r.bridging),
|
|
549
|
+
anchoring: num$4(r.anchoring),
|
|
550
|
+
degree: num$4(r.degree),
|
|
551
|
+
impressions: num$4(r.impressions)
|
|
552
|
+
}));
|
|
553
|
+
const first = arr[0] ?? {};
|
|
554
|
+
const queryCount = num$4(first.queryCount);
|
|
555
|
+
const urlCount = num$4(first.urlCount);
|
|
556
|
+
const deltas = parseJsonList$16(first.deltasJson).map((e) => ({
|
|
557
|
+
step: num$4(e.step),
|
|
558
|
+
l1: num$4(e.l1)
|
|
559
|
+
}));
|
|
560
|
+
const convergenceDelta = deltas.length > 0 ? deltas[deltas.length - 1].l1 : 0;
|
|
561
|
+
return {
|
|
562
|
+
results,
|
|
563
|
+
meta: {
|
|
564
|
+
total: results.length,
|
|
565
|
+
convergenceDelta,
|
|
566
|
+
iterations,
|
|
567
|
+
damping: d,
|
|
568
|
+
queryCount,
|
|
569
|
+
urlCount,
|
|
570
|
+
deltas
|
|
571
|
+
}
|
|
572
|
+
};
|
|
573
|
+
}
|
|
574
|
+
});
|
|
575
|
+
const DEFAULT_LIMIT$1 = 25e3;
|
|
576
|
+
function keywordsQueryState(period, limit = DEFAULT_LIMIT$1) {
|
|
577
|
+
return gsc.select(query, page).where(between(date, period.startDate, period.endDate)).limit(limit).getState();
|
|
578
|
+
}
|
|
579
|
+
function pagesQueryState(period, limit = DEFAULT_LIMIT$1) {
|
|
580
|
+
return gsc.select(page).where(between(date, period.startDate, period.endDate)).limit(limit).getState();
|
|
581
|
+
}
|
|
582
|
+
function datesQueryState(period, limit = DEFAULT_LIMIT$1) {
|
|
583
|
+
return gsc.select(date).where(between(date, period.startDate, period.endDate)).limit(limit).getState();
|
|
584
|
+
}
|
|
585
|
+
function escapeRegexAlt(s) {
|
|
586
|
+
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
587
|
+
}
|
|
588
|
+
function str$21(v) {
|
|
589
|
+
return v == null ? "" : String(v);
|
|
590
|
+
}
|
|
591
|
+
function analyzeBrandSegmentation(keywords, options) {
|
|
592
|
+
const { brandTerms, minImpressions = 10 } = options;
|
|
593
|
+
const lowerBrandTerms = brandTerms.map((t) => t.toLowerCase());
|
|
594
|
+
const brand = [];
|
|
595
|
+
const nonBrand = [];
|
|
596
|
+
for (const row of keywords) {
|
|
597
|
+
if (num$4(row.impressions) < minImpressions) continue;
|
|
598
|
+
if (lowerBrandTerms.some((term) => row.query.toLowerCase().includes(term))) brand.push(row);
|
|
599
|
+
else nonBrand.push(row);
|
|
600
|
+
}
|
|
601
|
+
const brandClicks = brand.reduce((sum, k) => sum + num$4(k.clicks), 0);
|
|
602
|
+
const nonBrandClicks = nonBrand.reduce((sum, k) => sum + num$4(k.clicks), 0);
|
|
603
|
+
const totalClicks = brandClicks + nonBrandClicks;
|
|
604
|
+
return {
|
|
605
|
+
brand,
|
|
606
|
+
nonBrand,
|
|
607
|
+
summary: {
|
|
608
|
+
brandClicks,
|
|
609
|
+
nonBrandClicks,
|
|
610
|
+
brandShare: totalClicks > 0 ? brandClicks / totalClicks : 0,
|
|
611
|
+
brandImpressions: brand.reduce((sum, k) => sum + num$4(k.impressions), 0),
|
|
612
|
+
nonBrandImpressions: nonBrand.reduce((sum, k) => sum + num$4(k.impressions), 0)
|
|
613
|
+
}
|
|
614
|
+
};
|
|
615
|
+
}
|
|
616
|
+
const brandAnalyzer = defineAnalyzer({
|
|
617
|
+
id: "brand",
|
|
618
|
+
buildSql(params) {
|
|
619
|
+
if (!params.brandTerms?.length) throw new Error("Brand analysis requires brandTerms");
|
|
620
|
+
const { startDate, endDate } = periodOf(params);
|
|
621
|
+
const minImpressions = params.minImpressions ?? 10;
|
|
622
|
+
const limit = params.limit ?? 1e4;
|
|
623
|
+
const regex = `(${params.brandTerms.map((t) => escapeRegexAlt(t.toLowerCase())).join("|")})`;
|
|
624
|
+
return {
|
|
625
|
+
sql: `
|
|
626
|
+
WITH agg AS (
|
|
627
|
+
SELECT
|
|
628
|
+
query,
|
|
629
|
+
url AS page,
|
|
630
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
631
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
632
|
+
${METRIC_EXPR.ctr} AS ctr,
|
|
633
|
+
${METRIC_EXPR.position} AS position
|
|
634
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
635
|
+
WHERE date >= ? AND date <= ?
|
|
636
|
+
GROUP BY query, url
|
|
637
|
+
HAVING SUM(impressions) >= ?
|
|
638
|
+
)
|
|
639
|
+
SELECT
|
|
640
|
+
query, page, clicks, impressions, ctr, position,
|
|
641
|
+
CASE WHEN regexp_matches(LOWER(query), ?) THEN 'brand' ELSE 'non-brand' END AS segment
|
|
642
|
+
FROM agg
|
|
643
|
+
ORDER BY clicks DESC
|
|
644
|
+
LIMIT ${Number(limit)}
|
|
645
|
+
`,
|
|
646
|
+
params: [
|
|
647
|
+
startDate,
|
|
648
|
+
endDate,
|
|
649
|
+
minImpressions,
|
|
650
|
+
regex
|
|
651
|
+
],
|
|
652
|
+
current: {
|
|
653
|
+
table: "page_keywords",
|
|
654
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
655
|
+
}
|
|
656
|
+
};
|
|
657
|
+
},
|
|
658
|
+
reduceSql(rows) {
|
|
659
|
+
const normalized = (Array.isArray(rows) ? rows : []).map((r) => ({
|
|
660
|
+
query: str$21(r.query),
|
|
661
|
+
page: r.page == null ? void 0 : str$21(r.page),
|
|
662
|
+
clicks: num$4(r.clicks),
|
|
663
|
+
impressions: num$4(r.impressions),
|
|
664
|
+
ctr: num$4(r.ctr),
|
|
665
|
+
position: num$4(r.position),
|
|
666
|
+
segment: str$21(r.segment)
|
|
667
|
+
}));
|
|
668
|
+
let brandClicks = 0;
|
|
669
|
+
let nonBrandClicks = 0;
|
|
670
|
+
let brandImpressions = 0;
|
|
671
|
+
let nonBrandImpressions = 0;
|
|
672
|
+
for (const r of normalized) if (r.segment === "brand") {
|
|
673
|
+
brandClicks += r.clicks;
|
|
674
|
+
brandImpressions += r.impressions;
|
|
675
|
+
} else {
|
|
676
|
+
nonBrandClicks += r.clicks;
|
|
677
|
+
nonBrandImpressions += r.impressions;
|
|
678
|
+
}
|
|
679
|
+
const totalClicks = brandClicks + nonBrandClicks;
|
|
680
|
+
return {
|
|
681
|
+
results: normalized,
|
|
682
|
+
meta: {
|
|
683
|
+
total: normalized.length,
|
|
684
|
+
summary: {
|
|
685
|
+
brandClicks,
|
|
686
|
+
nonBrandClicks,
|
|
687
|
+
brandShare: totalClicks > 0 ? brandClicks / totalClicks : 0,
|
|
688
|
+
brandImpressions,
|
|
689
|
+
nonBrandImpressions
|
|
690
|
+
}
|
|
691
|
+
}
|
|
692
|
+
};
|
|
693
|
+
},
|
|
694
|
+
buildRows(params) {
|
|
695
|
+
return { keywords: keywordsQueryState(periodOf(params), params.limit) };
|
|
696
|
+
},
|
|
697
|
+
reduceRows(rows, params) {
|
|
698
|
+
if (!params.brandTerms?.length) throw new Error("Brand analysis requires brandTerms");
|
|
699
|
+
const result = analyzeBrandSegmentation(Array.isArray(rows) ? rows : [], {
|
|
700
|
+
brandTerms: params.brandTerms,
|
|
701
|
+
minImpressions: params.minImpressions
|
|
702
|
+
});
|
|
703
|
+
return {
|
|
704
|
+
results: [...result.brand.map((r) => ({
|
|
705
|
+
...r,
|
|
706
|
+
segment: "brand"
|
|
707
|
+
})), ...result.nonBrand.map((r) => ({
|
|
708
|
+
...r,
|
|
709
|
+
segment: "non-brand"
|
|
710
|
+
}))],
|
|
711
|
+
meta: { summary: result.summary }
|
|
712
|
+
};
|
|
713
|
+
}
|
|
714
|
+
});
|
|
715
|
+
const sortRowResults$1 = createSorter((item, metric) => {
|
|
716
|
+
switch (metric) {
|
|
717
|
+
case "clicks": return item.totalClicks;
|
|
718
|
+
case "impressions": return item.totalImpressions;
|
|
719
|
+
case "positionSpread": return item.positionSpread;
|
|
720
|
+
case "pageCount": return item.pages.length;
|
|
721
|
+
}
|
|
722
|
+
}, "clicks");
|
|
723
|
+
function str$20(v) {
|
|
724
|
+
return v == null ? "" : String(v);
|
|
725
|
+
}
|
|
726
|
+
function parseJsonList$15(v) {
|
|
727
|
+
if (Array.isArray(v)) return v;
|
|
728
|
+
if (typeof v === "string" && v.length > 0) {
|
|
729
|
+
const parsed = JSON.parse(v);
|
|
730
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
731
|
+
}
|
|
732
|
+
return [];
|
|
733
|
+
}
|
|
734
|
+
function analyzeCannibalization(rows, options = {}) {
|
|
735
|
+
const { minImpressions = 10, maxPositionSpread = 10, minPages = 2, sortBy = "clicks", sortOrder = "desc" } = options;
|
|
736
|
+
const queryMap = /* @__PURE__ */ new Map();
|
|
737
|
+
for (const row of rows) {
|
|
738
|
+
if (row.impressions < minImpressions) continue;
|
|
739
|
+
const pages = queryMap.get(row.query) || [];
|
|
740
|
+
pages.push({
|
|
741
|
+
page: row.page,
|
|
742
|
+
clicks: row.clicks,
|
|
743
|
+
impressions: row.impressions,
|
|
744
|
+
ctr: row.ctr,
|
|
745
|
+
position: row.position
|
|
746
|
+
});
|
|
747
|
+
queryMap.set(row.query, pages);
|
|
748
|
+
}
|
|
749
|
+
const results = [];
|
|
750
|
+
for (const [query, pages] of queryMap) {
|
|
751
|
+
if (pages.length < minPages) continue;
|
|
752
|
+
pages.sort((a, b) => b.clicks - a.clicks);
|
|
753
|
+
const positions = pages.map((p) => p.position);
|
|
754
|
+
const positionSpread = Math.max(...positions) - Math.min(...positions);
|
|
755
|
+
if (positionSpread > maxPositionSpread) continue;
|
|
756
|
+
results.push({
|
|
757
|
+
query,
|
|
758
|
+
pages,
|
|
759
|
+
totalClicks: pages.reduce((sum, p) => sum + p.clicks, 0),
|
|
760
|
+
totalImpressions: pages.reduce((sum, p) => sum + p.impressions, 0),
|
|
761
|
+
positionSpread
|
|
762
|
+
});
|
|
763
|
+
}
|
|
764
|
+
return sortRowResults$1(results, sortBy, sortOrder);
|
|
765
|
+
}
|
|
766
|
+
const cannibalizationAnalyzer = defineAnalyzer({
|
|
767
|
+
id: "cannibalization",
|
|
768
|
+
buildSql(params) {
|
|
769
|
+
const { startDate, endDate } = periodOf(params);
|
|
770
|
+
const minImpressions = params.minImpressions ?? 50;
|
|
771
|
+
const minCompetitors = 2;
|
|
772
|
+
const minQueryImpressions = (params.minImpressions ?? 50) * 2;
|
|
773
|
+
const limit = params.limit ?? 200;
|
|
774
|
+
return {
|
|
775
|
+
sql: `
|
|
776
|
+
WITH agg AS (
|
|
777
|
+
SELECT
|
|
778
|
+
query,
|
|
779
|
+
url,
|
|
780
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
781
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
782
|
+
${METRIC_EXPR.ctr} AS ctr,
|
|
783
|
+
${METRIC_EXPR.position} AS position
|
|
784
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
785
|
+
WHERE date >= ? AND date <= ?
|
|
786
|
+
AND query IS NOT NULL AND query <> ''
|
|
787
|
+
AND url IS NOT NULL AND url <> ''
|
|
788
|
+
GROUP BY query, url
|
|
789
|
+
HAVING SUM(impressions) >= ?
|
|
790
|
+
),
|
|
791
|
+
query_totals AS (
|
|
792
|
+
SELECT
|
|
793
|
+
query,
|
|
794
|
+
SUM(impressions) AS total_impressions,
|
|
795
|
+
SUM(clicks) AS total_clicks,
|
|
796
|
+
COUNT(*) AS competitor_count
|
|
797
|
+
FROM agg
|
|
798
|
+
GROUP BY query
|
|
799
|
+
HAVING COUNT(*) >= ? AND SUM(impressions) >= ?
|
|
800
|
+
),
|
|
801
|
+
ranked AS (
|
|
802
|
+
SELECT
|
|
803
|
+
a.query,
|
|
804
|
+
a.url,
|
|
805
|
+
a.clicks,
|
|
806
|
+
a.impressions,
|
|
807
|
+
a.ctr,
|
|
808
|
+
a.position,
|
|
809
|
+
a.impressions / NULLIF(t.total_impressions, 0) AS share,
|
|
810
|
+
ROW_NUMBER() OVER (
|
|
811
|
+
PARTITION BY a.query
|
|
812
|
+
ORDER BY a.impressions DESC, a.clicks DESC, a.url ASC
|
|
813
|
+
) AS rnk
|
|
814
|
+
FROM agg a
|
|
815
|
+
JOIN query_totals t USING (query)
|
|
816
|
+
),
|
|
817
|
+
leader AS (
|
|
818
|
+
SELECT query, url AS leader_url, ctr AS leader_ctr, position AS leader_position
|
|
819
|
+
FROM ranked WHERE rnk = 1
|
|
820
|
+
),
|
|
821
|
+
events AS (
|
|
822
|
+
SELECT
|
|
823
|
+
r.query,
|
|
824
|
+
any_value(l.leader_url) AS leader_url,
|
|
825
|
+
any_value(l.leader_ctr) AS leader_ctr,
|
|
826
|
+
any_value(l.leader_position) AS leader_position,
|
|
827
|
+
SUM(POWER(r.share * 100.0, 2)) AS hhi,
|
|
828
|
+
SUM(CASE
|
|
829
|
+
WHEN r.rnk > 1 AND l.leader_ctr > r.ctr
|
|
830
|
+
THEN (l.leader_ctr - r.ctr) * r.impressions
|
|
831
|
+
ELSE 0.0
|
|
832
|
+
END) AS stolen_clicks,
|
|
833
|
+
to_json(list({
|
|
834
|
+
'url': r.url,
|
|
835
|
+
'clicks': r.clicks,
|
|
836
|
+
'impressions': r.impressions,
|
|
837
|
+
'ctr': r.ctr,
|
|
838
|
+
'position': r.position,
|
|
839
|
+
'share': r.share,
|
|
840
|
+
'rank': r.rnk
|
|
841
|
+
} ORDER BY r.rnk)) AS competitors
|
|
842
|
+
FROM ranked r
|
|
843
|
+
JOIN leader l USING (query)
|
|
844
|
+
GROUP BY r.query
|
|
845
|
+
)
|
|
846
|
+
SELECT
|
|
847
|
+
e.query AS keyword,
|
|
848
|
+
t.total_impressions AS totalImpressions,
|
|
849
|
+
t.total_clicks AS totalClicks,
|
|
850
|
+
t.competitor_count AS competitorCount,
|
|
851
|
+
e.leader_url AS leaderUrl,
|
|
852
|
+
e.leader_ctr AS leaderCtr,
|
|
853
|
+
e.leader_position AS leaderPosition,
|
|
854
|
+
e.hhi AS hhi,
|
|
855
|
+
GREATEST(0.0, 1.0 - e.hhi / 10000.0) AS fragmentation,
|
|
856
|
+
e.stolen_clicks AS stolenClicks,
|
|
857
|
+
e.competitors AS competitors,
|
|
858
|
+
CAST(ROUND(LEAST(100.0,
|
|
859
|
+
100.0 * POWER(
|
|
860
|
+
GREATEST(1.0 - e.hhi / 10000.0, 0.0)
|
|
861
|
+
* LEAST(e.stolen_clicks / GREATEST(t.total_clicks + e.stolen_clicks, 1.0), 1.0)
|
|
862
|
+
* LEAST(LOG10(GREATEST(t.total_impressions, 10.0)) / 5.0, 1.0),
|
|
863
|
+
1.0 / 3.0
|
|
864
|
+
)
|
|
865
|
+
)) AS DOUBLE) AS severity
|
|
866
|
+
FROM events e
|
|
867
|
+
JOIN query_totals t USING (query)
|
|
868
|
+
ORDER BY severity DESC, stolenClicks DESC
|
|
869
|
+
LIMIT ${Number(limit)}
|
|
870
|
+
`,
|
|
871
|
+
params: [
|
|
872
|
+
startDate,
|
|
873
|
+
endDate,
|
|
874
|
+
minImpressions,
|
|
875
|
+
minCompetitors,
|
|
876
|
+
minQueryImpressions
|
|
877
|
+
],
|
|
878
|
+
current: {
|
|
879
|
+
table: "page_keywords",
|
|
880
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
881
|
+
}
|
|
882
|
+
};
|
|
883
|
+
},
|
|
884
|
+
reduceSql(rows) {
|
|
885
|
+
const events = (Array.isArray(rows) ? rows : []).map((r) => ({
|
|
886
|
+
keyword: str$20(r.keyword),
|
|
887
|
+
totalImpressions: num$4(r.totalImpressions),
|
|
888
|
+
totalClicks: num$4(r.totalClicks),
|
|
889
|
+
competitorCount: num$4(r.competitorCount),
|
|
890
|
+
leaderUrl: str$20(r.leaderUrl),
|
|
891
|
+
leaderCtr: num$4(r.leaderCtr),
|
|
892
|
+
leaderPosition: num$4(r.leaderPosition),
|
|
893
|
+
hhi: num$4(r.hhi),
|
|
894
|
+
fragmentation: num$4(r.fragmentation),
|
|
895
|
+
stolenClicks: num$4(r.stolenClicks),
|
|
896
|
+
severity: num$4(r.severity),
|
|
897
|
+
competitors: parseJsonList$15(r.competitors).map((c) => ({
|
|
898
|
+
url: str$20(c.url),
|
|
899
|
+
clicks: num$4(c.clicks),
|
|
900
|
+
impressions: num$4(c.impressions),
|
|
901
|
+
ctr: num$4(c.ctr),
|
|
902
|
+
position: num$4(c.position),
|
|
903
|
+
share: num$4(c.share),
|
|
904
|
+
rank: num$4(c.rank)
|
|
905
|
+
}))
|
|
906
|
+
}));
|
|
907
|
+
const nodeAgg = /* @__PURE__ */ new Map();
|
|
908
|
+
const edgeAgg = /* @__PURE__ */ new Map();
|
|
909
|
+
for (const ev of events) {
|
|
910
|
+
for (const c of ev.competitors) {
|
|
911
|
+
const n = nodeAgg.get(c.url) ?? {
|
|
912
|
+
impressions: 0,
|
|
913
|
+
clicks: 0,
|
|
914
|
+
queries: /* @__PURE__ */ new Set()
|
|
915
|
+
};
|
|
916
|
+
n.impressions += c.impressions;
|
|
917
|
+
n.clicks += c.clicks;
|
|
918
|
+
n.queries.add(ev.keyword);
|
|
919
|
+
nodeAgg.set(c.url, n);
|
|
920
|
+
}
|
|
921
|
+
for (let i = 0; i < ev.competitors.length; i++) for (let j = i + 1; j < ev.competitors.length; j++) {
|
|
922
|
+
const a = ev.competitors[i];
|
|
923
|
+
const b = ev.competitors[j];
|
|
924
|
+
const [src, tgt] = a.url < b.url ? [a.url, b.url] : [b.url, a.url];
|
|
925
|
+
const key = `${src}${tgt}`;
|
|
926
|
+
const weight = Math.min(a.impressions, b.impressions);
|
|
927
|
+
const edge = edgeAgg.get(key) ?? {
|
|
928
|
+
source: src,
|
|
929
|
+
target: tgt,
|
|
930
|
+
weight: 0,
|
|
931
|
+
queries: 0
|
|
932
|
+
};
|
|
933
|
+
edge.weight += weight;
|
|
934
|
+
edge.queries += 1;
|
|
935
|
+
edgeAgg.set(key, edge);
|
|
936
|
+
}
|
|
937
|
+
}
|
|
938
|
+
const nodes = [...nodeAgg.entries()].map(([url, n]) => ({
|
|
939
|
+
url,
|
|
940
|
+
impressions: n.impressions,
|
|
941
|
+
clicks: n.clicks,
|
|
942
|
+
queryCount: n.queries.size
|
|
943
|
+
}));
|
|
944
|
+
const edges = [...edgeAgg.values()];
|
|
945
|
+
const avgFragmentation = events.length > 0 ? events.reduce((s, e) => s + e.fragmentation, 0) / events.length : 0;
|
|
946
|
+
const totalStolenClicks = events.reduce((s, e) => s + e.stolenClicks, 0);
|
|
947
|
+
return {
|
|
948
|
+
results: events,
|
|
949
|
+
meta: {
|
|
950
|
+
total: events.length,
|
|
951
|
+
totalStolenClicks,
|
|
952
|
+
avgFragmentation,
|
|
953
|
+
graph: {
|
|
954
|
+
nodes,
|
|
955
|
+
edges
|
|
956
|
+
}
|
|
957
|
+
}
|
|
958
|
+
};
|
|
959
|
+
},
|
|
960
|
+
buildRows(params) {
|
|
961
|
+
return { rows: keywordsQueryState(periodOf(params), params.limit) };
|
|
962
|
+
},
|
|
963
|
+
reduceRows(rows, params) {
|
|
964
|
+
const results = analyzeCannibalization(Array.isArray(rows) ? rows : [], {
|
|
965
|
+
minImpressions: params.minImpressions,
|
|
966
|
+
maxPositionSpread: params.maxPositionSpread,
|
|
967
|
+
minPages: params.minPages
|
|
968
|
+
});
|
|
969
|
+
return {
|
|
970
|
+
results,
|
|
971
|
+
meta: { total: results.length }
|
|
972
|
+
};
|
|
973
|
+
}
|
|
974
|
+
});
|
|
975
|
+
function num$3(v) {
|
|
976
|
+
if (typeof v === "number") return v;
|
|
977
|
+
if (typeof v === "bigint") return Number(v);
|
|
978
|
+
if (v == null) return 0;
|
|
979
|
+
const n = Number(v);
|
|
980
|
+
return Number.isFinite(n) ? n : 0;
|
|
981
|
+
}
|
|
982
|
+
function str$19(v) {
|
|
983
|
+
return v == null ? "" : String(v);
|
|
984
|
+
}
|
|
985
|
+
function parseJsonList$14(v) {
|
|
986
|
+
if (Array.isArray(v)) return v;
|
|
987
|
+
if (typeof v === "string" && v.length > 0) {
|
|
988
|
+
const parsed = JSON.parse(v);
|
|
989
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
990
|
+
}
|
|
991
|
+
return [];
|
|
992
|
+
}
|
|
993
|
+
const changePointAnalyzer = defineAnalyzer({
|
|
994
|
+
id: "change-point",
|
|
995
|
+
buildSql(params) {
|
|
996
|
+
const endDate = params.endDate ?? defaultEndDate();
|
|
997
|
+
const startDate = params.startDate ?? daysAgo(93);
|
|
998
|
+
const minDays = 21;
|
|
999
|
+
const minSide = 7;
|
|
1000
|
+
const threshold = params.threshold ?? 10;
|
|
1001
|
+
const minImpressions = params.minImpressions ?? 50;
|
|
1002
|
+
const metric = params.metric === "clicks" || params.metric === "impressions" ? params.metric : "position";
|
|
1003
|
+
const limit = params.limit ?? 100;
|
|
1004
|
+
const valueExpr = metric === "position" ? METRIC_EXPR.position : `CAST(SUM(${metric}) AS DOUBLE)`;
|
|
1005
|
+
return {
|
|
1006
|
+
sql: `
|
|
1007
|
+
WITH daily AS (
|
|
1008
|
+
SELECT
|
|
1009
|
+
query,
|
|
1010
|
+
url AS page,
|
|
1011
|
+
date,
|
|
1012
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
1013
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
1014
|
+
${valueExpr} AS value
|
|
1015
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
1016
|
+
WHERE date >= ? AND date <= ?
|
|
1017
|
+
AND query IS NOT NULL AND query <> ''
|
|
1018
|
+
AND url IS NOT NULL AND url <> ''
|
|
1019
|
+
GROUP BY query, url, date
|
|
1020
|
+
HAVING SUM(impressions) >= 1
|
|
1021
|
+
),
|
|
1022
|
+
entity_stats AS (
|
|
1023
|
+
SELECT query, page,
|
|
1024
|
+
COUNT(*) AS n_total,
|
|
1025
|
+
SUM(impressions) AS total_impressions,
|
|
1026
|
+
SUM(value) AS sum_total,
|
|
1027
|
+
SUM(value * value) AS sumsq_total
|
|
1028
|
+
FROM daily
|
|
1029
|
+
GROUP BY query, page
|
|
1030
|
+
HAVING COUNT(*) >= ${Number(minDays)}
|
|
1031
|
+
AND SUM(impressions) >= ?
|
|
1032
|
+
),
|
|
1033
|
+
filtered AS (
|
|
1034
|
+
SELECT d.*,
|
|
1035
|
+
e.n_total, e.sum_total, e.sumsq_total, e.total_impressions
|
|
1036
|
+
FROM daily d
|
|
1037
|
+
JOIN entity_stats e USING (query, page)
|
|
1038
|
+
),
|
|
1039
|
+
cumulated AS (
|
|
1040
|
+
SELECT *,
|
|
1041
|
+
COUNT(*) OVER w AS n_left,
|
|
1042
|
+
SUM(value) OVER w AS sum_left,
|
|
1043
|
+
SUM(value * value) OVER w AS sumsq_left
|
|
1044
|
+
FROM filtered
|
|
1045
|
+
WINDOW w AS (
|
|
1046
|
+
PARTITION BY query, page
|
|
1047
|
+
ORDER BY date
|
|
1048
|
+
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
|
1049
|
+
)
|
|
1050
|
+
),
|
|
1051
|
+
llr_scored AS (
|
|
1052
|
+
SELECT *,
|
|
1053
|
+
(n_total - n_left) AS n_right,
|
|
1054
|
+
(sum_total - sum_left) AS sum_right,
|
|
1055
|
+
(sumsq_total - sumsq_left) AS sumsq_right,
|
|
1056
|
+
GREATEST(
|
|
1057
|
+
(sumsq_left / NULLIF(n_left, 0))
|
|
1058
|
+
- (sum_left / NULLIF(n_left, 0)) * (sum_left / NULLIF(n_left, 0)),
|
|
1059
|
+
1e-9
|
|
1060
|
+
) AS var_left,
|
|
1061
|
+
GREATEST(
|
|
1062
|
+
((sumsq_total - sumsq_left) / NULLIF(n_total - n_left, 0))
|
|
1063
|
+
- ((sum_total - sum_left) / NULLIF(n_total - n_left, 0))
|
|
1064
|
+
* ((sum_total - sum_left) / NULLIF(n_total - n_left, 0)),
|
|
1065
|
+
1e-9
|
|
1066
|
+
) AS var_right,
|
|
1067
|
+
GREATEST(
|
|
1068
|
+
(sumsq_total / NULLIF(n_total, 0))
|
|
1069
|
+
- (sum_total / NULLIF(n_total, 0)) * (sum_total / NULLIF(n_total, 0)),
|
|
1070
|
+
1e-9
|
|
1071
|
+
) AS var_single
|
|
1072
|
+
FROM cumulated
|
|
1073
|
+
),
|
|
1074
|
+
llr AS (
|
|
1075
|
+
SELECT *,
|
|
1076
|
+
CASE
|
|
1077
|
+
WHEN n_left >= ${Number(minSide)} AND (n_total - n_left) >= ${Number(minSide)}
|
|
1078
|
+
THEN n_total * LN(var_single)
|
|
1079
|
+
- n_left * LN(var_left)
|
|
1080
|
+
- (n_total - n_left) * LN(var_right)
|
|
1081
|
+
ELSE NULL
|
|
1082
|
+
END AS llr
|
|
1083
|
+
FROM llr_scored
|
|
1084
|
+
),
|
|
1085
|
+
best AS (
|
|
1086
|
+
SELECT query, page, n_total, total_impressions,
|
|
1087
|
+
arg_max(date, llr) AS change_date,
|
|
1088
|
+
MAX(llr) AS best_llr,
|
|
1089
|
+
arg_max(sum_left / NULLIF(n_left, 0), llr) AS left_mean,
|
|
1090
|
+
arg_max((sum_total - sum_left) / NULLIF(n_total - n_left, 0), llr) AS right_mean,
|
|
1091
|
+
arg_max(sqrt(var_left), llr) AS left_std,
|
|
1092
|
+
arg_max(sqrt(var_right), llr) AS right_std
|
|
1093
|
+
FROM llr
|
|
1094
|
+
WHERE llr IS NOT NULL
|
|
1095
|
+
GROUP BY query, page, n_total, total_impressions
|
|
1096
|
+
HAVING MAX(llr) > ${Number(threshold)}
|
|
1097
|
+
),
|
|
1098
|
+
series AS (
|
|
1099
|
+
SELECT query, page,
|
|
1100
|
+
to_json(list({
|
|
1101
|
+
'date': strftime(date, '%Y-%m-%d'),
|
|
1102
|
+
'value': value
|
|
1103
|
+
} ORDER BY date)) AS seriesJson
|
|
1104
|
+
FROM daily
|
|
1105
|
+
GROUP BY query, page
|
|
1106
|
+
)
|
|
1107
|
+
SELECT
|
|
1108
|
+
b.query AS keyword,
|
|
1109
|
+
b.page,
|
|
1110
|
+
CAST(b.n_total AS DOUBLE) AS totalDays,
|
|
1111
|
+
CAST(b.total_impressions AS DOUBLE) AS totalImpressions,
|
|
1112
|
+
strftime(b.change_date, '%Y-%m-%d') AS changeDate,
|
|
1113
|
+
b.best_llr AS llr,
|
|
1114
|
+
b.left_mean AS leftMean,
|
|
1115
|
+
b.right_mean AS rightMean,
|
|
1116
|
+
(b.right_mean - b.left_mean) AS delta,
|
|
1117
|
+
b.left_std AS leftStddev,
|
|
1118
|
+
b.right_std AS rightStddev,
|
|
1119
|
+
s.seriesJson
|
|
1120
|
+
FROM best b
|
|
1121
|
+
LEFT JOIN series s USING (query, page)
|
|
1122
|
+
ORDER BY b.best_llr DESC
|
|
1123
|
+
LIMIT ${Number(limit)}
|
|
1124
|
+
`,
|
|
1125
|
+
params: [
|
|
1126
|
+
startDate,
|
|
1127
|
+
endDate,
|
|
1128
|
+
minImpressions
|
|
1129
|
+
],
|
|
1130
|
+
current: {
|
|
1131
|
+
table: "page_keywords",
|
|
1132
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
1133
|
+
}
|
|
1134
|
+
};
|
|
1135
|
+
},
|
|
1136
|
+
reduceSql(rows, params) {
|
|
1137
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
1138
|
+
const threshold = params.threshold ?? 10;
|
|
1139
|
+
const metric = params.metric === "clicks" || params.metric === "impressions" ? params.metric : "position";
|
|
1140
|
+
const lowerIsBetter = metric === "position";
|
|
1141
|
+
const results = arr.map((r) => {
|
|
1142
|
+
const delta = num$3(r.delta);
|
|
1143
|
+
const improved = lowerIsBetter ? delta < 0 : delta > 0;
|
|
1144
|
+
return {
|
|
1145
|
+
keyword: str$19(r.keyword),
|
|
1146
|
+
page: str$19(r.page),
|
|
1147
|
+
totalDays: num$3(r.totalDays),
|
|
1148
|
+
totalImpressions: num$3(r.totalImpressions),
|
|
1149
|
+
changeDate: str$19(r.changeDate),
|
|
1150
|
+
llr: num$3(r.llr),
|
|
1151
|
+
leftMean: num$3(r.leftMean),
|
|
1152
|
+
rightMean: num$3(r.rightMean),
|
|
1153
|
+
delta,
|
|
1154
|
+
leftStddev: num$3(r.leftStddev),
|
|
1155
|
+
rightStddev: num$3(r.rightStddev),
|
|
1156
|
+
direction: improved ? "improved" : "worsened",
|
|
1157
|
+
series: parseJsonList$14(r.seriesJson).map((s) => ({
|
|
1158
|
+
date: str$19(s.date),
|
|
1159
|
+
value: num$3(s.value)
|
|
1160
|
+
}))
|
|
1161
|
+
};
|
|
1162
|
+
});
|
|
1163
|
+
return {
|
|
1164
|
+
results,
|
|
1165
|
+
meta: {
|
|
1166
|
+
total: results.length,
|
|
1167
|
+
metric,
|
|
1168
|
+
threshold,
|
|
1169
|
+
improved: results.filter((r) => r.direction === "improved").length,
|
|
1170
|
+
worsened: results.filter((r) => r.direction === "worsened").length
|
|
1171
|
+
}
|
|
1172
|
+
};
|
|
1173
|
+
}
|
|
1174
|
+
});
|
|
1175
|
+
const INTENT_PREFIXES_REGEX = "^(how to|what is|what are|why is|why do|where to|when to|best|top|vs|versus|compare|review|buy|cheap|free|near me)(\\s|$)";
|
|
1176
|
+
const INTENT_PREFIXES = [
|
|
1177
|
+
"how to",
|
|
1178
|
+
"what is",
|
|
1179
|
+
"what are",
|
|
1180
|
+
"why is",
|
|
1181
|
+
"why do",
|
|
1182
|
+
"where to",
|
|
1183
|
+
"when to",
|
|
1184
|
+
"best",
|
|
1185
|
+
"top",
|
|
1186
|
+
"vs",
|
|
1187
|
+
"versus",
|
|
1188
|
+
"compare",
|
|
1189
|
+
"review",
|
|
1190
|
+
"buy",
|
|
1191
|
+
"cheap",
|
|
1192
|
+
"free",
|
|
1193
|
+
"near me"
|
|
1194
|
+
];
|
|
1195
|
+
const WHITESPACE_RE = /\s+/;
|
|
1196
|
+
function str$18(v) {
|
|
1197
|
+
return v == null ? "" : String(v);
|
|
1198
|
+
}
|
|
1199
|
+
function parseJsonList$13(v) {
|
|
1200
|
+
if (Array.isArray(v)) return v;
|
|
1201
|
+
if (typeof v === "string" && v.length > 0) {
|
|
1202
|
+
const parsed = JSON.parse(v);
|
|
1203
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
1204
|
+
}
|
|
1205
|
+
return [];
|
|
1206
|
+
}
|
|
1207
|
+
function extractIntentPrefix(keyword) {
|
|
1208
|
+
const lower = keyword.toLowerCase();
|
|
1209
|
+
for (const prefix of INTENT_PREFIXES) if (lower.startsWith(`${prefix} `) || lower.startsWith(prefix)) return prefix;
|
|
1210
|
+
return null;
|
|
1211
|
+
}
|
|
1212
|
+
function extractWordPrefix(keyword, wordCount = 2) {
|
|
1213
|
+
const words = keyword.toLowerCase().split(WHITESPACE_RE).filter(Boolean);
|
|
1214
|
+
if (words.length < wordCount + 1) return null;
|
|
1215
|
+
return words.slice(0, wordCount).join(" ");
|
|
1216
|
+
}
|
|
1217
|
+
function analyzeClustering(keywords, options = {}) {
|
|
1218
|
+
const { minClusterSize = 2, minImpressions = 10, clusterBy = "both" } = options;
|
|
1219
|
+
const filtered = keywords.filter((k) => num$4(k.impressions) >= minImpressions);
|
|
1220
|
+
const clusterMap = /* @__PURE__ */ new Map();
|
|
1221
|
+
const clusteredKeywords = /* @__PURE__ */ new Set();
|
|
1222
|
+
if (clusterBy === "intent" || clusterBy === "both") for (const kw of filtered) {
|
|
1223
|
+
const intent = extractIntentPrefix(kw.query);
|
|
1224
|
+
if (intent) {
|
|
1225
|
+
const existing = clusterMap.get(intent);
|
|
1226
|
+
if (existing) existing.keywords.push(kw);
|
|
1227
|
+
else clusterMap.set(intent, {
|
|
1228
|
+
type: "intent",
|
|
1229
|
+
keywords: [kw]
|
|
1230
|
+
});
|
|
1231
|
+
clusteredKeywords.add(kw.query);
|
|
1232
|
+
}
|
|
1233
|
+
}
|
|
1234
|
+
if (clusterBy === "prefix" || clusterBy === "both") {
|
|
1235
|
+
const unclustered = filtered.filter((kw) => !clusteredKeywords.has(kw.query));
|
|
1236
|
+
const prefixMap = /* @__PURE__ */ new Map();
|
|
1237
|
+
for (const kw of unclustered) {
|
|
1238
|
+
const prefix = extractWordPrefix(kw.query);
|
|
1239
|
+
if (prefix) {
|
|
1240
|
+
const existing = prefixMap.get(prefix);
|
|
1241
|
+
if (existing) existing.push(kw);
|
|
1242
|
+
else prefixMap.set(prefix, [kw]);
|
|
1243
|
+
}
|
|
1244
|
+
}
|
|
1245
|
+
for (const [prefix, kws] of prefixMap) if (kws.length >= minClusterSize) {
|
|
1246
|
+
clusterMap.set(prefix, {
|
|
1247
|
+
type: "prefix",
|
|
1248
|
+
keywords: kws
|
|
1249
|
+
});
|
|
1250
|
+
kws.forEach((kw) => clusteredKeywords.add(kw.query));
|
|
1251
|
+
}
|
|
1252
|
+
}
|
|
1253
|
+
const clusters = [];
|
|
1254
|
+
for (const [name, data] of clusterMap) {
|
|
1255
|
+
if (data.keywords.length < minClusterSize) continue;
|
|
1256
|
+
const totalClicks = data.keywords.reduce((sum, k) => sum + num$4(k.clicks), 0);
|
|
1257
|
+
const totalImpressions = data.keywords.reduce((sum, k) => sum + num$4(k.impressions), 0);
|
|
1258
|
+
const avgPosition = data.keywords.reduce((sum, k) => sum + num$4(k.position), 0) / data.keywords.length;
|
|
1259
|
+
clusters.push({
|
|
1260
|
+
clusterName: name,
|
|
1261
|
+
clusterType: data.type,
|
|
1262
|
+
keywords: data.keywords,
|
|
1263
|
+
totalClicks,
|
|
1264
|
+
totalImpressions,
|
|
1265
|
+
avgPosition,
|
|
1266
|
+
keywordCount: data.keywords.length
|
|
1267
|
+
});
|
|
1268
|
+
}
|
|
1269
|
+
clusters.sort((a, b) => b.totalClicks - a.totalClicks);
|
|
1270
|
+
return {
|
|
1271
|
+
clusters,
|
|
1272
|
+
unclustered: filtered.filter((kw) => !clusteredKeywords.has(kw.query))
|
|
1273
|
+
};
|
|
1274
|
+
}
|
|
1275
|
+
const clusteringAnalyzer = defineAnalyzer({
|
|
1276
|
+
id: "clustering",
|
|
1277
|
+
buildSql(params) {
|
|
1278
|
+
const { startDate, endDate } = periodOf(params);
|
|
1279
|
+
const minImpressions = params.minImpressions ?? 10;
|
|
1280
|
+
const minClusterSize = params.minClusterSize ?? 2;
|
|
1281
|
+
const clusterBy = params.clusterBy ?? "both";
|
|
1282
|
+
const doIntent = clusterBy === "intent" || clusterBy === "both";
|
|
1283
|
+
const doPrefix = clusterBy === "prefix" || clusterBy === "both";
|
|
1284
|
+
const intentExpr = doIntent ? `NULLIF(regexp_extract(LOWER(query), '${INTENT_PREFIXES_REGEX}', 1), '')` : `CAST(NULL AS VARCHAR)`;
|
|
1285
|
+
const prefixExpr = doPrefix ? `CASE WHEN len(regexp_split_to_array(LOWER(query), '\\s+')) >= 3
|
|
1286
|
+
THEN array_to_string(list_slice(regexp_split_to_array(LOWER(query), '\\s+'), 1, 2), ' ')
|
|
1287
|
+
ELSE CAST(NULL AS VARCHAR) END` : `CAST(NULL AS VARCHAR)`;
|
|
1288
|
+
return {
|
|
1289
|
+
sql: `
|
|
1290
|
+
WITH agg AS (
|
|
1291
|
+
SELECT
|
|
1292
|
+
query,
|
|
1293
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
1294
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
1295
|
+
${METRIC_EXPR.ctr} AS ctr,
|
|
1296
|
+
${METRIC_EXPR.position} AS position
|
|
1297
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
1298
|
+
WHERE date >= ? AND date <= ?
|
|
1299
|
+
GROUP BY query
|
|
1300
|
+
HAVING SUM(impressions) >= ?
|
|
1301
|
+
),
|
|
1302
|
+
classified AS (
|
|
1303
|
+
SELECT
|
|
1304
|
+
query, clicks, impressions, ctr, position,
|
|
1305
|
+
${intentExpr} AS intent_prefix,
|
|
1306
|
+
${prefixExpr} AS word_prefix
|
|
1307
|
+
FROM agg
|
|
1308
|
+
),
|
|
1309
|
+
keyed AS (
|
|
1310
|
+
SELECT
|
|
1311
|
+
query, clicks, impressions, ctr, position,
|
|
1312
|
+
COALESCE(intent_prefix, word_prefix) AS cluster_name,
|
|
1313
|
+
CASE WHEN intent_prefix IS NOT NULL THEN 'intent' ELSE 'prefix' END AS cluster_type
|
|
1314
|
+
FROM classified
|
|
1315
|
+
WHERE COALESCE(intent_prefix, word_prefix) IS NOT NULL
|
|
1316
|
+
)
|
|
1317
|
+
SELECT
|
|
1318
|
+
cluster_name AS clusterName,
|
|
1319
|
+
any_value(cluster_type) AS clusterType,
|
|
1320
|
+
CAST(COUNT(*) AS DOUBLE) AS keywordCount,
|
|
1321
|
+
${METRIC_EXPR.clicks} AS totalClicks,
|
|
1322
|
+
${METRIC_EXPR.impressions} AS totalImpressions,
|
|
1323
|
+
AVG(position) AS avgPosition,
|
|
1324
|
+
to_json(list({ 'query': query, 'clicks': clicks, 'impressions': impressions, 'ctr': ctr, 'position': position })) AS keywords
|
|
1325
|
+
FROM keyed
|
|
1326
|
+
GROUP BY cluster_name
|
|
1327
|
+
HAVING COUNT(*) >= ?
|
|
1328
|
+
ORDER BY totalClicks DESC
|
|
1329
|
+
`,
|
|
1330
|
+
params: [
|
|
1331
|
+
startDate,
|
|
1332
|
+
endDate,
|
|
1333
|
+
minImpressions,
|
|
1334
|
+
minClusterSize
|
|
1335
|
+
],
|
|
1336
|
+
current: {
|
|
1337
|
+
table: "keywords",
|
|
1338
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
1339
|
+
}
|
|
1340
|
+
};
|
|
1341
|
+
},
|
|
1342
|
+
reduceSql(rows) {
|
|
1343
|
+
const clusters = (Array.isArray(rows) ? rows : []).map((r) => ({
|
|
1344
|
+
clusterName: str$18(r.clusterName),
|
|
1345
|
+
clusterType: str$18(r.clusterType),
|
|
1346
|
+
keywordCount: num$4(r.keywordCount),
|
|
1347
|
+
totalClicks: num$4(r.totalClicks),
|
|
1348
|
+
totalImpressions: num$4(r.totalImpressions),
|
|
1349
|
+
avgPosition: num$4(r.avgPosition),
|
|
1350
|
+
keywords: parseJsonList$13(r.keywords).map((k) => ({
|
|
1351
|
+
query: str$18(k.query),
|
|
1352
|
+
clicks: num$4(k.clicks),
|
|
1353
|
+
impressions: num$4(k.impressions),
|
|
1354
|
+
ctr: num$4(k.ctr),
|
|
1355
|
+
position: num$4(k.position)
|
|
1356
|
+
}))
|
|
1357
|
+
}));
|
|
1358
|
+
return {
|
|
1359
|
+
results: clusters,
|
|
1360
|
+
meta: {
|
|
1361
|
+
total: clusters.length,
|
|
1362
|
+
totalClusters: clusters.length
|
|
1363
|
+
}
|
|
1364
|
+
};
|
|
1365
|
+
},
|
|
1366
|
+
buildRows(params) {
|
|
1367
|
+
return { keywords: keywordsQueryState(periodOf(params), params.limit) };
|
|
1368
|
+
},
|
|
1369
|
+
reduceRows(rows, params) {
|
|
1370
|
+
const result = analyzeClustering(Array.isArray(rows) ? rows : [], {
|
|
1371
|
+
clusterBy: params.clusterBy,
|
|
1372
|
+
minClusterSize: params.minClusterSize,
|
|
1373
|
+
minImpressions: params.minImpressions
|
|
1374
|
+
});
|
|
1375
|
+
return {
|
|
1376
|
+
results: result.clusters,
|
|
1377
|
+
meta: { totalClusters: result.clusters.length }
|
|
1378
|
+
};
|
|
1379
|
+
}
|
|
1380
|
+
});
|
|
1381
|
+
function str$17(v) {
|
|
1382
|
+
return v == null ? "" : String(v);
|
|
1383
|
+
}
|
|
1384
|
+
function parseJsonList$12(v) {
|
|
1385
|
+
if (Array.isArray(v)) return v;
|
|
1386
|
+
if (typeof v === "string" && v.length > 0) {
|
|
1387
|
+
const parsed = JSON.parse(v);
|
|
1388
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
1389
|
+
}
|
|
1390
|
+
return [];
|
|
1391
|
+
}
|
|
1392
|
+
function calculateGini(values) {
|
|
1393
|
+
if (values.length === 0) return 0;
|
|
1394
|
+
const sorted = [...values].sort((a, b) => a - b);
|
|
1395
|
+
const n = sorted.length;
|
|
1396
|
+
const sum = sorted.reduce((a, b) => a + b, 0);
|
|
1397
|
+
if (sum === 0) return 0;
|
|
1398
|
+
let weightedSum = 0;
|
|
1399
|
+
for (let i = 0; i < n; i++) weightedSum += (2 * (i + 1) - n - 1) * sorted[i];
|
|
1400
|
+
return weightedSum / (n * sum);
|
|
1401
|
+
}
|
|
1402
|
+
function calculateHHI(shares) {
|
|
1403
|
+
return shares.reduce((sum, share) => sum + (share * 100) ** 2, 0);
|
|
1404
|
+
}
|
|
1405
|
+
function analyzeConcentration(items, options = {}) {
|
|
1406
|
+
const { topN = 10 } = options;
|
|
1407
|
+
if (items.length === 0) return {
|
|
1408
|
+
giniCoefficient: 0,
|
|
1409
|
+
hhi: 0,
|
|
1410
|
+
topNConcentration: 0,
|
|
1411
|
+
topNItems: [],
|
|
1412
|
+
totalItems: 0,
|
|
1413
|
+
totalClicks: 0,
|
|
1414
|
+
riskLevel: "low"
|
|
1415
|
+
};
|
|
1416
|
+
const sorted = [...items].sort((a, b) => b.clicks - a.clicks);
|
|
1417
|
+
const totalClicks = sorted.reduce((sum, item) => sum + item.clicks, 0);
|
|
1418
|
+
const clickValues = sorted.map((i) => i.clicks);
|
|
1419
|
+
const shares = totalClicks > 0 ? sorted.map((i) => i.clicks / totalClicks) : [];
|
|
1420
|
+
const giniCoefficient = calculateGini(clickValues);
|
|
1421
|
+
const hhi = calculateHHI(shares);
|
|
1422
|
+
const topNItems = sorted.slice(0, topN).map((item) => ({
|
|
1423
|
+
key: item.key,
|
|
1424
|
+
clicks: item.clicks,
|
|
1425
|
+
share: totalClicks > 0 ? item.clicks / totalClicks : 0
|
|
1426
|
+
}));
|
|
1427
|
+
const topNClicks = topNItems.reduce((sum, item) => sum + item.clicks, 0);
|
|
1428
|
+
const topNConcentration = totalClicks > 0 ? topNClicks / totalClicks : 0;
|
|
1429
|
+
let riskLevel = "low";
|
|
1430
|
+
if (hhi > 2500) riskLevel = "high";
|
|
1431
|
+
else if (hhi > 1500) riskLevel = "medium";
|
|
1432
|
+
return {
|
|
1433
|
+
giniCoefficient,
|
|
1434
|
+
hhi,
|
|
1435
|
+
topNConcentration,
|
|
1436
|
+
topNItems,
|
|
1437
|
+
totalItems: items.length,
|
|
1438
|
+
totalClicks,
|
|
1439
|
+
riskLevel
|
|
1440
|
+
};
|
|
1441
|
+
}
|
|
1442
|
+
function analyzePageConcentration(pages, options) {
|
|
1443
|
+
return analyzeConcentration(pages.map((p) => ({
|
|
1444
|
+
key: p.page,
|
|
1445
|
+
clicks: num$4(p.clicks)
|
|
1446
|
+
})), options);
|
|
1447
|
+
}
|
|
1448
|
+
function analyzeKeywordConcentration(keywords, options) {
|
|
1449
|
+
return analyzeConcentration(keywords.map((k) => ({
|
|
1450
|
+
key: k.query,
|
|
1451
|
+
clicks: num$4(k.clicks)
|
|
1452
|
+
})), options);
|
|
1453
|
+
}
|
|
1454
|
+
const concentrationAnalyzer = defineAnalyzer({
|
|
1455
|
+
id: "concentration",
|
|
1456
|
+
buildSql(params) {
|
|
1457
|
+
const { startDate, endDate } = periodOf(params);
|
|
1458
|
+
const dim = params.dimension || "pages";
|
|
1459
|
+
const topN = params.topN ?? 10;
|
|
1460
|
+
const table = dim === "keywords" ? "keywords" : "pages";
|
|
1461
|
+
const keyCol = dim === "keywords" ? "query" : "url";
|
|
1462
|
+
return {
|
|
1463
|
+
sql: `
|
|
1464
|
+
WITH items AS (
|
|
1465
|
+
SELECT
|
|
1466
|
+
${keyCol} AS key,
|
|
1467
|
+
${METRIC_EXPR.clicks} AS clicks
|
|
1468
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
1469
|
+
WHERE date >= ? AND date <= ?
|
|
1470
|
+
GROUP BY ${keyCol}
|
|
1471
|
+
HAVING SUM(clicks) > 0
|
|
1472
|
+
),
|
|
1473
|
+
totals AS (
|
|
1474
|
+
SELECT SUM(clicks) AS total_clicks, COUNT(*) AS total_items FROM items
|
|
1475
|
+
),
|
|
1476
|
+
ranked AS (
|
|
1477
|
+
SELECT
|
|
1478
|
+
i.key, i.clicks,
|
|
1479
|
+
i.clicks / NULLIF(t.total_clicks, 0) AS share,
|
|
1480
|
+
ROW_NUMBER() OVER (ORDER BY i.clicks DESC, i.key ASC) AS rnk_desc,
|
|
1481
|
+
ROW_NUMBER() OVER (ORDER BY i.clicks ASC, i.key ASC) AS rnk_asc,
|
|
1482
|
+
t.total_clicks AS tclicks,
|
|
1483
|
+
t.total_items AS titems
|
|
1484
|
+
FROM items i, totals t
|
|
1485
|
+
),
|
|
1486
|
+
gini_num AS (
|
|
1487
|
+
SELECT SUM((2.0 * rnk_asc - titems - 1) * clicks) AS weighted_sum FROM ranked
|
|
1488
|
+
),
|
|
1489
|
+
hhi_calc AS (
|
|
1490
|
+
SELECT SUM(POWER(share * 100, 2)) AS hhi FROM ranked
|
|
1491
|
+
),
|
|
1492
|
+
top_list AS (
|
|
1493
|
+
SELECT
|
|
1494
|
+
list({ 'key': key, 'clicks': clicks, 'share': share } ORDER BY clicks DESC, key ASC) AS items,
|
|
1495
|
+
SUM(clicks) AS top_clicks
|
|
1496
|
+
FROM ranked WHERE rnk_desc <= ?
|
|
1497
|
+
)
|
|
1498
|
+
SELECT
|
|
1499
|
+
COALESCE(
|
|
1500
|
+
(SELECT weighted_sum FROM gini_num)
|
|
1501
|
+
/ NULLIF((SELECT total_items FROM totals) * (SELECT total_clicks FROM totals), 0),
|
|
1502
|
+
0.0
|
|
1503
|
+
) AS giniCoefficient,
|
|
1504
|
+
COALESCE((SELECT hhi FROM hhi_calc), 0.0) AS hhi,
|
|
1505
|
+
COALESCE(
|
|
1506
|
+
CAST((SELECT top_clicks FROM top_list) AS DOUBLE)
|
|
1507
|
+
/ NULLIF((SELECT total_clicks FROM totals), 0),
|
|
1508
|
+
0.0
|
|
1509
|
+
) AS topNConcentration,
|
|
1510
|
+
COALESCE((SELECT to_json(items) FROM top_list), '[]') AS topNItems,
|
|
1511
|
+
COALESCE((SELECT total_items FROM totals), 0) AS totalItems,
|
|
1512
|
+
COALESCE((SELECT total_clicks FROM totals), 0.0) AS totalClicks,
|
|
1513
|
+
CASE
|
|
1514
|
+
WHEN COALESCE((SELECT hhi FROM hhi_calc), 0.0) > 2500 THEN 'high'
|
|
1515
|
+
WHEN COALESCE((SELECT hhi FROM hhi_calc), 0.0) > 1500 THEN 'medium'
|
|
1516
|
+
ELSE 'low'
|
|
1517
|
+
END AS riskLevel
|
|
1518
|
+
`,
|
|
1519
|
+
params: [
|
|
1520
|
+
startDate,
|
|
1521
|
+
endDate,
|
|
1522
|
+
topN
|
|
1523
|
+
],
|
|
1524
|
+
current: {
|
|
1525
|
+
table,
|
|
1526
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
1527
|
+
}
|
|
1528
|
+
};
|
|
1529
|
+
},
|
|
1530
|
+
reduceSql(rows, params) {
|
|
1531
|
+
const r = (Array.isArray(rows) ? rows : [])[0] ?? {};
|
|
1532
|
+
const topRaw = parseJsonList$12(r.topNItems);
|
|
1533
|
+
return {
|
|
1534
|
+
results: [{
|
|
1535
|
+
giniCoefficient: num$4(r.giniCoefficient),
|
|
1536
|
+
hhi: num$4(r.hhi),
|
|
1537
|
+
topNConcentration: num$4(r.topNConcentration),
|
|
1538
|
+
topNItems: topRaw.map((t) => ({
|
|
1539
|
+
key: str$17(t.key),
|
|
1540
|
+
clicks: num$4(t.clicks),
|
|
1541
|
+
share: num$4(t.share)
|
|
1542
|
+
})),
|
|
1543
|
+
totalItems: num$4(r.totalItems),
|
|
1544
|
+
totalClicks: num$4(r.totalClicks),
|
|
1545
|
+
riskLevel: str$17(r.riskLevel)
|
|
1546
|
+
}],
|
|
1547
|
+
meta: {
|
|
1548
|
+
total: 1,
|
|
1549
|
+
dimension: params.dimension || "pages"
|
|
1550
|
+
}
|
|
1551
|
+
};
|
|
1552
|
+
},
|
|
1553
|
+
buildRows(params) {
|
|
1554
|
+
const dim = params.dimension || "pages";
|
|
1555
|
+
const period = periodOf(params);
|
|
1556
|
+
const out = {};
|
|
1557
|
+
if (dim === "pages") out.pages = pagesQueryState(period, params.limit);
|
|
1558
|
+
else out.keywords = keywordsQueryState(period, params.limit);
|
|
1559
|
+
return out;
|
|
1560
|
+
},
|
|
1561
|
+
reduceRows(rows, params) {
|
|
1562
|
+
const dim = params.dimension || "pages";
|
|
1563
|
+
const arr = Array.isArray(rows) ? rows : rows[dim] ?? [];
|
|
1564
|
+
return {
|
|
1565
|
+
results: [dim === "pages" ? analyzePageConcentration(arr, { topN: params.topN }) : analyzeKeywordConcentration(arr, { topN: params.topN })],
|
|
1566
|
+
meta: { dimension: dim }
|
|
1567
|
+
};
|
|
1568
|
+
}
|
|
1569
|
+
});
|
|
1570
|
+
function num$2(v) {
|
|
1571
|
+
if (typeof v === "number") return v;
|
|
1572
|
+
if (typeof v === "bigint") return Number(v);
|
|
1573
|
+
if (v == null) return 0;
|
|
1574
|
+
const n = Number(v);
|
|
1575
|
+
return Number.isFinite(n) ? n : 0;
|
|
1576
|
+
}
|
|
1577
|
+
function str$16(v) {
|
|
1578
|
+
return v == null ? "" : String(v);
|
|
1579
|
+
}
|
|
1580
|
+
const contentVelocityAnalyzer = defineAnalyzer({
|
|
1581
|
+
id: "content-velocity",
|
|
1582
|
+
buildSql(params) {
|
|
1583
|
+
const days = Math.min(Math.max(Number(params.days ?? 90), 7), 365);
|
|
1584
|
+
const { endDate } = periodOf(params);
|
|
1585
|
+
const start = new Date(endDate);
|
|
1586
|
+
start.setUTCDate(start.getUTCDate() - days);
|
|
1587
|
+
const startDate = toIsoDate(start);
|
|
1588
|
+
return {
|
|
1589
|
+
sql: `
|
|
1590
|
+
WITH src AS (
|
|
1591
|
+
SELECT query, date
|
|
1592
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
1593
|
+
WHERE date >= ? AND date <= ? AND impressions > 0
|
|
1594
|
+
),
|
|
1595
|
+
first_seen AS (
|
|
1596
|
+
SELECT query, MIN(date) AS first_date FROM src GROUP BY query
|
|
1597
|
+
),
|
|
1598
|
+
per_week AS (
|
|
1599
|
+
SELECT
|
|
1600
|
+
strftime(date, '%G-W%V') AS week,
|
|
1601
|
+
MIN(date) AS week_start,
|
|
1602
|
+
CAST(COUNT(DISTINCT query) AS DOUBLE) AS totalKeywords
|
|
1603
|
+
FROM src
|
|
1604
|
+
GROUP BY week
|
|
1605
|
+
),
|
|
1606
|
+
new_per_week AS (
|
|
1607
|
+
SELECT
|
|
1608
|
+
strftime(first_date, '%G-W%V') AS week,
|
|
1609
|
+
CAST(COUNT(*) AS DOUBLE) AS newKeywords
|
|
1610
|
+
FROM first_seen
|
|
1611
|
+
GROUP BY week
|
|
1612
|
+
)
|
|
1613
|
+
SELECT
|
|
1614
|
+
pw.week AS week,
|
|
1615
|
+
COALESCE(npw.newKeywords, 0) AS newKeywords,
|
|
1616
|
+
pw.totalKeywords AS totalKeywords
|
|
1617
|
+
FROM per_week pw
|
|
1618
|
+
LEFT JOIN new_per_week npw ON pw.week = npw.week
|
|
1619
|
+
ORDER BY pw.week ASC
|
|
1620
|
+
`,
|
|
1621
|
+
params: [startDate, endDate],
|
|
1622
|
+
current: {
|
|
1623
|
+
table: "keywords",
|
|
1624
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
1625
|
+
}
|
|
1626
|
+
};
|
|
1627
|
+
},
|
|
1628
|
+
reduceSql(rows, params) {
|
|
1629
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
1630
|
+
const days = Math.min(Math.max(Number(params.days ?? 90), 7), 365);
|
|
1631
|
+
const { endDate } = periodOf(params);
|
|
1632
|
+
const startDateD = new Date(endDate);
|
|
1633
|
+
startDateD.setUTCDate(startDateD.getUTCDate() - days);
|
|
1634
|
+
const startDate = toIsoDate(startDateD);
|
|
1635
|
+
const weekly = arr.map((r) => ({
|
|
1636
|
+
week: str$16(r.week),
|
|
1637
|
+
newKeywords: num$2(r.newKeywords),
|
|
1638
|
+
totalKeywords: num$2(r.totalKeywords)
|
|
1639
|
+
}));
|
|
1640
|
+
const total = weekly.reduce((s, w) => s + w.newKeywords, 0);
|
|
1641
|
+
const avg = weekly.length > 0 ? total / weekly.length : 0;
|
|
1642
|
+
const mid = Math.floor(weekly.length / 2);
|
|
1643
|
+
const firstAvg = mid > 0 ? weekly.slice(0, mid).reduce((s, w) => s + w.newKeywords, 0) / mid : 0;
|
|
1644
|
+
const diff = (weekly.length - mid > 0 ? weekly.slice(mid).reduce((s, w) => s + w.newKeywords, 0) / (weekly.length - mid) : 0) - firstAvg;
|
|
1645
|
+
const threshold = Math.max(1, avg * .15);
|
|
1646
|
+
return {
|
|
1647
|
+
results: weekly,
|
|
1648
|
+
meta: {
|
|
1649
|
+
summary: {
|
|
1650
|
+
totalNewKeywords: total,
|
|
1651
|
+
avgPerWeek: avg,
|
|
1652
|
+
trend: diff > threshold ? "accelerating" : diff < -threshold ? "decelerating" : "stable"
|
|
1653
|
+
},
|
|
1654
|
+
days,
|
|
1655
|
+
startDate,
|
|
1656
|
+
endDate
|
|
1657
|
+
}
|
|
1658
|
+
};
|
|
1659
|
+
}
|
|
1660
|
+
});
|
|
1661
|
+
function num$1(v) {
|
|
1662
|
+
if (typeof v === "number") return v;
|
|
1663
|
+
if (typeof v === "bigint") return Number(v);
|
|
1664
|
+
if (v == null) return 0;
|
|
1665
|
+
const n = Number(v);
|
|
1666
|
+
return Number.isFinite(n) ? n : 0;
|
|
1667
|
+
}
|
|
1668
|
+
function str$15(v) {
|
|
1669
|
+
return v == null ? "" : String(v);
|
|
1670
|
+
}
|
|
1671
|
+
function bool$2(v) {
|
|
1672
|
+
return v === true || v === 1 || v === "true";
|
|
1673
|
+
}
|
|
1674
|
+
function parseJsonList$11(v) {
|
|
1675
|
+
if (Array.isArray(v)) return v;
|
|
1676
|
+
if (typeof v === "string" && v.length > 0) {
|
|
1677
|
+
const parsed = JSON.parse(v);
|
|
1678
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
1679
|
+
}
|
|
1680
|
+
return [];
|
|
1681
|
+
}
|
|
1682
|
+
const ctrAnomalyAnalyzer = defineAnalyzer({
|
|
1683
|
+
id: "ctr-anomaly",
|
|
1684
|
+
buildSql(params) {
|
|
1685
|
+
const endDate = params.endDate ?? defaultEndDate();
|
|
1686
|
+
const startDate = params.startDate ?? daysAgo(93);
|
|
1687
|
+
const minDailyImpressions = params.minImpressions ?? 5;
|
|
1688
|
+
const minRollingN = 14;
|
|
1689
|
+
const zThreshold = params.threshold ?? 2;
|
|
1690
|
+
const maxPositionDelta = 1.5;
|
|
1691
|
+
const minBreachDays = 2;
|
|
1692
|
+
const limit = params.limit ?? 200;
|
|
1693
|
+
return {
|
|
1694
|
+
sql: `
|
|
1695
|
+
WITH daily AS (
|
|
1696
|
+
SELECT
|
|
1697
|
+
query,
|
|
1698
|
+
url AS page,
|
|
1699
|
+
date,
|
|
1700
|
+
${METRIC_EXPR.clicks} AS day_clicks,
|
|
1701
|
+
${METRIC_EXPR.impressions} AS day_impressions,
|
|
1702
|
+
${METRIC_EXPR.ctr} AS day_ctr,
|
|
1703
|
+
${METRIC_EXPR.position} AS day_position
|
|
1704
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
1705
|
+
WHERE date >= ? AND date <= ?
|
|
1706
|
+
AND query IS NOT NULL AND query <> ''
|
|
1707
|
+
AND url IS NOT NULL AND url <> ''
|
|
1708
|
+
GROUP BY query, url, date
|
|
1709
|
+
HAVING SUM(impressions) >= ?
|
|
1710
|
+
),
|
|
1711
|
+
rolled AS (
|
|
1712
|
+
SELECT *,
|
|
1713
|
+
AVG(day_ctr) OVER w AS rolling_ctr,
|
|
1714
|
+
STDDEV_POP(day_ctr) OVER w AS rolling_stddev,
|
|
1715
|
+
AVG(day_position) OVER w AS rolling_position,
|
|
1716
|
+
COUNT(*) OVER w AS rolling_n
|
|
1717
|
+
FROM daily
|
|
1718
|
+
WINDOW w AS (
|
|
1719
|
+
PARTITION BY query, page
|
|
1720
|
+
ORDER BY date
|
|
1721
|
+
ROWS BETWEEN 28 PRECEDING AND 1 PRECEDING
|
|
1722
|
+
)
|
|
1723
|
+
),
|
|
1724
|
+
flagged AS (
|
|
1725
|
+
SELECT *,
|
|
1726
|
+
CASE
|
|
1727
|
+
WHEN rolling_n >= ${Number(minRollingN)} AND rolling_stddev > 0
|
|
1728
|
+
THEN (day_ctr - rolling_ctr) / rolling_stddev
|
|
1729
|
+
ELSE 0.0
|
|
1730
|
+
END AS z_score,
|
|
1731
|
+
CASE
|
|
1732
|
+
WHEN rolling_position IS NULL THEN 0.0
|
|
1733
|
+
ELSE ABS(day_position - rolling_position)
|
|
1734
|
+
END AS position_delta
|
|
1735
|
+
FROM rolled
|
|
1736
|
+
),
|
|
1737
|
+
breaches AS (
|
|
1738
|
+
SELECT *,
|
|
1739
|
+
CASE
|
|
1740
|
+
WHEN ABS(z_score) >= ${zThreshold}
|
|
1741
|
+
AND position_delta <= ${maxPositionDelta}
|
|
1742
|
+
AND rolling_n >= ${Number(minRollingN)}
|
|
1743
|
+
THEN true ELSE false
|
|
1744
|
+
END AS is_breach
|
|
1745
|
+
FROM flagged
|
|
1746
|
+
),
|
|
1747
|
+
per_entity AS (
|
|
1748
|
+
SELECT
|
|
1749
|
+
query, page,
|
|
1750
|
+
COUNT(*) FILTER (WHERE is_breach AND z_score < 0) AS breach_days_down,
|
|
1751
|
+
COUNT(*) FILTER (WHERE is_breach AND z_score > 0) AS breach_days_up,
|
|
1752
|
+
SUM(CASE
|
|
1753
|
+
WHEN is_breach AND z_score < 0
|
|
1754
|
+
THEN (rolling_ctr - day_ctr) * day_impressions
|
|
1755
|
+
ELSE 0.0
|
|
1756
|
+
END) AS clicks_lost,
|
|
1757
|
+
SUM(CASE
|
|
1758
|
+
WHEN is_breach AND z_score < 0
|
|
1759
|
+
THEN ABS(z_score) * day_impressions
|
|
1760
|
+
ELSE 0.0
|
|
1761
|
+
END) AS severity_raw,
|
|
1762
|
+
MAX(CASE WHEN is_breach THEN ABS(z_score) ELSE 0.0 END) AS max_z,
|
|
1763
|
+
AVG(rolling_ctr) FILTER (WHERE rolling_n >= ${Number(minRollingN)}) AS baseline_ctr,
|
|
1764
|
+
AVG(rolling_position) FILTER (WHERE rolling_n >= ${Number(minRollingN)}) AS baseline_position,
|
|
1765
|
+
SUM(day_impressions) AS total_impressions,
|
|
1766
|
+
SUM(day_clicks) AS total_clicks
|
|
1767
|
+
FROM breaches
|
|
1768
|
+
GROUP BY query, page
|
|
1769
|
+
HAVING COUNT(*) FILTER (WHERE is_breach AND z_score < 0) >= ${Number(minBreachDays)}
|
|
1770
|
+
),
|
|
1771
|
+
series AS (
|
|
1772
|
+
SELECT query, page,
|
|
1773
|
+
to_json(list({
|
|
1774
|
+
'date': strftime(date, '%Y-%m-%d'),
|
|
1775
|
+
'ctr': day_ctr,
|
|
1776
|
+
'position': day_position,
|
|
1777
|
+
'impressions': day_impressions,
|
|
1778
|
+
'rollingCtr': rolling_ctr,
|
|
1779
|
+
'rollingStddev': rolling_stddev,
|
|
1780
|
+
'z': z_score,
|
|
1781
|
+
'breach': is_breach AND z_score < 0
|
|
1782
|
+
} ORDER BY date)) AS seriesJson
|
|
1783
|
+
FROM breaches
|
|
1784
|
+
GROUP BY query, page
|
|
1785
|
+
)
|
|
1786
|
+
SELECT
|
|
1787
|
+
e.query AS keyword,
|
|
1788
|
+
e.page,
|
|
1789
|
+
CAST(e.breach_days_down AS DOUBLE) AS breachDaysDown,
|
|
1790
|
+
CAST(e.breach_days_up AS DOUBLE) AS breachDaysUp,
|
|
1791
|
+
CAST(ROUND(e.clicks_lost) AS DOUBLE) AS clicksLost,
|
|
1792
|
+
e.severity_raw AS severityRaw,
|
|
1793
|
+
e.max_z AS maxZ,
|
|
1794
|
+
e.baseline_ctr AS baselineCtr,
|
|
1795
|
+
e.baseline_position AS baselinePosition,
|
|
1796
|
+
e.total_impressions AS totalImpressions,
|
|
1797
|
+
e.total_clicks AS totalClicks,
|
|
1798
|
+
s.seriesJson
|
|
1799
|
+
FROM per_entity e
|
|
1800
|
+
LEFT JOIN series s USING (query, page)
|
|
1801
|
+
ORDER BY clicksLost DESC
|
|
1802
|
+
LIMIT ${Number(limit)}
|
|
1803
|
+
`,
|
|
1804
|
+
params: [
|
|
1805
|
+
startDate,
|
|
1806
|
+
endDate,
|
|
1807
|
+
minDailyImpressions
|
|
1808
|
+
],
|
|
1809
|
+
current: {
|
|
1810
|
+
table: "page_keywords",
|
|
1811
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
1812
|
+
}
|
|
1813
|
+
};
|
|
1814
|
+
},
|
|
1815
|
+
reduceSql(rows, params) {
|
|
1816
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
1817
|
+
const minRollingN = 14;
|
|
1818
|
+
const zThreshold = params.threshold ?? 2;
|
|
1819
|
+
const anomalies = arr.map((r) => ({
|
|
1820
|
+
keyword: str$15(r.keyword),
|
|
1821
|
+
page: str$15(r.page),
|
|
1822
|
+
breachDaysDown: num$1(r.breachDaysDown),
|
|
1823
|
+
breachDaysUp: num$1(r.breachDaysUp),
|
|
1824
|
+
clicksLost: num$1(r.clicksLost),
|
|
1825
|
+
severity: num$1(r.severityRaw),
|
|
1826
|
+
maxZ: num$1(r.maxZ),
|
|
1827
|
+
baselineCtr: num$1(r.baselineCtr),
|
|
1828
|
+
baselinePosition: num$1(r.baselinePosition),
|
|
1829
|
+
totalImpressions: num$1(r.totalImpressions),
|
|
1830
|
+
totalClicks: num$1(r.totalClicks),
|
|
1831
|
+
series: parseJsonList$11(r.seriesJson).map((s) => ({
|
|
1832
|
+
date: str$15(s.date),
|
|
1833
|
+
ctr: num$1(s.ctr),
|
|
1834
|
+
position: num$1(s.position),
|
|
1835
|
+
impressions: num$1(s.impressions),
|
|
1836
|
+
rollingCtr: s.rollingCtr == null ? null : num$1(s.rollingCtr),
|
|
1837
|
+
rollingStddev: s.rollingStddev == null ? null : num$1(s.rollingStddev),
|
|
1838
|
+
z: num$1(s.z),
|
|
1839
|
+
breach: bool$2(s.breach)
|
|
1840
|
+
}))
|
|
1841
|
+
}));
|
|
1842
|
+
const totalClicksLost = anomalies.reduce((s, a) => s + a.clicksLost, 0);
|
|
1843
|
+
const totalBreachDays = anomalies.reduce((s, a) => s + a.breachDaysDown, 0);
|
|
1844
|
+
return {
|
|
1845
|
+
results: anomalies,
|
|
1846
|
+
meta: {
|
|
1847
|
+
total: anomalies.length,
|
|
1848
|
+
totalClicksLost,
|
|
1849
|
+
totalBreachDays,
|
|
1850
|
+
zThreshold,
|
|
1851
|
+
minRollingN
|
|
1852
|
+
}
|
|
1853
|
+
};
|
|
1854
|
+
}
|
|
1855
|
+
});
|
|
1856
|
+
function num(v) {
|
|
1857
|
+
if (typeof v === "number") return v;
|
|
1858
|
+
if (typeof v === "bigint") return Number(v);
|
|
1859
|
+
if (v == null) return 0;
|
|
1860
|
+
const n = Number(v);
|
|
1861
|
+
return Number.isFinite(n) ? n : 0;
|
|
1862
|
+
}
|
|
1863
|
+
function str$14(v) {
|
|
1864
|
+
return v == null ? "" : String(v);
|
|
1865
|
+
}
|
|
1866
|
+
function parseJsonList$10(v) {
|
|
1867
|
+
if (Array.isArray(v)) return v;
|
|
1868
|
+
if (typeof v === "string" && v.length > 0) {
|
|
1869
|
+
const parsed = JSON.parse(v);
|
|
1870
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
1871
|
+
}
|
|
1872
|
+
return [];
|
|
1873
|
+
}
|
|
1874
|
+
const ctrCurveAnalyzer = defineAnalyzer({
|
|
1875
|
+
id: "ctr-curve",
|
|
1876
|
+
buildSql(params) {
|
|
1877
|
+
const { startDate, endDate } = periodOf(params);
|
|
1878
|
+
return {
|
|
1879
|
+
sql: `
|
|
1880
|
+
WITH src AS (
|
|
1881
|
+
SELECT
|
|
1882
|
+
query,
|
|
1883
|
+
clicks,
|
|
1884
|
+
impressions,
|
|
1885
|
+
sum_position,
|
|
1886
|
+
(sum_position / NULLIF(impressions, 0) + 1) AS avg_pos
|
|
1887
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
1888
|
+
WHERE date >= ? AND date <= ? AND impressions > 0
|
|
1889
|
+
),
|
|
1890
|
+
curve AS (
|
|
1891
|
+
SELECT
|
|
1892
|
+
CASE
|
|
1893
|
+
WHEN avg_pos <= 1.5 THEN '1'
|
|
1894
|
+
WHEN avg_pos <= 2.5 THEN '2'
|
|
1895
|
+
WHEN avg_pos <= 3.5 THEN '3'
|
|
1896
|
+
WHEN avg_pos <= 5.5 THEN '4-5'
|
|
1897
|
+
WHEN avg_pos <= 10.5 THEN '6-10'
|
|
1898
|
+
WHEN avg_pos <= 20.5 THEN '11-20'
|
|
1899
|
+
ELSE '20+'
|
|
1900
|
+
END AS bucket,
|
|
1901
|
+
AVG(CAST(clicks AS DOUBLE) / NULLIF(impressions, 0)) AS avgCtr,
|
|
1902
|
+
AVG(avg_pos) AS medianPosition,
|
|
1903
|
+
CAST(COUNT(DISTINCT query) AS DOUBLE) AS keywordCount,
|
|
1904
|
+
${METRIC_EXPR.clicks} AS totalClicks,
|
|
1905
|
+
${METRIC_EXPR.impressions} AS totalImpressions
|
|
1906
|
+
FROM src
|
|
1907
|
+
GROUP BY bucket
|
|
1908
|
+
),
|
|
1909
|
+
ks AS (
|
|
1910
|
+
SELECT
|
|
1911
|
+
query,
|
|
1912
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
1913
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
1914
|
+
${METRIC_EXPR.ctr} AS ctr,
|
|
1915
|
+
${METRIC_EXPR.position} AS position,
|
|
1916
|
+
CASE
|
|
1917
|
+
WHEN ${METRIC_EXPR.position} <= 3.5 THEN 'top3'
|
|
1918
|
+
WHEN ${METRIC_EXPR.position} <= 10.5 THEN 'page1'
|
|
1919
|
+
WHEN ${METRIC_EXPR.position} <= 20.5 THEN 'page2'
|
|
1920
|
+
ELSE 'deep'
|
|
1921
|
+
END AS band
|
|
1922
|
+
FROM src
|
|
1923
|
+
GROUP BY query
|
|
1924
|
+
HAVING SUM(impressions) >= 20
|
|
1925
|
+
),
|
|
1926
|
+
band_avg AS (
|
|
1927
|
+
SELECT band, AVG(ctr) AS band_avg_ctr FROM ks GROUP BY band
|
|
1928
|
+
),
|
|
1929
|
+
outliers AS (
|
|
1930
|
+
SELECT
|
|
1931
|
+
ks.query, ks.clicks, ks.impressions, ks.ctr, ks.position,
|
|
1932
|
+
ba.band_avg_ctr AS expectedCtr,
|
|
1933
|
+
ks.ctr - ba.band_avg_ctr AS ctrDiff
|
|
1934
|
+
FROM ks JOIN band_avg ba ON ks.band = ba.band
|
|
1935
|
+
ORDER BY ABS(ks.ctr - ba.band_avg_ctr) DESC
|
|
1936
|
+
LIMIT 50
|
|
1937
|
+
)
|
|
1938
|
+
SELECT
|
|
1939
|
+
(SELECT to_json(list({
|
|
1940
|
+
'bucket': bucket,
|
|
1941
|
+
'avgCtr': avgCtr,
|
|
1942
|
+
'medianPosition': medianPosition,
|
|
1943
|
+
'keywordCount': keywordCount,
|
|
1944
|
+
'totalClicks': totalClicks,
|
|
1945
|
+
'totalImpressions': totalImpressions
|
|
1946
|
+
})) FROM curve) AS curve_json,
|
|
1947
|
+
(SELECT to_json(list({
|
|
1948
|
+
'query': query,
|
|
1949
|
+
'clicks': clicks,
|
|
1950
|
+
'impressions': impressions,
|
|
1951
|
+
'ctr': ctr,
|
|
1952
|
+
'position': position,
|
|
1953
|
+
'expectedCtr': expectedCtr,
|
|
1954
|
+
'ctrDiff': ctrDiff
|
|
1955
|
+
})) FROM outliers) AS outliers_json
|
|
1956
|
+
`,
|
|
1957
|
+
params: [startDate, endDate],
|
|
1958
|
+
current: {
|
|
1959
|
+
table: "keywords",
|
|
1960
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
1961
|
+
}
|
|
1962
|
+
};
|
|
1963
|
+
},
|
|
1964
|
+
reduceSql(rows, params) {
|
|
1965
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
1966
|
+
const { startDate, endDate } = periodOf(params);
|
|
1967
|
+
const row = arr[0] ?? {};
|
|
1968
|
+
const curve = parseJsonList$10(row.curve_json).map((r) => ({
|
|
1969
|
+
bucket: str$14(r.bucket),
|
|
1970
|
+
avgCtr: num(r.avgCtr),
|
|
1971
|
+
medianPosition: num(r.medianPosition),
|
|
1972
|
+
keywordCount: num(r.keywordCount),
|
|
1973
|
+
totalClicks: num(r.totalClicks),
|
|
1974
|
+
totalImpressions: num(r.totalImpressions)
|
|
1975
|
+
}));
|
|
1976
|
+
const outliers = parseJsonList$10(row.outliers_json).map((r) => ({
|
|
1977
|
+
query: str$14(r.query),
|
|
1978
|
+
clicks: num(r.clicks),
|
|
1979
|
+
impressions: num(r.impressions),
|
|
1980
|
+
ctr: num(r.ctr),
|
|
1981
|
+
position: num(r.position),
|
|
1982
|
+
expectedCtr: num(r.expectedCtr),
|
|
1983
|
+
ctrDiff: num(r.ctrDiff)
|
|
1984
|
+
}));
|
|
1985
|
+
return {
|
|
1986
|
+
results: curve,
|
|
1987
|
+
meta: {
|
|
1988
|
+
overperforming: outliers.filter((o) => o.ctrDiff > 0).slice(0, 25),
|
|
1989
|
+
underperforming: outliers.filter((o) => o.ctrDiff < 0).slice(0, 25),
|
|
1990
|
+
startDate,
|
|
1991
|
+
endDate
|
|
1992
|
+
}
|
|
1993
|
+
};
|
|
1994
|
+
}
|
|
1995
|
+
});
|
|
1996
|
+
function str$13(v) {
|
|
1997
|
+
return v == null ? "" : String(v);
|
|
1998
|
+
}
|
|
1999
|
+
function parseJsonList$9(v) {
|
|
2000
|
+
if (Array.isArray(v)) return v;
|
|
2001
|
+
if (typeof v === "string" && v.length > 0) {
|
|
2002
|
+
const parsed = JSON.parse(v);
|
|
2003
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
2004
|
+
}
|
|
2005
|
+
return [];
|
|
2006
|
+
}
|
|
2007
|
+
const darkTrafficAnalyzer = defineAnalyzer({
|
|
2008
|
+
id: "dark-traffic",
|
|
2009
|
+
buildSql(params) {
|
|
2010
|
+
const { startDate, endDate } = periodOf(params);
|
|
2011
|
+
return {
|
|
2012
|
+
sql: `
|
|
2013
|
+
WITH page_totals AS (
|
|
2014
|
+
SELECT SUM(clicks) AS total_clicks, SUM(impressions) AS total_impressions
|
|
2015
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
2016
|
+
WHERE date >= ? AND date <= ?
|
|
2017
|
+
),
|
|
2018
|
+
kw_totals AS (
|
|
2019
|
+
SELECT SUM(clicks) AS total_clicks, SUM(impressions) AS total_impressions
|
|
2020
|
+
FROM read_parquet({{FILES_KEYWORDS}}, union_by_name = true)
|
|
2021
|
+
WHERE date >= ? AND date <= ?
|
|
2022
|
+
),
|
|
2023
|
+
per_page AS (
|
|
2024
|
+
SELECT url, SUM(clicks) AS page_clicks
|
|
2025
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
2026
|
+
WHERE date >= ? AND date <= ?
|
|
2027
|
+
GROUP BY url
|
|
2028
|
+
HAVING SUM(clicks) > 0
|
|
2029
|
+
),
|
|
2030
|
+
per_page_kw AS (
|
|
2031
|
+
SELECT url, SUM(clicks) AS attributed_clicks, COUNT(DISTINCT query) AS kw_count
|
|
2032
|
+
FROM read_parquet({{FILES_PAGE_KEYWORDS}}, union_by_name = true)
|
|
2033
|
+
WHERE date >= ? AND date <= ?
|
|
2034
|
+
GROUP BY url
|
|
2035
|
+
),
|
|
2036
|
+
page_rows AS (
|
|
2037
|
+
SELECT
|
|
2038
|
+
p.url AS url,
|
|
2039
|
+
CAST(p.page_clicks AS DOUBLE) AS totalClicks,
|
|
2040
|
+
CAST(COALESCE(k.attributed_clicks, 0) AS DOUBLE) AS attributedClicks,
|
|
2041
|
+
CAST(p.page_clicks - COALESCE(k.attributed_clicks, 0) AS DOUBLE) AS darkClicks,
|
|
2042
|
+
CAST(p.page_clicks - COALESCE(k.attributed_clicks, 0) AS DOUBLE)
|
|
2043
|
+
/ NULLIF(p.page_clicks, 0) AS darkPercent,
|
|
2044
|
+
CAST(COALESCE(k.kw_count, 0) AS DOUBLE) AS keywordCount
|
|
2045
|
+
FROM per_page p
|
|
2046
|
+
LEFT JOIN per_page_kw k ON p.url = k.url
|
|
2047
|
+
WHERE p.page_clicks - COALESCE(k.attributed_clicks, 0) > 0
|
|
2048
|
+
ORDER BY darkClicks DESC
|
|
2049
|
+
LIMIT 50
|
|
2050
|
+
)
|
|
2051
|
+
SELECT
|
|
2052
|
+
(SELECT to_json({
|
|
2053
|
+
'totalClicks': CAST(total_clicks AS DOUBLE),
|
|
2054
|
+
'totalImpressions': CAST(total_impressions AS DOUBLE)
|
|
2055
|
+
}) FROM page_totals) AS page_totals_json,
|
|
2056
|
+
(SELECT to_json({
|
|
2057
|
+
'attributedClicks': CAST(total_clicks AS DOUBLE),
|
|
2058
|
+
'attributedImpressions': CAST(total_impressions AS DOUBLE)
|
|
2059
|
+
}) FROM kw_totals) AS kw_totals_json,
|
|
2060
|
+
(SELECT to_json(list({
|
|
2061
|
+
'url': url,
|
|
2062
|
+
'totalClicks': totalClicks,
|
|
2063
|
+
'attributedClicks': attributedClicks,
|
|
2064
|
+
'darkClicks': darkClicks,
|
|
2065
|
+
'darkPercent': darkPercent,
|
|
2066
|
+
'keywordCount': keywordCount
|
|
2067
|
+
})) FROM page_rows) AS pages_json
|
|
2068
|
+
`,
|
|
2069
|
+
params: [
|
|
2070
|
+
startDate,
|
|
2071
|
+
endDate,
|
|
2072
|
+
startDate,
|
|
2073
|
+
endDate,
|
|
2074
|
+
startDate,
|
|
2075
|
+
endDate,
|
|
2076
|
+
startDate,
|
|
2077
|
+
endDate
|
|
2078
|
+
],
|
|
2079
|
+
current: {
|
|
2080
|
+
table: "pages",
|
|
2081
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
2082
|
+
},
|
|
2083
|
+
extraFiles: {
|
|
2084
|
+
KEYWORDS: {
|
|
2085
|
+
table: "keywords",
|
|
2086
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
2087
|
+
},
|
|
2088
|
+
PAGE_KEYWORDS: {
|
|
2089
|
+
table: "page_keywords",
|
|
2090
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
2091
|
+
}
|
|
2092
|
+
}
|
|
2093
|
+
};
|
|
2094
|
+
},
|
|
2095
|
+
reduceSql(rows, params) {
|
|
2096
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
2097
|
+
const { startDate, endDate } = periodOf(params);
|
|
2098
|
+
const row = arr[0] ?? {};
|
|
2099
|
+
const pageTotals = typeof row.page_totals_json === "string" ? JSON.parse(row.page_totals_json) : row.page_totals_json ?? {};
|
|
2100
|
+
const kwTotals = typeof row.kw_totals_json === "string" ? JSON.parse(row.kw_totals_json) : row.kw_totals_json ?? {};
|
|
2101
|
+
const totalClicks = num$4(pageTotals.totalClicks);
|
|
2102
|
+
const totalImpressions = num$4(pageTotals.totalImpressions);
|
|
2103
|
+
const attributedClicks = num$4(kwTotals.attributedClicks);
|
|
2104
|
+
const attributedImpressions = num$4(kwTotals.attributedImpressions);
|
|
2105
|
+
const darkClicks = Math.max(0, totalClicks - attributedClicks);
|
|
2106
|
+
const darkPercent = totalClicks > 0 ? darkClicks / totalClicks : 0;
|
|
2107
|
+
return {
|
|
2108
|
+
results: parseJsonList$9(row.pages_json).map((r) => ({
|
|
2109
|
+
url: str$13(r.url),
|
|
2110
|
+
totalClicks: num$4(r.totalClicks),
|
|
2111
|
+
attributedClicks: num$4(r.attributedClicks),
|
|
2112
|
+
darkClicks: num$4(r.darkClicks),
|
|
2113
|
+
darkPercent: num$4(r.darkPercent),
|
|
2114
|
+
keywordCount: num$4(r.keywordCount)
|
|
2115
|
+
})),
|
|
2116
|
+
meta: {
|
|
2117
|
+
summary: {
|
|
2118
|
+
totalClicks,
|
|
2119
|
+
attributedClicks,
|
|
2120
|
+
darkClicks,
|
|
2121
|
+
darkPercent,
|
|
2122
|
+
totalImpressions,
|
|
2123
|
+
attributedImpressions
|
|
2124
|
+
},
|
|
2125
|
+
startDate,
|
|
2126
|
+
endDate
|
|
2127
|
+
}
|
|
2128
|
+
};
|
|
2129
|
+
}
|
|
2130
|
+
});
|
|
2131
|
+
function requireBuilderState(input, tool) {
|
|
2132
|
+
if (!input || typeof input !== "object" || !("dimensions" in input) || !Array.isArray(input.dimensions)) throw new Error(`${tool}: params.q is required (BuilderState)`);
|
|
2133
|
+
return input;
|
|
2134
|
+
}
|
|
2135
|
+
function optionalBuilderState(input, tool, key) {
|
|
2136
|
+
if (input == null) return null;
|
|
2137
|
+
if (typeof input !== "object" || !("dimensions" in input) || !Array.isArray(input.dimensions)) throw new Error(`${tool}: params.${key} must be a BuilderState`);
|
|
2138
|
+
return input;
|
|
2139
|
+
}
|
|
2140
|
+
const NUMERIC_METRIC_COLS = [
|
|
2141
|
+
"clicks",
|
|
2142
|
+
"impressions",
|
|
2143
|
+
"ctr",
|
|
2144
|
+
"position",
|
|
2145
|
+
"prevClicks",
|
|
2146
|
+
"prevImpressions",
|
|
2147
|
+
"prevCtr",
|
|
2148
|
+
"prevPosition",
|
|
2149
|
+
"variantCount",
|
|
2150
|
+
"totalCount"
|
|
2151
|
+
];
|
|
2152
|
+
function coerceNumericCols(row) {
|
|
2153
|
+
const out = { ...row };
|
|
2154
|
+
for (const col of NUMERIC_METRIC_COLS) if (col in out && out[col] != null) out[col] = Number(out[col]);
|
|
2155
|
+
return out;
|
|
2156
|
+
}
|
|
2157
|
+
function shapeDataQuery(rows, extras, opts) {
|
|
2158
|
+
let totalCount;
|
|
2159
|
+
let cleaned;
|
|
2160
|
+
if (opts.hasPrev) {
|
|
2161
|
+
cleaned = rows.map(coerceNumericCols);
|
|
2162
|
+
totalCount = Number((extras?.count?.[0])?.total ?? cleaned.length);
|
|
2163
|
+
} else {
|
|
2164
|
+
const first = rows[0];
|
|
2165
|
+
totalCount = Number(first?.totalCount ?? 0);
|
|
2166
|
+
cleaned = rows.map((raw) => {
|
|
2167
|
+
const { totalCount: _tc, totalClicks: _tclk, totalImpressions: _timp, totalCtr: _tctr, totalPosition: _tpos, sum_position: _sp, ...rest } = raw;
|
|
2168
|
+
return coerceNumericCols(rest);
|
|
2169
|
+
});
|
|
2170
|
+
}
|
|
2171
|
+
const totalsRow = extras?.totals?.[0] ?? {};
|
|
2172
|
+
const totals = {
|
|
2173
|
+
clicks: Number(totalsRow.clicks ?? 0),
|
|
2174
|
+
impressions: Number(totalsRow.impressions ?? 0),
|
|
2175
|
+
ctr: Number(totalsRow.ctr ?? 0),
|
|
2176
|
+
position: Number(totalsRow.position ?? 0)
|
|
2177
|
+
};
|
|
2178
|
+
const extrasResults = [];
|
|
2179
|
+
if (extras?.canonicalExtras) extrasResults.push({
|
|
2180
|
+
key: "canonicalExtras",
|
|
2181
|
+
results: extras.canonicalExtras
|
|
2182
|
+
});
|
|
2183
|
+
return {
|
|
2184
|
+
results: mergeExtras(cleaned, extrasResults),
|
|
2185
|
+
meta: {
|
|
2186
|
+
totalCount,
|
|
2187
|
+
totals
|
|
2188
|
+
}
|
|
2189
|
+
};
|
|
2190
|
+
}
|
|
2191
|
+
function buildDataQueryPlan(params, options) {
|
|
2192
|
+
const state = requireBuilderState(params.q, "data-query");
|
|
2193
|
+
if (state.dimensions.includes("date")) throw new Error("data-query: date dimension not supported; use data-detail");
|
|
2194
|
+
const prev = optionalBuilderState(params.qc, "data-query", "qc");
|
|
2195
|
+
const totals = buildTotalsSql(state, options);
|
|
2196
|
+
const extras = buildExtrasQueries(state, options);
|
|
2197
|
+
const extraQueries = [{
|
|
2198
|
+
name: "totals",
|
|
2199
|
+
sql: totals.sql,
|
|
2200
|
+
params: totals.params
|
|
2201
|
+
}, ...extras.map((extra) => ({
|
|
2202
|
+
name: extra.key,
|
|
2203
|
+
sql: extra.sql,
|
|
2204
|
+
params: extra.params
|
|
2205
|
+
}))];
|
|
2206
|
+
const tableKey = options.adapter.inferTable(state.dimensions);
|
|
2207
|
+
if (prev) {
|
|
2208
|
+
const comparison = resolveComparisonSQL(state, prev, options, params.comparisonFilter);
|
|
2209
|
+
extraQueries.push({
|
|
2210
|
+
name: "count",
|
|
2211
|
+
sql: comparison.countSql,
|
|
2212
|
+
params: comparison.countParams
|
|
2213
|
+
});
|
|
2214
|
+
return {
|
|
2215
|
+
tableKey,
|
|
2216
|
+
sql: comparison.sql,
|
|
2217
|
+
params: comparison.params,
|
|
2218
|
+
extraQueries,
|
|
2219
|
+
shape: (rows, _params, resolvedExtras) => shapeDataQuery(rows, resolvedExtras, { hasPrev: true })
|
|
2220
|
+
};
|
|
2221
|
+
}
|
|
2222
|
+
const optimized = resolveToSQLOptimized(state, options);
|
|
2223
|
+
return {
|
|
2224
|
+
tableKey,
|
|
2225
|
+
sql: optimized.sql,
|
|
2226
|
+
params: optimized.params,
|
|
2227
|
+
extraQueries,
|
|
2228
|
+
shape: (rows, _params, resolvedExtras) => shapeDataQuery(rows, resolvedExtras, { hasPrev: false })
|
|
2229
|
+
};
|
|
2230
|
+
}
|
|
2231
|
+
function buildDataDetailPlan(params, options) {
|
|
2232
|
+
const state = requireBuilderState(params.q, "data-detail");
|
|
2233
|
+
if (!state.dimensions.includes("date")) throw new Error("data-detail: `date` dimension is required");
|
|
2234
|
+
const main = resolveToSQL(state, options);
|
|
2235
|
+
const totals = buildTotalsSql(state, options);
|
|
2236
|
+
const prev = optionalBuilderState(params.qc, "data-detail", "qc");
|
|
2237
|
+
const extraQueries = [{
|
|
2238
|
+
name: "totals",
|
|
2239
|
+
sql: totals.sql,
|
|
2240
|
+
params: totals.params
|
|
2241
|
+
}];
|
|
2242
|
+
if (prev) {
|
|
2243
|
+
const previousTotals = buildTotalsSql(prev, options);
|
|
2244
|
+
extraQueries.push({
|
|
2245
|
+
name: "prevTotals",
|
|
2246
|
+
sql: previousTotals.sql,
|
|
2247
|
+
params: previousTotals.params
|
|
2248
|
+
});
|
|
2249
|
+
}
|
|
2250
|
+
const tableKey = options.adapter.inferTable(state.dimensions);
|
|
2251
|
+
const { startDate: rangeStart, endDate: rangeEnd } = extractDateRange(state.filter);
|
|
2252
|
+
return {
|
|
2253
|
+
tableKey,
|
|
2254
|
+
sql: main.sql,
|
|
2255
|
+
params: main.params,
|
|
2256
|
+
extraQueries,
|
|
2257
|
+
shape: (rows, _params, extras) => {
|
|
2258
|
+
const coerced = rows.map(coerceNumericCols);
|
|
2259
|
+
const daily = rangeStart && rangeEnd ? padTimeseries(coerced, {
|
|
2260
|
+
startDate: rangeStart,
|
|
2261
|
+
endDate: rangeEnd
|
|
2262
|
+
}) : coerced;
|
|
2263
|
+
const totalsRow = extras?.totals?.[0] ?? {};
|
|
2264
|
+
const meta = { totals: {
|
|
2265
|
+
clicks: Number(totalsRow.clicks ?? 0),
|
|
2266
|
+
impressions: Number(totalsRow.impressions ?? 0),
|
|
2267
|
+
ctr: Number(totalsRow.ctr ?? 0),
|
|
2268
|
+
position: Number(totalsRow.position ?? 0)
|
|
2269
|
+
} };
|
|
2270
|
+
if (extras?.prevTotals) {
|
|
2271
|
+
const previousTotalsRow = extras.prevTotals[0] ?? {};
|
|
2272
|
+
meta.previousTotals = {
|
|
2273
|
+
clicks: Number(previousTotalsRow.clicks ?? 0),
|
|
2274
|
+
impressions: Number(previousTotalsRow.impressions ?? 0),
|
|
2275
|
+
ctr: Number(previousTotalsRow.ctr ?? 0),
|
|
2276
|
+
position: Number(previousTotalsRow.position ?? 0)
|
|
2277
|
+
};
|
|
2278
|
+
}
|
|
2279
|
+
return {
|
|
2280
|
+
results: daily,
|
|
2281
|
+
meta
|
|
2282
|
+
};
|
|
2283
|
+
}
|
|
2284
|
+
};
|
|
2285
|
+
}
|
|
2286
|
+
const dataDetailAnalyzer = defineAnalyzer({
|
|
2287
|
+
id: "data-detail",
|
|
2288
|
+
buildSql(params) {
|
|
2289
|
+
const plan = buildDataDetailPlan(params, { adapter: pgResolverAdapter });
|
|
2290
|
+
return {
|
|
2291
|
+
sql: plan.sql,
|
|
2292
|
+
params: plan.params,
|
|
2293
|
+
current: {
|
|
2294
|
+
table: plan.tableKey,
|
|
2295
|
+
partitions: []
|
|
2296
|
+
},
|
|
2297
|
+
requiresAttachedTables: true,
|
|
2298
|
+
extraQueries: plan.extraQueries
|
|
2299
|
+
};
|
|
2300
|
+
},
|
|
2301
|
+
reduceSql(rows, params, ctx) {
|
|
2302
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
2303
|
+
const { results, meta } = buildDataDetailPlan(params, { adapter: pgResolverAdapter }).shape(arr, params, ctx.extras);
|
|
2304
|
+
return {
|
|
2305
|
+
results,
|
|
2306
|
+
meta
|
|
2307
|
+
};
|
|
2308
|
+
}
|
|
2309
|
+
});
|
|
2310
|
+
const dataQueryAnalyzer = defineAnalyzer({
|
|
2311
|
+
id: "data-query",
|
|
2312
|
+
buildSql(params) {
|
|
2313
|
+
const plan = buildDataQueryPlan(params, { adapter: pgResolverAdapter });
|
|
2314
|
+
return {
|
|
2315
|
+
sql: plan.sql,
|
|
2316
|
+
params: plan.params,
|
|
2317
|
+
current: {
|
|
2318
|
+
table: plan.tableKey,
|
|
2319
|
+
partitions: []
|
|
2320
|
+
},
|
|
2321
|
+
requiresAttachedTables: true,
|
|
2322
|
+
extraQueries: plan.extraQueries
|
|
2323
|
+
};
|
|
2324
|
+
},
|
|
2325
|
+
reduceSql(rows, params, ctx) {
|
|
2326
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
2327
|
+
const { results, meta } = buildDataQueryPlan(params, { adapter: pgResolverAdapter }).shape(arr, params, ctx.extras);
|
|
2328
|
+
return {
|
|
2329
|
+
results,
|
|
2330
|
+
meta
|
|
2331
|
+
};
|
|
2332
|
+
}
|
|
2333
|
+
});
|
|
2334
|
+
const sortResults$1 = createMetricSorter("lostClicks", {
|
|
2335
|
+
lostClicks: "desc",
|
|
2336
|
+
declinePercent: "desc",
|
|
2337
|
+
currentClicks: "asc"
|
|
2338
|
+
});
|
|
2339
|
+
function str$12(v) {
|
|
2340
|
+
return v == null ? "" : String(v);
|
|
2341
|
+
}
|
|
2342
|
+
function parseJsonList$8(v) {
|
|
2343
|
+
if (Array.isArray(v)) return v;
|
|
2344
|
+
if (typeof v === "string" && v.length > 0) {
|
|
2345
|
+
const parsed = JSON.parse(v);
|
|
2346
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
2347
|
+
}
|
|
2348
|
+
return [];
|
|
2349
|
+
}
|
|
2350
|
+
function analyzeDecay(input, options = {}) {
|
|
2351
|
+
const { minPreviousClicks = 50, threshold = .2, sortBy = "lostClicks" } = options;
|
|
2352
|
+
const currentMap = buildPeriodMap(input.current, (r) => r.page, (r) => ({
|
|
2353
|
+
clicks: num$4(r.clicks),
|
|
2354
|
+
position: num$4(r.position)
|
|
2355
|
+
}));
|
|
2356
|
+
const previousMap = buildPeriodMap(input.previous, (r) => r.page, (r) => ({
|
|
2357
|
+
clicks: num$4(r.clicks),
|
|
2358
|
+
position: num$4(r.position)
|
|
2359
|
+
}), (r) => num$4(r.clicks) >= minPreviousClicks);
|
|
2360
|
+
const results = [];
|
|
2361
|
+
for (const [page, prev] of previousMap) {
|
|
2362
|
+
const curr = currentMap.get(page) || {
|
|
2363
|
+
clicks: 0,
|
|
2364
|
+
position: 0
|
|
2365
|
+
};
|
|
2366
|
+
const lostClicks = prev.clicks - curr.clicks;
|
|
2367
|
+
const declinePercent = prev.clicks > 0 ? lostClicks / prev.clicks : 0;
|
|
2368
|
+
if (declinePercent >= threshold && lostClicks > 0) results.push({
|
|
2369
|
+
page,
|
|
2370
|
+
currentClicks: curr.clicks,
|
|
2371
|
+
previousClicks: prev.clicks,
|
|
2372
|
+
lostClicks,
|
|
2373
|
+
declinePercent,
|
|
2374
|
+
currentPosition: curr.position,
|
|
2375
|
+
previousPosition: prev.position,
|
|
2376
|
+
positionDrop: curr.position - prev.position
|
|
2377
|
+
});
|
|
2378
|
+
}
|
|
2379
|
+
return sortResults$1(results, sortBy);
|
|
2380
|
+
}
|
|
2381
|
+
const decayAnalyzer = defineAnalyzer({
|
|
2382
|
+
id: "decay",
|
|
2383
|
+
buildSql(params) {
|
|
2384
|
+
const { current: cur, previous: prev } = comparisonOf(params);
|
|
2385
|
+
const minPreviousClicks = params.minPreviousClicks ?? 50;
|
|
2386
|
+
const threshold = params.threshold ?? .2;
|
|
2387
|
+
const limit = params.limit ?? 2e3;
|
|
2388
|
+
return {
|
|
2389
|
+
sql: `
|
|
2390
|
+
WITH cur AS (
|
|
2391
|
+
SELECT
|
|
2392
|
+
url,
|
|
2393
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
2394
|
+
${METRIC_EXPR.position} AS position
|
|
2395
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
2396
|
+
WHERE date >= ? AND date <= ?
|
|
2397
|
+
GROUP BY url
|
|
2398
|
+
),
|
|
2399
|
+
prev AS (
|
|
2400
|
+
SELECT
|
|
2401
|
+
url,
|
|
2402
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
2403
|
+
${METRIC_EXPR.position} AS position
|
|
2404
|
+
FROM read_parquet({{FILES_PREV}}, union_by_name = true)
|
|
2405
|
+
WHERE date >= ? AND date <= ?
|
|
2406
|
+
GROUP BY url
|
|
2407
|
+
HAVING SUM(clicks) >= ?
|
|
2408
|
+
),
|
|
2409
|
+
weekly AS (
|
|
2410
|
+
SELECT url, date_trunc('week', CAST(date AS DATE)) AS week,
|
|
2411
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
2412
|
+
${METRIC_EXPR.impressions} AS impressions
|
|
2413
|
+
FROM (
|
|
2414
|
+
SELECT url, date, clicks, impressions
|
|
2415
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
2416
|
+
WHERE date >= ? AND date <= ?
|
|
2417
|
+
UNION ALL
|
|
2418
|
+
SELECT url, date, clicks, impressions
|
|
2419
|
+
FROM read_parquet({{FILES_PREV}}, union_by_name = true)
|
|
2420
|
+
WHERE date >= ? AND date <= ?
|
|
2421
|
+
)
|
|
2422
|
+
GROUP BY url, week
|
|
2423
|
+
),
|
|
2424
|
+
series_by_url AS (
|
|
2425
|
+
SELECT url, to_json(list({
|
|
2426
|
+
'week': strftime(week, '%Y-%m-%d'),
|
|
2427
|
+
'clicks': clicks,
|
|
2428
|
+
'impressions': impressions
|
|
2429
|
+
} ORDER BY week)) AS seriesJson
|
|
2430
|
+
FROM weekly GROUP BY url
|
|
2431
|
+
),
|
|
2432
|
+
joined AS (
|
|
2433
|
+
SELECT
|
|
2434
|
+
p.url AS page,
|
|
2435
|
+
COALESCE(c.clicks, 0.0) AS currentClicks,
|
|
2436
|
+
p.clicks AS previousClicks,
|
|
2437
|
+
(p.clicks - COALESCE(c.clicks, 0.0)) AS lostClicks,
|
|
2438
|
+
(p.clicks - COALESCE(c.clicks, 0.0)) / NULLIF(p.clicks, 0) AS declinePercent,
|
|
2439
|
+
COALESCE(c.position, 0.0) AS currentPosition,
|
|
2440
|
+
p.position AS previousPosition,
|
|
2441
|
+
(COALESCE(c.position, 0.0) - p.position) AS positionDrop,
|
|
2442
|
+
s.seriesJson
|
|
2443
|
+
FROM prev p
|
|
2444
|
+
LEFT JOIN cur c ON p.url = c.url
|
|
2445
|
+
LEFT JOIN series_by_url s ON p.url = s.url
|
|
2446
|
+
)
|
|
2447
|
+
SELECT *
|
|
2448
|
+
FROM joined
|
|
2449
|
+
WHERE declinePercent >= ? AND lostClicks > 0
|
|
2450
|
+
ORDER BY lostClicks DESC
|
|
2451
|
+
LIMIT ${Number(limit)}
|
|
2452
|
+
`,
|
|
2453
|
+
params: [
|
|
2454
|
+
cur.startDate,
|
|
2455
|
+
cur.endDate,
|
|
2456
|
+
prev.startDate,
|
|
2457
|
+
prev.endDate,
|
|
2458
|
+
minPreviousClicks,
|
|
2459
|
+
cur.startDate,
|
|
2460
|
+
cur.endDate,
|
|
2461
|
+
prev.startDate,
|
|
2462
|
+
prev.endDate,
|
|
2463
|
+
threshold
|
|
2464
|
+
],
|
|
2465
|
+
current: {
|
|
2466
|
+
table: "pages",
|
|
2467
|
+
partitions: enumeratePartitions(cur.startDate, cur.endDate)
|
|
2468
|
+
},
|
|
2469
|
+
previous: {
|
|
2470
|
+
table: "pages",
|
|
2471
|
+
partitions: enumeratePartitions(prev.startDate, prev.endDate)
|
|
2472
|
+
}
|
|
2473
|
+
};
|
|
2474
|
+
},
|
|
2475
|
+
reduceSql(rows) {
|
|
2476
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
2477
|
+
return {
|
|
2478
|
+
results: arr.map((r) => ({
|
|
2479
|
+
page: str$12(r.page),
|
|
2480
|
+
currentClicks: num$4(r.currentClicks),
|
|
2481
|
+
previousClicks: num$4(r.previousClicks),
|
|
2482
|
+
lostClicks: num$4(r.lostClicks),
|
|
2483
|
+
declinePercent: num$4(r.declinePercent),
|
|
2484
|
+
currentPosition: num$4(r.currentPosition),
|
|
2485
|
+
previousPosition: num$4(r.previousPosition),
|
|
2486
|
+
positionDrop: num$4(r.positionDrop),
|
|
2487
|
+
series: parseJsonList$8(r.seriesJson).map((s) => ({
|
|
2488
|
+
week: str$12(s.week),
|
|
2489
|
+
clicks: num$4(s.clicks),
|
|
2490
|
+
impressions: num$4(s.impressions)
|
|
2491
|
+
}))
|
|
2492
|
+
})),
|
|
2493
|
+
meta: { total: arr.length }
|
|
2494
|
+
};
|
|
2495
|
+
},
|
|
2496
|
+
buildRows(params) {
|
|
2497
|
+
const { current, previous } = comparisonOf(params);
|
|
2498
|
+
return {
|
|
2499
|
+
current: pagesQueryState(current, params.limit),
|
|
2500
|
+
previous: pagesQueryState(previous, params.limit)
|
|
2501
|
+
};
|
|
2502
|
+
},
|
|
2503
|
+
reduceRows(rows, params) {
|
|
2504
|
+
const map = rows && !Array.isArray(rows) ? rows : {
|
|
2505
|
+
current: [],
|
|
2506
|
+
previous: []
|
|
2507
|
+
};
|
|
2508
|
+
const results = analyzeDecay({
|
|
2509
|
+
current: map.current ?? [],
|
|
2510
|
+
previous: map.previous ?? []
|
|
2511
|
+
}, {
|
|
2512
|
+
minPreviousClicks: params.minPreviousClicks,
|
|
2513
|
+
threshold: params.threshold
|
|
2514
|
+
});
|
|
2515
|
+
return {
|
|
2516
|
+
results,
|
|
2517
|
+
meta: { total: results.length }
|
|
2518
|
+
};
|
|
2519
|
+
}
|
|
2520
|
+
});
|
|
2521
|
+
function str$11(v) {
|
|
2522
|
+
return v == null ? "" : String(v);
|
|
2523
|
+
}
|
|
2524
|
+
const deviceGapAnalyzer = defineAnalyzer({
|
|
2525
|
+
id: "device-gap",
|
|
2526
|
+
buildSql(params) {
|
|
2527
|
+
const { startDate, endDate } = periodOf(params);
|
|
2528
|
+
return {
|
|
2529
|
+
sql: `
|
|
2530
|
+
SELECT
|
|
2531
|
+
date,
|
|
2532
|
+
device,
|
|
2533
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
2534
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
2535
|
+
${METRIC_EXPR.ctr} AS ctr,
|
|
2536
|
+
${METRIC_EXPR.position} AS position
|
|
2537
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
2538
|
+
WHERE date >= ? AND date <= ?
|
|
2539
|
+
GROUP BY date, device
|
|
2540
|
+
ORDER BY date ASC
|
|
2541
|
+
`,
|
|
2542
|
+
params: [startDate, endDate],
|
|
2543
|
+
current: {
|
|
2544
|
+
table: "devices",
|
|
2545
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
2546
|
+
}
|
|
2547
|
+
};
|
|
2548
|
+
},
|
|
2549
|
+
reduceSql(rows, params) {
|
|
2550
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
2551
|
+
const { startDate, endDate } = periodOf(params);
|
|
2552
|
+
const typed = arr.map((r) => ({
|
|
2553
|
+
date: str$11(r.date),
|
|
2554
|
+
device: str$11(r.device).toUpperCase(),
|
|
2555
|
+
clicks: num$4(r.clicks),
|
|
2556
|
+
impressions: num$4(r.impressions),
|
|
2557
|
+
ctr: num$4(r.ctr),
|
|
2558
|
+
position: num$4(r.position)
|
|
2559
|
+
}));
|
|
2560
|
+
const byDate = /* @__PURE__ */ new Map();
|
|
2561
|
+
for (const r of typed) {
|
|
2562
|
+
const entry = byDate.get(r.date) ?? {};
|
|
2563
|
+
const metrics = {
|
|
2564
|
+
clicks: r.clicks,
|
|
2565
|
+
impressions: r.impressions,
|
|
2566
|
+
ctr: r.ctr,
|
|
2567
|
+
position: r.position
|
|
2568
|
+
};
|
|
2569
|
+
if (r.device === "DESKTOP") entry.desktop = metrics;
|
|
2570
|
+
else if (r.device === "MOBILE") entry.mobile = metrics;
|
|
2571
|
+
byDate.set(r.date, entry);
|
|
2572
|
+
}
|
|
2573
|
+
const zero = {
|
|
2574
|
+
clicks: 0,
|
|
2575
|
+
impressions: 0,
|
|
2576
|
+
ctr: 0,
|
|
2577
|
+
position: 0
|
|
2578
|
+
};
|
|
2579
|
+
const daily = [...byDate.entries()].sort(([a], [b]) => a.localeCompare(b)).map(([date, sides]) => {
|
|
2580
|
+
const d = sides.desktop ?? zero;
|
|
2581
|
+
const m = sides.mobile ?? zero;
|
|
2582
|
+
return {
|
|
2583
|
+
date,
|
|
2584
|
+
desktop: d,
|
|
2585
|
+
mobile: m,
|
|
2586
|
+
gaps: {
|
|
2587
|
+
ctrGap: d.ctr - m.ctr,
|
|
2588
|
+
positionGap: m.position - d.position
|
|
2589
|
+
}
|
|
2590
|
+
};
|
|
2591
|
+
});
|
|
2592
|
+
const weekly = (start, end) => {
|
|
2593
|
+
const slice = daily.slice(start, end);
|
|
2594
|
+
if (slice.length === 0) return {
|
|
2595
|
+
ctr: 0,
|
|
2596
|
+
pos: 0
|
|
2597
|
+
};
|
|
2598
|
+
const sum = slice.reduce((acc, d) => ({
|
|
2599
|
+
ctr: acc.ctr + d.gaps.ctrGap,
|
|
2600
|
+
pos: acc.pos + d.gaps.positionGap
|
|
2601
|
+
}), {
|
|
2602
|
+
ctr: 0,
|
|
2603
|
+
pos: 0
|
|
2604
|
+
});
|
|
2605
|
+
return {
|
|
2606
|
+
ctr: sum.ctr / slice.length,
|
|
2607
|
+
pos: sum.pos / slice.length
|
|
2608
|
+
};
|
|
2609
|
+
};
|
|
2610
|
+
const first = weekly(0, 7);
|
|
2611
|
+
const last = weekly(Math.max(0, daily.length - 7), daily.length);
|
|
2612
|
+
const classify = (firstVal, lastVal) => {
|
|
2613
|
+
const diff = Math.abs(lastVal) - Math.abs(firstVal);
|
|
2614
|
+
if (Math.abs(diff) < .005) return "stable";
|
|
2615
|
+
return diff < 0 ? "improving" : "worsening";
|
|
2616
|
+
};
|
|
2617
|
+
return {
|
|
2618
|
+
results: daily,
|
|
2619
|
+
meta: {
|
|
2620
|
+
summary: {
|
|
2621
|
+
avgCtrGap: daily.reduce((s, d) => s + d.gaps.ctrGap, 0) / Math.max(1, daily.length),
|
|
2622
|
+
avgPositionGap: daily.reduce((s, d) => s + d.gaps.positionGap, 0) / Math.max(1, daily.length),
|
|
2623
|
+
ctrGapTrend: classify(first.ctr, last.ctr),
|
|
2624
|
+
positionGapTrend: classify(first.pos, last.pos)
|
|
2625
|
+
},
|
|
2626
|
+
startDate,
|
|
2627
|
+
endDate
|
|
2628
|
+
}
|
|
2629
|
+
};
|
|
2630
|
+
}
|
|
2631
|
+
});
|
|
2632
|
+
function str$10(v) {
|
|
2633
|
+
return v == null ? "" : String(v);
|
|
2634
|
+
}
|
|
2635
|
+
function parseJsonList$7(v) {
|
|
2636
|
+
if (Array.isArray(v)) return v;
|
|
2637
|
+
if (typeof v === "string" && v.length > 0) {
|
|
2638
|
+
const parsed = JSON.parse(v);
|
|
2639
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
2640
|
+
}
|
|
2641
|
+
return [];
|
|
2642
|
+
}
|
|
2643
|
+
const INTENT_ATLAS_STOP_WORDS = [
|
|
2644
|
+
"the",
|
|
2645
|
+
"a",
|
|
2646
|
+
"an",
|
|
2647
|
+
"is",
|
|
2648
|
+
"are",
|
|
2649
|
+
"was",
|
|
2650
|
+
"were",
|
|
2651
|
+
"be",
|
|
2652
|
+
"been",
|
|
2653
|
+
"of",
|
|
2654
|
+
"to",
|
|
2655
|
+
"in",
|
|
2656
|
+
"for",
|
|
2657
|
+
"on",
|
|
2658
|
+
"and",
|
|
2659
|
+
"or",
|
|
2660
|
+
"with",
|
|
2661
|
+
"at",
|
|
2662
|
+
"by",
|
|
2663
|
+
"from",
|
|
2664
|
+
"into",
|
|
2665
|
+
"about",
|
|
2666
|
+
"as",
|
|
2667
|
+
"so",
|
|
2668
|
+
"than",
|
|
2669
|
+
"then",
|
|
2670
|
+
"that",
|
|
2671
|
+
"this",
|
|
2672
|
+
"my",
|
|
2673
|
+
"your",
|
|
2674
|
+
"our",
|
|
2675
|
+
"their",
|
|
2676
|
+
"his",
|
|
2677
|
+
"her",
|
|
2678
|
+
"its",
|
|
2679
|
+
"me",
|
|
2680
|
+
"you",
|
|
2681
|
+
"what",
|
|
2682
|
+
"how",
|
|
2683
|
+
"why",
|
|
2684
|
+
"when",
|
|
2685
|
+
"where",
|
|
2686
|
+
"who",
|
|
2687
|
+
"which",
|
|
2688
|
+
"do",
|
|
2689
|
+
"does"
|
|
2690
|
+
];
|
|
2691
|
+
const intentAtlasAnalyzer = defineAnalyzer({
|
|
2692
|
+
id: "intent-atlas",
|
|
2693
|
+
buildSql(params) {
|
|
2694
|
+
const endDate = params.endDate ?? defaultEndDate();
|
|
2695
|
+
const startDate = params.startDate ?? daysAgo(90);
|
|
2696
|
+
const minQueryImpressions = params.minImpressions ?? 20;
|
|
2697
|
+
const minClusterSize = params.minClusterSize ?? 3;
|
|
2698
|
+
const minTokenImpressions = 50;
|
|
2699
|
+
const limit = params.limit ?? 200;
|
|
2700
|
+
const stopList = INTENT_ATLAS_STOP_WORDS.map((w) => `'${w}'`).join(", ");
|
|
2701
|
+
return {
|
|
2702
|
+
sql: `
|
|
2703
|
+
WITH queries AS (
|
|
2704
|
+
SELECT
|
|
2705
|
+
query,
|
|
2706
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
2707
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
2708
|
+
${METRIC_EXPR.position} AS position
|
|
2709
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
2710
|
+
WHERE date >= ? AND date <= ?
|
|
2711
|
+
AND query IS NOT NULL AND query <> ''
|
|
2712
|
+
GROUP BY query
|
|
2713
|
+
HAVING SUM(impressions) >= ?
|
|
2714
|
+
),
|
|
2715
|
+
tokens AS (
|
|
2716
|
+
SELECT q.query, q.impressions, q.clicks, q.position,
|
|
2717
|
+
LOWER(t.token) AS token
|
|
2718
|
+
FROM queries q,
|
|
2719
|
+
unnest(regexp_split_to_array(LOWER(q.query), '\\s+')) AS t(token)
|
|
2720
|
+
WHERE LENGTH(t.token) >= 3
|
|
2721
|
+
AND LOWER(t.token) NOT IN (${stopList})
|
|
2722
|
+
),
|
|
2723
|
+
token_weights AS (
|
|
2724
|
+
SELECT token,
|
|
2725
|
+
SUM(impressions) AS token_impressions,
|
|
2726
|
+
COUNT(DISTINCT query) AS query_count
|
|
2727
|
+
FROM tokens
|
|
2728
|
+
GROUP BY token
|
|
2729
|
+
HAVING SUM(impressions) >= ${Number(minTokenImpressions)}
|
|
2730
|
+
),
|
|
2731
|
+
ranked_tokens AS (
|
|
2732
|
+
SELECT t.query, t.token, tw.token_impressions,
|
|
2733
|
+
ROW_NUMBER() OVER (
|
|
2734
|
+
PARTITION BY t.query
|
|
2735
|
+
ORDER BY tw.token_impressions DESC, t.token ASC
|
|
2736
|
+
) AS rnk
|
|
2737
|
+
FROM tokens t
|
|
2738
|
+
JOIN token_weights tw USING (token)
|
|
2739
|
+
),
|
|
2740
|
+
cluster_keys AS (
|
|
2741
|
+
SELECT query,
|
|
2742
|
+
array_to_string(list(token ORDER BY token), ' + ') AS cluster_key
|
|
2743
|
+
FROM ranked_tokens
|
|
2744
|
+
WHERE rnk <= 2
|
|
2745
|
+
GROUP BY query
|
|
2746
|
+
HAVING COUNT(*) >= 2
|
|
2747
|
+
),
|
|
2748
|
+
clustered AS (
|
|
2749
|
+
SELECT q.query, q.impressions, q.clicks, q.position, ck.cluster_key
|
|
2750
|
+
FROM queries q
|
|
2751
|
+
JOIN cluster_keys ck USING (query)
|
|
2752
|
+
)
|
|
2753
|
+
SELECT
|
|
2754
|
+
cluster_key AS clusterKey,
|
|
2755
|
+
COUNT(*) AS keywordCount,
|
|
2756
|
+
SUM(impressions) AS totalImpressions,
|
|
2757
|
+
SUM(clicks) AS totalClicks,
|
|
2758
|
+
SUM(clicks) / NULLIF(SUM(impressions), 0) AS ctr,
|
|
2759
|
+
AVG(position) AS avgPosition,
|
|
2760
|
+
to_json(list({
|
|
2761
|
+
'query': query,
|
|
2762
|
+
'impressions': impressions,
|
|
2763
|
+
'clicks': clicks,
|
|
2764
|
+
'position': position
|
|
2765
|
+
} ORDER BY impressions DESC)) AS keywords
|
|
2766
|
+
FROM clustered
|
|
2767
|
+
GROUP BY cluster_key
|
|
2768
|
+
HAVING COUNT(*) >= ${Number(minClusterSize)}
|
|
2769
|
+
ORDER BY totalImpressions DESC
|
|
2770
|
+
LIMIT ${Number(limit)}
|
|
2771
|
+
`,
|
|
2772
|
+
params: [
|
|
2773
|
+
startDate,
|
|
2774
|
+
endDate,
|
|
2775
|
+
minQueryImpressions
|
|
2776
|
+
],
|
|
2777
|
+
current: {
|
|
2778
|
+
table: "keywords",
|
|
2779
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
2780
|
+
}
|
|
2781
|
+
};
|
|
2782
|
+
},
|
|
2783
|
+
reduceSql(rows) {
|
|
2784
|
+
const clusters = (Array.isArray(rows) ? rows : []).map((r) => ({
|
|
2785
|
+
clusterKey: str$10(r.clusterKey),
|
|
2786
|
+
keywordCount: num$4(r.keywordCount),
|
|
2787
|
+
totalImpressions: num$4(r.totalImpressions),
|
|
2788
|
+
totalClicks: num$4(r.totalClicks),
|
|
2789
|
+
ctr: num$4(r.ctr),
|
|
2790
|
+
avgPosition: num$4(r.avgPosition),
|
|
2791
|
+
keywords: parseJsonList$7(r.keywords).slice(0, 25).map((k) => ({
|
|
2792
|
+
query: str$10(k.query),
|
|
2793
|
+
impressions: num$4(k.impressions),
|
|
2794
|
+
clicks: num$4(k.clicks),
|
|
2795
|
+
position: num$4(k.position)
|
|
2796
|
+
}))
|
|
2797
|
+
}));
|
|
2798
|
+
const totalImpressions = clusters.reduce((s, c) => s + c.totalImpressions, 0);
|
|
2799
|
+
const totalKeywords = clusters.reduce((s, c) => s + c.keywordCount, 0);
|
|
2800
|
+
return {
|
|
2801
|
+
results: clusters,
|
|
2802
|
+
meta: {
|
|
2803
|
+
total: clusters.length,
|
|
2804
|
+
totalImpressions,
|
|
2805
|
+
totalKeywords
|
|
2806
|
+
}
|
|
2807
|
+
};
|
|
2808
|
+
}
|
|
2809
|
+
});
|
|
2810
|
+
function str$9(v) {
|
|
2811
|
+
return v == null ? "" : String(v);
|
|
2812
|
+
}
|
|
2813
|
+
function parseJsonList$6(v) {
|
|
2814
|
+
if (Array.isArray(v)) return v;
|
|
2815
|
+
if (typeof v === "string" && v.length > 0) {
|
|
2816
|
+
const parsed = JSON.parse(v);
|
|
2817
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
2818
|
+
}
|
|
2819
|
+
return [];
|
|
2820
|
+
}
|
|
2821
|
+
const keywordBreadthAnalyzer = defineAnalyzer({
|
|
2822
|
+
id: "keyword-breadth",
|
|
2823
|
+
buildSql(params) {
|
|
2824
|
+
const { startDate, endDate } = periodOf(params);
|
|
2825
|
+
return {
|
|
2826
|
+
sql: `
|
|
2827
|
+
WITH per_page AS (
|
|
2828
|
+
SELECT
|
|
2829
|
+
url,
|
|
2830
|
+
CAST(COUNT(DISTINCT query) AS DOUBLE) AS keywordCount,
|
|
2831
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
2832
|
+
${METRIC_EXPR.impressions} AS impressions
|
|
2833
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
2834
|
+
WHERE date >= ? AND date <= ? AND impressions > 0
|
|
2835
|
+
GROUP BY url
|
|
2836
|
+
),
|
|
2837
|
+
bucketed AS (
|
|
2838
|
+
SELECT
|
|
2839
|
+
CASE
|
|
2840
|
+
WHEN keywordCount = 1 THEN '1'
|
|
2841
|
+
WHEN keywordCount BETWEEN 2 AND 5 THEN '2-5'
|
|
2842
|
+
WHEN keywordCount BETWEEN 6 AND 15 THEN '6-15'
|
|
2843
|
+
WHEN keywordCount BETWEEN 16 AND 50 THEN '16-50'
|
|
2844
|
+
ELSE '50+'
|
|
2845
|
+
END AS bucket,
|
|
2846
|
+
MIN(keywordCount) AS sort_key,
|
|
2847
|
+
CAST(COUNT(*) AS DOUBLE) AS pageCount
|
|
2848
|
+
FROM per_page
|
|
2849
|
+
GROUP BY bucket
|
|
2850
|
+
),
|
|
2851
|
+
fragile AS (
|
|
2852
|
+
SELECT url, keywordCount, clicks, impressions
|
|
2853
|
+
FROM per_page
|
|
2854
|
+
WHERE keywordCount <= 2 AND clicks >= 5
|
|
2855
|
+
ORDER BY clicks DESC
|
|
2856
|
+
LIMIT 20
|
|
2857
|
+
),
|
|
2858
|
+
authority AS (
|
|
2859
|
+
SELECT url, keywordCount, clicks, impressions
|
|
2860
|
+
FROM per_page
|
|
2861
|
+
WHERE keywordCount >= 20
|
|
2862
|
+
ORDER BY keywordCount DESC
|
|
2863
|
+
LIMIT 20
|
|
2864
|
+
),
|
|
2865
|
+
stats AS (
|
|
2866
|
+
SELECT
|
|
2867
|
+
CAST(COUNT(*) AS DOUBLE) AS totalPages,
|
|
2868
|
+
CAST(AVG(keywordCount) AS DOUBLE) AS avgKeywordsPerPage,
|
|
2869
|
+
CAST(SUM(CASE WHEN keywordCount <= 2 THEN 1 ELSE 0 END) AS DOUBLE) AS fragileCount,
|
|
2870
|
+
CAST(SUM(CASE WHEN keywordCount >= 20 THEN 1 ELSE 0 END) AS DOUBLE) AS authorityCount
|
|
2871
|
+
FROM per_page
|
|
2872
|
+
)
|
|
2873
|
+
SELECT
|
|
2874
|
+
(SELECT to_json(list({ 'bucket': bucket, 'pageCount': pageCount, 'sortKey': sort_key })
|
|
2875
|
+
ORDER BY sort_key ASC) FROM bucketed) AS distribution_json,
|
|
2876
|
+
(SELECT to_json(list({ 'url': url, 'keywordCount': keywordCount, 'clicks': clicks, 'impressions': impressions })) FROM fragile) AS fragile_json,
|
|
2877
|
+
(SELECT to_json(list({ 'url': url, 'keywordCount': keywordCount, 'clicks': clicks, 'impressions': impressions })) FROM authority) AS authority_json,
|
|
2878
|
+
(SELECT to_json({
|
|
2879
|
+
'totalPages': totalPages,
|
|
2880
|
+
'avgKeywordsPerPage': avgKeywordsPerPage,
|
|
2881
|
+
'fragileCount': fragileCount,
|
|
2882
|
+
'authorityCount': authorityCount
|
|
2883
|
+
}) FROM stats) AS stats_json
|
|
2884
|
+
`,
|
|
2885
|
+
params: [startDate, endDate],
|
|
2886
|
+
current: {
|
|
2887
|
+
table: "page_keywords",
|
|
2888
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
2889
|
+
}
|
|
2890
|
+
};
|
|
2891
|
+
},
|
|
2892
|
+
reduceSql(rows, params) {
|
|
2893
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
2894
|
+
const { startDate, endDate } = periodOf(params);
|
|
2895
|
+
const row = arr[0] ?? {};
|
|
2896
|
+
const distribution = parseJsonList$6(row.distribution_json).sort((a, b) => num$4(a.sortKey) - num$4(b.sortKey)).map((r) => ({
|
|
2897
|
+
bucket: str$9(r.bucket),
|
|
2898
|
+
pageCount: num$4(r.pageCount)
|
|
2899
|
+
}));
|
|
2900
|
+
const fragile = parseJsonList$6(row.fragile_json).map((r) => ({
|
|
2901
|
+
url: str$9(r.url),
|
|
2902
|
+
keywordCount: num$4(r.keywordCount),
|
|
2903
|
+
clicks: num$4(r.clicks),
|
|
2904
|
+
impressions: num$4(r.impressions)
|
|
2905
|
+
}));
|
|
2906
|
+
const authority = parseJsonList$6(row.authority_json).map((r) => ({
|
|
2907
|
+
url: str$9(r.url),
|
|
2908
|
+
keywordCount: num$4(r.keywordCount),
|
|
2909
|
+
clicks: num$4(r.clicks),
|
|
2910
|
+
impressions: num$4(r.impressions)
|
|
2911
|
+
}));
|
|
2912
|
+
const stats = typeof row.stats_json === "string" ? JSON.parse(row.stats_json) : row.stats_json ?? {};
|
|
2913
|
+
return {
|
|
2914
|
+
results: distribution,
|
|
2915
|
+
meta: {
|
|
2916
|
+
fragilePages: fragile,
|
|
2917
|
+
authorityPages: authority,
|
|
2918
|
+
summary: {
|
|
2919
|
+
totalPages: num$4(stats.totalPages),
|
|
2920
|
+
avgKeywordsPerPage: num$4(stats.avgKeywordsPerPage),
|
|
2921
|
+
fragileCount: num$4(stats.fragileCount),
|
|
2922
|
+
authorityCount: num$4(stats.authorityCount)
|
|
2923
|
+
},
|
|
2924
|
+
startDate,
|
|
2925
|
+
endDate
|
|
2926
|
+
}
|
|
2927
|
+
};
|
|
2928
|
+
}
|
|
2929
|
+
});
|
|
2930
|
+
function str$8(v) {
|
|
2931
|
+
return v == null ? "" : String(v);
|
|
2932
|
+
}
|
|
2933
|
+
function parseJsonList$5(v) {
|
|
2934
|
+
if (Array.isArray(v)) return v;
|
|
2935
|
+
if (typeof v === "string" && v.length > 0) {
|
|
2936
|
+
const parsed = JSON.parse(v);
|
|
2937
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
2938
|
+
}
|
|
2939
|
+
return [];
|
|
2940
|
+
}
|
|
2941
|
+
function downsampleLogRank(points) {
|
|
2942
|
+
const all = points.map((p) => ({
|
|
2943
|
+
rank: num$4(p.rank),
|
|
2944
|
+
impressions: num$4(p.impressions),
|
|
2945
|
+
clicks: num$4(p.clicks),
|
|
2946
|
+
query: str$8(p.query)
|
|
2947
|
+
}));
|
|
2948
|
+
if (all.length <= 80) return all;
|
|
2949
|
+
const top = all.slice(0, 10);
|
|
2950
|
+
const rest = all.slice(10);
|
|
2951
|
+
const stepped = [];
|
|
2952
|
+
let nextThreshold = 1.15;
|
|
2953
|
+
for (const p of rest) if (p.rank >= nextThreshold) {
|
|
2954
|
+
stepped.push(p);
|
|
2955
|
+
nextThreshold *= 1.15;
|
|
2956
|
+
}
|
|
2957
|
+
return [...top, ...stepped];
|
|
2958
|
+
}
|
|
2959
|
+
const longTailAnalyzer = defineAnalyzer({
|
|
2960
|
+
id: "long-tail",
|
|
2961
|
+
buildSql(params) {
|
|
2962
|
+
const { startDate, endDate } = periodOf(params);
|
|
2963
|
+
const minQueries = 10;
|
|
2964
|
+
const minQueryImpressions = params.minImpressions ?? 5;
|
|
2965
|
+
const limit = params.limit ?? 100;
|
|
2966
|
+
return {
|
|
2967
|
+
sql: `
|
|
2968
|
+
WITH page_queries AS (
|
|
2969
|
+
SELECT
|
|
2970
|
+
url AS page,
|
|
2971
|
+
query,
|
|
2972
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
2973
|
+
${METRIC_EXPR.clicks} AS clicks
|
|
2974
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
2975
|
+
WHERE date >= ? AND date <= ?
|
|
2976
|
+
AND query IS NOT NULL AND query <> ''
|
|
2977
|
+
AND url IS NOT NULL AND url <> ''
|
|
2978
|
+
GROUP BY url, query
|
|
2979
|
+
HAVING SUM(impressions) >= ?
|
|
2980
|
+
),
|
|
2981
|
+
ranked AS (
|
|
2982
|
+
SELECT
|
|
2983
|
+
page, query, impressions, clicks,
|
|
2984
|
+
ROW_NUMBER() OVER (PARTITION BY page ORDER BY impressions DESC, query ASC) AS rnk
|
|
2985
|
+
FROM page_queries
|
|
2986
|
+
),
|
|
2987
|
+
log_space AS (
|
|
2988
|
+
SELECT *,
|
|
2989
|
+
LN(rnk) AS log_rank,
|
|
2990
|
+
LN(impressions) AS log_impr
|
|
2991
|
+
FROM ranked
|
|
2992
|
+
),
|
|
2993
|
+
fit AS (
|
|
2994
|
+
SELECT
|
|
2995
|
+
page,
|
|
2996
|
+
COUNT(*) AS query_count,
|
|
2997
|
+
SUM(impressions) AS total_impressions,
|
|
2998
|
+
SUM(clicks) AS total_clicks,
|
|
2999
|
+
REGR_SLOPE(log_impr, log_rank) AS slope,
|
|
3000
|
+
REGR_INTERCEPT(log_impr, log_rank) AS intercept,
|
|
3001
|
+
REGR_R2(log_impr, log_rank) AS r2,
|
|
3002
|
+
MAX(impressions) AS head_impressions,
|
|
3003
|
+
MAX(CASE WHEN rnk = 1 THEN impressions END) / NULLIF(SUM(impressions), 0) AS head_share
|
|
3004
|
+
FROM log_space
|
|
3005
|
+
GROUP BY page
|
|
3006
|
+
HAVING COUNT(*) >= ${Number(minQueries)}
|
|
3007
|
+
),
|
|
3008
|
+
scatter AS (
|
|
3009
|
+
SELECT
|
|
3010
|
+
l.page,
|
|
3011
|
+
to_json(list({
|
|
3012
|
+
'rank': l.rnk,
|
|
3013
|
+
'impressions': l.impressions,
|
|
3014
|
+
'clicks': l.clicks,
|
|
3015
|
+
'query': l.query
|
|
3016
|
+
} ORDER BY l.rnk)) AS pointsJson
|
|
3017
|
+
FROM log_space l
|
|
3018
|
+
JOIN fit f USING (page)
|
|
3019
|
+
GROUP BY l.page
|
|
3020
|
+
)
|
|
3021
|
+
SELECT
|
|
3022
|
+
f.page,
|
|
3023
|
+
f.query_count AS queryCount,
|
|
3024
|
+
f.total_impressions AS totalImpressions,
|
|
3025
|
+
f.total_clicks AS totalClicks,
|
|
3026
|
+
f.slope AS slope,
|
|
3027
|
+
f.intercept AS intercept,
|
|
3028
|
+
f.r2 AS r2,
|
|
3029
|
+
f.head_impressions AS headImpressions,
|
|
3030
|
+
f.head_share AS headShare,
|
|
3031
|
+
s.pointsJson AS pointsJson,
|
|
3032
|
+
CASE
|
|
3033
|
+
WHEN f.slope > -0.6 THEN 'flat-tail'
|
|
3034
|
+
WHEN f.slope > -1.2 THEN 'balanced'
|
|
3035
|
+
ELSE 'head-heavy'
|
|
3036
|
+
END AS fingerprint
|
|
3037
|
+
FROM fit f
|
|
3038
|
+
LEFT JOIN scatter s USING (page)
|
|
3039
|
+
ORDER BY f.total_impressions DESC
|
|
3040
|
+
LIMIT ${Number(limit)}
|
|
3041
|
+
`,
|
|
3042
|
+
params: [
|
|
3043
|
+
startDate,
|
|
3044
|
+
endDate,
|
|
3045
|
+
minQueryImpressions
|
|
3046
|
+
],
|
|
3047
|
+
current: {
|
|
3048
|
+
table: "page_keywords",
|
|
3049
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
3050
|
+
}
|
|
3051
|
+
};
|
|
3052
|
+
},
|
|
3053
|
+
reduceSql(rows) {
|
|
3054
|
+
const results = (Array.isArray(rows) ? rows : []).map((r) => ({
|
|
3055
|
+
page: str$8(r.page),
|
|
3056
|
+
queryCount: num$4(r.queryCount),
|
|
3057
|
+
totalImpressions: num$4(r.totalImpressions),
|
|
3058
|
+
totalClicks: num$4(r.totalClicks),
|
|
3059
|
+
slope: num$4(r.slope),
|
|
3060
|
+
intercept: num$4(r.intercept),
|
|
3061
|
+
r2: num$4(r.r2),
|
|
3062
|
+
headImpressions: num$4(r.headImpressions),
|
|
3063
|
+
headShare: num$4(r.headShare),
|
|
3064
|
+
fingerprint: str$8(r.fingerprint),
|
|
3065
|
+
points: downsampleLogRank(parseJsonList$5(r.pointsJson))
|
|
3066
|
+
}));
|
|
3067
|
+
const counts = {
|
|
3068
|
+
"flat-tail": 0,
|
|
3069
|
+
"balanced": 0,
|
|
3070
|
+
"head-heavy": 0
|
|
3071
|
+
};
|
|
3072
|
+
for (const r of results) counts[r.fingerprint]++;
|
|
3073
|
+
return {
|
|
3074
|
+
results,
|
|
3075
|
+
meta: {
|
|
3076
|
+
total: results.length,
|
|
3077
|
+
fingerprints: counts,
|
|
3078
|
+
avgSlope: results.length > 0 ? results.reduce((s, r) => s + r.slope, 0) / results.length : 0
|
|
3079
|
+
}
|
|
3080
|
+
};
|
|
3081
|
+
}
|
|
3082
|
+
});
|
|
3083
|
+
function percentDifference(current, previous) {
|
|
3084
|
+
if (previous === 0) return current > 0 ? 100 : 0;
|
|
3085
|
+
return (current - previous) / previous * 100;
|
|
3086
|
+
}
|
|
3087
|
+
function str$7(v) {
|
|
3088
|
+
return v == null ? "" : String(v);
|
|
3089
|
+
}
|
|
3090
|
+
function parseJsonList$4(v) {
|
|
3091
|
+
if (Array.isArray(v)) return v;
|
|
3092
|
+
if (typeof v === "string" && v.length > 0) {
|
|
3093
|
+
const parsed = JSON.parse(v);
|
|
3094
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
3095
|
+
}
|
|
3096
|
+
return [];
|
|
3097
|
+
}
|
|
3098
|
+
function analyzeMovers(input, options = {}) {
|
|
3099
|
+
const { changeThreshold = .2, minImpressions = 50, sortBy = "clicksChange" } = options;
|
|
3100
|
+
const normFactor = input.normalizationFactor ?? 1;
|
|
3101
|
+
const baselineMap = buildPeriodMap(input.previous, (r) => r.query, (r) => ({
|
|
3102
|
+
clicks: num$4(r.clicks) / normFactor,
|
|
3103
|
+
impressions: num$4(r.impressions) / normFactor,
|
|
3104
|
+
position: num$4(r.position),
|
|
3105
|
+
page: r.page ?? null
|
|
3106
|
+
}));
|
|
3107
|
+
const pageMap = /* @__PURE__ */ new Map();
|
|
3108
|
+
for (const row of input.current) if (!pageMap.has(row.query) && row.page) pageMap.set(row.query, row.page);
|
|
3109
|
+
for (const row of input.previous) if (!pageMap.has(row.query) && row.page) pageMap.set(row.query, row.page);
|
|
3110
|
+
const rising = [];
|
|
3111
|
+
const declining = [];
|
|
3112
|
+
const stable = [];
|
|
3113
|
+
for (const row of input.current) {
|
|
3114
|
+
const impressions = num$4(row.impressions);
|
|
3115
|
+
const clicks = num$4(row.clicks);
|
|
3116
|
+
const position = num$4(row.position);
|
|
3117
|
+
if (impressions < minImpressions) continue;
|
|
3118
|
+
const baseline = baselineMap.get(row.query) || {
|
|
3119
|
+
clicks: 0,
|
|
3120
|
+
impressions: 0,
|
|
3121
|
+
position: 0,
|
|
3122
|
+
page: null
|
|
3123
|
+
};
|
|
3124
|
+
const clicksChangePercent = percentDifference(clicks, baseline.clicks);
|
|
3125
|
+
const impressionsChangePercent = percentDifference(impressions, baseline.impressions);
|
|
3126
|
+
const data = {
|
|
3127
|
+
keyword: row.query,
|
|
3128
|
+
page: pageMap.get(row.query) ?? null,
|
|
3129
|
+
recentClicks: clicks,
|
|
3130
|
+
recentImpressions: impressions,
|
|
3131
|
+
recentPosition: position,
|
|
3132
|
+
baselineClicks: Math.round(baseline.clicks),
|
|
3133
|
+
baselineImpressions: Math.round(baseline.impressions),
|
|
3134
|
+
baselinePosition: baseline.position,
|
|
3135
|
+
clicksChange: clicks - Math.round(baseline.clicks),
|
|
3136
|
+
clicksChangePercent,
|
|
3137
|
+
impressionsChangePercent,
|
|
3138
|
+
positionChange: position - baseline.position
|
|
3139
|
+
};
|
|
3140
|
+
const absChange = Math.abs(clicksChangePercent / 100);
|
|
3141
|
+
if (clicksChangePercent > 0 && absChange >= changeThreshold) rising.push(data);
|
|
3142
|
+
else if (clicksChangePercent < 0 && absChange >= changeThreshold) declining.push(data);
|
|
3143
|
+
else stable.push(data);
|
|
3144
|
+
}
|
|
3145
|
+
const sortFn = (a, b) => {
|
|
3146
|
+
switch (sortBy) {
|
|
3147
|
+
case "clicks": return b.recentClicks - a.recentClicks;
|
|
3148
|
+
case "impressions": return b.recentImpressions - a.recentImpressions;
|
|
3149
|
+
case "clicksChange": return Math.abs(b.clicksChangePercent) - Math.abs(a.clicksChangePercent);
|
|
3150
|
+
case "impressionsChange": return Math.abs(b.impressionsChangePercent) - Math.abs(a.impressionsChangePercent);
|
|
3151
|
+
case "positionChange": return Math.abs(b.positionChange) - Math.abs(a.positionChange);
|
|
3152
|
+
default: return Math.abs(b.clicksChangePercent) - Math.abs(a.clicksChangePercent);
|
|
3153
|
+
}
|
|
3154
|
+
};
|
|
3155
|
+
rising.sort(sortFn);
|
|
3156
|
+
declining.sort(sortFn);
|
|
3157
|
+
stable.sort((a, b) => b.recentClicks - a.recentClicks);
|
|
3158
|
+
return {
|
|
3159
|
+
rising,
|
|
3160
|
+
declining,
|
|
3161
|
+
stable
|
|
3162
|
+
};
|
|
3163
|
+
}
|
|
3164
|
+
const moversAnalyzer = defineAnalyzer({
|
|
3165
|
+
id: "movers",
|
|
3166
|
+
buildSql(params) {
|
|
3167
|
+
const { current: cur, previous: prev } = comparisonOf(params);
|
|
3168
|
+
const minImpressions = params.minImpressions ?? 50;
|
|
3169
|
+
const changeThreshold = params.changeThreshold ?? .2;
|
|
3170
|
+
const limit = params.limit ?? 2e3;
|
|
3171
|
+
return {
|
|
3172
|
+
sql: `
|
|
3173
|
+
WITH cur AS (
|
|
3174
|
+
SELECT
|
|
3175
|
+
query, url,
|
|
3176
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
3177
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
3178
|
+
${METRIC_EXPR.position} AS position
|
|
3179
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
3180
|
+
WHERE date >= ? AND date <= ?
|
|
3181
|
+
GROUP BY query, url
|
|
3182
|
+
),
|
|
3183
|
+
prev AS (
|
|
3184
|
+
SELECT
|
|
3185
|
+
query, url,
|
|
3186
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
3187
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
3188
|
+
${METRIC_EXPR.position} AS position
|
|
3189
|
+
FROM read_parquet({{FILES_PREV}}, union_by_name = true)
|
|
3190
|
+
WHERE date >= ? AND date <= ?
|
|
3191
|
+
GROUP BY query, url
|
|
3192
|
+
),
|
|
3193
|
+
weekly AS (
|
|
3194
|
+
SELECT query, url, date_trunc('week', CAST(date AS DATE)) AS week,
|
|
3195
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
3196
|
+
${METRIC_EXPR.impressions} AS impressions
|
|
3197
|
+
FROM (
|
|
3198
|
+
SELECT query, url, date, clicks, impressions
|
|
3199
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
3200
|
+
WHERE date >= ? AND date <= ?
|
|
3201
|
+
UNION ALL
|
|
3202
|
+
SELECT query, url, date, clicks, impressions
|
|
3203
|
+
FROM read_parquet({{FILES_PREV}}, union_by_name = true)
|
|
3204
|
+
WHERE date >= ? AND date <= ?
|
|
3205
|
+
)
|
|
3206
|
+
GROUP BY query, url, week
|
|
3207
|
+
),
|
|
3208
|
+
series_by_entity AS (
|
|
3209
|
+
SELECT query, url, to_json(list({
|
|
3210
|
+
'week': strftime(week, '%Y-%m-%d'),
|
|
3211
|
+
'clicks': clicks,
|
|
3212
|
+
'impressions': impressions
|
|
3213
|
+
} ORDER BY week)) AS seriesJson
|
|
3214
|
+
FROM weekly GROUP BY query, url
|
|
3215
|
+
),
|
|
3216
|
+
joined AS (
|
|
3217
|
+
SELECT
|
|
3218
|
+
c.query AS keyword,
|
|
3219
|
+
c.url AS page,
|
|
3220
|
+
c.clicks AS recentClicks,
|
|
3221
|
+
c.impressions AS recentImpressions,
|
|
3222
|
+
c.position AS recentPosition,
|
|
3223
|
+
COALESCE(p.clicks, 0.0) AS baselineClicks,
|
|
3224
|
+
COALESCE(p.impressions, 0.0) AS baselineImpressions,
|
|
3225
|
+
COALESCE(p.position, 0.0) AS baselinePosition,
|
|
3226
|
+
(c.clicks - COALESCE(p.clicks, 0.0)) AS clicksChange,
|
|
3227
|
+
CASE
|
|
3228
|
+
WHEN COALESCE(p.clicks, 0.0) = 0 THEN CASE WHEN c.clicks > 0 THEN 100.0 ELSE 0.0 END
|
|
3229
|
+
ELSE (c.clicks - p.clicks) * 100.0 / p.clicks
|
|
3230
|
+
END AS clicksChangePercent,
|
|
3231
|
+
CASE
|
|
3232
|
+
WHEN COALESCE(p.impressions, 0.0) = 0 THEN CASE WHEN c.impressions > 0 THEN 100.0 ELSE 0.0 END
|
|
3233
|
+
ELSE (c.impressions - p.impressions) * 100.0 / p.impressions
|
|
3234
|
+
END AS impressionsChangePercent,
|
|
3235
|
+
(c.position - COALESCE(p.position, 0.0)) AS positionChange,
|
|
3236
|
+
s.seriesJson
|
|
3237
|
+
FROM cur c
|
|
3238
|
+
LEFT JOIN prev p ON c.query = p.query AND c.url = p.url
|
|
3239
|
+
LEFT JOIN series_by_entity s ON c.query = s.query AND c.url = s.url
|
|
3240
|
+
WHERE c.impressions >= ?
|
|
3241
|
+
)
|
|
3242
|
+
SELECT *,
|
|
3243
|
+
CASE
|
|
3244
|
+
WHEN clicksChangePercent > 0 AND ABS(clicksChangePercent) / 100.0 >= ? THEN 'rising'
|
|
3245
|
+
WHEN clicksChangePercent < 0 AND ABS(clicksChangePercent) / 100.0 >= ? THEN 'declining'
|
|
3246
|
+
ELSE 'stable'
|
|
3247
|
+
END AS direction
|
|
3248
|
+
FROM joined
|
|
3249
|
+
ORDER BY ABS(clicksChangePercent) DESC
|
|
3250
|
+
LIMIT ${Number(limit)}
|
|
3251
|
+
`,
|
|
3252
|
+
params: [
|
|
3253
|
+
cur.startDate,
|
|
3254
|
+
cur.endDate,
|
|
3255
|
+
prev.startDate,
|
|
3256
|
+
prev.endDate,
|
|
3257
|
+
cur.startDate,
|
|
3258
|
+
cur.endDate,
|
|
3259
|
+
prev.startDate,
|
|
3260
|
+
prev.endDate,
|
|
3261
|
+
minImpressions,
|
|
3262
|
+
changeThreshold,
|
|
3263
|
+
changeThreshold
|
|
3264
|
+
],
|
|
3265
|
+
current: {
|
|
3266
|
+
table: "page_keywords",
|
|
3267
|
+
partitions: enumeratePartitions(cur.startDate, cur.endDate)
|
|
3268
|
+
},
|
|
3269
|
+
previous: {
|
|
3270
|
+
table: "page_keywords",
|
|
3271
|
+
partitions: enumeratePartitions(prev.startDate, prev.endDate)
|
|
3272
|
+
}
|
|
3273
|
+
};
|
|
3274
|
+
},
|
|
3275
|
+
reduceSql(rows) {
|
|
3276
|
+
const normalized = (Array.isArray(rows) ? rows : []).map((r) => ({
|
|
3277
|
+
keyword: str$7(r.keyword),
|
|
3278
|
+
page: r.page == null ? null : str$7(r.page),
|
|
3279
|
+
recentClicks: num$4(r.recentClicks),
|
|
3280
|
+
recentImpressions: num$4(r.recentImpressions),
|
|
3281
|
+
recentPosition: num$4(r.recentPosition),
|
|
3282
|
+
baselineClicks: Math.round(num$4(r.baselineClicks)),
|
|
3283
|
+
baselineImpressions: Math.round(num$4(r.baselineImpressions)),
|
|
3284
|
+
baselinePosition: num$4(r.baselinePosition),
|
|
3285
|
+
clicksChange: num$4(r.clicksChange),
|
|
3286
|
+
clicksChangePercent: num$4(r.clicksChangePercent),
|
|
3287
|
+
impressionsChangePercent: num$4(r.impressionsChangePercent),
|
|
3288
|
+
positionChange: num$4(r.positionChange),
|
|
3289
|
+
direction: str$7(r.direction),
|
|
3290
|
+
series: parseJsonList$4(r.seriesJson).map((s) => ({
|
|
3291
|
+
week: str$7(s.week),
|
|
3292
|
+
clicks: num$4(s.clicks),
|
|
3293
|
+
impressions: num$4(s.impressions)
|
|
3294
|
+
}))
|
|
3295
|
+
}));
|
|
3296
|
+
const rising = normalized.filter((r) => r.direction === "rising");
|
|
3297
|
+
const declining = normalized.filter((r) => r.direction === "declining");
|
|
3298
|
+
const stable = normalized.filter((r) => r.direction === "stable");
|
|
3299
|
+
const combined = [...rising, ...declining];
|
|
3300
|
+
return {
|
|
3301
|
+
results: combined,
|
|
3302
|
+
meta: {
|
|
3303
|
+
total: combined.length,
|
|
3304
|
+
rising: rising.length,
|
|
3305
|
+
declining: declining.length,
|
|
3306
|
+
stable: stable.length
|
|
3307
|
+
}
|
|
3308
|
+
};
|
|
3309
|
+
},
|
|
3310
|
+
buildRows(params) {
|
|
3311
|
+
const { current, previous } = comparisonOf(params);
|
|
3312
|
+
return {
|
|
3313
|
+
current: keywordsQueryState(current, params.limit),
|
|
3314
|
+
previous: keywordsQueryState(previous, params.limit)
|
|
3315
|
+
};
|
|
3316
|
+
},
|
|
3317
|
+
reduceRows(rows, params) {
|
|
3318
|
+
const map = rows && !Array.isArray(rows) ? rows : {
|
|
3319
|
+
current: [],
|
|
3320
|
+
previous: []
|
|
3321
|
+
};
|
|
3322
|
+
const result = analyzeMovers({
|
|
3323
|
+
current: map.current ?? [],
|
|
3324
|
+
previous: map.previous ?? []
|
|
3325
|
+
}, {
|
|
3326
|
+
changeThreshold: params.changeThreshold,
|
|
3327
|
+
minImpressions: params.minImpressions
|
|
3328
|
+
});
|
|
3329
|
+
return {
|
|
3330
|
+
results: [...result.rising.map((r) => ({
|
|
3331
|
+
...r,
|
|
3332
|
+
direction: "rising"
|
|
3333
|
+
})), ...result.declining.map((r) => ({
|
|
3334
|
+
...r,
|
|
3335
|
+
direction: "declining"
|
|
3336
|
+
}))],
|
|
3337
|
+
meta: {
|
|
3338
|
+
rising: result.rising.length,
|
|
3339
|
+
declining: result.declining.length
|
|
3340
|
+
}
|
|
3341
|
+
};
|
|
3342
|
+
}
|
|
3343
|
+
});
|
|
3344
|
+
const DEFAULT_LIMIT = 1e3;
|
|
3345
|
+
const MAX_LIMIT = 5e4;
|
|
3346
|
+
function clampLimit(limit, fallback = DEFAULT_LIMIT) {
|
|
3347
|
+
const n = Number(limit ?? fallback);
|
|
3348
|
+
if (!Number.isFinite(n) || n <= 0) return fallback;
|
|
3349
|
+
return Math.min(n, MAX_LIMIT);
|
|
3350
|
+
}
|
|
3351
|
+
function clampOffset(offset) {
|
|
3352
|
+
const n = Number(offset ?? 0);
|
|
3353
|
+
if (!Number.isFinite(n) || n < 0) return 0;
|
|
3354
|
+
return Math.floor(n);
|
|
3355
|
+
}
|
|
3356
|
+
function paginateClause(input) {
|
|
3357
|
+
const l = clampLimit(input.limit);
|
|
3358
|
+
const o = clampOffset(input.offset);
|
|
3359
|
+
return o > 0 ? `LIMIT ${l} OFFSET ${o}` : `LIMIT ${l}`;
|
|
3360
|
+
}
|
|
3361
|
+
function paginateInMemory(rows, input) {
|
|
3362
|
+
const l = clampLimit(input.limit, rows.length);
|
|
3363
|
+
const o = clampOffset(input.offset);
|
|
3364
|
+
return rows.slice(o, o + l);
|
|
3365
|
+
}
|
|
3366
|
+
function resolveSort(input, allowed, defaults) {
|
|
3367
|
+
return {
|
|
3368
|
+
sortBy: input.sortBy && allowed.includes(input.sortBy) ? input.sortBy : defaults.sortBy,
|
|
3369
|
+
sortDir: input.sortDir === "asc" || input.sortDir === "desc" ? input.sortDir : defaults.sortDir
|
|
3370
|
+
};
|
|
3371
|
+
}
|
|
3372
|
+
const EXPECTED_CTR_BY_POSITION = {
|
|
3373
|
+
1: .3,
|
|
3374
|
+
2: .15,
|
|
3375
|
+
3: .1,
|
|
3376
|
+
4: .07,
|
|
3377
|
+
5: .05,
|
|
3378
|
+
6: .04,
|
|
3379
|
+
7: .03,
|
|
3380
|
+
8: .025,
|
|
3381
|
+
9: .02,
|
|
3382
|
+
10: .015
|
|
3383
|
+
};
|
|
3384
|
+
function getExpectedCtr(position) {
|
|
3385
|
+
return EXPECTED_CTR_BY_POSITION[Math.round(Math.max(1, Math.min(position, 10)))] || .01;
|
|
3386
|
+
}
|
|
3387
|
+
function calculatePositionScore(position) {
|
|
3388
|
+
if (position <= 3) return .2;
|
|
3389
|
+
if (position > 50) return .1;
|
|
3390
|
+
const distance = Math.abs(position - 11);
|
|
3391
|
+
return Math.max(0, 1 - distance / 15);
|
|
3392
|
+
}
|
|
3393
|
+
function calculateImpressionScore(impressions) {
|
|
3394
|
+
if (impressions <= 0) return 0;
|
|
3395
|
+
return Math.min(Math.log10(impressions) / 5, 1);
|
|
3396
|
+
}
|
|
3397
|
+
function calculateCtrGapScore(actualCtr, position) {
|
|
3398
|
+
const expectedCtr = getExpectedCtr(position);
|
|
3399
|
+
if (actualCtr >= expectedCtr) return 0;
|
|
3400
|
+
const gap = expectedCtr - actualCtr;
|
|
3401
|
+
return Math.min(gap / expectedCtr, 1);
|
|
3402
|
+
}
|
|
3403
|
+
const sortResults = createMetricSorter("opportunityScore", {
|
|
3404
|
+
opportunityScore: "desc",
|
|
3405
|
+
potentialClicks: "desc",
|
|
3406
|
+
impressions: "desc",
|
|
3407
|
+
position: "asc"
|
|
3408
|
+
});
|
|
3409
|
+
const opportunityAnalyzer = defineAnalyzer({
|
|
3410
|
+
id: "opportunity",
|
|
3411
|
+
buildSql(params) {
|
|
3412
|
+
const { startDate, endDate } = periodOf(params);
|
|
3413
|
+
const minImpressions = params.minImpressions ?? 100;
|
|
3414
|
+
const w1 = 1;
|
|
3415
|
+
const w2 = 1;
|
|
3416
|
+
const w3 = 1;
|
|
3417
|
+
const totalW = w1 + w2 + w3;
|
|
3418
|
+
const limit = params.limit ?? 1e3;
|
|
3419
|
+
return {
|
|
3420
|
+
sql: `
|
|
3421
|
+
WITH agg AS (
|
|
3422
|
+
SELECT
|
|
3423
|
+
query AS keyword,
|
|
3424
|
+
url AS page,
|
|
3425
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
3426
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
3427
|
+
${METRIC_EXPR.ctr} AS ctr,
|
|
3428
|
+
${METRIC_EXPR.position} AS position
|
|
3429
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
3430
|
+
WHERE date >= ? AND date <= ?
|
|
3431
|
+
GROUP BY query, url
|
|
3432
|
+
HAVING SUM(impressions) >= ?
|
|
3433
|
+
),
|
|
3434
|
+
scored AS (
|
|
3435
|
+
SELECT
|
|
3436
|
+
keyword, page, clicks, impressions, ctr, position,
|
|
3437
|
+
CASE
|
|
3438
|
+
WHEN position <= 3 THEN 0.2
|
|
3439
|
+
WHEN position > 50 THEN 0.1
|
|
3440
|
+
ELSE GREATEST(0.0, 1.0 - ABS(position - 11.0) / 15.0)
|
|
3441
|
+
END AS positionScore,
|
|
3442
|
+
CASE WHEN impressions <= 0 THEN 0.0 ELSE LEAST(LOG10(impressions) / 5.0, 1.0) END AS impressionScore,
|
|
3443
|
+
CASE CAST(ROUND(GREATEST(LEAST(position, 10.0), 1.0)) AS INTEGER)
|
|
3444
|
+
WHEN 1 THEN 0.30
|
|
3445
|
+
WHEN 2 THEN 0.15
|
|
3446
|
+
WHEN 3 THEN 0.10
|
|
3447
|
+
WHEN 4 THEN 0.07
|
|
3448
|
+
WHEN 5 THEN 0.05
|
|
3449
|
+
WHEN 6 THEN 0.04
|
|
3450
|
+
WHEN 7 THEN 0.03
|
|
3451
|
+
WHEN 8 THEN 0.025
|
|
3452
|
+
WHEN 9 THEN 0.02
|
|
3453
|
+
WHEN 10 THEN 0.015
|
|
3454
|
+
ELSE 0.01
|
|
3455
|
+
END AS expectedCtr
|
|
3456
|
+
FROM agg
|
|
3457
|
+
),
|
|
3458
|
+
gapped AS (
|
|
3459
|
+
SELECT
|
|
3460
|
+
*,
|
|
3461
|
+
CASE WHEN ctr >= expectedCtr THEN 0.0 ELSE LEAST((expectedCtr - ctr) / expectedCtr, 1.0) END AS ctrGapScore
|
|
3462
|
+
FROM scored
|
|
3463
|
+
)
|
|
3464
|
+
SELECT
|
|
3465
|
+
keyword, page, clicks, impressions, ctr, position,
|
|
3466
|
+
CAST(ROUND(POWER(
|
|
3467
|
+
POWER(positionScore, ${w1}) * POWER(impressionScore, ${w2}) * POWER(ctrGapScore, ${w3}),
|
|
3468
|
+
1.0 / ${totalW}
|
|
3469
|
+
) * 100) AS DOUBLE) AS opportunityScore,
|
|
3470
|
+
CAST(ROUND(impressions * (
|
|
3471
|
+
CASE CAST(ROUND(GREATEST(LEAST(position, 3.0), 1.0)) AS INTEGER)
|
|
3472
|
+
WHEN 1 THEN 0.30
|
|
3473
|
+
WHEN 2 THEN 0.15
|
|
3474
|
+
WHEN 3 THEN 0.10
|
|
3475
|
+
ELSE 0.10
|
|
3476
|
+
END
|
|
3477
|
+
)) AS DOUBLE) AS potentialClicks,
|
|
3478
|
+
positionScore, impressionScore, ctrGapScore
|
|
3479
|
+
FROM gapped
|
|
3480
|
+
ORDER BY opportunityScore DESC
|
|
3481
|
+
${paginateClause({
|
|
3482
|
+
limit,
|
|
3483
|
+
offset: params.offset
|
|
3484
|
+
})}
|
|
3485
|
+
`,
|
|
3486
|
+
params: [
|
|
3487
|
+
startDate,
|
|
3488
|
+
endDate,
|
|
3489
|
+
minImpressions
|
|
3490
|
+
],
|
|
3491
|
+
current: {
|
|
3492
|
+
table: "page_keywords",
|
|
3493
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
3494
|
+
}
|
|
3495
|
+
};
|
|
3496
|
+
},
|
|
3497
|
+
reduceSql(rows) {
|
|
3498
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
3499
|
+
return {
|
|
3500
|
+
results: arr.map((r) => ({
|
|
3501
|
+
keyword: r.keyword == null ? "" : String(r.keyword),
|
|
3502
|
+
page: r.page == null ? null : String(r.page),
|
|
3503
|
+
clicks: num$4(r.clicks),
|
|
3504
|
+
impressions: num$4(r.impressions),
|
|
3505
|
+
ctr: num$4(r.ctr),
|
|
3506
|
+
position: num$4(r.position),
|
|
3507
|
+
opportunityScore: num$4(r.opportunityScore),
|
|
3508
|
+
potentialClicks: num$4(r.potentialClicks),
|
|
3509
|
+
factors: {
|
|
3510
|
+
positionScore: num$4(r.positionScore),
|
|
3511
|
+
impressionScore: num$4(r.impressionScore),
|
|
3512
|
+
ctrGapScore: num$4(r.ctrGapScore)
|
|
3513
|
+
}
|
|
3514
|
+
})),
|
|
3515
|
+
meta: { total: arr.length }
|
|
3516
|
+
};
|
|
3517
|
+
},
|
|
3518
|
+
buildRows(params) {
|
|
3519
|
+
return { keywords: keywordsQueryState(periodOf(params), params.limit) };
|
|
3520
|
+
},
|
|
3521
|
+
reduceRows(rows, params) {
|
|
3522
|
+
const keywords = (Array.isArray(rows) ? rows : []) ?? [];
|
|
3523
|
+
const minImpressions = params.minImpressions ?? 100;
|
|
3524
|
+
const positionWeight = 1;
|
|
3525
|
+
const impressionsWeight = 1;
|
|
3526
|
+
const ctrGapWeight = 1;
|
|
3527
|
+
const sortBy = "opportunityScore";
|
|
3528
|
+
const results = [];
|
|
3529
|
+
for (const row of keywords) {
|
|
3530
|
+
const impressions = num$4(row.impressions);
|
|
3531
|
+
const position = num$4(row.position);
|
|
3532
|
+
const ctr = num$4(row.ctr);
|
|
3533
|
+
const clicks = num$4(row.clicks);
|
|
3534
|
+
if (impressions < minImpressions) continue;
|
|
3535
|
+
const positionScore = calculatePositionScore(position);
|
|
3536
|
+
const impressionScore = calculateImpressionScore(impressions);
|
|
3537
|
+
const ctrGapScore = calculateCtrGapScore(ctr, position);
|
|
3538
|
+
const geometricMean = (positionScore ** positionWeight * impressionScore ** impressionsWeight * ctrGapScore ** ctrGapWeight) ** (1 / (positionWeight + impressionsWeight + ctrGapWeight));
|
|
3539
|
+
const opportunityScore = Math.round(geometricMean * 100);
|
|
3540
|
+
const targetCtr = getExpectedCtr(Math.min(3, position));
|
|
3541
|
+
const potentialClicks = Math.round(impressions * targetCtr);
|
|
3542
|
+
results.push({
|
|
3543
|
+
keyword: row.query,
|
|
3544
|
+
page: row.page ?? null,
|
|
3545
|
+
clicks,
|
|
3546
|
+
impressions,
|
|
3547
|
+
ctr,
|
|
3548
|
+
position,
|
|
3549
|
+
opportunityScore,
|
|
3550
|
+
potentialClicks,
|
|
3551
|
+
factors: {
|
|
3552
|
+
positionScore,
|
|
3553
|
+
impressionScore,
|
|
3554
|
+
ctrGapScore
|
|
3555
|
+
}
|
|
3556
|
+
});
|
|
3557
|
+
}
|
|
3558
|
+
const sorted = sortResults(results, sortBy);
|
|
3559
|
+
const paged = paginateInMemory(sorted, {
|
|
3560
|
+
limit: params.limit,
|
|
3561
|
+
offset: params.offset
|
|
3562
|
+
});
|
|
3563
|
+
return {
|
|
3564
|
+
results: paged,
|
|
3565
|
+
meta: {
|
|
3566
|
+
total: sorted.length,
|
|
3567
|
+
returned: paged.length
|
|
3568
|
+
}
|
|
3569
|
+
};
|
|
3570
|
+
}
|
|
3571
|
+
});
|
|
3572
|
+
function str$6(v) {
|
|
3573
|
+
return v == null ? "" : String(v);
|
|
3574
|
+
}
|
|
3575
|
+
const positionDistributionAnalyzer = defineAnalyzer({
|
|
3576
|
+
id: "position-distribution",
|
|
3577
|
+
buildSql(params) {
|
|
3578
|
+
const { startDate, endDate } = periodOf(params);
|
|
3579
|
+
return {
|
|
3580
|
+
sql: `
|
|
3581
|
+
WITH pos AS (
|
|
3582
|
+
SELECT
|
|
3583
|
+
date,
|
|
3584
|
+
(sum_position / NULLIF(impressions, 0) + 1) AS avg_pos
|
|
3585
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
3586
|
+
WHERE date >= ? AND date <= ? AND impressions > 0
|
|
3587
|
+
)
|
|
3588
|
+
SELECT
|
|
3589
|
+
date,
|
|
3590
|
+
CAST(SUM(CASE WHEN avg_pos <= 3 THEN 1 ELSE 0 END) AS DOUBLE) AS pos_1_3,
|
|
3591
|
+
CAST(SUM(CASE WHEN avg_pos > 3 AND avg_pos <= 10 THEN 1 ELSE 0 END) AS DOUBLE) AS pos_4_10,
|
|
3592
|
+
CAST(SUM(CASE WHEN avg_pos > 10 AND avg_pos <= 20 THEN 1 ELSE 0 END) AS DOUBLE) AS pos_11_20,
|
|
3593
|
+
CAST(SUM(CASE WHEN avg_pos > 20 THEN 1 ELSE 0 END) AS DOUBLE) AS pos_20_plus,
|
|
3594
|
+
CAST(COUNT(*) AS DOUBLE) AS total
|
|
3595
|
+
FROM pos
|
|
3596
|
+
GROUP BY date
|
|
3597
|
+
ORDER BY date ASC
|
|
3598
|
+
`,
|
|
3599
|
+
params: [startDate, endDate],
|
|
3600
|
+
current: {
|
|
3601
|
+
table: "keywords",
|
|
3602
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
3603
|
+
}
|
|
3604
|
+
};
|
|
3605
|
+
},
|
|
3606
|
+
reduceSql(rows, params) {
|
|
3607
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
3608
|
+
const { startDate, endDate } = periodOf(params);
|
|
3609
|
+
return {
|
|
3610
|
+
results: arr.map((r) => ({
|
|
3611
|
+
date: str$6(r.date),
|
|
3612
|
+
pos_1_3: num$4(r.pos_1_3),
|
|
3613
|
+
pos_4_10: num$4(r.pos_4_10),
|
|
3614
|
+
pos_11_20: num$4(r.pos_11_20),
|
|
3615
|
+
pos_20_plus: num$4(r.pos_20_plus),
|
|
3616
|
+
total: num$4(r.total)
|
|
3617
|
+
})),
|
|
3618
|
+
meta: {
|
|
3619
|
+
total: arr.length,
|
|
3620
|
+
startDate,
|
|
3621
|
+
endDate
|
|
3622
|
+
}
|
|
3623
|
+
};
|
|
3624
|
+
}
|
|
3625
|
+
});
|
|
3626
|
+
function str$5(v) {
|
|
3627
|
+
return v == null ? "" : String(v);
|
|
3628
|
+
}
|
|
3629
|
+
const positionVolatilityAnalyzer = defineAnalyzer({
|
|
3630
|
+
id: "position-volatility",
|
|
3631
|
+
buildSql(params) {
|
|
3632
|
+
const { startDate, endDate } = periodOf(params);
|
|
3633
|
+
const topN = params.topN ?? 30;
|
|
3634
|
+
const minDayImpressions = params.minImpressions ?? 10;
|
|
3635
|
+
const minDays = params.minWeeksWithData ?? 7;
|
|
3636
|
+
return {
|
|
3637
|
+
sql: `
|
|
3638
|
+
WITH query_day AS (
|
|
3639
|
+
SELECT
|
|
3640
|
+
url AS page,
|
|
3641
|
+
query,
|
|
3642
|
+
date,
|
|
3643
|
+
${METRIC_EXPR.impressions} AS q_impressions,
|
|
3644
|
+
${METRIC_EXPR.position} AS q_position
|
|
3645
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
3646
|
+
WHERE date >= ? AND date <= ?
|
|
3647
|
+
AND query IS NOT NULL AND query <> ''
|
|
3648
|
+
AND url IS NOT NULL AND url <> ''
|
|
3649
|
+
GROUP BY url, query, date
|
|
3650
|
+
HAVING SUM(impressions) >= 1
|
|
3651
|
+
),
|
|
3652
|
+
daily AS (
|
|
3653
|
+
SELECT
|
|
3654
|
+
page, date,
|
|
3655
|
+
COUNT(*) AS query_count,
|
|
3656
|
+
SUM(q_impressions) AS day_impressions,
|
|
3657
|
+
SUM(q_position * q_impressions) / NULLIF(SUM(q_impressions), 0) AS avg_position,
|
|
3658
|
+
COALESCE(STDDEV_POP(q_position), 0.0) AS pos_stddev,
|
|
3659
|
+
MIN(q_position) AS best_position,
|
|
3660
|
+
MAX(q_position) AS worst_position
|
|
3661
|
+
FROM query_day
|
|
3662
|
+
GROUP BY page, date
|
|
3663
|
+
HAVING SUM(q_impressions) >= ?
|
|
3664
|
+
),
|
|
3665
|
+
with_shift AS (
|
|
3666
|
+
SELECT *,
|
|
3667
|
+
LAG(avg_position) OVER (PARTITION BY page ORDER BY date) AS prev_position,
|
|
3668
|
+
COALESCE(
|
|
3669
|
+
ABS(avg_position - LAG(avg_position) OVER (PARTITION BY page ORDER BY date)),
|
|
3670
|
+
0.0
|
|
3671
|
+
) AS dod_shift
|
|
3672
|
+
FROM daily
|
|
3673
|
+
),
|
|
3674
|
+
scored AS (
|
|
3675
|
+
SELECT *,
|
|
3676
|
+
pos_stddev + dod_shift AS volatility
|
|
3677
|
+
FROM with_shift
|
|
3678
|
+
),
|
|
3679
|
+
top_pages AS (
|
|
3680
|
+
SELECT page,
|
|
3681
|
+
SUM(day_impressions) AS total_impressions,
|
|
3682
|
+
AVG(volatility) AS avg_volatility,
|
|
3683
|
+
MAX(volatility) AS peak_volatility,
|
|
3684
|
+
COUNT(*) AS days_with_data
|
|
3685
|
+
FROM scored
|
|
3686
|
+
GROUP BY page
|
|
3687
|
+
HAVING COUNT(*) >= ?
|
|
3688
|
+
ORDER BY avg_volatility DESC
|
|
3689
|
+
LIMIT ${Number(topN)}
|
|
3690
|
+
)
|
|
3691
|
+
SELECT
|
|
3692
|
+
s.page,
|
|
3693
|
+
strftime(s.date, '%Y-%m-%d') AS date,
|
|
3694
|
+
s.query_count AS queryCount,
|
|
3695
|
+
s.day_impressions AS dayImpressions,
|
|
3696
|
+
s.avg_position AS avgPosition,
|
|
3697
|
+
s.pos_stddev AS posStddev,
|
|
3698
|
+
s.best_position AS bestPosition,
|
|
3699
|
+
s.worst_position AS worstPosition,
|
|
3700
|
+
s.dod_shift AS dodShift,
|
|
3701
|
+
s.volatility AS volatility,
|
|
3702
|
+
t.avg_volatility AS pageAvgVolatility,
|
|
3703
|
+
t.peak_volatility AS pagePeakVolatility,
|
|
3704
|
+
t.total_impressions AS pageTotalImpressions
|
|
3705
|
+
FROM scored s
|
|
3706
|
+
JOIN top_pages t USING (page)
|
|
3707
|
+
ORDER BY t.avg_volatility DESC, s.date ASC
|
|
3708
|
+
`,
|
|
3709
|
+
params: [
|
|
3710
|
+
startDate,
|
|
3711
|
+
endDate,
|
|
3712
|
+
minDayImpressions,
|
|
3713
|
+
minDays
|
|
3714
|
+
],
|
|
3715
|
+
current: {
|
|
3716
|
+
table: "page_keywords",
|
|
3717
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
3718
|
+
}
|
|
3719
|
+
};
|
|
3720
|
+
},
|
|
3721
|
+
reduceSql(rows) {
|
|
3722
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
3723
|
+
const byPage = /* @__PURE__ */ new Map();
|
|
3724
|
+
const allDates = /* @__PURE__ */ new Set();
|
|
3725
|
+
for (const r of arr) {
|
|
3726
|
+
const page = str$5(r.page);
|
|
3727
|
+
const date = str$5(r.date);
|
|
3728
|
+
allDates.add(date);
|
|
3729
|
+
const entry = byPage.get(page) ?? {
|
|
3730
|
+
page,
|
|
3731
|
+
avgVolatility: num$4(r.pageAvgVolatility),
|
|
3732
|
+
peakVolatility: num$4(r.pagePeakVolatility),
|
|
3733
|
+
totalImpressions: num$4(r.pageTotalImpressions),
|
|
3734
|
+
days: []
|
|
3735
|
+
};
|
|
3736
|
+
entry.days.push({
|
|
3737
|
+
date,
|
|
3738
|
+
queryCount: num$4(r.queryCount),
|
|
3739
|
+
dayImpressions: num$4(r.dayImpressions),
|
|
3740
|
+
avgPosition: num$4(r.avgPosition),
|
|
3741
|
+
posStddev: num$4(r.posStddev),
|
|
3742
|
+
bestPosition: num$4(r.bestPosition),
|
|
3743
|
+
worstPosition: num$4(r.worstPosition),
|
|
3744
|
+
dodShift: num$4(r.dodShift),
|
|
3745
|
+
volatility: num$4(r.volatility)
|
|
3746
|
+
});
|
|
3747
|
+
byPage.set(page, entry);
|
|
3748
|
+
}
|
|
3749
|
+
const pages = [...byPage.values()].sort((a, b) => b.avgVolatility - a.avgVolatility);
|
|
3750
|
+
const dates = [...allDates].sort();
|
|
3751
|
+
const maxVolatility = pages.reduce((m, p) => Math.max(m, p.peakVolatility), 0);
|
|
3752
|
+
return {
|
|
3753
|
+
results: pages,
|
|
3754
|
+
meta: {
|
|
3755
|
+
total: pages.length,
|
|
3756
|
+
dates,
|
|
3757
|
+
maxVolatility
|
|
3758
|
+
}
|
|
3759
|
+
};
|
|
3760
|
+
}
|
|
3761
|
+
});
|
|
3762
|
+
function str$4(v) {
|
|
3763
|
+
return v == null ? "" : String(v);
|
|
3764
|
+
}
|
|
3765
|
+
function parseJsonList$3(v) {
|
|
3766
|
+
if (Array.isArray(v)) return v;
|
|
3767
|
+
if (typeof v === "string" && v.length > 0) {
|
|
3768
|
+
const parsed = JSON.parse(v);
|
|
3769
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
3770
|
+
}
|
|
3771
|
+
return [];
|
|
3772
|
+
}
|
|
3773
|
+
const queryMigrationAnalyzer = defineAnalyzer({
|
|
3774
|
+
id: "query-migration",
|
|
3775
|
+
buildSql(params) {
|
|
3776
|
+
const cur = periodOf(params);
|
|
3777
|
+
let prevStart = params.prevStartDate;
|
|
3778
|
+
let prevEnd = params.prevEndDate;
|
|
3779
|
+
if (prevStart == null || prevEnd == null) {
|
|
3780
|
+
const curStartMs = new Date(cur.startDate).getTime();
|
|
3781
|
+
const span = new Date(cur.endDate).getTime() - curStartMs;
|
|
3782
|
+
prevEnd = toIsoDate(new Date(curStartMs - MS_PER_DAY));
|
|
3783
|
+
prevStart = toIsoDate(new Date(curStartMs - MS_PER_DAY - span));
|
|
3784
|
+
}
|
|
3785
|
+
const minImpressions = params.minImpressions ?? 20;
|
|
3786
|
+
const limit = params.limit ?? 200;
|
|
3787
|
+
const maxLevenshtein = 2;
|
|
3788
|
+
return {
|
|
3789
|
+
sql: `
|
|
3790
|
+
WITH cur AS (
|
|
3791
|
+
SELECT query, url AS page,
|
|
3792
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
3793
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
3794
|
+
${METRIC_EXPR.position} AS position
|
|
3795
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
3796
|
+
WHERE date >= ? AND date <= ?
|
|
3797
|
+
AND query IS NOT NULL AND query <> ''
|
|
3798
|
+
AND url IS NOT NULL AND url <> ''
|
|
3799
|
+
GROUP BY query, url
|
|
3800
|
+
HAVING SUM(impressions) >= ?
|
|
3801
|
+
),
|
|
3802
|
+
prev AS (
|
|
3803
|
+
SELECT query, url AS page,
|
|
3804
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
3805
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
3806
|
+
${METRIC_EXPR.position} AS position
|
|
3807
|
+
FROM read_parquet({{FILES_PREV}}, union_by_name = true)
|
|
3808
|
+
WHERE date >= ? AND date <= ?
|
|
3809
|
+
AND query IS NOT NULL AND query <> ''
|
|
3810
|
+
AND url IS NOT NULL AND url <> ''
|
|
3811
|
+
GROUP BY query, url
|
|
3812
|
+
HAVING SUM(impressions) >= ?
|
|
3813
|
+
),
|
|
3814
|
+
lost AS (
|
|
3815
|
+
SELECT p.page AS source_page, p.query AS source_query, p.impressions AS source_impressions
|
|
3816
|
+
FROM prev p
|
|
3817
|
+
LEFT JOIN cur c ON p.page = c.page AND p.query = c.query
|
|
3818
|
+
WHERE c.query IS NULL
|
|
3819
|
+
),
|
|
3820
|
+
gained AS (
|
|
3821
|
+
SELECT c.page AS target_page, c.query AS target_query, c.impressions AS target_impressions
|
|
3822
|
+
FROM cur c
|
|
3823
|
+
LEFT JOIN prev p ON p.page = c.page AND p.query = c.query
|
|
3824
|
+
WHERE p.query IS NULL
|
|
3825
|
+
),
|
|
3826
|
+
matched AS (
|
|
3827
|
+
SELECT
|
|
3828
|
+
l.source_page, l.source_query, l.source_impressions,
|
|
3829
|
+
g.target_page, g.target_query, g.target_impressions,
|
|
3830
|
+
CASE
|
|
3831
|
+
WHEN l.source_query = g.target_query THEN 'exact'
|
|
3832
|
+
ELSE 'fuzzy'
|
|
3833
|
+
END AS match_type,
|
|
3834
|
+
LEAST(l.source_impressions, g.target_impressions) AS absorbed_impressions
|
|
3835
|
+
FROM lost l
|
|
3836
|
+
JOIN gained g
|
|
3837
|
+
ON l.source_page <> g.target_page
|
|
3838
|
+
AND ABS(LENGTH(l.source_query) - LENGTH(g.target_query)) <= ${maxLevenshtein}
|
|
3839
|
+
AND (
|
|
3840
|
+
l.source_query = g.target_query
|
|
3841
|
+
OR levenshtein(l.source_query, g.target_query) <= ${maxLevenshtein}
|
|
3842
|
+
)
|
|
3843
|
+
),
|
|
3844
|
+
edges AS (
|
|
3845
|
+
SELECT
|
|
3846
|
+
source_page, target_page,
|
|
3847
|
+
SUM(absorbed_impressions) AS weight,
|
|
3848
|
+
COUNT(*) AS query_count,
|
|
3849
|
+
SUM(CASE WHEN match_type = 'exact' THEN 1 ELSE 0 END) AS exact_count,
|
|
3850
|
+
to_json(list({
|
|
3851
|
+
'sourceQuery': source_query,
|
|
3852
|
+
'targetQuery': target_query,
|
|
3853
|
+
'absorbed': absorbed_impressions,
|
|
3854
|
+
'matchType': match_type
|
|
3855
|
+
} ORDER BY absorbed_impressions DESC)) AS examplesJson
|
|
3856
|
+
FROM matched
|
|
3857
|
+
GROUP BY source_page, target_page
|
|
3858
|
+
)
|
|
3859
|
+
SELECT *
|
|
3860
|
+
FROM edges
|
|
3861
|
+
ORDER BY weight DESC
|
|
3862
|
+
LIMIT ${Number(limit)}
|
|
3863
|
+
`,
|
|
3864
|
+
params: [
|
|
3865
|
+
cur.startDate,
|
|
3866
|
+
cur.endDate,
|
|
3867
|
+
minImpressions,
|
|
3868
|
+
prevStart,
|
|
3869
|
+
prevEnd,
|
|
3870
|
+
minImpressions
|
|
3871
|
+
],
|
|
3872
|
+
current: {
|
|
3873
|
+
table: "page_keywords",
|
|
3874
|
+
partitions: enumeratePartitions(cur.startDate, cur.endDate)
|
|
3875
|
+
},
|
|
3876
|
+
previous: {
|
|
3877
|
+
table: "page_keywords",
|
|
3878
|
+
partitions: enumeratePartitions(prevStart, prevEnd)
|
|
3879
|
+
}
|
|
3880
|
+
};
|
|
3881
|
+
},
|
|
3882
|
+
reduceSql(rows, params) {
|
|
3883
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
3884
|
+
const cur = periodOf(params);
|
|
3885
|
+
let prevStart = params.prevStartDate;
|
|
3886
|
+
let prevEnd = params.prevEndDate;
|
|
3887
|
+
if (prevStart == null || prevEnd == null) {
|
|
3888
|
+
const curStartMs = new Date(cur.startDate).getTime();
|
|
3889
|
+
const span = new Date(cur.endDate).getTime() - curStartMs;
|
|
3890
|
+
prevEnd = toIsoDate(new Date(curStartMs - MS_PER_DAY));
|
|
3891
|
+
prevStart = toIsoDate(new Date(curStartMs - MS_PER_DAY - span));
|
|
3892
|
+
}
|
|
3893
|
+
const edges = arr.map((r) => ({
|
|
3894
|
+
sourcePage: str$4(r.source_page),
|
|
3895
|
+
targetPage: str$4(r.target_page),
|
|
3896
|
+
weight: num$4(r.weight),
|
|
3897
|
+
queryCount: num$4(r.query_count),
|
|
3898
|
+
exactCount: num$4(r.exact_count),
|
|
3899
|
+
fuzzyCount: num$4(r.query_count) - num$4(r.exact_count),
|
|
3900
|
+
examples: parseJsonList$3(r.examplesJson).slice(0, 8).map((e) => ({
|
|
3901
|
+
sourceQuery: str$4(e.sourceQuery),
|
|
3902
|
+
targetQuery: str$4(e.targetQuery),
|
|
3903
|
+
absorbed: num$4(e.absorbed),
|
|
3904
|
+
matchType: str$4(e.matchType)
|
|
3905
|
+
}))
|
|
3906
|
+
}));
|
|
3907
|
+
const nodeAgg = /* @__PURE__ */ new Map();
|
|
3908
|
+
for (const e of edges) {
|
|
3909
|
+
const src = nodeAgg.get(e.sourcePage) ?? {
|
|
3910
|
+
url: e.sourcePage,
|
|
3911
|
+
outgoing: 0,
|
|
3912
|
+
incoming: 0
|
|
3913
|
+
};
|
|
3914
|
+
src.outgoing += e.weight;
|
|
3915
|
+
nodeAgg.set(e.sourcePage, src);
|
|
3916
|
+
const tgt = nodeAgg.get(e.targetPage) ?? {
|
|
3917
|
+
url: e.targetPage,
|
|
3918
|
+
outgoing: 0,
|
|
3919
|
+
incoming: 0
|
|
3920
|
+
};
|
|
3921
|
+
tgt.incoming += e.weight;
|
|
3922
|
+
nodeAgg.set(e.targetPage, tgt);
|
|
3923
|
+
}
|
|
3924
|
+
const nodes = [...nodeAgg.values()];
|
|
3925
|
+
const totalAbsorbed = edges.reduce((s, e) => s + e.weight, 0);
|
|
3926
|
+
return {
|
|
3927
|
+
results: edges,
|
|
3928
|
+
meta: {
|
|
3929
|
+
total: edges.length,
|
|
3930
|
+
totalAbsorbed,
|
|
3931
|
+
period: {
|
|
3932
|
+
current: cur,
|
|
3933
|
+
previous: {
|
|
3934
|
+
startDate: prevStart,
|
|
3935
|
+
endDate: prevEnd
|
|
3936
|
+
}
|
|
3937
|
+
},
|
|
3938
|
+
nodes
|
|
3939
|
+
}
|
|
3940
|
+
};
|
|
3941
|
+
}
|
|
3942
|
+
});
|
|
3943
|
+
function str$3(v) {
|
|
3944
|
+
return v == null ? "" : String(v);
|
|
3945
|
+
}
|
|
3946
|
+
function bool$1(v) {
|
|
3947
|
+
return v === true || v === 1 || v === "true";
|
|
3948
|
+
}
|
|
3949
|
+
function calculateCV(values) {
|
|
3950
|
+
if (values.length === 0) return 0;
|
|
3951
|
+
const mean = values.reduce((a, b) => a + b, 0) / values.length;
|
|
3952
|
+
if (mean === 0) return 0;
|
|
3953
|
+
const variance = values.reduce((sum, v) => sum + (v - mean) ** 2, 0) / values.length;
|
|
3954
|
+
return Math.min(Math.sqrt(variance) / mean, 1);
|
|
3955
|
+
}
|
|
3956
|
+
function analyzeSeasonality(dates, options = {}) {
|
|
3957
|
+
const { metric = "clicks" } = options;
|
|
3958
|
+
if (dates.length === 0) return {
|
|
3959
|
+
hasSeasonality: false,
|
|
3960
|
+
strength: 0,
|
|
3961
|
+
peakMonths: [],
|
|
3962
|
+
troughMonths: [],
|
|
3963
|
+
monthlyBreakdown: [],
|
|
3964
|
+
insufficientData: true
|
|
3965
|
+
};
|
|
3966
|
+
const monthlyMap = /* @__PURE__ */ new Map();
|
|
3967
|
+
for (const row of dates) {
|
|
3968
|
+
const month = row.date.substring(0, 7);
|
|
3969
|
+
const value = metric === "clicks" ? row.clicks : row.impressions;
|
|
3970
|
+
monthlyMap.set(month, (monthlyMap.get(month) || 0) + value);
|
|
3971
|
+
}
|
|
3972
|
+
const months = Array.from(monthlyMap.keys()).sort();
|
|
3973
|
+
const values = months.map((m) => monthlyMap.get(m) || 0);
|
|
3974
|
+
const insufficientData = months.length < 12;
|
|
3975
|
+
const totalValue = values.reduce((a, b) => a + b, 0);
|
|
3976
|
+
const avgValue = values.length > 0 ? totalValue / values.length : 0;
|
|
3977
|
+
const monthlyBreakdown = months.map((month, i) => {
|
|
3978
|
+
const value = values[i] ?? 0;
|
|
3979
|
+
const vsAverage = avgValue > 0 ? value / avgValue : 0;
|
|
3980
|
+
return {
|
|
3981
|
+
month,
|
|
3982
|
+
value,
|
|
3983
|
+
vsAverage,
|
|
3984
|
+
isPeak: vsAverage > 1.5,
|
|
3985
|
+
isTrough: vsAverage < .5
|
|
3986
|
+
};
|
|
3987
|
+
});
|
|
3988
|
+
const peakMonths = [...new Set(monthlyBreakdown.filter((m) => m.isPeak).map((m) => m.month.substring(5, 7)))];
|
|
3989
|
+
const troughMonths = [...new Set(monthlyBreakdown.filter((m) => m.isTrough).map((m) => m.month.substring(5, 7)))];
|
|
3990
|
+
const strength = calculateCV(values);
|
|
3991
|
+
return {
|
|
3992
|
+
hasSeasonality: peakMonths.length > 0 || troughMonths.length > 0 || strength > .3,
|
|
3993
|
+
strength,
|
|
3994
|
+
peakMonths,
|
|
3995
|
+
troughMonths,
|
|
3996
|
+
monthlyBreakdown,
|
|
3997
|
+
insufficientData
|
|
3998
|
+
};
|
|
3999
|
+
}
|
|
4000
|
+
const seasonalityAnalyzer = defineAnalyzer({
|
|
4001
|
+
id: "seasonality",
|
|
4002
|
+
buildSql(params) {
|
|
4003
|
+
const { startDate, endDate } = periodOf(params);
|
|
4004
|
+
return {
|
|
4005
|
+
sql: `
|
|
4006
|
+
WITH monthly AS (
|
|
4007
|
+
SELECT
|
|
4008
|
+
strftime(date, '%Y-%m') AS month,
|
|
4009
|
+
CAST(SUM(${params.metric === "impressions" ? "impressions" : "clicks"}) AS DOUBLE) AS value
|
|
4010
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
4011
|
+
WHERE date >= ? AND date <= ?
|
|
4012
|
+
GROUP BY month
|
|
4013
|
+
),
|
|
4014
|
+
stats AS (
|
|
4015
|
+
SELECT
|
|
4016
|
+
AVG(value) AS avg_val,
|
|
4017
|
+
COALESCE(STDDEV_POP(value), 0.0) AS std_val,
|
|
4018
|
+
CAST(COUNT(*) AS DOUBLE) AS month_count
|
|
4019
|
+
FROM monthly
|
|
4020
|
+
)
|
|
4021
|
+
SELECT
|
|
4022
|
+
m.month AS month,
|
|
4023
|
+
m.value AS value,
|
|
4024
|
+
CASE WHEN s.avg_val > 0 THEN m.value / s.avg_val ELSE 0.0 END AS vsAverage,
|
|
4025
|
+
(s.avg_val > 0 AND m.value / s.avg_val > 1.5) AS isPeak,
|
|
4026
|
+
(s.avg_val > 0 AND m.value / s.avg_val < 0.5) AS isTrough,
|
|
4027
|
+
CASE WHEN s.avg_val > 0 THEN LEAST(s.std_val / s.avg_val, 1.0) ELSE 0.0 END AS strength,
|
|
4028
|
+
s.month_count AS monthCount
|
|
4029
|
+
FROM monthly m, stats s
|
|
4030
|
+
ORDER BY m.month
|
|
4031
|
+
`,
|
|
4032
|
+
params: [startDate, endDate],
|
|
4033
|
+
current: {
|
|
4034
|
+
table: "pages",
|
|
4035
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
4036
|
+
}
|
|
4037
|
+
};
|
|
4038
|
+
},
|
|
4039
|
+
reduceSql(rows) {
|
|
4040
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
4041
|
+
const breakdown = arr.map((r) => ({
|
|
4042
|
+
month: str$3(r.month),
|
|
4043
|
+
value: num$4(r.value),
|
|
4044
|
+
vsAverage: num$4(r.vsAverage),
|
|
4045
|
+
isPeak: bool$1(r.isPeak),
|
|
4046
|
+
isTrough: bool$1(r.isTrough)
|
|
4047
|
+
}));
|
|
4048
|
+
const first = arr[0];
|
|
4049
|
+
const strength = first ? num$4(first.strength) : 0;
|
|
4050
|
+
const monthCount = first ? num$4(first.monthCount) : 0;
|
|
4051
|
+
const peakMonths = [...new Set(breakdown.filter((m) => m.isPeak).map((m) => m.month.substring(5, 7)))];
|
|
4052
|
+
const troughMonths = [...new Set(breakdown.filter((m) => m.isTrough).map((m) => m.month.substring(5, 7)))];
|
|
4053
|
+
const hasSeasonality = peakMonths.length > 0 || troughMonths.length > 0 || strength > .3;
|
|
4054
|
+
const insufficientData = monthCount < 12;
|
|
4055
|
+
return {
|
|
4056
|
+
results: breakdown,
|
|
4057
|
+
meta: {
|
|
4058
|
+
total: breakdown.length,
|
|
4059
|
+
hasSeasonality,
|
|
4060
|
+
strength,
|
|
4061
|
+
peakMonths,
|
|
4062
|
+
troughMonths,
|
|
4063
|
+
insufficientData
|
|
4064
|
+
}
|
|
4065
|
+
};
|
|
4066
|
+
},
|
|
4067
|
+
buildRows(params) {
|
|
4068
|
+
return { dates: datesQueryState(periodOf(params), params.limit) };
|
|
4069
|
+
},
|
|
4070
|
+
reduceRows(rows, params) {
|
|
4071
|
+
const result = analyzeSeasonality(Array.isArray(rows) ? rows : [], { metric: params.metric });
|
|
4072
|
+
return {
|
|
4073
|
+
results: result.monthlyBreakdown,
|
|
4074
|
+
meta: { strength: result.strength }
|
|
4075
|
+
};
|
|
4076
|
+
}
|
|
4077
|
+
});
|
|
4078
|
+
function str$2(v) {
|
|
4079
|
+
return v == null ? "" : String(v);
|
|
4080
|
+
}
|
|
4081
|
+
function bool(v) {
|
|
4082
|
+
return v === true || v === 1 || v === "true";
|
|
4083
|
+
}
|
|
4084
|
+
function parseJsonList$2(v) {
|
|
4085
|
+
if (Array.isArray(v)) return v;
|
|
4086
|
+
if (typeof v === "string" && v.length > 0) {
|
|
4087
|
+
const parsed = JSON.parse(v);
|
|
4088
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
4089
|
+
}
|
|
4090
|
+
return [];
|
|
4091
|
+
}
|
|
4092
|
+
const stlDecomposeAnalyzer = defineAnalyzer({
|
|
4093
|
+
id: "stl-decompose",
|
|
4094
|
+
buildSql(params) {
|
|
4095
|
+
const endDate = params.endDate ?? defaultEndDate();
|
|
4096
|
+
const startDate = params.startDate ?? daysAgo(93);
|
|
4097
|
+
const minImpressions = params.minImpressions ?? 100;
|
|
4098
|
+
const minDays = 21;
|
|
4099
|
+
const metric = params.metric === "clicks" ? "clicks" : "impressions";
|
|
4100
|
+
const limit = params.limit ?? 100;
|
|
4101
|
+
return {
|
|
4102
|
+
sql: `
|
|
4103
|
+
WITH daily AS (
|
|
4104
|
+
SELECT
|
|
4105
|
+
query,
|
|
4106
|
+
url AS page,
|
|
4107
|
+
date,
|
|
4108
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
4109
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
4110
|
+
CAST(SUM(${metric}) AS DOUBLE) AS observed
|
|
4111
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
4112
|
+
WHERE date >= ? AND date <= ?
|
|
4113
|
+
AND query IS NOT NULL AND query <> ''
|
|
4114
|
+
AND url IS NOT NULL AND url <> ''
|
|
4115
|
+
GROUP BY query, url, date
|
|
4116
|
+
),
|
|
4117
|
+
entity_stats AS (
|
|
4118
|
+
SELECT query, page,
|
|
4119
|
+
COUNT(*) AS days,
|
|
4120
|
+
SUM(impressions) AS total_impressions
|
|
4121
|
+
FROM daily
|
|
4122
|
+
GROUP BY query, page
|
|
4123
|
+
HAVING COUNT(*) >= ${Number(minDays)}
|
|
4124
|
+
AND SUM(impressions) >= ?
|
|
4125
|
+
),
|
|
4126
|
+
filtered AS (
|
|
4127
|
+
SELECT d.*
|
|
4128
|
+
FROM daily d
|
|
4129
|
+
JOIN entity_stats e USING (query, page)
|
|
4130
|
+
),
|
|
4131
|
+
trended AS (
|
|
4132
|
+
SELECT *,
|
|
4133
|
+
CASE
|
|
4134
|
+
WHEN COUNT(*) OVER w = 7
|
|
4135
|
+
THEN AVG(observed) OVER w
|
|
4136
|
+
ELSE NULL
|
|
4137
|
+
END AS trend
|
|
4138
|
+
FROM filtered
|
|
4139
|
+
WINDOW w AS (
|
|
4140
|
+
PARTITION BY query, page
|
|
4141
|
+
ORDER BY date
|
|
4142
|
+
ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING
|
|
4143
|
+
)
|
|
4144
|
+
),
|
|
4145
|
+
detrended AS (
|
|
4146
|
+
SELECT *,
|
|
4147
|
+
observed - trend AS detrended,
|
|
4148
|
+
dayofweek(date) AS dow
|
|
4149
|
+
FROM trended
|
|
4150
|
+
),
|
|
4151
|
+
seasonal_raw AS (
|
|
4152
|
+
SELECT *,
|
|
4153
|
+
AVG(detrended) OVER (PARTITION BY query, page, dow) AS seasonal_dow
|
|
4154
|
+
FROM detrended
|
|
4155
|
+
),
|
|
4156
|
+
seasonal_centered AS (
|
|
4157
|
+
SELECT *,
|
|
4158
|
+
seasonal_dow - AVG(seasonal_dow) OVER (PARTITION BY query, page) AS seasonal
|
|
4159
|
+
FROM seasonal_raw
|
|
4160
|
+
),
|
|
4161
|
+
residualed AS (
|
|
4162
|
+
SELECT *,
|
|
4163
|
+
CASE
|
|
4164
|
+
WHEN trend IS NULL OR seasonal IS NULL THEN NULL
|
|
4165
|
+
ELSE observed - trend - seasonal
|
|
4166
|
+
END AS residual
|
|
4167
|
+
FROM seasonal_centered
|
|
4168
|
+
),
|
|
4169
|
+
scored AS (
|
|
4170
|
+
SELECT *,
|
|
4171
|
+
STDDEV_POP(residual) OVER (PARTITION BY query, page) AS resid_std,
|
|
4172
|
+
CASE
|
|
4173
|
+
WHEN residual IS NOT NULL
|
|
4174
|
+
AND STDDEV_POP(residual) OVER (PARTITION BY query, page) > 0
|
|
4175
|
+
AND ABS(residual) > 2.0 * STDDEV_POP(residual) OVER (PARTITION BY query, page)
|
|
4176
|
+
THEN true ELSE false
|
|
4177
|
+
END AS anomaly
|
|
4178
|
+
FROM residualed
|
|
4179
|
+
),
|
|
4180
|
+
per_entity AS (
|
|
4181
|
+
SELECT query, page,
|
|
4182
|
+
COUNT(*) AS days,
|
|
4183
|
+
SUM(impressions) AS total_impressions,
|
|
4184
|
+
VAR_POP(detrended) AS var_detrended,
|
|
4185
|
+
VAR_POP(seasonal) AS var_seasonal,
|
|
4186
|
+
VAR_POP(residual) AS var_residual,
|
|
4187
|
+
COUNT(*) FILTER (WHERE anomaly) AS residual_anomalies,
|
|
4188
|
+
REGR_SLOPE(observed, epoch(date) / 86400.0) AS trend_slope
|
|
4189
|
+
FROM scored
|
|
4190
|
+
GROUP BY query, page
|
|
4191
|
+
),
|
|
4192
|
+
series AS (
|
|
4193
|
+
SELECT query, page,
|
|
4194
|
+
to_json(list({
|
|
4195
|
+
'date': strftime(date, '%Y-%m-%d'),
|
|
4196
|
+
'observed': observed,
|
|
4197
|
+
'trend': trend,
|
|
4198
|
+
'seasonal': seasonal,
|
|
4199
|
+
'residual': residual,
|
|
4200
|
+
'anomaly': anomaly
|
|
4201
|
+
} ORDER BY date)) AS seriesJson
|
|
4202
|
+
FROM scored
|
|
4203
|
+
GROUP BY query, page
|
|
4204
|
+
)
|
|
4205
|
+
SELECT
|
|
4206
|
+
e.query AS keyword,
|
|
4207
|
+
e.page,
|
|
4208
|
+
CAST(e.total_impressions AS DOUBLE) AS totalImpressions,
|
|
4209
|
+
CAST(e.days AS DOUBLE) AS days,
|
|
4210
|
+
CASE
|
|
4211
|
+
WHEN e.var_detrended IS NULL OR e.var_detrended = 0 THEN 0.0
|
|
4212
|
+
ELSE LEAST(e.var_seasonal / NULLIF(e.var_detrended, 0), 1.0)
|
|
4213
|
+
END AS seasonalStrength,
|
|
4214
|
+
CASE
|
|
4215
|
+
WHEN e.var_detrended IS NULL OR e.var_detrended = 0 THEN 0.0
|
|
4216
|
+
ELSE GREATEST(0.0, 1.0 - e.var_residual / NULLIF(e.var_detrended, 0))
|
|
4217
|
+
END AS trendStrength,
|
|
4218
|
+
CAST(e.residual_anomalies AS DOUBLE) AS residualAnomalies,
|
|
4219
|
+
COALESCE(e.trend_slope, 0.0) AS trendSlope,
|
|
4220
|
+
s.seriesJson
|
|
4221
|
+
FROM per_entity e
|
|
4222
|
+
LEFT JOIN series s USING (query, page)
|
|
4223
|
+
ORDER BY seasonalStrength DESC, ABS(COALESCE(e.trend_slope, 0.0)) DESC
|
|
4224
|
+
LIMIT ${Number(limit)}
|
|
4225
|
+
`,
|
|
4226
|
+
params: [
|
|
4227
|
+
startDate,
|
|
4228
|
+
endDate,
|
|
4229
|
+
minImpressions
|
|
4230
|
+
],
|
|
4231
|
+
current: {
|
|
4232
|
+
table: "page_keywords",
|
|
4233
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
4234
|
+
}
|
|
4235
|
+
};
|
|
4236
|
+
},
|
|
4237
|
+
reduceSql(rows, params) {
|
|
4238
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
4239
|
+
const metric = params.metric === "clicks" ? "clicks" : "impressions";
|
|
4240
|
+
const results = arr.map((r) => ({
|
|
4241
|
+
keyword: str$2(r.keyword),
|
|
4242
|
+
page: str$2(r.page),
|
|
4243
|
+
totalImpressions: num$4(r.totalImpressions),
|
|
4244
|
+
days: num$4(r.days),
|
|
4245
|
+
seasonalStrength: num$4(r.seasonalStrength),
|
|
4246
|
+
trendStrength: num$4(r.trendStrength),
|
|
4247
|
+
residualAnomalies: num$4(r.residualAnomalies),
|
|
4248
|
+
trendSlope: num$4(r.trendSlope),
|
|
4249
|
+
series: parseJsonList$2(r.seriesJson).map((s) => ({
|
|
4250
|
+
date: str$2(s.date),
|
|
4251
|
+
observed: num$4(s.observed),
|
|
4252
|
+
trend: s.trend == null ? null : num$4(s.trend),
|
|
4253
|
+
seasonal: s.seasonal == null ? null : num$4(s.seasonal),
|
|
4254
|
+
residual: s.residual == null ? null : num$4(s.residual),
|
|
4255
|
+
anomaly: bool(s.anomaly)
|
|
4256
|
+
}))
|
|
4257
|
+
}));
|
|
4258
|
+
return {
|
|
4259
|
+
results,
|
|
4260
|
+
meta: {
|
|
4261
|
+
total: results.length,
|
|
4262
|
+
metric,
|
|
4263
|
+
avgSeasonalStrength: results.length > 0 ? results.reduce((a, r) => a + r.seasonalStrength, 0) / results.length : 0
|
|
4264
|
+
}
|
|
4265
|
+
};
|
|
4266
|
+
}
|
|
4267
|
+
});
|
|
4268
|
+
const DEFAULT_ROW_LIMIT$1 = 25e3;
|
|
4269
|
+
const strikingDistanceAnalyzer = defineAnalyzer({
|
|
4270
|
+
id: "striking-distance",
|
|
4271
|
+
reduce(rows, params) {
|
|
4272
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
4273
|
+
const minPosition = params.minPosition ?? 4;
|
|
4274
|
+
const maxPosition = params.maxPosition ?? 20;
|
|
4275
|
+
const minImpressions = params.minImpressions ?? 100;
|
|
4276
|
+
const maxCtr = params.maxCtr ?? .05;
|
|
4277
|
+
const limit = params.limit ?? 1e3;
|
|
4278
|
+
const results = [];
|
|
4279
|
+
for (const row of arr) {
|
|
4280
|
+
const position = num$4(row.position);
|
|
4281
|
+
const impressions = num$4(row.impressions);
|
|
4282
|
+
const ctr = num$4(row.ctr);
|
|
4283
|
+
const clicks = num$4(row.clicks);
|
|
4284
|
+
if (position < minPosition || position > maxPosition) continue;
|
|
4285
|
+
if (impressions < minImpressions) continue;
|
|
4286
|
+
if (ctr > maxCtr) continue;
|
|
4287
|
+
results.push({
|
|
4288
|
+
keyword: String(row.query ?? ""),
|
|
4289
|
+
page: row.page == null ? null : String(row.page),
|
|
4290
|
+
clicks,
|
|
4291
|
+
impressions,
|
|
4292
|
+
ctr,
|
|
4293
|
+
position,
|
|
4294
|
+
potentialClicks: Math.round(impressions * .15)
|
|
4295
|
+
});
|
|
4296
|
+
}
|
|
4297
|
+
results.sort((a, b) => b.potentialClicks - a.potentialClicks);
|
|
4298
|
+
const paged = paginateInMemory(results, {
|
|
4299
|
+
limit,
|
|
4300
|
+
offset: params.offset
|
|
4301
|
+
});
|
|
4302
|
+
return {
|
|
4303
|
+
results: paged,
|
|
4304
|
+
meta: {
|
|
4305
|
+
total: results.length,
|
|
4306
|
+
returned: paged.length
|
|
4307
|
+
}
|
|
4308
|
+
};
|
|
4309
|
+
},
|
|
4310
|
+
buildSql(params) {
|
|
4311
|
+
const { startDate, endDate } = periodOf(params);
|
|
4312
|
+
return {
|
|
4313
|
+
sql: `
|
|
4314
|
+
SELECT
|
|
4315
|
+
query,
|
|
4316
|
+
url AS page,
|
|
4317
|
+
CAST(SUM(clicks) AS DOUBLE) AS clicks,
|
|
4318
|
+
CAST(SUM(impressions) AS DOUBLE) AS impressions,
|
|
4319
|
+
CAST(SUM(clicks) AS DOUBLE) / NULLIF(SUM(impressions), 0) AS ctr,
|
|
4320
|
+
SUM(sum_position) / NULLIF(SUM(impressions), 0) + 1 AS position
|
|
4321
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
4322
|
+
WHERE date >= ? AND date <= ?
|
|
4323
|
+
GROUP BY query, url
|
|
4324
|
+
`,
|
|
4325
|
+
params: [startDate, endDate],
|
|
4326
|
+
current: {
|
|
4327
|
+
table: "page_keywords",
|
|
4328
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
4329
|
+
}
|
|
4330
|
+
};
|
|
4331
|
+
},
|
|
4332
|
+
buildRows(params) {
|
|
4333
|
+
return { keywords: keywordsQueryState(periodOf(params), params.limit ?? DEFAULT_ROW_LIMIT$1) };
|
|
4334
|
+
}
|
|
4335
|
+
});
|
|
4336
|
+
function str$1(v) {
|
|
4337
|
+
return v == null ? "" : String(v);
|
|
4338
|
+
}
|
|
4339
|
+
function parseJsonList$1(v) {
|
|
4340
|
+
if (Array.isArray(v)) return v;
|
|
4341
|
+
if (typeof v === "string" && v.length > 0) {
|
|
4342
|
+
const parsed = JSON.parse(v);
|
|
4343
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
4344
|
+
}
|
|
4345
|
+
return [];
|
|
4346
|
+
}
|
|
4347
|
+
const survivalAnalyzer = defineAnalyzer({
|
|
4348
|
+
id: "survival",
|
|
4349
|
+
buildSql(params) {
|
|
4350
|
+
const endDate = params.endDate ?? defaultEndDate();
|
|
4351
|
+
const startDate = params.startDate ?? daysAgo(183);
|
|
4352
|
+
const minImpressions = params.minImpressions ?? 5;
|
|
4353
|
+
return {
|
|
4354
|
+
sql: `
|
|
4355
|
+
WITH daily AS (
|
|
4356
|
+
SELECT
|
|
4357
|
+
query,
|
|
4358
|
+
url,
|
|
4359
|
+
date,
|
|
4360
|
+
${METRIC_EXPR.clicks} AS day_clicks,
|
|
4361
|
+
${METRIC_EXPR.impressions} AS day_impressions,
|
|
4362
|
+
${METRIC_EXPR.position} AS day_position
|
|
4363
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
4364
|
+
WHERE date >= ? AND date <= ?
|
|
4365
|
+
AND query IS NOT NULL AND query <> ''
|
|
4366
|
+
AND url IS NOT NULL AND url <> ''
|
|
4367
|
+
GROUP BY query, url, date
|
|
4368
|
+
HAVING SUM(impressions) >= ?
|
|
4369
|
+
),
|
|
4370
|
+
classified AS (
|
|
4371
|
+
SELECT *,
|
|
4372
|
+
(day_position <= 10) AS in_top10
|
|
4373
|
+
FROM daily
|
|
4374
|
+
),
|
|
4375
|
+
transitions AS (
|
|
4376
|
+
SELECT *,
|
|
4377
|
+
CASE
|
|
4378
|
+
WHEN in_top10 AND (LAG(in_top10) OVER w IS NULL OR NOT LAG(in_top10) OVER w)
|
|
4379
|
+
THEN 1 ELSE 0
|
|
4380
|
+
END AS is_entry
|
|
4381
|
+
FROM classified
|
|
4382
|
+
WINDOW w AS (PARTITION BY query, url ORDER BY date)
|
|
4383
|
+
),
|
|
4384
|
+
run_ids AS (
|
|
4385
|
+
SELECT *,
|
|
4386
|
+
SUM(is_entry) OVER (PARTITION BY query, url ORDER BY date) AS run_id
|
|
4387
|
+
FROM transitions
|
|
4388
|
+
WHERE in_top10
|
|
4389
|
+
),
|
|
4390
|
+
window_bounds AS (
|
|
4391
|
+
SELECT MIN(date) AS window_start, MAX(date) AS window_end FROM daily
|
|
4392
|
+
),
|
|
4393
|
+
episodes_raw AS (
|
|
4394
|
+
SELECT
|
|
4395
|
+
query, url, run_id,
|
|
4396
|
+
MIN(date) AS entry_date,
|
|
4397
|
+
MAX(date) AS exit_date,
|
|
4398
|
+
DATEDIFF('day', MIN(date), MAX(date)) + 1 AS tenure
|
|
4399
|
+
FROM run_ids
|
|
4400
|
+
GROUP BY query, url, run_id
|
|
4401
|
+
),
|
|
4402
|
+
episodes AS (
|
|
4403
|
+
SELECT
|
|
4404
|
+
e.query, e.url, e.run_id, e.entry_date, e.exit_date, e.tenure,
|
|
4405
|
+
(e.exit_date >= wb.window_end - INTERVAL 2 DAY) AS censored,
|
|
4406
|
+
CASE
|
|
4407
|
+
WHEN regexp_extract(e.url, '^(?:https?://[^/]+)?(/[^/?#]*)', 1) = '/' OR e.url = '/'
|
|
4408
|
+
THEN 'home'
|
|
4409
|
+
WHEN regexp_extract(e.url, '^(?:https?://[^/]+)?/([^/?#]+)', 1) = ''
|
|
4410
|
+
THEN 'home'
|
|
4411
|
+
ELSE regexp_extract(e.url, '^(?:https?://[^/]+)?/([^/?#]+)', 1)
|
|
4412
|
+
END AS cohort
|
|
4413
|
+
FROM episodes_raw e
|
|
4414
|
+
CROSS JOIN window_bounds wb
|
|
4415
|
+
),
|
|
4416
|
+
episodes_all AS (
|
|
4417
|
+
SELECT query, url, tenure, censored, cohort FROM episodes
|
|
4418
|
+
UNION ALL
|
|
4419
|
+
SELECT query, url, tenure, censored, '__all__' AS cohort FROM episodes
|
|
4420
|
+
),
|
|
4421
|
+
cohort_totals AS (
|
|
4422
|
+
SELECT cohort, COUNT(*) AS n_total
|
|
4423
|
+
FROM episodes_all
|
|
4424
|
+
GROUP BY cohort
|
|
4425
|
+
),
|
|
4426
|
+
events AS (
|
|
4427
|
+
SELECT
|
|
4428
|
+
cohort,
|
|
4429
|
+
tenure,
|
|
4430
|
+
COUNT(*) FILTER (WHERE NOT censored) AS d_t,
|
|
4431
|
+
COUNT(*) AS n_ending_at_t
|
|
4432
|
+
FROM episodes_all
|
|
4433
|
+
GROUP BY cohort, tenure
|
|
4434
|
+
),
|
|
4435
|
+
km AS (
|
|
4436
|
+
SELECT
|
|
4437
|
+
e.cohort,
|
|
4438
|
+
e.tenure,
|
|
4439
|
+
e.d_t,
|
|
4440
|
+
e.n_ending_at_t,
|
|
4441
|
+
SUM(e.n_ending_at_t) OVER (PARTITION BY e.cohort ORDER BY e.tenure DESC
|
|
4442
|
+
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS at_risk
|
|
4443
|
+
FROM events e
|
|
4444
|
+
),
|
|
4445
|
+
km_surv AS (
|
|
4446
|
+
SELECT
|
|
4447
|
+
cohort, tenure, d_t, at_risk,
|
|
4448
|
+
EXP(SUM(LN(GREATEST(1.0 - CAST(d_t AS DOUBLE) / NULLIF(at_risk, 0), 1e-9)))
|
|
4449
|
+
OVER (PARTITION BY cohort ORDER BY tenure
|
|
4450
|
+
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)) AS survival
|
|
4451
|
+
FROM km
|
|
4452
|
+
),
|
|
4453
|
+
curve_agg AS (
|
|
4454
|
+
SELECT
|
|
4455
|
+
cohort,
|
|
4456
|
+
to_json(list({
|
|
4457
|
+
'tenure': tenure,
|
|
4458
|
+
'survival': survival,
|
|
4459
|
+
'atRisk': at_risk,
|
|
4460
|
+
'events': d_t
|
|
4461
|
+
} ORDER BY tenure)) AS curveJson
|
|
4462
|
+
FROM km_surv
|
|
4463
|
+
GROUP BY cohort
|
|
4464
|
+
),
|
|
4465
|
+
cohort_stats AS (
|
|
4466
|
+
SELECT
|
|
4467
|
+
ea.cohort,
|
|
4468
|
+
COUNT(*) AS episode_count,
|
|
4469
|
+
AVG(CASE WHEN ea.censored THEN 1.0 ELSE 0.0 END) AS censoring_rate
|
|
4470
|
+
FROM episodes_all ea
|
|
4471
|
+
GROUP BY ea.cohort
|
|
4472
|
+
)
|
|
4473
|
+
SELECT
|
|
4474
|
+
cs.cohort,
|
|
4475
|
+
cs.episode_count AS episodeCount,
|
|
4476
|
+
cs.censoring_rate AS censoringRate,
|
|
4477
|
+
ca.curveJson
|
|
4478
|
+
FROM cohort_stats cs
|
|
4479
|
+
LEFT JOIN curve_agg ca USING (cohort)
|
|
4480
|
+
ORDER BY cs.cohort
|
|
4481
|
+
`,
|
|
4482
|
+
params: [
|
|
4483
|
+
startDate,
|
|
4484
|
+
endDate,
|
|
4485
|
+
minImpressions
|
|
4486
|
+
],
|
|
4487
|
+
current: {
|
|
4488
|
+
table: "page_keywords",
|
|
4489
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
4490
|
+
}
|
|
4491
|
+
};
|
|
4492
|
+
},
|
|
4493
|
+
reduceSql(rows, params) {
|
|
4494
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
4495
|
+
const endDate = params.endDate ?? defaultEndDate();
|
|
4496
|
+
const startDate = params.startDate ?? daysAgo(183);
|
|
4497
|
+
const windowDays = Math.round((new Date(endDate).getTime() - new Date(startDate).getTime()) / MS_PER_DAY) + 1;
|
|
4498
|
+
const results = arr.map((r) => {
|
|
4499
|
+
const curve = parseJsonList$1(r.curveJson).map((p) => ({
|
|
4500
|
+
tenure: num$4(p.tenure),
|
|
4501
|
+
survival: num$4(p.survival),
|
|
4502
|
+
atRisk: num$4(p.atRisk),
|
|
4503
|
+
events: num$4(p.events)
|
|
4504
|
+
}));
|
|
4505
|
+
let medianTenure = 0;
|
|
4506
|
+
for (let i = 0; i < curve.length; i++) {
|
|
4507
|
+
const cur = curve[i];
|
|
4508
|
+
if (cur.survival <= .5) {
|
|
4509
|
+
if (i === 0) medianTenure = cur.tenure;
|
|
4510
|
+
else {
|
|
4511
|
+
const prev = curve[i - 1];
|
|
4512
|
+
const span = prev.survival - cur.survival;
|
|
4513
|
+
const frac = span > 0 ? (prev.survival - .5) / span : 0;
|
|
4514
|
+
medianTenure = prev.tenure + frac * (cur.tenure - prev.tenure);
|
|
4515
|
+
}
|
|
4516
|
+
break;
|
|
4517
|
+
}
|
|
4518
|
+
}
|
|
4519
|
+
const last = curve[curve.length - 1];
|
|
4520
|
+
if (medianTenure === 0 && last && last.survival > .5) medianTenure = last.tenure;
|
|
4521
|
+
return {
|
|
4522
|
+
cohort: str$1(r.cohort),
|
|
4523
|
+
episodeCount: num$4(r.episodeCount),
|
|
4524
|
+
censoringRate: num$4(r.censoringRate),
|
|
4525
|
+
medianTenure,
|
|
4526
|
+
curve
|
|
4527
|
+
};
|
|
4528
|
+
});
|
|
4529
|
+
return {
|
|
4530
|
+
results,
|
|
4531
|
+
meta: {
|
|
4532
|
+
totalEpisodes: results.find((r) => r.cohort === "__all__")?.episodeCount ?? 0,
|
|
4533
|
+
cohortCount: results.filter((r) => r.cohort !== "__all__").length,
|
|
4534
|
+
windowDays
|
|
4535
|
+
}
|
|
4536
|
+
};
|
|
4537
|
+
}
|
|
4538
|
+
});
|
|
4539
|
+
function str(v) {
|
|
4540
|
+
return v == null ? "" : String(v);
|
|
4541
|
+
}
|
|
4542
|
+
function parseJsonList(v) {
|
|
4543
|
+
if (Array.isArray(v)) return v;
|
|
4544
|
+
if (typeof v === "string" && v.length > 0) {
|
|
4545
|
+
const parsed = JSON.parse(v);
|
|
4546
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
4547
|
+
}
|
|
4548
|
+
return [];
|
|
4549
|
+
}
|
|
4550
|
+
const trendsAnalyzer = defineAnalyzer({
|
|
4551
|
+
id: "trends",
|
|
4552
|
+
buildSql(params) {
|
|
4553
|
+
const weeks = params.weeks ?? 28;
|
|
4554
|
+
const endDate = params.endDate || defaultEndDate();
|
|
4555
|
+
const startDate = params.startDate || toIsoDate(/* @__PURE__ */ new Date(Date.parse(endDate) - (weeks * 7 - 1) * MS_PER_DAY));
|
|
4556
|
+
const minImpressions = params.minImpressions ?? 100;
|
|
4557
|
+
const minWeeksWithData = params.minWeeksWithData ?? Math.max(2, Math.floor(weeks / 4));
|
|
4558
|
+
const limit = params.limit ?? 500;
|
|
4559
|
+
const dim = params.dimension === "keywords" ? "keywords" : "pages";
|
|
4560
|
+
const table = dim === "keywords" ? "keywords" : "pages";
|
|
4561
|
+
return {
|
|
4562
|
+
sql: `
|
|
4563
|
+
WITH bucketed AS (
|
|
4564
|
+
SELECT
|
|
4565
|
+
${dim === "keywords" ? "query" : "url"} AS entity,
|
|
4566
|
+
date_trunc('week', CAST(date AS DATE)) AS week,
|
|
4567
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
4568
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
4569
|
+
SUM(sum_position) AS sum_position_sum
|
|
4570
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
4571
|
+
WHERE date >= ? AND date <= ?
|
|
4572
|
+
GROUP BY entity, week
|
|
4573
|
+
),
|
|
4574
|
+
with_meta AS (
|
|
4575
|
+
SELECT
|
|
4576
|
+
entity, week, clicks, impressions, sum_position_sum,
|
|
4577
|
+
ROW_NUMBER() OVER (PARTITION BY entity ORDER BY week) - 1 AS week_idx,
|
|
4578
|
+
COUNT(*) OVER (PARTITION BY entity) AS n_weeks,
|
|
4579
|
+
(ROW_NUMBER() OVER (PARTITION BY entity ORDER BY week) - 1)
|
|
4580
|
+
< (COUNT(*) OVER (PARTITION BY entity) / 2) AS is_first_half
|
|
4581
|
+
FROM bucketed
|
|
4582
|
+
),
|
|
4583
|
+
agg AS (
|
|
4584
|
+
SELECT
|
|
4585
|
+
entity,
|
|
4586
|
+
SUM(clicks) AS totalClicks,
|
|
4587
|
+
SUM(impressions) AS totalImpressions,
|
|
4588
|
+
any_value(n_weeks) AS weeksWithData,
|
|
4589
|
+
COALESCE(regr_slope(clicks, CAST(week_idx AS DOUBLE)), 0.0) AS slope,
|
|
4590
|
+
SUM(CASE WHEN is_first_half THEN clicks ELSE 0 END) AS firstHalfClicks,
|
|
4591
|
+
SUM(CASE WHEN NOT is_first_half THEN clicks ELSE 0 END) AS secondHalfClicks,
|
|
4592
|
+
SUM(sum_position_sum) / NULLIF(SUM(impressions), 0) + 1 AS avgPosition,
|
|
4593
|
+
to_json(list({
|
|
4594
|
+
'week': strftime(week, '%Y-%m-%d'),
|
|
4595
|
+
'clicks': clicks,
|
|
4596
|
+
'impressions': impressions
|
|
4597
|
+
} ORDER BY week)) AS seriesJson
|
|
4598
|
+
FROM with_meta
|
|
4599
|
+
GROUP BY entity
|
|
4600
|
+
HAVING SUM(impressions) >= ? AND any_value(n_weeks) >= ?
|
|
4601
|
+
),
|
|
4602
|
+
classified AS (
|
|
4603
|
+
SELECT
|
|
4604
|
+
*,
|
|
4605
|
+
CASE
|
|
4606
|
+
WHEN firstHalfClicks = 0 AND secondHalfClicks > 0 THEN 10.0
|
|
4607
|
+
WHEN firstHalfClicks = 0 THEN 1.0
|
|
4608
|
+
ELSE secondHalfClicks / firstHalfClicks
|
|
4609
|
+
END AS growthRatio
|
|
4610
|
+
FROM agg
|
|
4611
|
+
)
|
|
4612
|
+
SELECT
|
|
4613
|
+
entity,
|
|
4614
|
+
totalClicks,
|
|
4615
|
+
totalImpressions,
|
|
4616
|
+
weeksWithData,
|
|
4617
|
+
slope,
|
|
4618
|
+
growthRatio,
|
|
4619
|
+
avgPosition,
|
|
4620
|
+
CASE
|
|
4621
|
+
WHEN growthRatio >= 1.5 AND slope > 0 THEN 'accelerating'
|
|
4622
|
+
WHEN growthRatio >= 1.1 AND slope >= 0 THEN 'growing'
|
|
4623
|
+
WHEN growthRatio < 0.5 THEN 'cratering'
|
|
4624
|
+
WHEN growthRatio < 0.9 AND slope < 0 THEN 'declining'
|
|
4625
|
+
ELSE 'steady'
|
|
4626
|
+
END AS trend,
|
|
4627
|
+
seriesJson
|
|
4628
|
+
FROM classified
|
|
4629
|
+
ORDER BY
|
|
4630
|
+
CASE
|
|
4631
|
+
WHEN growthRatio >= 1.5 AND slope > 0 THEN 0
|
|
4632
|
+
WHEN growthRatio < 0.5 THEN 1
|
|
4633
|
+
WHEN growthRatio >= 1.1 AND slope >= 0 THEN 2
|
|
4634
|
+
WHEN growthRatio < 0.9 AND slope < 0 THEN 3
|
|
4635
|
+
ELSE 4
|
|
4636
|
+
END,
|
|
4637
|
+
ABS(growthRatio - 1) DESC,
|
|
4638
|
+
totalClicks DESC
|
|
4639
|
+
LIMIT ${Number(limit)}
|
|
4640
|
+
`,
|
|
4641
|
+
params: [
|
|
4642
|
+
startDate,
|
|
4643
|
+
endDate,
|
|
4644
|
+
minImpressions,
|
|
4645
|
+
minWeeksWithData
|
|
4646
|
+
],
|
|
4647
|
+
current: {
|
|
4648
|
+
table,
|
|
4649
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
4650
|
+
}
|
|
4651
|
+
};
|
|
4652
|
+
},
|
|
4653
|
+
reduceSql(rows, params) {
|
|
4654
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
4655
|
+
const weeks = params.weeks ?? 28;
|
|
4656
|
+
const endDate = params.endDate || defaultEndDate();
|
|
4657
|
+
const startDate = params.startDate || toIsoDate(/* @__PURE__ */ new Date(Date.parse(endDate) - (weeks * 7 - 1) * MS_PER_DAY));
|
|
4658
|
+
const dim = params.dimension === "keywords" ? "keywords" : "pages";
|
|
4659
|
+
const results = arr.map((r) => {
|
|
4660
|
+
const series = parseJsonList(r.seriesJson).map((s) => ({
|
|
4661
|
+
week: str(s.week),
|
|
4662
|
+
clicks: num$4(s.clicks),
|
|
4663
|
+
impressions: num$4(s.impressions)
|
|
4664
|
+
}));
|
|
4665
|
+
return {
|
|
4666
|
+
[dim === "keywords" ? "query" : "page"]: str(r.entity),
|
|
4667
|
+
totalClicks: num$4(r.totalClicks),
|
|
4668
|
+
totalImpressions: num$4(r.totalImpressions),
|
|
4669
|
+
weeksWithData: num$4(r.weeksWithData),
|
|
4670
|
+
slope: num$4(r.slope),
|
|
4671
|
+
growthRatio: num$4(r.growthRatio),
|
|
4672
|
+
avgPosition: num$4(r.avgPosition),
|
|
4673
|
+
trend: str(r.trend),
|
|
4674
|
+
series
|
|
4675
|
+
};
|
|
4676
|
+
});
|
|
4677
|
+
const counts = {
|
|
4678
|
+
accelerating: 0,
|
|
4679
|
+
growing: 0,
|
|
4680
|
+
steady: 0,
|
|
4681
|
+
declining: 0,
|
|
4682
|
+
cratering: 0
|
|
4683
|
+
};
|
|
4684
|
+
for (const r of results) counts[r.trend] = (counts[r.trend] ?? 0) + 1;
|
|
4685
|
+
return {
|
|
4686
|
+
results,
|
|
4687
|
+
meta: {
|
|
4688
|
+
total: results.length,
|
|
4689
|
+
dimension: dim,
|
|
4690
|
+
weeks: Number(weeks),
|
|
4691
|
+
startDate,
|
|
4692
|
+
endDate,
|
|
4693
|
+
counts
|
|
4694
|
+
}
|
|
4695
|
+
};
|
|
4696
|
+
}
|
|
4697
|
+
});
|
|
4698
|
+
const DEFAULT_ROW_LIMIT = 25e3;
|
|
4699
|
+
const sortRowResults = createSorter((item) => item.impressions, "impressions");
|
|
4700
|
+
const zeroClickAnalyzer = defineAnalyzer({
|
|
4701
|
+
id: "zero-click",
|
|
4702
|
+
buildSql(params) {
|
|
4703
|
+
const { startDate, endDate } = periodOf(params);
|
|
4704
|
+
const minImpressions = params.minImpressions ?? 1e3;
|
|
4705
|
+
const maxCtr = params.maxCtr ?? .03;
|
|
4706
|
+
const maxPosition = params.maxPosition ?? 10;
|
|
4707
|
+
const limit = params.limit ?? 1e3;
|
|
4708
|
+
return {
|
|
4709
|
+
sql: `
|
|
4710
|
+
WITH agg AS (
|
|
4711
|
+
SELECT
|
|
4712
|
+
query,
|
|
4713
|
+
url AS page,
|
|
4714
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
4715
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
4716
|
+
${METRIC_EXPR.ctr} AS ctr,
|
|
4717
|
+
${METRIC_EXPR.position} AS position
|
|
4718
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
4719
|
+
WHERE date >= ? AND date <= ?
|
|
4720
|
+
GROUP BY query, url
|
|
4721
|
+
HAVING SUM(impressions) >= ?
|
|
4722
|
+
)
|
|
4723
|
+
SELECT
|
|
4724
|
+
query, page, clicks, impressions, ctr, position,
|
|
4725
|
+
CAST(GREATEST(0, ROUND(impressions * (
|
|
4726
|
+
CASE
|
|
4727
|
+
WHEN position <= 1 THEN 0.30
|
|
4728
|
+
WHEN position <= 3 THEN 0.15
|
|
4729
|
+
WHEN position <= 5 THEN 0.08
|
|
4730
|
+
ELSE 0.04
|
|
4731
|
+
END
|
|
4732
|
+
)) - clicks) AS DOUBLE) AS missedClicks
|
|
4733
|
+
FROM agg
|
|
4734
|
+
WHERE position <= ? AND ctr < ?
|
|
4735
|
+
ORDER BY impressions DESC
|
|
4736
|
+
${paginateClause({
|
|
4737
|
+
limit,
|
|
4738
|
+
offset: params.offset
|
|
4739
|
+
})}
|
|
4740
|
+
`,
|
|
4741
|
+
params: [
|
|
4742
|
+
startDate,
|
|
4743
|
+
endDate,
|
|
4744
|
+
minImpressions,
|
|
4745
|
+
maxPosition,
|
|
4746
|
+
maxCtr
|
|
4747
|
+
],
|
|
4748
|
+
current: {
|
|
4749
|
+
table: "page_keywords",
|
|
4750
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
4751
|
+
}
|
|
4752
|
+
};
|
|
4753
|
+
},
|
|
4754
|
+
reduceSql(rows, params) {
|
|
4755
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
4756
|
+
const minImpressions = params.minImpressions ?? 1e3;
|
|
4757
|
+
const maxCtr = params.maxCtr ?? .03;
|
|
4758
|
+
const maxPosition = params.maxPosition ?? 10;
|
|
4759
|
+
return {
|
|
4760
|
+
results: arr.map((r) => ({
|
|
4761
|
+
query: r.query == null ? "" : String(r.query),
|
|
4762
|
+
page: r.page == null ? "" : String(r.page),
|
|
4763
|
+
clicks: num$4(r.clicks),
|
|
4764
|
+
impressions: num$4(r.impressions),
|
|
4765
|
+
ctr: num$4(r.ctr),
|
|
4766
|
+
position: num$4(r.position),
|
|
4767
|
+
missedClicks: num$4(r.missedClicks)
|
|
4768
|
+
})),
|
|
4769
|
+
meta: {
|
|
4770
|
+
total: arr.length,
|
|
4771
|
+
minImpressions,
|
|
4772
|
+
maxCtr,
|
|
4773
|
+
maxPosition
|
|
4774
|
+
}
|
|
4775
|
+
};
|
|
4776
|
+
},
|
|
4777
|
+
buildRows(params) {
|
|
4778
|
+
const period = periodOf(params);
|
|
4779
|
+
const limit = params.limit ?? DEFAULT_ROW_LIMIT;
|
|
4780
|
+
return { rows: gsc.select(query, page).where(between(date, period.startDate, period.endDate)).limit(limit).getState() };
|
|
4781
|
+
},
|
|
4782
|
+
reduceRows(rows, params) {
|
|
4783
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
4784
|
+
const minImpressions = params.minImpressions ?? 1e3;
|
|
4785
|
+
const maxCtr = params.maxCtr ?? .03;
|
|
4786
|
+
const maxPosition = params.maxPosition ?? 10;
|
|
4787
|
+
const queryMap = /* @__PURE__ */ new Map();
|
|
4788
|
+
for (const row of arr) {
|
|
4789
|
+
if (row.impressions < minImpressions) continue;
|
|
4790
|
+
if (row.position > maxPosition) continue;
|
|
4791
|
+
if (row.ctr > maxCtr) continue;
|
|
4792
|
+
const existing = queryMap.get(row.query);
|
|
4793
|
+
if (!existing || row.position < existing.position) queryMap.set(row.query, {
|
|
4794
|
+
query: row.query,
|
|
4795
|
+
page: row.page,
|
|
4796
|
+
clicks: row.clicks,
|
|
4797
|
+
impressions: row.impressions,
|
|
4798
|
+
ctr: row.ctr,
|
|
4799
|
+
position: row.position
|
|
4800
|
+
});
|
|
4801
|
+
}
|
|
4802
|
+
const results = sortRowResults(Array.from(queryMap.values()), "impressions", "desc");
|
|
4803
|
+
const paged = paginateInMemory(results, {
|
|
4804
|
+
limit: params.limit,
|
|
4805
|
+
offset: params.offset
|
|
4806
|
+
});
|
|
4807
|
+
return {
|
|
4808
|
+
results: paged,
|
|
4809
|
+
meta: {
|
|
4810
|
+
total: results.length,
|
|
4811
|
+
returned: paged.length
|
|
4812
|
+
}
|
|
4813
|
+
};
|
|
4814
|
+
}
|
|
4815
|
+
});
|
|
4816
|
+
var AnalyzerCapabilityError = class extends Error {
|
|
4817
|
+
constructor(tool, missing) {
|
|
4818
|
+
super(`analyzer "${tool}" requires capabilities [${missing.join(", ")}] not provided by source`);
|
|
4819
|
+
this.tool = tool;
|
|
4820
|
+
this.missing = missing;
|
|
4821
|
+
this.name = "AnalyzerCapabilityError";
|
|
4822
|
+
}
|
|
4823
|
+
};
|
|
4824
|
+
function sourceCapabilities(source) {
|
|
4825
|
+
const caps = /* @__PURE__ */ new Set();
|
|
4826
|
+
if (source.executeSql) caps.add("executeSql");
|
|
4827
|
+
if (source.capabilities.fileSets) caps.add("partitionedParquet");
|
|
4828
|
+
if (source.capabilities.regex) caps.add("regex");
|
|
4829
|
+
if (source.capabilities.windowTotals) caps.add("windowTotals");
|
|
4830
|
+
if (source.capabilities.comparisonJoin) caps.add("comparisonJoin");
|
|
4831
|
+
if (source.capabilities.attachedTables) caps.add("attachedTables");
|
|
4832
|
+
return caps;
|
|
4833
|
+
}
|
|
4834
|
+
function assertSatisfies(analyzer, caps) {
|
|
4835
|
+
const missing = analyzer.requires.filter((c) => !caps.has(c));
|
|
4836
|
+
if (missing.length > 0) throw new AnalyzerCapabilityError(analyzer.id, missing);
|
|
4837
|
+
}
|
|
4838
|
+
async function runAnalyzerFromSource(source, params, registry) {
|
|
4839
|
+
const caps = sourceCapabilities(source);
|
|
4840
|
+
const analyzer = registry.resolveAnalyzer(params.type, caps.has("executeSql") || caps.has("attachedTables"));
|
|
4841
|
+
if (!analyzer) throw new AnalyzerCapabilityError(params.type, ["executeSql"]);
|
|
4842
|
+
assertSatisfies(analyzer, caps);
|
|
4843
|
+
const plan = analyzer.build(params);
|
|
4844
|
+
if (plan.kind === "rows") return runRowsPlanAgainstSource(source, analyzer, plan, params);
|
|
4845
|
+
return runSqlPlanAgainstSource(source, analyzer, plan, params);
|
|
4846
|
+
}
|
|
4847
|
+
async function runRowsPlanAgainstSource(source, analyzer, plan, params) {
|
|
4848
|
+
const entries = Object.entries(plan.queries);
|
|
4849
|
+
const resolved = await Promise.all(entries.map(async ([k, q]) => [k, await source.queryRows(q.state)]));
|
|
4850
|
+
const rowMap = Object.fromEntries(resolved);
|
|
4851
|
+
const { results, meta } = analyzer.reduce(rowMap, { params });
|
|
4852
|
+
return {
|
|
4853
|
+
results,
|
|
4854
|
+
meta: {
|
|
4855
|
+
tool: params.type,
|
|
4856
|
+
...meta
|
|
4857
|
+
}
|
|
4858
|
+
};
|
|
4859
|
+
}
|
|
4860
|
+
function fileSetsFor(plan) {
|
|
4861
|
+
const fileSets = { FILES: plan.current };
|
|
4862
|
+
if (plan.previous) fileSets.FILES_PREV = plan.previous;
|
|
4863
|
+
if (plan.extraFiles) for (const [key, fs] of Object.entries(plan.extraFiles)) fileSets[`FILES_${key}`] = fs;
|
|
4864
|
+
return fileSets;
|
|
4865
|
+
}
|
|
4866
|
+
async function runSqlPlanAgainstSource(source, analyzer, plan, params) {
|
|
4867
|
+
if (!source.executeSql) throw new AnalyzerCapabilityError(analyzer.id, ["executeSql"]);
|
|
4868
|
+
if (plan.requiresAttachedTables && !source.capabilities.attachedTables) throw new AnalyzerCapabilityError(analyzer.id, ["attachedTables"]);
|
|
4869
|
+
const fileSets = source.capabilities.fileSets ? fileSetsFor(plan) : void 0;
|
|
4870
|
+
const rows = await source.executeSql(plan.sql, plan.params, fileSets ? { fileSets } : void 0);
|
|
4871
|
+
const extras = {};
|
|
4872
|
+
if (plan.extraQueries) for (const q of plan.extraQueries) {
|
|
4873
|
+
const extraRows = await source.executeSql(q.sql, q.params, fileSets ? { fileSets } : void 0);
|
|
4874
|
+
extras[q.name] = extraRows;
|
|
4875
|
+
}
|
|
4876
|
+
const { results, meta } = analyzer.reduce(rows, {
|
|
4877
|
+
params,
|
|
4878
|
+
extras
|
|
4879
|
+
});
|
|
4880
|
+
const sourceMeta = source.capabilities.localSource ? { source: "local" } : {};
|
|
4881
|
+
return {
|
|
4882
|
+
results,
|
|
4883
|
+
meta: {
|
|
4884
|
+
tool: params.type,
|
|
4885
|
+
...sourceMeta,
|
|
4886
|
+
...meta
|
|
4887
|
+
}
|
|
4888
|
+
};
|
|
4889
|
+
}
|
|
4890
|
+
function createAnalyzerRegistry(init = {}) {
|
|
4891
|
+
const byId = /* @__PURE__ */ new Map();
|
|
4892
|
+
for (const a of init.rows ?? []) {
|
|
4893
|
+
const entry = byId.get(a.id) ?? {};
|
|
4894
|
+
entry.rows = a;
|
|
4895
|
+
byId.set(a.id, entry);
|
|
4896
|
+
}
|
|
4897
|
+
for (const a of init.sql ?? []) {
|
|
4898
|
+
const entry = byId.get(a.id) ?? {};
|
|
4899
|
+
entry.sql = a;
|
|
4900
|
+
byId.set(a.id, entry);
|
|
4901
|
+
}
|
|
4902
|
+
const listAnalyzerIds = () => [...byId.keys()].sort();
|
|
4903
|
+
const getAnalyzerVariants = (id) => byId.get(id);
|
|
4904
|
+
const resolveAnalyzer = (id, sourceSupportsSql) => {
|
|
4905
|
+
const variants = byId.get(id);
|
|
4906
|
+
if (!variants) return void 0;
|
|
4907
|
+
if (sourceSupportsSql) return variants.sql ?? variants.rows;
|
|
4908
|
+
return variants.rows;
|
|
4909
|
+
};
|
|
4910
|
+
const listAnalyzersFor = (sourceSupportsSql) => {
|
|
4911
|
+
const out = [];
|
|
4912
|
+
for (const id of listAnalyzerIds()) {
|
|
4913
|
+
const a = resolveAnalyzer(id, sourceSupportsSql);
|
|
4914
|
+
if (a) out.push(a);
|
|
4915
|
+
}
|
|
4916
|
+
return out;
|
|
4917
|
+
};
|
|
4918
|
+
const listAnalyzerIdsFor = (source) => {
|
|
4919
|
+
const sourceSupportsSql = typeof source.executeSql === "function";
|
|
4920
|
+
const out = [];
|
|
4921
|
+
for (const id of listAnalyzerIds()) if (resolveAnalyzer(id, sourceSupportsSql)) out.push(id);
|
|
4922
|
+
return out;
|
|
4923
|
+
};
|
|
4924
|
+
return {
|
|
4925
|
+
listAnalyzerIds,
|
|
4926
|
+
getAnalyzerVariants,
|
|
4927
|
+
resolveAnalyzer,
|
|
4928
|
+
listAnalyzersFor,
|
|
4929
|
+
listAnalyzerIdsFor
|
|
4930
|
+
};
|
|
4931
|
+
}
|
|
4932
|
+
const ROW_ANALYZERS = [
|
|
4933
|
+
strikingDistanceAnalyzer.rows,
|
|
4934
|
+
opportunityAnalyzer.rows,
|
|
4935
|
+
brandAnalyzer.rows,
|
|
4936
|
+
concentrationAnalyzer.rows,
|
|
4937
|
+
clusteringAnalyzer.rows,
|
|
4938
|
+
seasonalityAnalyzer.rows,
|
|
4939
|
+
moversAnalyzer.rows,
|
|
4940
|
+
decayAnalyzer.rows,
|
|
4941
|
+
cannibalizationAnalyzer.rows,
|
|
4942
|
+
zeroClickAnalyzer.rows
|
|
4943
|
+
];
|
|
4944
|
+
export { AnalyzerCapabilityError, ROW_ANALYZERS, bayesianCtrAnalyzer, bipartitePagerankAnalyzer, brandAnalyzer, cannibalizationAnalyzer, changePointAnalyzer, clampLimit, clampOffset, clusteringAnalyzer, concentrationAnalyzer, contentVelocityAnalyzer, createAnalyzerRegistry, ctrAnomalyAnalyzer, ctrCurveAnalyzer, darkTrafficAnalyzer, dataDetailAnalyzer, dataQueryAnalyzer, datesQueryState, decayAnalyzer, defineAnalyzer, deviceGapAnalyzer, intentAtlasAnalyzer, keywordBreadthAnalyzer, keywordsQueryState, longTailAnalyzer, moversAnalyzer, opportunityAnalyzer, pagesQueryState, paginateClause, paginateInMemory, positionDistributionAnalyzer, positionVolatilityAnalyzer, queryMigrationAnalyzer, resolveSort, runAnalyzerFromSource, seasonalityAnalyzer, stlDecomposeAnalyzer, strikingDistanceAnalyzer, survivalAnalyzer, trendsAnalyzer, zeroClickAnalyzer };
|