@gscdump/analysis 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +251 -0
- package/dist/analyzer/index.d.mts +893 -0
- package/dist/analyzer/index.mjs +4944 -0
- package/dist/default-registry.d.mts +93 -0
- package/dist/default-registry.mjs +1957 -0
- package/dist/index.d.mts +620 -0
- package/dist/index.mjs +2873 -0
- package/dist/period/index.d.mts +57 -0
- package/dist/period/index.mjs +150 -0
- package/dist/query/index.d.mts +26 -0
- package/dist/query/index.mjs +340 -0
- package/dist/semantic/index.d.mts +70 -0
- package/dist/semantic/index.mjs +391 -0
- package/dist/source/index.d.mts +427 -0
- package/dist/source/index.mjs +1865 -0
- package/package.json +86 -0
|
@@ -0,0 +1,1957 @@
|
|
|
1
|
+
import { SQL_ANALYZERS } from "@gscdump/engine-duckdb-node";
|
|
2
|
+
import { enumeratePartitions } from "@gscdump/engine/planner";
|
|
3
|
+
import { METRIC_EXPR } from "@gscdump/engine/sql-fragments";
|
|
4
|
+
import { between, date, gsc, page, query } from "gscdump/query";
|
|
5
|
+
import { daysAgo } from "gscdump";
|
|
6
|
+
function createAnalyzerRegistry(init = {}) {
|
|
7
|
+
const byId = /* @__PURE__ */ new Map();
|
|
8
|
+
for (const a of init.rows ?? []) {
|
|
9
|
+
const entry = byId.get(a.id) ?? {};
|
|
10
|
+
entry.rows = a;
|
|
11
|
+
byId.set(a.id, entry);
|
|
12
|
+
}
|
|
13
|
+
for (const a of init.sql ?? []) {
|
|
14
|
+
const entry = byId.get(a.id) ?? {};
|
|
15
|
+
entry.sql = a;
|
|
16
|
+
byId.set(a.id, entry);
|
|
17
|
+
}
|
|
18
|
+
const listAnalyzerIds = () => [...byId.keys()].sort();
|
|
19
|
+
const getAnalyzerVariants = (id) => byId.get(id);
|
|
20
|
+
const resolveAnalyzer = (id, sourceSupportsSql) => {
|
|
21
|
+
const variants = byId.get(id);
|
|
22
|
+
if (!variants) return void 0;
|
|
23
|
+
if (sourceSupportsSql) return variants.sql ?? variants.rows;
|
|
24
|
+
return variants.rows;
|
|
25
|
+
};
|
|
26
|
+
const listAnalyzersFor = (sourceSupportsSql) => {
|
|
27
|
+
const out = [];
|
|
28
|
+
for (const id of listAnalyzerIds()) {
|
|
29
|
+
const a = resolveAnalyzer(id, sourceSupportsSql);
|
|
30
|
+
if (a) out.push(a);
|
|
31
|
+
}
|
|
32
|
+
return out;
|
|
33
|
+
};
|
|
34
|
+
const listAnalyzerIdsFor = (source) => {
|
|
35
|
+
const sourceSupportsSql = typeof source.executeSql === "function";
|
|
36
|
+
const out = [];
|
|
37
|
+
for (const id of listAnalyzerIds()) if (resolveAnalyzer(id, sourceSupportsSql)) out.push(id);
|
|
38
|
+
return out;
|
|
39
|
+
};
|
|
40
|
+
return {
|
|
41
|
+
listAnalyzerIds,
|
|
42
|
+
getAnalyzerVariants,
|
|
43
|
+
resolveAnalyzer,
|
|
44
|
+
listAnalyzersFor,
|
|
45
|
+
listAnalyzerIdsFor
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
const DEFAULT_LIMIT$1 = 25e3;
|
|
49
|
+
function keywordsQueryState(period, limit = DEFAULT_LIMIT$1) {
|
|
50
|
+
return gsc.select(query, page).where(between(date, period.startDate, period.endDate)).limit(limit).getState();
|
|
51
|
+
}
|
|
52
|
+
function pagesQueryState(period, limit = DEFAULT_LIMIT$1) {
|
|
53
|
+
return gsc.select(page).where(between(date, period.startDate, period.endDate)).limit(limit).getState();
|
|
54
|
+
}
|
|
55
|
+
function datesQueryState(period, limit = DEFAULT_LIMIT$1) {
|
|
56
|
+
return gsc.select(date).where(between(date, period.startDate, period.endDate)).limit(limit).getState();
|
|
57
|
+
}
|
|
58
|
+
const DEFAULT_SQL_REQUIRES = ["executeSql", "partitionedParquet"];
|
|
59
|
+
function defineAnalyzer(opts) {
|
|
60
|
+
const { id, reduce, reduceSql, reduceRows, buildSql, buildRows, sqlRequires = DEFAULT_SQL_REQUIRES, rowsRequires = [] } = opts;
|
|
61
|
+
const sqlReducer = reduceSql ?? reduce;
|
|
62
|
+
const rowsReducer = reduceRows ?? reduce;
|
|
63
|
+
if (buildSql && !sqlReducer) throw new Error(`defineAnalyzer(${id}): buildSql requires reduce or reduceSql`);
|
|
64
|
+
if (buildRows && !rowsReducer) throw new Error(`defineAnalyzer(${id}): buildRows requires reduce or reduceRows`);
|
|
65
|
+
const wrap = (fn) => (rows, params, ctx) => {
|
|
66
|
+
return fn(Array.isArray(rows) ? rows : pickSingle(rows) ?? rows, params, ctx);
|
|
67
|
+
};
|
|
68
|
+
return {
|
|
69
|
+
id,
|
|
70
|
+
sql: buildSql && sqlReducer ? {
|
|
71
|
+
id,
|
|
72
|
+
requires: sqlRequires,
|
|
73
|
+
build(params) {
|
|
74
|
+
const spec = buildSql(params);
|
|
75
|
+
return {
|
|
76
|
+
kind: "sql",
|
|
77
|
+
sql: spec.sql,
|
|
78
|
+
params: spec.params,
|
|
79
|
+
current: spec.current,
|
|
80
|
+
previous: spec.previous,
|
|
81
|
+
extraFiles: spec.extraFiles,
|
|
82
|
+
extraQueries: spec.extraQueries,
|
|
83
|
+
requiresAttachedTables: spec.requiresAttachedTables
|
|
84
|
+
};
|
|
85
|
+
},
|
|
86
|
+
reduce(rows, ctx) {
|
|
87
|
+
const { results, meta } = wrap(sqlReducer)(rows, ctx.params, { extras: ctx.extras });
|
|
88
|
+
return {
|
|
89
|
+
results,
|
|
90
|
+
meta
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
} : void 0,
|
|
94
|
+
rows: buildRows && rowsReducer ? {
|
|
95
|
+
id,
|
|
96
|
+
requires: rowsRequires,
|
|
97
|
+
build(params) {
|
|
98
|
+
const queries = buildRows(params);
|
|
99
|
+
return {
|
|
100
|
+
kind: "rows",
|
|
101
|
+
queries: Object.fromEntries(Object.entries(queries).map(([k, state]) => [k, { state }]))
|
|
102
|
+
};
|
|
103
|
+
},
|
|
104
|
+
reduce(rows, ctx) {
|
|
105
|
+
const { results, meta } = wrap(rowsReducer)(rows, ctx.params, {});
|
|
106
|
+
return {
|
|
107
|
+
results,
|
|
108
|
+
meta
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
} : void 0
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
function pickSingle(rows) {
|
|
115
|
+
const keys = Object.keys(rows);
|
|
116
|
+
return keys.length === 1 ? rows[keys[0]] : void 0;
|
|
117
|
+
}
|
|
118
|
+
function defaultEndDate() {
|
|
119
|
+
return daysAgo(3);
|
|
120
|
+
}
|
|
121
|
+
function defaultStartDate() {
|
|
122
|
+
return daysAgo(31);
|
|
123
|
+
}
|
|
124
|
+
function periodOf(params) {
|
|
125
|
+
return {
|
|
126
|
+
startDate: params.startDate || defaultStartDate(),
|
|
127
|
+
endDate: params.endDate || defaultEndDate()
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
function comparisonOf(params) {
|
|
131
|
+
if (!params.prevStartDate || !params.prevEndDate) throw new Error(`${params.type} analysis requires prevStartDate and prevEndDate`);
|
|
132
|
+
return {
|
|
133
|
+
current: periodOf(params),
|
|
134
|
+
previous: {
|
|
135
|
+
startDate: params.prevStartDate,
|
|
136
|
+
endDate: params.prevEndDate
|
|
137
|
+
}
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
function num(v) {
|
|
141
|
+
if (typeof v === "number") return v;
|
|
142
|
+
if (typeof v === "bigint") return Number(v);
|
|
143
|
+
if (v == null) return 0;
|
|
144
|
+
return Number(v);
|
|
145
|
+
}
|
|
146
|
+
function buildPeriodMap(rows, key, value, filter) {
|
|
147
|
+
const out = /* @__PURE__ */ new Map();
|
|
148
|
+
for (const row of rows) {
|
|
149
|
+
if (filter && !filter(row)) continue;
|
|
150
|
+
out.set(key(row), value(row));
|
|
151
|
+
}
|
|
152
|
+
return out;
|
|
153
|
+
}
|
|
154
|
+
function createSorter(getValue, defaultMetric, defaultOrder = "desc") {
|
|
155
|
+
return (items, sortBy = defaultMetric, sortOrder = defaultOrder) => {
|
|
156
|
+
const mult = sortOrder === "desc" ? -1 : 1;
|
|
157
|
+
return [...items].sort((a, b) => (getValue(a, sortBy) - getValue(b, sortBy)) * mult);
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
function createMetricSorter(defaultMetric, orderByMetric) {
|
|
161
|
+
return (items, sortBy = defaultMetric) => {
|
|
162
|
+
const mult = orderByMetric[sortBy] === "desc" ? -1 : 1;
|
|
163
|
+
return [...items].sort((a, b) => (a[sortBy] - b[sortBy]) * mult);
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
function escapeRegexAlt(s) {
|
|
167
|
+
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
168
|
+
}
|
|
169
|
+
function str$6(v) {
|
|
170
|
+
return v == null ? "" : String(v);
|
|
171
|
+
}
|
|
172
|
+
function analyzeBrandSegmentation(keywords, options) {
|
|
173
|
+
const { brandTerms, minImpressions = 10 } = options;
|
|
174
|
+
const lowerBrandTerms = brandTerms.map((t) => t.toLowerCase());
|
|
175
|
+
const brand = [];
|
|
176
|
+
const nonBrand = [];
|
|
177
|
+
for (const row of keywords) {
|
|
178
|
+
if (num(row.impressions) < minImpressions) continue;
|
|
179
|
+
if (lowerBrandTerms.some((term) => row.query.toLowerCase().includes(term))) brand.push(row);
|
|
180
|
+
else nonBrand.push(row);
|
|
181
|
+
}
|
|
182
|
+
const brandClicks = brand.reduce((sum, k) => sum + num(k.clicks), 0);
|
|
183
|
+
const nonBrandClicks = nonBrand.reduce((sum, k) => sum + num(k.clicks), 0);
|
|
184
|
+
const totalClicks = brandClicks + nonBrandClicks;
|
|
185
|
+
return {
|
|
186
|
+
brand,
|
|
187
|
+
nonBrand,
|
|
188
|
+
summary: {
|
|
189
|
+
brandClicks,
|
|
190
|
+
nonBrandClicks,
|
|
191
|
+
brandShare: totalClicks > 0 ? brandClicks / totalClicks : 0,
|
|
192
|
+
brandImpressions: brand.reduce((sum, k) => sum + num(k.impressions), 0),
|
|
193
|
+
nonBrandImpressions: nonBrand.reduce((sum, k) => sum + num(k.impressions), 0)
|
|
194
|
+
}
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
const brandAnalyzer = defineAnalyzer({
|
|
198
|
+
id: "brand",
|
|
199
|
+
buildSql(params) {
|
|
200
|
+
if (!params.brandTerms?.length) throw new Error("Brand analysis requires brandTerms");
|
|
201
|
+
const { startDate, endDate } = periodOf(params);
|
|
202
|
+
const minImpressions = params.minImpressions ?? 10;
|
|
203
|
+
const limit = params.limit ?? 1e4;
|
|
204
|
+
const regex = `(${params.brandTerms.map((t) => escapeRegexAlt(t.toLowerCase())).join("|")})`;
|
|
205
|
+
return {
|
|
206
|
+
sql: `
|
|
207
|
+
WITH agg AS (
|
|
208
|
+
SELECT
|
|
209
|
+
query,
|
|
210
|
+
url AS page,
|
|
211
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
212
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
213
|
+
${METRIC_EXPR.ctr} AS ctr,
|
|
214
|
+
${METRIC_EXPR.position} AS position
|
|
215
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
216
|
+
WHERE date >= ? AND date <= ?
|
|
217
|
+
GROUP BY query, url
|
|
218
|
+
HAVING SUM(impressions) >= ?
|
|
219
|
+
)
|
|
220
|
+
SELECT
|
|
221
|
+
query, page, clicks, impressions, ctr, position,
|
|
222
|
+
CASE WHEN regexp_matches(LOWER(query), ?) THEN 'brand' ELSE 'non-brand' END AS segment
|
|
223
|
+
FROM agg
|
|
224
|
+
ORDER BY clicks DESC
|
|
225
|
+
LIMIT ${Number(limit)}
|
|
226
|
+
`,
|
|
227
|
+
params: [
|
|
228
|
+
startDate,
|
|
229
|
+
endDate,
|
|
230
|
+
minImpressions,
|
|
231
|
+
regex
|
|
232
|
+
],
|
|
233
|
+
current: {
|
|
234
|
+
table: "page_keywords",
|
|
235
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
236
|
+
}
|
|
237
|
+
};
|
|
238
|
+
},
|
|
239
|
+
reduceSql(rows) {
|
|
240
|
+
const normalized = (Array.isArray(rows) ? rows : []).map((r) => ({
|
|
241
|
+
query: str$6(r.query),
|
|
242
|
+
page: r.page == null ? void 0 : str$6(r.page),
|
|
243
|
+
clicks: num(r.clicks),
|
|
244
|
+
impressions: num(r.impressions),
|
|
245
|
+
ctr: num(r.ctr),
|
|
246
|
+
position: num(r.position),
|
|
247
|
+
segment: str$6(r.segment)
|
|
248
|
+
}));
|
|
249
|
+
let brandClicks = 0;
|
|
250
|
+
let nonBrandClicks = 0;
|
|
251
|
+
let brandImpressions = 0;
|
|
252
|
+
let nonBrandImpressions = 0;
|
|
253
|
+
for (const r of normalized) if (r.segment === "brand") {
|
|
254
|
+
brandClicks += r.clicks;
|
|
255
|
+
brandImpressions += r.impressions;
|
|
256
|
+
} else {
|
|
257
|
+
nonBrandClicks += r.clicks;
|
|
258
|
+
nonBrandImpressions += r.impressions;
|
|
259
|
+
}
|
|
260
|
+
const totalClicks = brandClicks + nonBrandClicks;
|
|
261
|
+
return {
|
|
262
|
+
results: normalized,
|
|
263
|
+
meta: {
|
|
264
|
+
total: normalized.length,
|
|
265
|
+
summary: {
|
|
266
|
+
brandClicks,
|
|
267
|
+
nonBrandClicks,
|
|
268
|
+
brandShare: totalClicks > 0 ? brandClicks / totalClicks : 0,
|
|
269
|
+
brandImpressions,
|
|
270
|
+
nonBrandImpressions
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
};
|
|
274
|
+
},
|
|
275
|
+
buildRows(params) {
|
|
276
|
+
return { keywords: keywordsQueryState(periodOf(params), params.limit) };
|
|
277
|
+
},
|
|
278
|
+
reduceRows(rows, params) {
|
|
279
|
+
if (!params.brandTerms?.length) throw new Error("Brand analysis requires brandTerms");
|
|
280
|
+
const result = analyzeBrandSegmentation(Array.isArray(rows) ? rows : [], {
|
|
281
|
+
brandTerms: params.brandTerms,
|
|
282
|
+
minImpressions: params.minImpressions
|
|
283
|
+
});
|
|
284
|
+
return {
|
|
285
|
+
results: [...result.brand.map((r) => ({
|
|
286
|
+
...r,
|
|
287
|
+
segment: "brand"
|
|
288
|
+
})), ...result.nonBrand.map((r) => ({
|
|
289
|
+
...r,
|
|
290
|
+
segment: "non-brand"
|
|
291
|
+
}))],
|
|
292
|
+
meta: { summary: result.summary }
|
|
293
|
+
};
|
|
294
|
+
}
|
|
295
|
+
});
|
|
296
|
+
const sortRowResults$1 = createSorter((item, metric) => {
|
|
297
|
+
switch (metric) {
|
|
298
|
+
case "clicks": return item.totalClicks;
|
|
299
|
+
case "impressions": return item.totalImpressions;
|
|
300
|
+
case "positionSpread": return item.positionSpread;
|
|
301
|
+
case "pageCount": return item.pages.length;
|
|
302
|
+
}
|
|
303
|
+
}, "clicks");
|
|
304
|
+
function str$5(v) {
|
|
305
|
+
return v == null ? "" : String(v);
|
|
306
|
+
}
|
|
307
|
+
function parseJsonList$4(v) {
|
|
308
|
+
if (Array.isArray(v)) return v;
|
|
309
|
+
if (typeof v === "string" && v.length > 0) {
|
|
310
|
+
const parsed = JSON.parse(v);
|
|
311
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
312
|
+
}
|
|
313
|
+
return [];
|
|
314
|
+
}
|
|
315
|
+
function analyzeCannibalization(rows, options = {}) {
|
|
316
|
+
const { minImpressions = 10, maxPositionSpread = 10, minPages = 2, sortBy = "clicks", sortOrder = "desc" } = options;
|
|
317
|
+
const queryMap = /* @__PURE__ */ new Map();
|
|
318
|
+
for (const row of rows) {
|
|
319
|
+
if (row.impressions < minImpressions) continue;
|
|
320
|
+
const pages = queryMap.get(row.query) || [];
|
|
321
|
+
pages.push({
|
|
322
|
+
page: row.page,
|
|
323
|
+
clicks: row.clicks,
|
|
324
|
+
impressions: row.impressions,
|
|
325
|
+
ctr: row.ctr,
|
|
326
|
+
position: row.position
|
|
327
|
+
});
|
|
328
|
+
queryMap.set(row.query, pages);
|
|
329
|
+
}
|
|
330
|
+
const results = [];
|
|
331
|
+
for (const [query, pages] of queryMap) {
|
|
332
|
+
if (pages.length < minPages) continue;
|
|
333
|
+
pages.sort((a, b) => b.clicks - a.clicks);
|
|
334
|
+
const positions = pages.map((p) => p.position);
|
|
335
|
+
const positionSpread = Math.max(...positions) - Math.min(...positions);
|
|
336
|
+
if (positionSpread > maxPositionSpread) continue;
|
|
337
|
+
results.push({
|
|
338
|
+
query,
|
|
339
|
+
pages,
|
|
340
|
+
totalClicks: pages.reduce((sum, p) => sum + p.clicks, 0),
|
|
341
|
+
totalImpressions: pages.reduce((sum, p) => sum + p.impressions, 0),
|
|
342
|
+
positionSpread
|
|
343
|
+
});
|
|
344
|
+
}
|
|
345
|
+
return sortRowResults$1(results, sortBy, sortOrder);
|
|
346
|
+
}
|
|
347
|
+
const cannibalizationAnalyzer = defineAnalyzer({
|
|
348
|
+
id: "cannibalization",
|
|
349
|
+
buildSql(params) {
|
|
350
|
+
const { startDate, endDate } = periodOf(params);
|
|
351
|
+
const minImpressions = params.minImpressions ?? 50;
|
|
352
|
+
const minCompetitors = 2;
|
|
353
|
+
const minQueryImpressions = (params.minImpressions ?? 50) * 2;
|
|
354
|
+
const limit = params.limit ?? 200;
|
|
355
|
+
return {
|
|
356
|
+
sql: `
|
|
357
|
+
WITH agg AS (
|
|
358
|
+
SELECT
|
|
359
|
+
query,
|
|
360
|
+
url,
|
|
361
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
362
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
363
|
+
${METRIC_EXPR.ctr} AS ctr,
|
|
364
|
+
${METRIC_EXPR.position} AS position
|
|
365
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
366
|
+
WHERE date >= ? AND date <= ?
|
|
367
|
+
AND query IS NOT NULL AND query <> ''
|
|
368
|
+
AND url IS NOT NULL AND url <> ''
|
|
369
|
+
GROUP BY query, url
|
|
370
|
+
HAVING SUM(impressions) >= ?
|
|
371
|
+
),
|
|
372
|
+
query_totals AS (
|
|
373
|
+
SELECT
|
|
374
|
+
query,
|
|
375
|
+
SUM(impressions) AS total_impressions,
|
|
376
|
+
SUM(clicks) AS total_clicks,
|
|
377
|
+
COUNT(*) AS competitor_count
|
|
378
|
+
FROM agg
|
|
379
|
+
GROUP BY query
|
|
380
|
+
HAVING COUNT(*) >= ? AND SUM(impressions) >= ?
|
|
381
|
+
),
|
|
382
|
+
ranked AS (
|
|
383
|
+
SELECT
|
|
384
|
+
a.query,
|
|
385
|
+
a.url,
|
|
386
|
+
a.clicks,
|
|
387
|
+
a.impressions,
|
|
388
|
+
a.ctr,
|
|
389
|
+
a.position,
|
|
390
|
+
a.impressions / NULLIF(t.total_impressions, 0) AS share,
|
|
391
|
+
ROW_NUMBER() OVER (
|
|
392
|
+
PARTITION BY a.query
|
|
393
|
+
ORDER BY a.impressions DESC, a.clicks DESC, a.url ASC
|
|
394
|
+
) AS rnk
|
|
395
|
+
FROM agg a
|
|
396
|
+
JOIN query_totals t USING (query)
|
|
397
|
+
),
|
|
398
|
+
leader AS (
|
|
399
|
+
SELECT query, url AS leader_url, ctr AS leader_ctr, position AS leader_position
|
|
400
|
+
FROM ranked WHERE rnk = 1
|
|
401
|
+
),
|
|
402
|
+
events AS (
|
|
403
|
+
SELECT
|
|
404
|
+
r.query,
|
|
405
|
+
any_value(l.leader_url) AS leader_url,
|
|
406
|
+
any_value(l.leader_ctr) AS leader_ctr,
|
|
407
|
+
any_value(l.leader_position) AS leader_position,
|
|
408
|
+
SUM(POWER(r.share * 100.0, 2)) AS hhi,
|
|
409
|
+
SUM(CASE
|
|
410
|
+
WHEN r.rnk > 1 AND l.leader_ctr > r.ctr
|
|
411
|
+
THEN (l.leader_ctr - r.ctr) * r.impressions
|
|
412
|
+
ELSE 0.0
|
|
413
|
+
END) AS stolen_clicks,
|
|
414
|
+
to_json(list({
|
|
415
|
+
'url': r.url,
|
|
416
|
+
'clicks': r.clicks,
|
|
417
|
+
'impressions': r.impressions,
|
|
418
|
+
'ctr': r.ctr,
|
|
419
|
+
'position': r.position,
|
|
420
|
+
'share': r.share,
|
|
421
|
+
'rank': r.rnk
|
|
422
|
+
} ORDER BY r.rnk)) AS competitors
|
|
423
|
+
FROM ranked r
|
|
424
|
+
JOIN leader l USING (query)
|
|
425
|
+
GROUP BY r.query
|
|
426
|
+
)
|
|
427
|
+
SELECT
|
|
428
|
+
e.query AS keyword,
|
|
429
|
+
t.total_impressions AS totalImpressions,
|
|
430
|
+
t.total_clicks AS totalClicks,
|
|
431
|
+
t.competitor_count AS competitorCount,
|
|
432
|
+
e.leader_url AS leaderUrl,
|
|
433
|
+
e.leader_ctr AS leaderCtr,
|
|
434
|
+
e.leader_position AS leaderPosition,
|
|
435
|
+
e.hhi AS hhi,
|
|
436
|
+
GREATEST(0.0, 1.0 - e.hhi / 10000.0) AS fragmentation,
|
|
437
|
+
e.stolen_clicks AS stolenClicks,
|
|
438
|
+
e.competitors AS competitors,
|
|
439
|
+
CAST(ROUND(LEAST(100.0,
|
|
440
|
+
100.0 * POWER(
|
|
441
|
+
GREATEST(1.0 - e.hhi / 10000.0, 0.0)
|
|
442
|
+
* LEAST(e.stolen_clicks / GREATEST(t.total_clicks + e.stolen_clicks, 1.0), 1.0)
|
|
443
|
+
* LEAST(LOG10(GREATEST(t.total_impressions, 10.0)) / 5.0, 1.0),
|
|
444
|
+
1.0 / 3.0
|
|
445
|
+
)
|
|
446
|
+
)) AS DOUBLE) AS severity
|
|
447
|
+
FROM events e
|
|
448
|
+
JOIN query_totals t USING (query)
|
|
449
|
+
ORDER BY severity DESC, stolenClicks DESC
|
|
450
|
+
LIMIT ${Number(limit)}
|
|
451
|
+
`,
|
|
452
|
+
params: [
|
|
453
|
+
startDate,
|
|
454
|
+
endDate,
|
|
455
|
+
minImpressions,
|
|
456
|
+
minCompetitors,
|
|
457
|
+
minQueryImpressions
|
|
458
|
+
],
|
|
459
|
+
current: {
|
|
460
|
+
table: "page_keywords",
|
|
461
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
462
|
+
}
|
|
463
|
+
};
|
|
464
|
+
},
|
|
465
|
+
reduceSql(rows) {
|
|
466
|
+
const events = (Array.isArray(rows) ? rows : []).map((r) => ({
|
|
467
|
+
keyword: str$5(r.keyword),
|
|
468
|
+
totalImpressions: num(r.totalImpressions),
|
|
469
|
+
totalClicks: num(r.totalClicks),
|
|
470
|
+
competitorCount: num(r.competitorCount),
|
|
471
|
+
leaderUrl: str$5(r.leaderUrl),
|
|
472
|
+
leaderCtr: num(r.leaderCtr),
|
|
473
|
+
leaderPosition: num(r.leaderPosition),
|
|
474
|
+
hhi: num(r.hhi),
|
|
475
|
+
fragmentation: num(r.fragmentation),
|
|
476
|
+
stolenClicks: num(r.stolenClicks),
|
|
477
|
+
severity: num(r.severity),
|
|
478
|
+
competitors: parseJsonList$4(r.competitors).map((c) => ({
|
|
479
|
+
url: str$5(c.url),
|
|
480
|
+
clicks: num(c.clicks),
|
|
481
|
+
impressions: num(c.impressions),
|
|
482
|
+
ctr: num(c.ctr),
|
|
483
|
+
position: num(c.position),
|
|
484
|
+
share: num(c.share),
|
|
485
|
+
rank: num(c.rank)
|
|
486
|
+
}))
|
|
487
|
+
}));
|
|
488
|
+
const nodeAgg = /* @__PURE__ */ new Map();
|
|
489
|
+
const edgeAgg = /* @__PURE__ */ new Map();
|
|
490
|
+
for (const ev of events) {
|
|
491
|
+
for (const c of ev.competitors) {
|
|
492
|
+
const n = nodeAgg.get(c.url) ?? {
|
|
493
|
+
impressions: 0,
|
|
494
|
+
clicks: 0,
|
|
495
|
+
queries: /* @__PURE__ */ new Set()
|
|
496
|
+
};
|
|
497
|
+
n.impressions += c.impressions;
|
|
498
|
+
n.clicks += c.clicks;
|
|
499
|
+
n.queries.add(ev.keyword);
|
|
500
|
+
nodeAgg.set(c.url, n);
|
|
501
|
+
}
|
|
502
|
+
for (let i = 0; i < ev.competitors.length; i++) for (let j = i + 1; j < ev.competitors.length; j++) {
|
|
503
|
+
const a = ev.competitors[i];
|
|
504
|
+
const b = ev.competitors[j];
|
|
505
|
+
const [src, tgt] = a.url < b.url ? [a.url, b.url] : [b.url, a.url];
|
|
506
|
+
const key = `${src}${tgt}`;
|
|
507
|
+
const weight = Math.min(a.impressions, b.impressions);
|
|
508
|
+
const edge = edgeAgg.get(key) ?? {
|
|
509
|
+
source: src,
|
|
510
|
+
target: tgt,
|
|
511
|
+
weight: 0,
|
|
512
|
+
queries: 0
|
|
513
|
+
};
|
|
514
|
+
edge.weight += weight;
|
|
515
|
+
edge.queries += 1;
|
|
516
|
+
edgeAgg.set(key, edge);
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
const nodes = [...nodeAgg.entries()].map(([url, n]) => ({
|
|
520
|
+
url,
|
|
521
|
+
impressions: n.impressions,
|
|
522
|
+
clicks: n.clicks,
|
|
523
|
+
queryCount: n.queries.size
|
|
524
|
+
}));
|
|
525
|
+
const edges = [...edgeAgg.values()];
|
|
526
|
+
const avgFragmentation = events.length > 0 ? events.reduce((s, e) => s + e.fragmentation, 0) / events.length : 0;
|
|
527
|
+
const totalStolenClicks = events.reduce((s, e) => s + e.stolenClicks, 0);
|
|
528
|
+
return {
|
|
529
|
+
results: events,
|
|
530
|
+
meta: {
|
|
531
|
+
total: events.length,
|
|
532
|
+
totalStolenClicks,
|
|
533
|
+
avgFragmentation,
|
|
534
|
+
graph: {
|
|
535
|
+
nodes,
|
|
536
|
+
edges
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
};
|
|
540
|
+
},
|
|
541
|
+
buildRows(params) {
|
|
542
|
+
return { rows: keywordsQueryState(periodOf(params), params.limit) };
|
|
543
|
+
},
|
|
544
|
+
reduceRows(rows, params) {
|
|
545
|
+
const results = analyzeCannibalization(Array.isArray(rows) ? rows : [], {
|
|
546
|
+
minImpressions: params.minImpressions,
|
|
547
|
+
maxPositionSpread: params.maxPositionSpread,
|
|
548
|
+
minPages: params.minPages
|
|
549
|
+
});
|
|
550
|
+
return {
|
|
551
|
+
results,
|
|
552
|
+
meta: { total: results.length }
|
|
553
|
+
};
|
|
554
|
+
}
|
|
555
|
+
});
|
|
556
|
+
const INTENT_PREFIXES_REGEX = "^(how to|what is|what are|why is|why do|where to|when to|best|top|vs|versus|compare|review|buy|cheap|free|near me)(\\s|$)";
|
|
557
|
+
const INTENT_PREFIXES = [
|
|
558
|
+
"how to",
|
|
559
|
+
"what is",
|
|
560
|
+
"what are",
|
|
561
|
+
"why is",
|
|
562
|
+
"why do",
|
|
563
|
+
"where to",
|
|
564
|
+
"when to",
|
|
565
|
+
"best",
|
|
566
|
+
"top",
|
|
567
|
+
"vs",
|
|
568
|
+
"versus",
|
|
569
|
+
"compare",
|
|
570
|
+
"review",
|
|
571
|
+
"buy",
|
|
572
|
+
"cheap",
|
|
573
|
+
"free",
|
|
574
|
+
"near me"
|
|
575
|
+
];
|
|
576
|
+
const WHITESPACE_RE = /\s+/;
|
|
577
|
+
function str$4(v) {
|
|
578
|
+
return v == null ? "" : String(v);
|
|
579
|
+
}
|
|
580
|
+
function parseJsonList$3(v) {
|
|
581
|
+
if (Array.isArray(v)) return v;
|
|
582
|
+
if (typeof v === "string" && v.length > 0) {
|
|
583
|
+
const parsed = JSON.parse(v);
|
|
584
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
585
|
+
}
|
|
586
|
+
return [];
|
|
587
|
+
}
|
|
588
|
+
function extractIntentPrefix(keyword) {
|
|
589
|
+
const lower = keyword.toLowerCase();
|
|
590
|
+
for (const prefix of INTENT_PREFIXES) if (lower.startsWith(`${prefix} `) || lower.startsWith(prefix)) return prefix;
|
|
591
|
+
return null;
|
|
592
|
+
}
|
|
593
|
+
function extractWordPrefix(keyword, wordCount = 2) {
|
|
594
|
+
const words = keyword.toLowerCase().split(WHITESPACE_RE).filter(Boolean);
|
|
595
|
+
if (words.length < wordCount + 1) return null;
|
|
596
|
+
return words.slice(0, wordCount).join(" ");
|
|
597
|
+
}
|
|
598
|
+
function analyzeClustering(keywords, options = {}) {
|
|
599
|
+
const { minClusterSize = 2, minImpressions = 10, clusterBy = "both" } = options;
|
|
600
|
+
const filtered = keywords.filter((k) => num(k.impressions) >= minImpressions);
|
|
601
|
+
const clusterMap = /* @__PURE__ */ new Map();
|
|
602
|
+
const clusteredKeywords = /* @__PURE__ */ new Set();
|
|
603
|
+
if (clusterBy === "intent" || clusterBy === "both") for (const kw of filtered) {
|
|
604
|
+
const intent = extractIntentPrefix(kw.query);
|
|
605
|
+
if (intent) {
|
|
606
|
+
const existing = clusterMap.get(intent);
|
|
607
|
+
if (existing) existing.keywords.push(kw);
|
|
608
|
+
else clusterMap.set(intent, {
|
|
609
|
+
type: "intent",
|
|
610
|
+
keywords: [kw]
|
|
611
|
+
});
|
|
612
|
+
clusteredKeywords.add(kw.query);
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
if (clusterBy === "prefix" || clusterBy === "both") {
|
|
616
|
+
const unclustered = filtered.filter((kw) => !clusteredKeywords.has(kw.query));
|
|
617
|
+
const prefixMap = /* @__PURE__ */ new Map();
|
|
618
|
+
for (const kw of unclustered) {
|
|
619
|
+
const prefix = extractWordPrefix(kw.query);
|
|
620
|
+
if (prefix) {
|
|
621
|
+
const existing = prefixMap.get(prefix);
|
|
622
|
+
if (existing) existing.push(kw);
|
|
623
|
+
else prefixMap.set(prefix, [kw]);
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
for (const [prefix, kws] of prefixMap) if (kws.length >= minClusterSize) {
|
|
627
|
+
clusterMap.set(prefix, {
|
|
628
|
+
type: "prefix",
|
|
629
|
+
keywords: kws
|
|
630
|
+
});
|
|
631
|
+
kws.forEach((kw) => clusteredKeywords.add(kw.query));
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
const clusters = [];
|
|
635
|
+
for (const [name, data] of clusterMap) {
|
|
636
|
+
if (data.keywords.length < minClusterSize) continue;
|
|
637
|
+
const totalClicks = data.keywords.reduce((sum, k) => sum + num(k.clicks), 0);
|
|
638
|
+
const totalImpressions = data.keywords.reduce((sum, k) => sum + num(k.impressions), 0);
|
|
639
|
+
const avgPosition = data.keywords.reduce((sum, k) => sum + num(k.position), 0) / data.keywords.length;
|
|
640
|
+
clusters.push({
|
|
641
|
+
clusterName: name,
|
|
642
|
+
clusterType: data.type,
|
|
643
|
+
keywords: data.keywords,
|
|
644
|
+
totalClicks,
|
|
645
|
+
totalImpressions,
|
|
646
|
+
avgPosition,
|
|
647
|
+
keywordCount: data.keywords.length
|
|
648
|
+
});
|
|
649
|
+
}
|
|
650
|
+
clusters.sort((a, b) => b.totalClicks - a.totalClicks);
|
|
651
|
+
return {
|
|
652
|
+
clusters,
|
|
653
|
+
unclustered: filtered.filter((kw) => !clusteredKeywords.has(kw.query))
|
|
654
|
+
};
|
|
655
|
+
}
|
|
656
|
+
const clusteringAnalyzer = defineAnalyzer({
|
|
657
|
+
id: "clustering",
|
|
658
|
+
buildSql(params) {
|
|
659
|
+
const { startDate, endDate } = periodOf(params);
|
|
660
|
+
const minImpressions = params.minImpressions ?? 10;
|
|
661
|
+
const minClusterSize = params.minClusterSize ?? 2;
|
|
662
|
+
const clusterBy = params.clusterBy ?? "both";
|
|
663
|
+
const doIntent = clusterBy === "intent" || clusterBy === "both";
|
|
664
|
+
const doPrefix = clusterBy === "prefix" || clusterBy === "both";
|
|
665
|
+
const intentExpr = doIntent ? `NULLIF(regexp_extract(LOWER(query), '${INTENT_PREFIXES_REGEX}', 1), '')` : `CAST(NULL AS VARCHAR)`;
|
|
666
|
+
const prefixExpr = doPrefix ? `CASE WHEN len(regexp_split_to_array(LOWER(query), '\\s+')) >= 3
|
|
667
|
+
THEN array_to_string(list_slice(regexp_split_to_array(LOWER(query), '\\s+'), 1, 2), ' ')
|
|
668
|
+
ELSE CAST(NULL AS VARCHAR) END` : `CAST(NULL AS VARCHAR)`;
|
|
669
|
+
return {
|
|
670
|
+
sql: `
|
|
671
|
+
WITH agg AS (
|
|
672
|
+
SELECT
|
|
673
|
+
query,
|
|
674
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
675
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
676
|
+
${METRIC_EXPR.ctr} AS ctr,
|
|
677
|
+
${METRIC_EXPR.position} AS position
|
|
678
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
679
|
+
WHERE date >= ? AND date <= ?
|
|
680
|
+
GROUP BY query
|
|
681
|
+
HAVING SUM(impressions) >= ?
|
|
682
|
+
),
|
|
683
|
+
classified AS (
|
|
684
|
+
SELECT
|
|
685
|
+
query, clicks, impressions, ctr, position,
|
|
686
|
+
${intentExpr} AS intent_prefix,
|
|
687
|
+
${prefixExpr} AS word_prefix
|
|
688
|
+
FROM agg
|
|
689
|
+
),
|
|
690
|
+
keyed AS (
|
|
691
|
+
SELECT
|
|
692
|
+
query, clicks, impressions, ctr, position,
|
|
693
|
+
COALESCE(intent_prefix, word_prefix) AS cluster_name,
|
|
694
|
+
CASE WHEN intent_prefix IS NOT NULL THEN 'intent' ELSE 'prefix' END AS cluster_type
|
|
695
|
+
FROM classified
|
|
696
|
+
WHERE COALESCE(intent_prefix, word_prefix) IS NOT NULL
|
|
697
|
+
)
|
|
698
|
+
SELECT
|
|
699
|
+
cluster_name AS clusterName,
|
|
700
|
+
any_value(cluster_type) AS clusterType,
|
|
701
|
+
CAST(COUNT(*) AS DOUBLE) AS keywordCount,
|
|
702
|
+
${METRIC_EXPR.clicks} AS totalClicks,
|
|
703
|
+
${METRIC_EXPR.impressions} AS totalImpressions,
|
|
704
|
+
AVG(position) AS avgPosition,
|
|
705
|
+
to_json(list({ 'query': query, 'clicks': clicks, 'impressions': impressions, 'ctr': ctr, 'position': position })) AS keywords
|
|
706
|
+
FROM keyed
|
|
707
|
+
GROUP BY cluster_name
|
|
708
|
+
HAVING COUNT(*) >= ?
|
|
709
|
+
ORDER BY totalClicks DESC
|
|
710
|
+
`,
|
|
711
|
+
params: [
|
|
712
|
+
startDate,
|
|
713
|
+
endDate,
|
|
714
|
+
minImpressions,
|
|
715
|
+
minClusterSize
|
|
716
|
+
],
|
|
717
|
+
current: {
|
|
718
|
+
table: "keywords",
|
|
719
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
720
|
+
}
|
|
721
|
+
};
|
|
722
|
+
},
|
|
723
|
+
reduceSql(rows) {
|
|
724
|
+
const clusters = (Array.isArray(rows) ? rows : []).map((r) => ({
|
|
725
|
+
clusterName: str$4(r.clusterName),
|
|
726
|
+
clusterType: str$4(r.clusterType),
|
|
727
|
+
keywordCount: num(r.keywordCount),
|
|
728
|
+
totalClicks: num(r.totalClicks),
|
|
729
|
+
totalImpressions: num(r.totalImpressions),
|
|
730
|
+
avgPosition: num(r.avgPosition),
|
|
731
|
+
keywords: parseJsonList$3(r.keywords).map((k) => ({
|
|
732
|
+
query: str$4(k.query),
|
|
733
|
+
clicks: num(k.clicks),
|
|
734
|
+
impressions: num(k.impressions),
|
|
735
|
+
ctr: num(k.ctr),
|
|
736
|
+
position: num(k.position)
|
|
737
|
+
}))
|
|
738
|
+
}));
|
|
739
|
+
return {
|
|
740
|
+
results: clusters,
|
|
741
|
+
meta: {
|
|
742
|
+
total: clusters.length,
|
|
743
|
+
totalClusters: clusters.length
|
|
744
|
+
}
|
|
745
|
+
};
|
|
746
|
+
},
|
|
747
|
+
buildRows(params) {
|
|
748
|
+
return { keywords: keywordsQueryState(periodOf(params), params.limit) };
|
|
749
|
+
},
|
|
750
|
+
reduceRows(rows, params) {
|
|
751
|
+
const result = analyzeClustering(Array.isArray(rows) ? rows : [], {
|
|
752
|
+
clusterBy: params.clusterBy,
|
|
753
|
+
minClusterSize: params.minClusterSize,
|
|
754
|
+
minImpressions: params.minImpressions
|
|
755
|
+
});
|
|
756
|
+
return {
|
|
757
|
+
results: result.clusters,
|
|
758
|
+
meta: { totalClusters: result.clusters.length }
|
|
759
|
+
};
|
|
760
|
+
}
|
|
761
|
+
});
|
|
762
|
+
function str$3(v) {
|
|
763
|
+
return v == null ? "" : String(v);
|
|
764
|
+
}
|
|
765
|
+
function parseJsonList$2(v) {
|
|
766
|
+
if (Array.isArray(v)) return v;
|
|
767
|
+
if (typeof v === "string" && v.length > 0) {
|
|
768
|
+
const parsed = JSON.parse(v);
|
|
769
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
770
|
+
}
|
|
771
|
+
return [];
|
|
772
|
+
}
|
|
773
|
+
function calculateGini(values) {
|
|
774
|
+
if (values.length === 0) return 0;
|
|
775
|
+
const sorted = [...values].sort((a, b) => a - b);
|
|
776
|
+
const n = sorted.length;
|
|
777
|
+
const sum = sorted.reduce((a, b) => a + b, 0);
|
|
778
|
+
if (sum === 0) return 0;
|
|
779
|
+
let weightedSum = 0;
|
|
780
|
+
for (let i = 0; i < n; i++) weightedSum += (2 * (i + 1) - n - 1) * sorted[i];
|
|
781
|
+
return weightedSum / (n * sum);
|
|
782
|
+
}
|
|
783
|
+
function calculateHHI(shares) {
|
|
784
|
+
return shares.reduce((sum, share) => sum + (share * 100) ** 2, 0);
|
|
785
|
+
}
|
|
786
|
+
function analyzeConcentration(items, options = {}) {
|
|
787
|
+
const { topN = 10 } = options;
|
|
788
|
+
if (items.length === 0) return {
|
|
789
|
+
giniCoefficient: 0,
|
|
790
|
+
hhi: 0,
|
|
791
|
+
topNConcentration: 0,
|
|
792
|
+
topNItems: [],
|
|
793
|
+
totalItems: 0,
|
|
794
|
+
totalClicks: 0,
|
|
795
|
+
riskLevel: "low"
|
|
796
|
+
};
|
|
797
|
+
const sorted = [...items].sort((a, b) => b.clicks - a.clicks);
|
|
798
|
+
const totalClicks = sorted.reduce((sum, item) => sum + item.clicks, 0);
|
|
799
|
+
const clickValues = sorted.map((i) => i.clicks);
|
|
800
|
+
const shares = totalClicks > 0 ? sorted.map((i) => i.clicks / totalClicks) : [];
|
|
801
|
+
const giniCoefficient = calculateGini(clickValues);
|
|
802
|
+
const hhi = calculateHHI(shares);
|
|
803
|
+
const topNItems = sorted.slice(0, topN).map((item) => ({
|
|
804
|
+
key: item.key,
|
|
805
|
+
clicks: item.clicks,
|
|
806
|
+
share: totalClicks > 0 ? item.clicks / totalClicks : 0
|
|
807
|
+
}));
|
|
808
|
+
const topNClicks = topNItems.reduce((sum, item) => sum + item.clicks, 0);
|
|
809
|
+
const topNConcentration = totalClicks > 0 ? topNClicks / totalClicks : 0;
|
|
810
|
+
let riskLevel = "low";
|
|
811
|
+
if (hhi > 2500) riskLevel = "high";
|
|
812
|
+
else if (hhi > 1500) riskLevel = "medium";
|
|
813
|
+
return {
|
|
814
|
+
giniCoefficient,
|
|
815
|
+
hhi,
|
|
816
|
+
topNConcentration,
|
|
817
|
+
topNItems,
|
|
818
|
+
totalItems: items.length,
|
|
819
|
+
totalClicks,
|
|
820
|
+
riskLevel
|
|
821
|
+
};
|
|
822
|
+
}
|
|
823
|
+
function analyzePageConcentration(pages, options) {
|
|
824
|
+
return analyzeConcentration(pages.map((p) => ({
|
|
825
|
+
key: p.page,
|
|
826
|
+
clicks: num(p.clicks)
|
|
827
|
+
})), options);
|
|
828
|
+
}
|
|
829
|
+
function analyzeKeywordConcentration(keywords, options) {
|
|
830
|
+
return analyzeConcentration(keywords.map((k) => ({
|
|
831
|
+
key: k.query,
|
|
832
|
+
clicks: num(k.clicks)
|
|
833
|
+
})), options);
|
|
834
|
+
}
|
|
835
|
+
const concentrationAnalyzer = defineAnalyzer({
|
|
836
|
+
id: "concentration",
|
|
837
|
+
buildSql(params) {
|
|
838
|
+
const { startDate, endDate } = periodOf(params);
|
|
839
|
+
const dim = params.dimension || "pages";
|
|
840
|
+
const topN = params.topN ?? 10;
|
|
841
|
+
const table = dim === "keywords" ? "keywords" : "pages";
|
|
842
|
+
const keyCol = dim === "keywords" ? "query" : "url";
|
|
843
|
+
return {
|
|
844
|
+
sql: `
|
|
845
|
+
WITH items AS (
|
|
846
|
+
SELECT
|
|
847
|
+
${keyCol} AS key,
|
|
848
|
+
${METRIC_EXPR.clicks} AS clicks
|
|
849
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
850
|
+
WHERE date >= ? AND date <= ?
|
|
851
|
+
GROUP BY ${keyCol}
|
|
852
|
+
HAVING SUM(clicks) > 0
|
|
853
|
+
),
|
|
854
|
+
totals AS (
|
|
855
|
+
SELECT SUM(clicks) AS total_clicks, COUNT(*) AS total_items FROM items
|
|
856
|
+
),
|
|
857
|
+
ranked AS (
|
|
858
|
+
SELECT
|
|
859
|
+
i.key, i.clicks,
|
|
860
|
+
i.clicks / NULLIF(t.total_clicks, 0) AS share,
|
|
861
|
+
ROW_NUMBER() OVER (ORDER BY i.clicks DESC, i.key ASC) AS rnk_desc,
|
|
862
|
+
ROW_NUMBER() OVER (ORDER BY i.clicks ASC, i.key ASC) AS rnk_asc,
|
|
863
|
+
t.total_clicks AS tclicks,
|
|
864
|
+
t.total_items AS titems
|
|
865
|
+
FROM items i, totals t
|
|
866
|
+
),
|
|
867
|
+
gini_num AS (
|
|
868
|
+
SELECT SUM((2.0 * rnk_asc - titems - 1) * clicks) AS weighted_sum FROM ranked
|
|
869
|
+
),
|
|
870
|
+
hhi_calc AS (
|
|
871
|
+
SELECT SUM(POWER(share * 100, 2)) AS hhi FROM ranked
|
|
872
|
+
),
|
|
873
|
+
top_list AS (
|
|
874
|
+
SELECT
|
|
875
|
+
list({ 'key': key, 'clicks': clicks, 'share': share } ORDER BY clicks DESC, key ASC) AS items,
|
|
876
|
+
SUM(clicks) AS top_clicks
|
|
877
|
+
FROM ranked WHERE rnk_desc <= ?
|
|
878
|
+
)
|
|
879
|
+
SELECT
|
|
880
|
+
COALESCE(
|
|
881
|
+
(SELECT weighted_sum FROM gini_num)
|
|
882
|
+
/ NULLIF((SELECT total_items FROM totals) * (SELECT total_clicks FROM totals), 0),
|
|
883
|
+
0.0
|
|
884
|
+
) AS giniCoefficient,
|
|
885
|
+
COALESCE((SELECT hhi FROM hhi_calc), 0.0) AS hhi,
|
|
886
|
+
COALESCE(
|
|
887
|
+
CAST((SELECT top_clicks FROM top_list) AS DOUBLE)
|
|
888
|
+
/ NULLIF((SELECT total_clicks FROM totals), 0),
|
|
889
|
+
0.0
|
|
890
|
+
) AS topNConcentration,
|
|
891
|
+
COALESCE((SELECT to_json(items) FROM top_list), '[]') AS topNItems,
|
|
892
|
+
COALESCE((SELECT total_items FROM totals), 0) AS totalItems,
|
|
893
|
+
COALESCE((SELECT total_clicks FROM totals), 0.0) AS totalClicks,
|
|
894
|
+
CASE
|
|
895
|
+
WHEN COALESCE((SELECT hhi FROM hhi_calc), 0.0) > 2500 THEN 'high'
|
|
896
|
+
WHEN COALESCE((SELECT hhi FROM hhi_calc), 0.0) > 1500 THEN 'medium'
|
|
897
|
+
ELSE 'low'
|
|
898
|
+
END AS riskLevel
|
|
899
|
+
`,
|
|
900
|
+
params: [
|
|
901
|
+
startDate,
|
|
902
|
+
endDate,
|
|
903
|
+
topN
|
|
904
|
+
],
|
|
905
|
+
current: {
|
|
906
|
+
table,
|
|
907
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
908
|
+
}
|
|
909
|
+
};
|
|
910
|
+
},
|
|
911
|
+
reduceSql(rows, params) {
|
|
912
|
+
const r = (Array.isArray(rows) ? rows : [])[0] ?? {};
|
|
913
|
+
const topRaw = parseJsonList$2(r.topNItems);
|
|
914
|
+
return {
|
|
915
|
+
results: [{
|
|
916
|
+
giniCoefficient: num(r.giniCoefficient),
|
|
917
|
+
hhi: num(r.hhi),
|
|
918
|
+
topNConcentration: num(r.topNConcentration),
|
|
919
|
+
topNItems: topRaw.map((t) => ({
|
|
920
|
+
key: str$3(t.key),
|
|
921
|
+
clicks: num(t.clicks),
|
|
922
|
+
share: num(t.share)
|
|
923
|
+
})),
|
|
924
|
+
totalItems: num(r.totalItems),
|
|
925
|
+
totalClicks: num(r.totalClicks),
|
|
926
|
+
riskLevel: str$3(r.riskLevel)
|
|
927
|
+
}],
|
|
928
|
+
meta: {
|
|
929
|
+
total: 1,
|
|
930
|
+
dimension: params.dimension || "pages"
|
|
931
|
+
}
|
|
932
|
+
};
|
|
933
|
+
},
|
|
934
|
+
buildRows(params) {
|
|
935
|
+
const dim = params.dimension || "pages";
|
|
936
|
+
const period = periodOf(params);
|
|
937
|
+
const out = {};
|
|
938
|
+
if (dim === "pages") out.pages = pagesQueryState(period, params.limit);
|
|
939
|
+
else out.keywords = keywordsQueryState(period, params.limit);
|
|
940
|
+
return out;
|
|
941
|
+
},
|
|
942
|
+
reduceRows(rows, params) {
|
|
943
|
+
const dim = params.dimension || "pages";
|
|
944
|
+
const arr = Array.isArray(rows) ? rows : rows[dim] ?? [];
|
|
945
|
+
return {
|
|
946
|
+
results: [dim === "pages" ? analyzePageConcentration(arr, { topN: params.topN }) : analyzeKeywordConcentration(arr, { topN: params.topN })],
|
|
947
|
+
meta: { dimension: dim }
|
|
948
|
+
};
|
|
949
|
+
}
|
|
950
|
+
});
|
|
951
|
+
const sortResults$1 = createMetricSorter("lostClicks", {
|
|
952
|
+
lostClicks: "desc",
|
|
953
|
+
declinePercent: "desc",
|
|
954
|
+
currentClicks: "asc"
|
|
955
|
+
});
|
|
956
|
+
function str$2(v) {
|
|
957
|
+
return v == null ? "" : String(v);
|
|
958
|
+
}
|
|
959
|
+
function parseJsonList$1(v) {
|
|
960
|
+
if (Array.isArray(v)) return v;
|
|
961
|
+
if (typeof v === "string" && v.length > 0) {
|
|
962
|
+
const parsed = JSON.parse(v);
|
|
963
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
964
|
+
}
|
|
965
|
+
return [];
|
|
966
|
+
}
|
|
967
|
+
function analyzeDecay(input, options = {}) {
|
|
968
|
+
const { minPreviousClicks = 50, threshold = .2, sortBy = "lostClicks" } = options;
|
|
969
|
+
const currentMap = buildPeriodMap(input.current, (r) => r.page, (r) => ({
|
|
970
|
+
clicks: num(r.clicks),
|
|
971
|
+
position: num(r.position)
|
|
972
|
+
}));
|
|
973
|
+
const previousMap = buildPeriodMap(input.previous, (r) => r.page, (r) => ({
|
|
974
|
+
clicks: num(r.clicks),
|
|
975
|
+
position: num(r.position)
|
|
976
|
+
}), (r) => num(r.clicks) >= minPreviousClicks);
|
|
977
|
+
const results = [];
|
|
978
|
+
for (const [page, prev] of previousMap) {
|
|
979
|
+
const curr = currentMap.get(page) || {
|
|
980
|
+
clicks: 0,
|
|
981
|
+
position: 0
|
|
982
|
+
};
|
|
983
|
+
const lostClicks = prev.clicks - curr.clicks;
|
|
984
|
+
const declinePercent = prev.clicks > 0 ? lostClicks / prev.clicks : 0;
|
|
985
|
+
if (declinePercent >= threshold && lostClicks > 0) results.push({
|
|
986
|
+
page,
|
|
987
|
+
currentClicks: curr.clicks,
|
|
988
|
+
previousClicks: prev.clicks,
|
|
989
|
+
lostClicks,
|
|
990
|
+
declinePercent,
|
|
991
|
+
currentPosition: curr.position,
|
|
992
|
+
previousPosition: prev.position,
|
|
993
|
+
positionDrop: curr.position - prev.position
|
|
994
|
+
});
|
|
995
|
+
}
|
|
996
|
+
return sortResults$1(results, sortBy);
|
|
997
|
+
}
|
|
998
|
+
const decayAnalyzer = defineAnalyzer({
|
|
999
|
+
id: "decay",
|
|
1000
|
+
buildSql(params) {
|
|
1001
|
+
const { current: cur, previous: prev } = comparisonOf(params);
|
|
1002
|
+
const minPreviousClicks = params.minPreviousClicks ?? 50;
|
|
1003
|
+
const threshold = params.threshold ?? .2;
|
|
1004
|
+
const limit = params.limit ?? 2e3;
|
|
1005
|
+
return {
|
|
1006
|
+
sql: `
|
|
1007
|
+
WITH cur AS (
|
|
1008
|
+
SELECT
|
|
1009
|
+
url,
|
|
1010
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
1011
|
+
${METRIC_EXPR.position} AS position
|
|
1012
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
1013
|
+
WHERE date >= ? AND date <= ?
|
|
1014
|
+
GROUP BY url
|
|
1015
|
+
),
|
|
1016
|
+
prev AS (
|
|
1017
|
+
SELECT
|
|
1018
|
+
url,
|
|
1019
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
1020
|
+
${METRIC_EXPR.position} AS position
|
|
1021
|
+
FROM read_parquet({{FILES_PREV}}, union_by_name = true)
|
|
1022
|
+
WHERE date >= ? AND date <= ?
|
|
1023
|
+
GROUP BY url
|
|
1024
|
+
HAVING SUM(clicks) >= ?
|
|
1025
|
+
),
|
|
1026
|
+
weekly AS (
|
|
1027
|
+
SELECT url, date_trunc('week', CAST(date AS DATE)) AS week,
|
|
1028
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
1029
|
+
${METRIC_EXPR.impressions} AS impressions
|
|
1030
|
+
FROM (
|
|
1031
|
+
SELECT url, date, clicks, impressions
|
|
1032
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
1033
|
+
WHERE date >= ? AND date <= ?
|
|
1034
|
+
UNION ALL
|
|
1035
|
+
SELECT url, date, clicks, impressions
|
|
1036
|
+
FROM read_parquet({{FILES_PREV}}, union_by_name = true)
|
|
1037
|
+
WHERE date >= ? AND date <= ?
|
|
1038
|
+
)
|
|
1039
|
+
GROUP BY url, week
|
|
1040
|
+
),
|
|
1041
|
+
series_by_url AS (
|
|
1042
|
+
SELECT url, to_json(list({
|
|
1043
|
+
'week': strftime(week, '%Y-%m-%d'),
|
|
1044
|
+
'clicks': clicks,
|
|
1045
|
+
'impressions': impressions
|
|
1046
|
+
} ORDER BY week)) AS seriesJson
|
|
1047
|
+
FROM weekly GROUP BY url
|
|
1048
|
+
),
|
|
1049
|
+
joined AS (
|
|
1050
|
+
SELECT
|
|
1051
|
+
p.url AS page,
|
|
1052
|
+
COALESCE(c.clicks, 0.0) AS currentClicks,
|
|
1053
|
+
p.clicks AS previousClicks,
|
|
1054
|
+
(p.clicks - COALESCE(c.clicks, 0.0)) AS lostClicks,
|
|
1055
|
+
(p.clicks - COALESCE(c.clicks, 0.0)) / NULLIF(p.clicks, 0) AS declinePercent,
|
|
1056
|
+
COALESCE(c.position, 0.0) AS currentPosition,
|
|
1057
|
+
p.position AS previousPosition,
|
|
1058
|
+
(COALESCE(c.position, 0.0) - p.position) AS positionDrop,
|
|
1059
|
+
s.seriesJson
|
|
1060
|
+
FROM prev p
|
|
1061
|
+
LEFT JOIN cur c ON p.url = c.url
|
|
1062
|
+
LEFT JOIN series_by_url s ON p.url = s.url
|
|
1063
|
+
)
|
|
1064
|
+
SELECT *
|
|
1065
|
+
FROM joined
|
|
1066
|
+
WHERE declinePercent >= ? AND lostClicks > 0
|
|
1067
|
+
ORDER BY lostClicks DESC
|
|
1068
|
+
LIMIT ${Number(limit)}
|
|
1069
|
+
`,
|
|
1070
|
+
params: [
|
|
1071
|
+
cur.startDate,
|
|
1072
|
+
cur.endDate,
|
|
1073
|
+
prev.startDate,
|
|
1074
|
+
prev.endDate,
|
|
1075
|
+
minPreviousClicks,
|
|
1076
|
+
cur.startDate,
|
|
1077
|
+
cur.endDate,
|
|
1078
|
+
prev.startDate,
|
|
1079
|
+
prev.endDate,
|
|
1080
|
+
threshold
|
|
1081
|
+
],
|
|
1082
|
+
current: {
|
|
1083
|
+
table: "pages",
|
|
1084
|
+
partitions: enumeratePartitions(cur.startDate, cur.endDate)
|
|
1085
|
+
},
|
|
1086
|
+
previous: {
|
|
1087
|
+
table: "pages",
|
|
1088
|
+
partitions: enumeratePartitions(prev.startDate, prev.endDate)
|
|
1089
|
+
}
|
|
1090
|
+
};
|
|
1091
|
+
},
|
|
1092
|
+
reduceSql(rows) {
|
|
1093
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
1094
|
+
return {
|
|
1095
|
+
results: arr.map((r) => ({
|
|
1096
|
+
page: str$2(r.page),
|
|
1097
|
+
currentClicks: num(r.currentClicks),
|
|
1098
|
+
previousClicks: num(r.previousClicks),
|
|
1099
|
+
lostClicks: num(r.lostClicks),
|
|
1100
|
+
declinePercent: num(r.declinePercent),
|
|
1101
|
+
currentPosition: num(r.currentPosition),
|
|
1102
|
+
previousPosition: num(r.previousPosition),
|
|
1103
|
+
positionDrop: num(r.positionDrop),
|
|
1104
|
+
series: parseJsonList$1(r.seriesJson).map((s) => ({
|
|
1105
|
+
week: str$2(s.week),
|
|
1106
|
+
clicks: num(s.clicks),
|
|
1107
|
+
impressions: num(s.impressions)
|
|
1108
|
+
}))
|
|
1109
|
+
})),
|
|
1110
|
+
meta: { total: arr.length }
|
|
1111
|
+
};
|
|
1112
|
+
},
|
|
1113
|
+
buildRows(params) {
|
|
1114
|
+
const { current, previous } = comparisonOf(params);
|
|
1115
|
+
return {
|
|
1116
|
+
current: pagesQueryState(current, params.limit),
|
|
1117
|
+
previous: pagesQueryState(previous, params.limit)
|
|
1118
|
+
};
|
|
1119
|
+
},
|
|
1120
|
+
reduceRows(rows, params) {
|
|
1121
|
+
const map = rows && !Array.isArray(rows) ? rows : {
|
|
1122
|
+
current: [],
|
|
1123
|
+
previous: []
|
|
1124
|
+
};
|
|
1125
|
+
const results = analyzeDecay({
|
|
1126
|
+
current: map.current ?? [],
|
|
1127
|
+
previous: map.previous ?? []
|
|
1128
|
+
}, {
|
|
1129
|
+
minPreviousClicks: params.minPreviousClicks,
|
|
1130
|
+
threshold: params.threshold
|
|
1131
|
+
});
|
|
1132
|
+
return {
|
|
1133
|
+
results,
|
|
1134
|
+
meta: { total: results.length }
|
|
1135
|
+
};
|
|
1136
|
+
}
|
|
1137
|
+
});
|
|
1138
|
+
function percentDifference(current, previous) {
|
|
1139
|
+
if (previous === 0) return current > 0 ? 100 : 0;
|
|
1140
|
+
return (current - previous) / previous * 100;
|
|
1141
|
+
}
|
|
1142
|
+
function str$1(v) {
|
|
1143
|
+
return v == null ? "" : String(v);
|
|
1144
|
+
}
|
|
1145
|
+
function parseJsonList(v) {
|
|
1146
|
+
if (Array.isArray(v)) return v;
|
|
1147
|
+
if (typeof v === "string" && v.length > 0) {
|
|
1148
|
+
const parsed = JSON.parse(v);
|
|
1149
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
1150
|
+
}
|
|
1151
|
+
return [];
|
|
1152
|
+
}
|
|
1153
|
+
function analyzeMovers(input, options = {}) {
|
|
1154
|
+
const { changeThreshold = .2, minImpressions = 50, sortBy = "clicksChange" } = options;
|
|
1155
|
+
const normFactor = input.normalizationFactor ?? 1;
|
|
1156
|
+
const baselineMap = buildPeriodMap(input.previous, (r) => r.query, (r) => ({
|
|
1157
|
+
clicks: num(r.clicks) / normFactor,
|
|
1158
|
+
impressions: num(r.impressions) / normFactor,
|
|
1159
|
+
position: num(r.position),
|
|
1160
|
+
page: r.page ?? null
|
|
1161
|
+
}));
|
|
1162
|
+
const pageMap = /* @__PURE__ */ new Map();
|
|
1163
|
+
for (const row of input.current) if (!pageMap.has(row.query) && row.page) pageMap.set(row.query, row.page);
|
|
1164
|
+
for (const row of input.previous) if (!pageMap.has(row.query) && row.page) pageMap.set(row.query, row.page);
|
|
1165
|
+
const rising = [];
|
|
1166
|
+
const declining = [];
|
|
1167
|
+
const stable = [];
|
|
1168
|
+
for (const row of input.current) {
|
|
1169
|
+
const impressions = num(row.impressions);
|
|
1170
|
+
const clicks = num(row.clicks);
|
|
1171
|
+
const position = num(row.position);
|
|
1172
|
+
if (impressions < minImpressions) continue;
|
|
1173
|
+
const baseline = baselineMap.get(row.query) || {
|
|
1174
|
+
clicks: 0,
|
|
1175
|
+
impressions: 0,
|
|
1176
|
+
position: 0,
|
|
1177
|
+
page: null
|
|
1178
|
+
};
|
|
1179
|
+
const clicksChangePercent = percentDifference(clicks, baseline.clicks);
|
|
1180
|
+
const impressionsChangePercent = percentDifference(impressions, baseline.impressions);
|
|
1181
|
+
const data = {
|
|
1182
|
+
keyword: row.query,
|
|
1183
|
+
page: pageMap.get(row.query) ?? null,
|
|
1184
|
+
recentClicks: clicks,
|
|
1185
|
+
recentImpressions: impressions,
|
|
1186
|
+
recentPosition: position,
|
|
1187
|
+
baselineClicks: Math.round(baseline.clicks),
|
|
1188
|
+
baselineImpressions: Math.round(baseline.impressions),
|
|
1189
|
+
baselinePosition: baseline.position,
|
|
1190
|
+
clicksChange: clicks - Math.round(baseline.clicks),
|
|
1191
|
+
clicksChangePercent,
|
|
1192
|
+
impressionsChangePercent,
|
|
1193
|
+
positionChange: position - baseline.position
|
|
1194
|
+
};
|
|
1195
|
+
const absChange = Math.abs(clicksChangePercent / 100);
|
|
1196
|
+
if (clicksChangePercent > 0 && absChange >= changeThreshold) rising.push(data);
|
|
1197
|
+
else if (clicksChangePercent < 0 && absChange >= changeThreshold) declining.push(data);
|
|
1198
|
+
else stable.push(data);
|
|
1199
|
+
}
|
|
1200
|
+
const sortFn = (a, b) => {
|
|
1201
|
+
switch (sortBy) {
|
|
1202
|
+
case "clicks": return b.recentClicks - a.recentClicks;
|
|
1203
|
+
case "impressions": return b.recentImpressions - a.recentImpressions;
|
|
1204
|
+
case "clicksChange": return Math.abs(b.clicksChangePercent) - Math.abs(a.clicksChangePercent);
|
|
1205
|
+
case "impressionsChange": return Math.abs(b.impressionsChangePercent) - Math.abs(a.impressionsChangePercent);
|
|
1206
|
+
case "positionChange": return Math.abs(b.positionChange) - Math.abs(a.positionChange);
|
|
1207
|
+
default: return Math.abs(b.clicksChangePercent) - Math.abs(a.clicksChangePercent);
|
|
1208
|
+
}
|
|
1209
|
+
};
|
|
1210
|
+
rising.sort(sortFn);
|
|
1211
|
+
declining.sort(sortFn);
|
|
1212
|
+
stable.sort((a, b) => b.recentClicks - a.recentClicks);
|
|
1213
|
+
return {
|
|
1214
|
+
rising,
|
|
1215
|
+
declining,
|
|
1216
|
+
stable
|
|
1217
|
+
};
|
|
1218
|
+
}
|
|
1219
|
+
const moversAnalyzer = defineAnalyzer({
|
|
1220
|
+
id: "movers",
|
|
1221
|
+
buildSql(params) {
|
|
1222
|
+
const { current: cur, previous: prev } = comparisonOf(params);
|
|
1223
|
+
const minImpressions = params.minImpressions ?? 50;
|
|
1224
|
+
const changeThreshold = params.changeThreshold ?? .2;
|
|
1225
|
+
const limit = params.limit ?? 2e3;
|
|
1226
|
+
return {
|
|
1227
|
+
sql: `
|
|
1228
|
+
WITH cur AS (
|
|
1229
|
+
SELECT
|
|
1230
|
+
query, url,
|
|
1231
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
1232
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
1233
|
+
${METRIC_EXPR.position} AS position
|
|
1234
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
1235
|
+
WHERE date >= ? AND date <= ?
|
|
1236
|
+
GROUP BY query, url
|
|
1237
|
+
),
|
|
1238
|
+
prev AS (
|
|
1239
|
+
SELECT
|
|
1240
|
+
query, url,
|
|
1241
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
1242
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
1243
|
+
${METRIC_EXPR.position} AS position
|
|
1244
|
+
FROM read_parquet({{FILES_PREV}}, union_by_name = true)
|
|
1245
|
+
WHERE date >= ? AND date <= ?
|
|
1246
|
+
GROUP BY query, url
|
|
1247
|
+
),
|
|
1248
|
+
weekly AS (
|
|
1249
|
+
SELECT query, url, date_trunc('week', CAST(date AS DATE)) AS week,
|
|
1250
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
1251
|
+
${METRIC_EXPR.impressions} AS impressions
|
|
1252
|
+
FROM (
|
|
1253
|
+
SELECT query, url, date, clicks, impressions
|
|
1254
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
1255
|
+
WHERE date >= ? AND date <= ?
|
|
1256
|
+
UNION ALL
|
|
1257
|
+
SELECT query, url, date, clicks, impressions
|
|
1258
|
+
FROM read_parquet({{FILES_PREV}}, union_by_name = true)
|
|
1259
|
+
WHERE date >= ? AND date <= ?
|
|
1260
|
+
)
|
|
1261
|
+
GROUP BY query, url, week
|
|
1262
|
+
),
|
|
1263
|
+
series_by_entity AS (
|
|
1264
|
+
SELECT query, url, to_json(list({
|
|
1265
|
+
'week': strftime(week, '%Y-%m-%d'),
|
|
1266
|
+
'clicks': clicks,
|
|
1267
|
+
'impressions': impressions
|
|
1268
|
+
} ORDER BY week)) AS seriesJson
|
|
1269
|
+
FROM weekly GROUP BY query, url
|
|
1270
|
+
),
|
|
1271
|
+
joined AS (
|
|
1272
|
+
SELECT
|
|
1273
|
+
c.query AS keyword,
|
|
1274
|
+
c.url AS page,
|
|
1275
|
+
c.clicks AS recentClicks,
|
|
1276
|
+
c.impressions AS recentImpressions,
|
|
1277
|
+
c.position AS recentPosition,
|
|
1278
|
+
COALESCE(p.clicks, 0.0) AS baselineClicks,
|
|
1279
|
+
COALESCE(p.impressions, 0.0) AS baselineImpressions,
|
|
1280
|
+
COALESCE(p.position, 0.0) AS baselinePosition,
|
|
1281
|
+
(c.clicks - COALESCE(p.clicks, 0.0)) AS clicksChange,
|
|
1282
|
+
CASE
|
|
1283
|
+
WHEN COALESCE(p.clicks, 0.0) = 0 THEN CASE WHEN c.clicks > 0 THEN 100.0 ELSE 0.0 END
|
|
1284
|
+
ELSE (c.clicks - p.clicks) * 100.0 / p.clicks
|
|
1285
|
+
END AS clicksChangePercent,
|
|
1286
|
+
CASE
|
|
1287
|
+
WHEN COALESCE(p.impressions, 0.0) = 0 THEN CASE WHEN c.impressions > 0 THEN 100.0 ELSE 0.0 END
|
|
1288
|
+
ELSE (c.impressions - p.impressions) * 100.0 / p.impressions
|
|
1289
|
+
END AS impressionsChangePercent,
|
|
1290
|
+
(c.position - COALESCE(p.position, 0.0)) AS positionChange,
|
|
1291
|
+
s.seriesJson
|
|
1292
|
+
FROM cur c
|
|
1293
|
+
LEFT JOIN prev p ON c.query = p.query AND c.url = p.url
|
|
1294
|
+
LEFT JOIN series_by_entity s ON c.query = s.query AND c.url = s.url
|
|
1295
|
+
WHERE c.impressions >= ?
|
|
1296
|
+
)
|
|
1297
|
+
SELECT *,
|
|
1298
|
+
CASE
|
|
1299
|
+
WHEN clicksChangePercent > 0 AND ABS(clicksChangePercent) / 100.0 >= ? THEN 'rising'
|
|
1300
|
+
WHEN clicksChangePercent < 0 AND ABS(clicksChangePercent) / 100.0 >= ? THEN 'declining'
|
|
1301
|
+
ELSE 'stable'
|
|
1302
|
+
END AS direction
|
|
1303
|
+
FROM joined
|
|
1304
|
+
ORDER BY ABS(clicksChangePercent) DESC
|
|
1305
|
+
LIMIT ${Number(limit)}
|
|
1306
|
+
`,
|
|
1307
|
+
params: [
|
|
1308
|
+
cur.startDate,
|
|
1309
|
+
cur.endDate,
|
|
1310
|
+
prev.startDate,
|
|
1311
|
+
prev.endDate,
|
|
1312
|
+
cur.startDate,
|
|
1313
|
+
cur.endDate,
|
|
1314
|
+
prev.startDate,
|
|
1315
|
+
prev.endDate,
|
|
1316
|
+
minImpressions,
|
|
1317
|
+
changeThreshold,
|
|
1318
|
+
changeThreshold
|
|
1319
|
+
],
|
|
1320
|
+
current: {
|
|
1321
|
+
table: "page_keywords",
|
|
1322
|
+
partitions: enumeratePartitions(cur.startDate, cur.endDate)
|
|
1323
|
+
},
|
|
1324
|
+
previous: {
|
|
1325
|
+
table: "page_keywords",
|
|
1326
|
+
partitions: enumeratePartitions(prev.startDate, prev.endDate)
|
|
1327
|
+
}
|
|
1328
|
+
};
|
|
1329
|
+
},
|
|
1330
|
+
reduceSql(rows) {
|
|
1331
|
+
const normalized = (Array.isArray(rows) ? rows : []).map((r) => ({
|
|
1332
|
+
keyword: str$1(r.keyword),
|
|
1333
|
+
page: r.page == null ? null : str$1(r.page),
|
|
1334
|
+
recentClicks: num(r.recentClicks),
|
|
1335
|
+
recentImpressions: num(r.recentImpressions),
|
|
1336
|
+
recentPosition: num(r.recentPosition),
|
|
1337
|
+
baselineClicks: Math.round(num(r.baselineClicks)),
|
|
1338
|
+
baselineImpressions: Math.round(num(r.baselineImpressions)),
|
|
1339
|
+
baselinePosition: num(r.baselinePosition),
|
|
1340
|
+
clicksChange: num(r.clicksChange),
|
|
1341
|
+
clicksChangePercent: num(r.clicksChangePercent),
|
|
1342
|
+
impressionsChangePercent: num(r.impressionsChangePercent),
|
|
1343
|
+
positionChange: num(r.positionChange),
|
|
1344
|
+
direction: str$1(r.direction),
|
|
1345
|
+
series: parseJsonList(r.seriesJson).map((s) => ({
|
|
1346
|
+
week: str$1(s.week),
|
|
1347
|
+
clicks: num(s.clicks),
|
|
1348
|
+
impressions: num(s.impressions)
|
|
1349
|
+
}))
|
|
1350
|
+
}));
|
|
1351
|
+
const rising = normalized.filter((r) => r.direction === "rising");
|
|
1352
|
+
const declining = normalized.filter((r) => r.direction === "declining");
|
|
1353
|
+
const stable = normalized.filter((r) => r.direction === "stable");
|
|
1354
|
+
const combined = [...rising, ...declining];
|
|
1355
|
+
return {
|
|
1356
|
+
results: combined,
|
|
1357
|
+
meta: {
|
|
1358
|
+
total: combined.length,
|
|
1359
|
+
rising: rising.length,
|
|
1360
|
+
declining: declining.length,
|
|
1361
|
+
stable: stable.length
|
|
1362
|
+
}
|
|
1363
|
+
};
|
|
1364
|
+
},
|
|
1365
|
+
buildRows(params) {
|
|
1366
|
+
const { current, previous } = comparisonOf(params);
|
|
1367
|
+
return {
|
|
1368
|
+
current: keywordsQueryState(current, params.limit),
|
|
1369
|
+
previous: keywordsQueryState(previous, params.limit)
|
|
1370
|
+
};
|
|
1371
|
+
},
|
|
1372
|
+
reduceRows(rows, params) {
|
|
1373
|
+
const map = rows && !Array.isArray(rows) ? rows : {
|
|
1374
|
+
current: [],
|
|
1375
|
+
previous: []
|
|
1376
|
+
};
|
|
1377
|
+
const result = analyzeMovers({
|
|
1378
|
+
current: map.current ?? [],
|
|
1379
|
+
previous: map.previous ?? []
|
|
1380
|
+
}, {
|
|
1381
|
+
changeThreshold: params.changeThreshold,
|
|
1382
|
+
minImpressions: params.minImpressions
|
|
1383
|
+
});
|
|
1384
|
+
return {
|
|
1385
|
+
results: [...result.rising.map((r) => ({
|
|
1386
|
+
...r,
|
|
1387
|
+
direction: "rising"
|
|
1388
|
+
})), ...result.declining.map((r) => ({
|
|
1389
|
+
...r,
|
|
1390
|
+
direction: "declining"
|
|
1391
|
+
}))],
|
|
1392
|
+
meta: {
|
|
1393
|
+
rising: result.rising.length,
|
|
1394
|
+
declining: result.declining.length
|
|
1395
|
+
}
|
|
1396
|
+
};
|
|
1397
|
+
}
|
|
1398
|
+
});
|
|
1399
|
+
const DEFAULT_LIMIT = 1e3;
|
|
1400
|
+
const MAX_LIMIT = 5e4;
|
|
1401
|
+
function clampLimit(limit, fallback = DEFAULT_LIMIT) {
|
|
1402
|
+
const n = Number(limit ?? fallback);
|
|
1403
|
+
if (!Number.isFinite(n) || n <= 0) return fallback;
|
|
1404
|
+
return Math.min(n, MAX_LIMIT);
|
|
1405
|
+
}
|
|
1406
|
+
function clampOffset(offset) {
|
|
1407
|
+
const n = Number(offset ?? 0);
|
|
1408
|
+
if (!Number.isFinite(n) || n < 0) return 0;
|
|
1409
|
+
return Math.floor(n);
|
|
1410
|
+
}
|
|
1411
|
+
function paginateClause(input) {
|
|
1412
|
+
const l = clampLimit(input.limit);
|
|
1413
|
+
const o = clampOffset(input.offset);
|
|
1414
|
+
return o > 0 ? `LIMIT ${l} OFFSET ${o}` : `LIMIT ${l}`;
|
|
1415
|
+
}
|
|
1416
|
+
function paginateInMemory(rows, input) {
|
|
1417
|
+
const l = clampLimit(input.limit, rows.length);
|
|
1418
|
+
const o = clampOffset(input.offset);
|
|
1419
|
+
return rows.slice(o, o + l);
|
|
1420
|
+
}
|
|
1421
|
+
const EXPECTED_CTR_BY_POSITION = {
|
|
1422
|
+
1: .3,
|
|
1423
|
+
2: .15,
|
|
1424
|
+
3: .1,
|
|
1425
|
+
4: .07,
|
|
1426
|
+
5: .05,
|
|
1427
|
+
6: .04,
|
|
1428
|
+
7: .03,
|
|
1429
|
+
8: .025,
|
|
1430
|
+
9: .02,
|
|
1431
|
+
10: .015
|
|
1432
|
+
};
|
|
1433
|
+
function getExpectedCtr(position) {
|
|
1434
|
+
return EXPECTED_CTR_BY_POSITION[Math.round(Math.max(1, Math.min(position, 10)))] || .01;
|
|
1435
|
+
}
|
|
1436
|
+
function calculatePositionScore(position) {
|
|
1437
|
+
if (position <= 3) return .2;
|
|
1438
|
+
if (position > 50) return .1;
|
|
1439
|
+
const distance = Math.abs(position - 11);
|
|
1440
|
+
return Math.max(0, 1 - distance / 15);
|
|
1441
|
+
}
|
|
1442
|
+
function calculateImpressionScore(impressions) {
|
|
1443
|
+
if (impressions <= 0) return 0;
|
|
1444
|
+
return Math.min(Math.log10(impressions) / 5, 1);
|
|
1445
|
+
}
|
|
1446
|
+
function calculateCtrGapScore(actualCtr, position) {
|
|
1447
|
+
const expectedCtr = getExpectedCtr(position);
|
|
1448
|
+
if (actualCtr >= expectedCtr) return 0;
|
|
1449
|
+
const gap = expectedCtr - actualCtr;
|
|
1450
|
+
return Math.min(gap / expectedCtr, 1);
|
|
1451
|
+
}
|
|
1452
|
+
const sortResults = createMetricSorter("opportunityScore", {
|
|
1453
|
+
opportunityScore: "desc",
|
|
1454
|
+
potentialClicks: "desc",
|
|
1455
|
+
impressions: "desc",
|
|
1456
|
+
position: "asc"
|
|
1457
|
+
});
|
|
1458
|
+
const opportunityAnalyzer = defineAnalyzer({
|
|
1459
|
+
id: "opportunity",
|
|
1460
|
+
buildSql(params) {
|
|
1461
|
+
const { startDate, endDate } = periodOf(params);
|
|
1462
|
+
const minImpressions = params.minImpressions ?? 100;
|
|
1463
|
+
const w1 = 1;
|
|
1464
|
+
const w2 = 1;
|
|
1465
|
+
const w3 = 1;
|
|
1466
|
+
const totalW = w1 + w2 + w3;
|
|
1467
|
+
const limit = params.limit ?? 1e3;
|
|
1468
|
+
return {
|
|
1469
|
+
sql: `
|
|
1470
|
+
WITH agg AS (
|
|
1471
|
+
SELECT
|
|
1472
|
+
query AS keyword,
|
|
1473
|
+
url AS page,
|
|
1474
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
1475
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
1476
|
+
${METRIC_EXPR.ctr} AS ctr,
|
|
1477
|
+
${METRIC_EXPR.position} AS position
|
|
1478
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
1479
|
+
WHERE date >= ? AND date <= ?
|
|
1480
|
+
GROUP BY query, url
|
|
1481
|
+
HAVING SUM(impressions) >= ?
|
|
1482
|
+
),
|
|
1483
|
+
scored AS (
|
|
1484
|
+
SELECT
|
|
1485
|
+
keyword, page, clicks, impressions, ctr, position,
|
|
1486
|
+
CASE
|
|
1487
|
+
WHEN position <= 3 THEN 0.2
|
|
1488
|
+
WHEN position > 50 THEN 0.1
|
|
1489
|
+
ELSE GREATEST(0.0, 1.0 - ABS(position - 11.0) / 15.0)
|
|
1490
|
+
END AS positionScore,
|
|
1491
|
+
CASE WHEN impressions <= 0 THEN 0.0 ELSE LEAST(LOG10(impressions) / 5.0, 1.0) END AS impressionScore,
|
|
1492
|
+
CASE CAST(ROUND(GREATEST(LEAST(position, 10.0), 1.0)) AS INTEGER)
|
|
1493
|
+
WHEN 1 THEN 0.30
|
|
1494
|
+
WHEN 2 THEN 0.15
|
|
1495
|
+
WHEN 3 THEN 0.10
|
|
1496
|
+
WHEN 4 THEN 0.07
|
|
1497
|
+
WHEN 5 THEN 0.05
|
|
1498
|
+
WHEN 6 THEN 0.04
|
|
1499
|
+
WHEN 7 THEN 0.03
|
|
1500
|
+
WHEN 8 THEN 0.025
|
|
1501
|
+
WHEN 9 THEN 0.02
|
|
1502
|
+
WHEN 10 THEN 0.015
|
|
1503
|
+
ELSE 0.01
|
|
1504
|
+
END AS expectedCtr
|
|
1505
|
+
FROM agg
|
|
1506
|
+
),
|
|
1507
|
+
gapped AS (
|
|
1508
|
+
SELECT
|
|
1509
|
+
*,
|
|
1510
|
+
CASE WHEN ctr >= expectedCtr THEN 0.0 ELSE LEAST((expectedCtr - ctr) / expectedCtr, 1.0) END AS ctrGapScore
|
|
1511
|
+
FROM scored
|
|
1512
|
+
)
|
|
1513
|
+
SELECT
|
|
1514
|
+
keyword, page, clicks, impressions, ctr, position,
|
|
1515
|
+
CAST(ROUND(POWER(
|
|
1516
|
+
POWER(positionScore, ${w1}) * POWER(impressionScore, ${w2}) * POWER(ctrGapScore, ${w3}),
|
|
1517
|
+
1.0 / ${totalW}
|
|
1518
|
+
) * 100) AS DOUBLE) AS opportunityScore,
|
|
1519
|
+
CAST(ROUND(impressions * (
|
|
1520
|
+
CASE CAST(ROUND(GREATEST(LEAST(position, 3.0), 1.0)) AS INTEGER)
|
|
1521
|
+
WHEN 1 THEN 0.30
|
|
1522
|
+
WHEN 2 THEN 0.15
|
|
1523
|
+
WHEN 3 THEN 0.10
|
|
1524
|
+
ELSE 0.10
|
|
1525
|
+
END
|
|
1526
|
+
)) AS DOUBLE) AS potentialClicks,
|
|
1527
|
+
positionScore, impressionScore, ctrGapScore
|
|
1528
|
+
FROM gapped
|
|
1529
|
+
ORDER BY opportunityScore DESC
|
|
1530
|
+
${paginateClause({
|
|
1531
|
+
limit,
|
|
1532
|
+
offset: params.offset
|
|
1533
|
+
})}
|
|
1534
|
+
`,
|
|
1535
|
+
params: [
|
|
1536
|
+
startDate,
|
|
1537
|
+
endDate,
|
|
1538
|
+
minImpressions
|
|
1539
|
+
],
|
|
1540
|
+
current: {
|
|
1541
|
+
table: "page_keywords",
|
|
1542
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
1543
|
+
}
|
|
1544
|
+
};
|
|
1545
|
+
},
|
|
1546
|
+
reduceSql(rows) {
|
|
1547
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
1548
|
+
return {
|
|
1549
|
+
results: arr.map((r) => ({
|
|
1550
|
+
keyword: r.keyword == null ? "" : String(r.keyword),
|
|
1551
|
+
page: r.page == null ? null : String(r.page),
|
|
1552
|
+
clicks: num(r.clicks),
|
|
1553
|
+
impressions: num(r.impressions),
|
|
1554
|
+
ctr: num(r.ctr),
|
|
1555
|
+
position: num(r.position),
|
|
1556
|
+
opportunityScore: num(r.opportunityScore),
|
|
1557
|
+
potentialClicks: num(r.potentialClicks),
|
|
1558
|
+
factors: {
|
|
1559
|
+
positionScore: num(r.positionScore),
|
|
1560
|
+
impressionScore: num(r.impressionScore),
|
|
1561
|
+
ctrGapScore: num(r.ctrGapScore)
|
|
1562
|
+
}
|
|
1563
|
+
})),
|
|
1564
|
+
meta: { total: arr.length }
|
|
1565
|
+
};
|
|
1566
|
+
},
|
|
1567
|
+
buildRows(params) {
|
|
1568
|
+
return { keywords: keywordsQueryState(periodOf(params), params.limit) };
|
|
1569
|
+
},
|
|
1570
|
+
reduceRows(rows, params) {
|
|
1571
|
+
const keywords = (Array.isArray(rows) ? rows : []) ?? [];
|
|
1572
|
+
const minImpressions = params.minImpressions ?? 100;
|
|
1573
|
+
const positionWeight = 1;
|
|
1574
|
+
const impressionsWeight = 1;
|
|
1575
|
+
const ctrGapWeight = 1;
|
|
1576
|
+
const sortBy = "opportunityScore";
|
|
1577
|
+
const results = [];
|
|
1578
|
+
for (const row of keywords) {
|
|
1579
|
+
const impressions = num(row.impressions);
|
|
1580
|
+
const position = num(row.position);
|
|
1581
|
+
const ctr = num(row.ctr);
|
|
1582
|
+
const clicks = num(row.clicks);
|
|
1583
|
+
if (impressions < minImpressions) continue;
|
|
1584
|
+
const positionScore = calculatePositionScore(position);
|
|
1585
|
+
const impressionScore = calculateImpressionScore(impressions);
|
|
1586
|
+
const ctrGapScore = calculateCtrGapScore(ctr, position);
|
|
1587
|
+
const geometricMean = (positionScore ** positionWeight * impressionScore ** impressionsWeight * ctrGapScore ** ctrGapWeight) ** (1 / (positionWeight + impressionsWeight + ctrGapWeight));
|
|
1588
|
+
const opportunityScore = Math.round(geometricMean * 100);
|
|
1589
|
+
const targetCtr = getExpectedCtr(Math.min(3, position));
|
|
1590
|
+
const potentialClicks = Math.round(impressions * targetCtr);
|
|
1591
|
+
results.push({
|
|
1592
|
+
keyword: row.query,
|
|
1593
|
+
page: row.page ?? null,
|
|
1594
|
+
clicks,
|
|
1595
|
+
impressions,
|
|
1596
|
+
ctr,
|
|
1597
|
+
position,
|
|
1598
|
+
opportunityScore,
|
|
1599
|
+
potentialClicks,
|
|
1600
|
+
factors: {
|
|
1601
|
+
positionScore,
|
|
1602
|
+
impressionScore,
|
|
1603
|
+
ctrGapScore
|
|
1604
|
+
}
|
|
1605
|
+
});
|
|
1606
|
+
}
|
|
1607
|
+
const sorted = sortResults(results, sortBy);
|
|
1608
|
+
const paged = paginateInMemory(sorted, {
|
|
1609
|
+
limit: params.limit,
|
|
1610
|
+
offset: params.offset
|
|
1611
|
+
});
|
|
1612
|
+
return {
|
|
1613
|
+
results: paged,
|
|
1614
|
+
meta: {
|
|
1615
|
+
total: sorted.length,
|
|
1616
|
+
returned: paged.length
|
|
1617
|
+
}
|
|
1618
|
+
};
|
|
1619
|
+
}
|
|
1620
|
+
});
|
|
1621
|
+
function str(v) {
|
|
1622
|
+
return v == null ? "" : String(v);
|
|
1623
|
+
}
|
|
1624
|
+
function bool(v) {
|
|
1625
|
+
return v === true || v === 1 || v === "true";
|
|
1626
|
+
}
|
|
1627
|
+
function calculateCV(values) {
|
|
1628
|
+
if (values.length === 0) return 0;
|
|
1629
|
+
const mean = values.reduce((a, b) => a + b, 0) / values.length;
|
|
1630
|
+
if (mean === 0) return 0;
|
|
1631
|
+
const variance = values.reduce((sum, v) => sum + (v - mean) ** 2, 0) / values.length;
|
|
1632
|
+
return Math.min(Math.sqrt(variance) / mean, 1);
|
|
1633
|
+
}
|
|
1634
|
+
function analyzeSeasonality(dates, options = {}) {
|
|
1635
|
+
const { metric = "clicks" } = options;
|
|
1636
|
+
if (dates.length === 0) return {
|
|
1637
|
+
hasSeasonality: false,
|
|
1638
|
+
strength: 0,
|
|
1639
|
+
peakMonths: [],
|
|
1640
|
+
troughMonths: [],
|
|
1641
|
+
monthlyBreakdown: [],
|
|
1642
|
+
insufficientData: true
|
|
1643
|
+
};
|
|
1644
|
+
const monthlyMap = /* @__PURE__ */ new Map();
|
|
1645
|
+
for (const row of dates) {
|
|
1646
|
+
const month = row.date.substring(0, 7);
|
|
1647
|
+
const value = metric === "clicks" ? row.clicks : row.impressions;
|
|
1648
|
+
monthlyMap.set(month, (monthlyMap.get(month) || 0) + value);
|
|
1649
|
+
}
|
|
1650
|
+
const months = Array.from(monthlyMap.keys()).sort();
|
|
1651
|
+
const values = months.map((m) => monthlyMap.get(m) || 0);
|
|
1652
|
+
const insufficientData = months.length < 12;
|
|
1653
|
+
const totalValue = values.reduce((a, b) => a + b, 0);
|
|
1654
|
+
const avgValue = values.length > 0 ? totalValue / values.length : 0;
|
|
1655
|
+
const monthlyBreakdown = months.map((month, i) => {
|
|
1656
|
+
const value = values[i] ?? 0;
|
|
1657
|
+
const vsAverage = avgValue > 0 ? value / avgValue : 0;
|
|
1658
|
+
return {
|
|
1659
|
+
month,
|
|
1660
|
+
value,
|
|
1661
|
+
vsAverage,
|
|
1662
|
+
isPeak: vsAverage > 1.5,
|
|
1663
|
+
isTrough: vsAverage < .5
|
|
1664
|
+
};
|
|
1665
|
+
});
|
|
1666
|
+
const peakMonths = [...new Set(monthlyBreakdown.filter((m) => m.isPeak).map((m) => m.month.substring(5, 7)))];
|
|
1667
|
+
const troughMonths = [...new Set(monthlyBreakdown.filter((m) => m.isTrough).map((m) => m.month.substring(5, 7)))];
|
|
1668
|
+
const strength = calculateCV(values);
|
|
1669
|
+
return {
|
|
1670
|
+
hasSeasonality: peakMonths.length > 0 || troughMonths.length > 0 || strength > .3,
|
|
1671
|
+
strength,
|
|
1672
|
+
peakMonths,
|
|
1673
|
+
troughMonths,
|
|
1674
|
+
monthlyBreakdown,
|
|
1675
|
+
insufficientData
|
|
1676
|
+
};
|
|
1677
|
+
}
|
|
1678
|
+
const seasonalityAnalyzer = defineAnalyzer({
|
|
1679
|
+
id: "seasonality",
|
|
1680
|
+
buildSql(params) {
|
|
1681
|
+
const { startDate, endDate } = periodOf(params);
|
|
1682
|
+
return {
|
|
1683
|
+
sql: `
|
|
1684
|
+
WITH monthly AS (
|
|
1685
|
+
SELECT
|
|
1686
|
+
strftime(date, '%Y-%m') AS month,
|
|
1687
|
+
CAST(SUM(${params.metric === "impressions" ? "impressions" : "clicks"}) AS DOUBLE) AS value
|
|
1688
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
1689
|
+
WHERE date >= ? AND date <= ?
|
|
1690
|
+
GROUP BY month
|
|
1691
|
+
),
|
|
1692
|
+
stats AS (
|
|
1693
|
+
SELECT
|
|
1694
|
+
AVG(value) AS avg_val,
|
|
1695
|
+
COALESCE(STDDEV_POP(value), 0.0) AS std_val,
|
|
1696
|
+
CAST(COUNT(*) AS DOUBLE) AS month_count
|
|
1697
|
+
FROM monthly
|
|
1698
|
+
)
|
|
1699
|
+
SELECT
|
|
1700
|
+
m.month AS month,
|
|
1701
|
+
m.value AS value,
|
|
1702
|
+
CASE WHEN s.avg_val > 0 THEN m.value / s.avg_val ELSE 0.0 END AS vsAverage,
|
|
1703
|
+
(s.avg_val > 0 AND m.value / s.avg_val > 1.5) AS isPeak,
|
|
1704
|
+
(s.avg_val > 0 AND m.value / s.avg_val < 0.5) AS isTrough,
|
|
1705
|
+
CASE WHEN s.avg_val > 0 THEN LEAST(s.std_val / s.avg_val, 1.0) ELSE 0.0 END AS strength,
|
|
1706
|
+
s.month_count AS monthCount
|
|
1707
|
+
FROM monthly m, stats s
|
|
1708
|
+
ORDER BY m.month
|
|
1709
|
+
`,
|
|
1710
|
+
params: [startDate, endDate],
|
|
1711
|
+
current: {
|
|
1712
|
+
table: "pages",
|
|
1713
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
1714
|
+
}
|
|
1715
|
+
};
|
|
1716
|
+
},
|
|
1717
|
+
reduceSql(rows) {
|
|
1718
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
1719
|
+
const breakdown = arr.map((r) => ({
|
|
1720
|
+
month: str(r.month),
|
|
1721
|
+
value: num(r.value),
|
|
1722
|
+
vsAverage: num(r.vsAverage),
|
|
1723
|
+
isPeak: bool(r.isPeak),
|
|
1724
|
+
isTrough: bool(r.isTrough)
|
|
1725
|
+
}));
|
|
1726
|
+
const first = arr[0];
|
|
1727
|
+
const strength = first ? num(first.strength) : 0;
|
|
1728
|
+
const monthCount = first ? num(first.monthCount) : 0;
|
|
1729
|
+
const peakMonths = [...new Set(breakdown.filter((m) => m.isPeak).map((m) => m.month.substring(5, 7)))];
|
|
1730
|
+
const troughMonths = [...new Set(breakdown.filter((m) => m.isTrough).map((m) => m.month.substring(5, 7)))];
|
|
1731
|
+
const hasSeasonality = peakMonths.length > 0 || troughMonths.length > 0 || strength > .3;
|
|
1732
|
+
const insufficientData = monthCount < 12;
|
|
1733
|
+
return {
|
|
1734
|
+
results: breakdown,
|
|
1735
|
+
meta: {
|
|
1736
|
+
total: breakdown.length,
|
|
1737
|
+
hasSeasonality,
|
|
1738
|
+
strength,
|
|
1739
|
+
peakMonths,
|
|
1740
|
+
troughMonths,
|
|
1741
|
+
insufficientData
|
|
1742
|
+
}
|
|
1743
|
+
};
|
|
1744
|
+
},
|
|
1745
|
+
buildRows(params) {
|
|
1746
|
+
return { dates: datesQueryState(periodOf(params), params.limit) };
|
|
1747
|
+
},
|
|
1748
|
+
reduceRows(rows, params) {
|
|
1749
|
+
const result = analyzeSeasonality(Array.isArray(rows) ? rows : [], { metric: params.metric });
|
|
1750
|
+
return {
|
|
1751
|
+
results: result.monthlyBreakdown,
|
|
1752
|
+
meta: { strength: result.strength }
|
|
1753
|
+
};
|
|
1754
|
+
}
|
|
1755
|
+
});
|
|
1756
|
+
const DEFAULT_ROW_LIMIT$1 = 25e3;
|
|
1757
|
+
const strikingDistanceAnalyzer = defineAnalyzer({
|
|
1758
|
+
id: "striking-distance",
|
|
1759
|
+
reduce(rows, params) {
|
|
1760
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
1761
|
+
const minPosition = params.minPosition ?? 4;
|
|
1762
|
+
const maxPosition = params.maxPosition ?? 20;
|
|
1763
|
+
const minImpressions = params.minImpressions ?? 100;
|
|
1764
|
+
const maxCtr = params.maxCtr ?? .05;
|
|
1765
|
+
const limit = params.limit ?? 1e3;
|
|
1766
|
+
const results = [];
|
|
1767
|
+
for (const row of arr) {
|
|
1768
|
+
const position = num(row.position);
|
|
1769
|
+
const impressions = num(row.impressions);
|
|
1770
|
+
const ctr = num(row.ctr);
|
|
1771
|
+
const clicks = num(row.clicks);
|
|
1772
|
+
if (position < minPosition || position > maxPosition) continue;
|
|
1773
|
+
if (impressions < minImpressions) continue;
|
|
1774
|
+
if (ctr > maxCtr) continue;
|
|
1775
|
+
results.push({
|
|
1776
|
+
keyword: String(row.query ?? ""),
|
|
1777
|
+
page: row.page == null ? null : String(row.page),
|
|
1778
|
+
clicks,
|
|
1779
|
+
impressions,
|
|
1780
|
+
ctr,
|
|
1781
|
+
position,
|
|
1782
|
+
potentialClicks: Math.round(impressions * .15)
|
|
1783
|
+
});
|
|
1784
|
+
}
|
|
1785
|
+
results.sort((a, b) => b.potentialClicks - a.potentialClicks);
|
|
1786
|
+
const paged = paginateInMemory(results, {
|
|
1787
|
+
limit,
|
|
1788
|
+
offset: params.offset
|
|
1789
|
+
});
|
|
1790
|
+
return {
|
|
1791
|
+
results: paged,
|
|
1792
|
+
meta: {
|
|
1793
|
+
total: results.length,
|
|
1794
|
+
returned: paged.length
|
|
1795
|
+
}
|
|
1796
|
+
};
|
|
1797
|
+
},
|
|
1798
|
+
buildSql(params) {
|
|
1799
|
+
const { startDate, endDate } = periodOf(params);
|
|
1800
|
+
return {
|
|
1801
|
+
sql: `
|
|
1802
|
+
SELECT
|
|
1803
|
+
query,
|
|
1804
|
+
url AS page,
|
|
1805
|
+
CAST(SUM(clicks) AS DOUBLE) AS clicks,
|
|
1806
|
+
CAST(SUM(impressions) AS DOUBLE) AS impressions,
|
|
1807
|
+
CAST(SUM(clicks) AS DOUBLE) / NULLIF(SUM(impressions), 0) AS ctr,
|
|
1808
|
+
SUM(sum_position) / NULLIF(SUM(impressions), 0) + 1 AS position
|
|
1809
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
1810
|
+
WHERE date >= ? AND date <= ?
|
|
1811
|
+
GROUP BY query, url
|
|
1812
|
+
`,
|
|
1813
|
+
params: [startDate, endDate],
|
|
1814
|
+
current: {
|
|
1815
|
+
table: "page_keywords",
|
|
1816
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
1817
|
+
}
|
|
1818
|
+
};
|
|
1819
|
+
},
|
|
1820
|
+
buildRows(params) {
|
|
1821
|
+
return { keywords: keywordsQueryState(periodOf(params), params.limit ?? DEFAULT_ROW_LIMIT$1) };
|
|
1822
|
+
}
|
|
1823
|
+
});
|
|
1824
|
+
const DEFAULT_ROW_LIMIT = 25e3;
|
|
1825
|
+
const sortRowResults = createSorter((item) => item.impressions, "impressions");
|
|
1826
|
+
const zeroClickAnalyzer = defineAnalyzer({
|
|
1827
|
+
id: "zero-click",
|
|
1828
|
+
buildSql(params) {
|
|
1829
|
+
const { startDate, endDate } = periodOf(params);
|
|
1830
|
+
const minImpressions = params.minImpressions ?? 1e3;
|
|
1831
|
+
const maxCtr = params.maxCtr ?? .03;
|
|
1832
|
+
const maxPosition = params.maxPosition ?? 10;
|
|
1833
|
+
const limit = params.limit ?? 1e3;
|
|
1834
|
+
return {
|
|
1835
|
+
sql: `
|
|
1836
|
+
WITH agg AS (
|
|
1837
|
+
SELECT
|
|
1838
|
+
query,
|
|
1839
|
+
url AS page,
|
|
1840
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
1841
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
1842
|
+
${METRIC_EXPR.ctr} AS ctr,
|
|
1843
|
+
${METRIC_EXPR.position} AS position
|
|
1844
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
1845
|
+
WHERE date >= ? AND date <= ?
|
|
1846
|
+
GROUP BY query, url
|
|
1847
|
+
HAVING SUM(impressions) >= ?
|
|
1848
|
+
)
|
|
1849
|
+
SELECT
|
|
1850
|
+
query, page, clicks, impressions, ctr, position,
|
|
1851
|
+
CAST(GREATEST(0, ROUND(impressions * (
|
|
1852
|
+
CASE
|
|
1853
|
+
WHEN position <= 1 THEN 0.30
|
|
1854
|
+
WHEN position <= 3 THEN 0.15
|
|
1855
|
+
WHEN position <= 5 THEN 0.08
|
|
1856
|
+
ELSE 0.04
|
|
1857
|
+
END
|
|
1858
|
+
)) - clicks) AS DOUBLE) AS missedClicks
|
|
1859
|
+
FROM agg
|
|
1860
|
+
WHERE position <= ? AND ctr < ?
|
|
1861
|
+
ORDER BY impressions DESC
|
|
1862
|
+
${paginateClause({
|
|
1863
|
+
limit,
|
|
1864
|
+
offset: params.offset
|
|
1865
|
+
})}
|
|
1866
|
+
`,
|
|
1867
|
+
params: [
|
|
1868
|
+
startDate,
|
|
1869
|
+
endDate,
|
|
1870
|
+
minImpressions,
|
|
1871
|
+
maxPosition,
|
|
1872
|
+
maxCtr
|
|
1873
|
+
],
|
|
1874
|
+
current: {
|
|
1875
|
+
table: "page_keywords",
|
|
1876
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
1877
|
+
}
|
|
1878
|
+
};
|
|
1879
|
+
},
|
|
1880
|
+
reduceSql(rows, params) {
|
|
1881
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
1882
|
+
const minImpressions = params.minImpressions ?? 1e3;
|
|
1883
|
+
const maxCtr = params.maxCtr ?? .03;
|
|
1884
|
+
const maxPosition = params.maxPosition ?? 10;
|
|
1885
|
+
return {
|
|
1886
|
+
results: arr.map((r) => ({
|
|
1887
|
+
query: r.query == null ? "" : String(r.query),
|
|
1888
|
+
page: r.page == null ? "" : String(r.page),
|
|
1889
|
+
clicks: num(r.clicks),
|
|
1890
|
+
impressions: num(r.impressions),
|
|
1891
|
+
ctr: num(r.ctr),
|
|
1892
|
+
position: num(r.position),
|
|
1893
|
+
missedClicks: num(r.missedClicks)
|
|
1894
|
+
})),
|
|
1895
|
+
meta: {
|
|
1896
|
+
total: arr.length,
|
|
1897
|
+
minImpressions,
|
|
1898
|
+
maxCtr,
|
|
1899
|
+
maxPosition
|
|
1900
|
+
}
|
|
1901
|
+
};
|
|
1902
|
+
},
|
|
1903
|
+
buildRows(params) {
|
|
1904
|
+
const period = periodOf(params);
|
|
1905
|
+
const limit = params.limit ?? DEFAULT_ROW_LIMIT;
|
|
1906
|
+
return { rows: gsc.select(query, page).where(between(date, period.startDate, period.endDate)).limit(limit).getState() };
|
|
1907
|
+
},
|
|
1908
|
+
reduceRows(rows, params) {
|
|
1909
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
1910
|
+
const minImpressions = params.minImpressions ?? 1e3;
|
|
1911
|
+
const maxCtr = params.maxCtr ?? .03;
|
|
1912
|
+
const maxPosition = params.maxPosition ?? 10;
|
|
1913
|
+
const queryMap = /* @__PURE__ */ new Map();
|
|
1914
|
+
for (const row of arr) {
|
|
1915
|
+
if (row.impressions < minImpressions) continue;
|
|
1916
|
+
if (row.position > maxPosition) continue;
|
|
1917
|
+
if (row.ctr > maxCtr) continue;
|
|
1918
|
+
const existing = queryMap.get(row.query);
|
|
1919
|
+
if (!existing || row.position < existing.position) queryMap.set(row.query, {
|
|
1920
|
+
query: row.query,
|
|
1921
|
+
page: row.page,
|
|
1922
|
+
clicks: row.clicks,
|
|
1923
|
+
impressions: row.impressions,
|
|
1924
|
+
ctr: row.ctr,
|
|
1925
|
+
position: row.position
|
|
1926
|
+
});
|
|
1927
|
+
}
|
|
1928
|
+
const results = sortRowResults(Array.from(queryMap.values()), "impressions", "desc");
|
|
1929
|
+
const paged = paginateInMemory(results, {
|
|
1930
|
+
limit: params.limit,
|
|
1931
|
+
offset: params.offset
|
|
1932
|
+
});
|
|
1933
|
+
return {
|
|
1934
|
+
results: paged,
|
|
1935
|
+
meta: {
|
|
1936
|
+
total: results.length,
|
|
1937
|
+
returned: paged.length
|
|
1938
|
+
}
|
|
1939
|
+
};
|
|
1940
|
+
}
|
|
1941
|
+
});
|
|
1942
|
+
const defaultAnalyzerRegistry = createAnalyzerRegistry({
|
|
1943
|
+
rows: [
|
|
1944
|
+
strikingDistanceAnalyzer.rows,
|
|
1945
|
+
opportunityAnalyzer.rows,
|
|
1946
|
+
brandAnalyzer.rows,
|
|
1947
|
+
concentrationAnalyzer.rows,
|
|
1948
|
+
clusteringAnalyzer.rows,
|
|
1949
|
+
seasonalityAnalyzer.rows,
|
|
1950
|
+
moversAnalyzer.rows,
|
|
1951
|
+
decayAnalyzer.rows,
|
|
1952
|
+
cannibalizationAnalyzer.rows,
|
|
1953
|
+
zeroClickAnalyzer.rows
|
|
1954
|
+
],
|
|
1955
|
+
sql: SQL_ANALYZERS
|
|
1956
|
+
});
|
|
1957
|
+
export { defaultAnalyzerRegistry };
|