@gscdump/analysis 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +251 -0
- package/dist/analyzer/index.d.mts +893 -0
- package/dist/analyzer/index.mjs +4944 -0
- package/dist/default-registry.d.mts +93 -0
- package/dist/default-registry.mjs +1957 -0
- package/dist/index.d.mts +620 -0
- package/dist/index.mjs +2873 -0
- package/dist/period/index.d.mts +57 -0
- package/dist/period/index.mjs +150 -0
- package/dist/query/index.d.mts +26 -0
- package/dist/query/index.mjs +340 -0
- package/dist/semantic/index.d.mts +70 -0
- package/dist/semantic/index.mjs +391 -0
- package/dist/source/index.d.mts +427 -0
- package/dist/source/index.mjs +1865 -0
- package/package.json +86 -0
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,2873 @@
|
|
|
1
|
+
import { enumeratePartitions } from "@gscdump/engine/planner";
|
|
2
|
+
import { METRIC_EXPR } from "@gscdump/engine/sql-fragments";
|
|
3
|
+
import { between, date, extractMetricFilters, extractSpecialOperatorFilters, gsc, page, query } from "gscdump/query";
|
|
4
|
+
import { MS_PER_DAY, daysAgo, toIsoDate } from "gscdump";
|
|
5
|
+
import { createEngine as createBrowserQuerySource } from "@gscdump/engine-wasm";
|
|
6
|
+
import { assertDimensionsSupported, getDimensionFilters, getFilterDimensions, isSqlQuerySource, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, metricValue } from "@gscdump/engine/resolver";
|
|
7
|
+
import { buildLogicalPlan } from "gscdump/query/plan";
|
|
8
|
+
import { createEngine as createSqliteQuerySource } from "@gscdump/engine-sqlite";
|
|
9
|
+
function clamp01(value) {
|
|
10
|
+
if (value < 0) return 0;
|
|
11
|
+
if (value > 1) return 1;
|
|
12
|
+
return value;
|
|
13
|
+
}
|
|
14
|
+
function clamp(value, min, max) {
|
|
15
|
+
if (value < min) return min;
|
|
16
|
+
if (value > max) return max;
|
|
17
|
+
return value;
|
|
18
|
+
}
|
|
19
|
+
function percentDifference(current, previous) {
|
|
20
|
+
if (previous === 0) return current > 0 ? 100 : 0;
|
|
21
|
+
return (current - previous) / previous * 100;
|
|
22
|
+
}
|
|
23
|
+
var AnalyzerCapabilityError = class extends Error {
|
|
24
|
+
constructor(tool, missing) {
|
|
25
|
+
super(`analyzer "${tool}" requires capabilities [${missing.join(", ")}] not provided by source`);
|
|
26
|
+
this.tool = tool;
|
|
27
|
+
this.missing = missing;
|
|
28
|
+
this.name = "AnalyzerCapabilityError";
|
|
29
|
+
}
|
|
30
|
+
};
|
|
31
|
+
function sourceCapabilities(source) {
|
|
32
|
+
const caps = /* @__PURE__ */ new Set();
|
|
33
|
+
if (source.executeSql) caps.add("executeSql");
|
|
34
|
+
if (source.capabilities.fileSets) caps.add("partitionedParquet");
|
|
35
|
+
if (source.capabilities.regex) caps.add("regex");
|
|
36
|
+
if (source.capabilities.windowTotals) caps.add("windowTotals");
|
|
37
|
+
if (source.capabilities.comparisonJoin) caps.add("comparisonJoin");
|
|
38
|
+
if (source.capabilities.attachedTables) caps.add("attachedTables");
|
|
39
|
+
return caps;
|
|
40
|
+
}
|
|
41
|
+
function assertSatisfies(analyzer, caps) {
|
|
42
|
+
const missing = analyzer.requires.filter((c) => !caps.has(c));
|
|
43
|
+
if (missing.length > 0) throw new AnalyzerCapabilityError(analyzer.id, missing);
|
|
44
|
+
}
|
|
45
|
+
async function runAnalyzerFromSource(source, params, registry) {
|
|
46
|
+
const caps = sourceCapabilities(source);
|
|
47
|
+
const analyzer = registry.resolveAnalyzer(params.type, caps.has("executeSql") || caps.has("attachedTables"));
|
|
48
|
+
if (!analyzer) throw new AnalyzerCapabilityError(params.type, ["executeSql"]);
|
|
49
|
+
assertSatisfies(analyzer, caps);
|
|
50
|
+
const plan = analyzer.build(params);
|
|
51
|
+
if (plan.kind === "rows") return runRowsPlanAgainstSource(source, analyzer, plan, params);
|
|
52
|
+
return runSqlPlanAgainstSource(source, analyzer, plan, params);
|
|
53
|
+
}
|
|
54
|
+
async function runRowsPlanAgainstSource(source, analyzer, plan, params) {
|
|
55
|
+
const entries = Object.entries(plan.queries);
|
|
56
|
+
const resolved = await Promise.all(entries.map(async ([k, q]) => [k, await source.queryRows(q.state)]));
|
|
57
|
+
const rowMap = Object.fromEntries(resolved);
|
|
58
|
+
const { results, meta } = analyzer.reduce(rowMap, { params });
|
|
59
|
+
return {
|
|
60
|
+
results,
|
|
61
|
+
meta: {
|
|
62
|
+
tool: params.type,
|
|
63
|
+
...meta
|
|
64
|
+
}
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
function fileSetsFor(plan) {
|
|
68
|
+
const fileSets = { FILES: plan.current };
|
|
69
|
+
if (plan.previous) fileSets.FILES_PREV = plan.previous;
|
|
70
|
+
if (plan.extraFiles) for (const [key, fs] of Object.entries(plan.extraFiles)) fileSets[`FILES_${key}`] = fs;
|
|
71
|
+
return fileSets;
|
|
72
|
+
}
|
|
73
|
+
async function runSqlPlanAgainstSource(source, analyzer, plan, params) {
|
|
74
|
+
if (!source.executeSql) throw new AnalyzerCapabilityError(analyzer.id, ["executeSql"]);
|
|
75
|
+
if (plan.requiresAttachedTables && !source.capabilities.attachedTables) throw new AnalyzerCapabilityError(analyzer.id, ["attachedTables"]);
|
|
76
|
+
const fileSets = source.capabilities.fileSets ? fileSetsFor(plan) : void 0;
|
|
77
|
+
const rows = await source.executeSql(plan.sql, plan.params, fileSets ? { fileSets } : void 0);
|
|
78
|
+
const extras = {};
|
|
79
|
+
if (plan.extraQueries) for (const q of plan.extraQueries) {
|
|
80
|
+
const extraRows = await source.executeSql(q.sql, q.params, fileSets ? { fileSets } : void 0);
|
|
81
|
+
extras[q.name] = extraRows;
|
|
82
|
+
}
|
|
83
|
+
const { results, meta } = analyzer.reduce(rows, {
|
|
84
|
+
params,
|
|
85
|
+
extras
|
|
86
|
+
});
|
|
87
|
+
const sourceMeta = source.capabilities.localSource ? { source: "local" } : {};
|
|
88
|
+
return {
|
|
89
|
+
results,
|
|
90
|
+
meta: {
|
|
91
|
+
tool: params.type,
|
|
92
|
+
...sourceMeta,
|
|
93
|
+
...meta
|
|
94
|
+
}
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
async function analyzeFromSource(source, params, registry) {
|
|
98
|
+
return runAnalyzerFromSource(source, params, registry);
|
|
99
|
+
}
|
|
100
|
+
const DEFAULT_SOURCES = [
|
|
101
|
+
"striking-distance",
|
|
102
|
+
"opportunity",
|
|
103
|
+
"cannibalization",
|
|
104
|
+
"ctr-anomaly",
|
|
105
|
+
"change-point"
|
|
106
|
+
];
|
|
107
|
+
const EFFORT_BY_SOURCE = {
|
|
108
|
+
"striking-distance": "low",
|
|
109
|
+
"opportunity": "low",
|
|
110
|
+
"cannibalization": "medium",
|
|
111
|
+
"ctr-anomaly": "high",
|
|
112
|
+
"change-point": "high"
|
|
113
|
+
};
|
|
114
|
+
const EFFORT_MULTIPLIER = {
|
|
115
|
+
low: 1.3,
|
|
116
|
+
medium: 1,
|
|
117
|
+
high: .7
|
|
118
|
+
};
|
|
119
|
+
const EFFORT_RANK = {
|
|
120
|
+
low: 0,
|
|
121
|
+
medium: 1,
|
|
122
|
+
high: 2
|
|
123
|
+
};
|
|
124
|
+
function idKey(keyword, page) {
|
|
125
|
+
return `${keyword.toLowerCase()}|${page.toLowerCase()}`;
|
|
126
|
+
}
|
|
127
|
+
function truncate(s, n) {
|
|
128
|
+
return s.length <= n ? s : `${s.slice(0, n - 1)}...`;
|
|
129
|
+
}
|
|
130
|
+
function buildAction(spec) {
|
|
131
|
+
return {
|
|
132
|
+
id: idKey(spec.keyword, spec.page),
|
|
133
|
+
title: spec.title,
|
|
134
|
+
keyword: spec.keyword,
|
|
135
|
+
page: spec.page,
|
|
136
|
+
sources: [spec.source],
|
|
137
|
+
severity: spec.severity,
|
|
138
|
+
impressions: spec.impressions,
|
|
139
|
+
impact: spec.impact,
|
|
140
|
+
effort: EFFORT_BY_SOURCE[spec.source],
|
|
141
|
+
why: spec.why,
|
|
142
|
+
priorityScore: 0,
|
|
143
|
+
data: { [spec.source]: spec.data }
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
function fromStrikingDistance(rows) {
|
|
147
|
+
const out = [];
|
|
148
|
+
for (const r of rows) {
|
|
149
|
+
if (r.page == null) continue;
|
|
150
|
+
const impact = Math.max(0, r.potentialClicks);
|
|
151
|
+
if (impact <= 0) continue;
|
|
152
|
+
const posScore = clamp01((20 - r.position) / 16);
|
|
153
|
+
const imprScore = Math.min(1, r.impressions / 5e3);
|
|
154
|
+
out.push(buildAction({
|
|
155
|
+
source: "striking-distance",
|
|
156
|
+
keyword: r.keyword,
|
|
157
|
+
page: r.page,
|
|
158
|
+
title: `Push "${truncate(r.keyword, 40)}" onto page 1`,
|
|
159
|
+
why: `Ranks #${r.position.toFixed(1)} with ${Math.round(r.impressions)} impressions; small gains unlock page-1 clicks.`,
|
|
160
|
+
severity: Math.round(100 * Math.sqrt(posScore * imprScore)),
|
|
161
|
+
impressions: r.impressions,
|
|
162
|
+
impact,
|
|
163
|
+
data: r
|
|
164
|
+
}));
|
|
165
|
+
}
|
|
166
|
+
return out;
|
|
167
|
+
}
|
|
168
|
+
function fromOpportunity(rows) {
|
|
169
|
+
const out = [];
|
|
170
|
+
for (const r of rows) {
|
|
171
|
+
if (r.page == null) continue;
|
|
172
|
+
const impact = Math.max(0, r.potentialClicks);
|
|
173
|
+
if (impact <= 0) continue;
|
|
174
|
+
out.push(buildAction({
|
|
175
|
+
source: "opportunity",
|
|
176
|
+
keyword: r.keyword,
|
|
177
|
+
page: r.page,
|
|
178
|
+
title: `Improve on-page for "${truncate(r.keyword, 40)}"`,
|
|
179
|
+
why: `Opportunity score ${Math.round(r.opportunityScore)}; CTR ${(r.ctr * 100).toFixed(1)}% vs expected at pos ${r.position.toFixed(1)}.`,
|
|
180
|
+
severity: Math.round(r.opportunityScore),
|
|
181
|
+
impressions: r.impressions,
|
|
182
|
+
impact,
|
|
183
|
+
data: r
|
|
184
|
+
}));
|
|
185
|
+
}
|
|
186
|
+
return out;
|
|
187
|
+
}
|
|
188
|
+
function fromCannibalization(events) {
|
|
189
|
+
const out = [];
|
|
190
|
+
for (const ev of events) {
|
|
191
|
+
if (ev.severity < 30) continue;
|
|
192
|
+
out.push(buildAction({
|
|
193
|
+
source: "cannibalization",
|
|
194
|
+
keyword: ev.keyword,
|
|
195
|
+
page: ev.leaderUrl,
|
|
196
|
+
title: `Consolidate cannibalization on "${truncate(ev.keyword, 36)}"`,
|
|
197
|
+
why: `${ev.competitorCount} URLs split ${Math.round(ev.totalImpressions)} impressions; leader loses ~${Math.round(ev.stolenClicks)} clicks to siblings.`,
|
|
198
|
+
severity: Math.round(ev.severity),
|
|
199
|
+
impressions: ev.totalImpressions,
|
|
200
|
+
impact: Math.max(0, ev.stolenClicks),
|
|
201
|
+
data: ev
|
|
202
|
+
}));
|
|
203
|
+
}
|
|
204
|
+
return out;
|
|
205
|
+
}
|
|
206
|
+
function fromCtrAnomaly(rows) {
|
|
207
|
+
const out = [];
|
|
208
|
+
let maxRaw = 0;
|
|
209
|
+
for (const r of rows) if (r.severity > maxRaw) maxRaw = r.severity;
|
|
210
|
+
for (const r of rows) {
|
|
211
|
+
const impact = Math.max(0, r.clicksLost);
|
|
212
|
+
if (impact <= 0) continue;
|
|
213
|
+
out.push(buildAction({
|
|
214
|
+
source: "ctr-anomaly",
|
|
215
|
+
keyword: r.keyword,
|
|
216
|
+
page: r.page,
|
|
217
|
+
title: `Lift CTR on "${truncate(r.keyword, 36)}"`,
|
|
218
|
+
why: `CTR collapsed ${r.breachDaysDown} days at flat position; ~${Math.round(r.clicksLost)} clicks lost vs baseline ${(r.baselineCtr * 100).toFixed(1)}%.`,
|
|
219
|
+
severity: maxRaw > 0 ? Math.round(r.severity / maxRaw * 100) : 0,
|
|
220
|
+
impressions: r.totalImpressions,
|
|
221
|
+
impact,
|
|
222
|
+
data: r
|
|
223
|
+
}));
|
|
224
|
+
}
|
|
225
|
+
return out;
|
|
226
|
+
}
|
|
227
|
+
function fromChangePoint(rows) {
|
|
228
|
+
const out = [];
|
|
229
|
+
for (const r of rows) {
|
|
230
|
+
if (r.direction !== "worsened") continue;
|
|
231
|
+
const days = Math.max(1, r.totalDays / 2);
|
|
232
|
+
const impact = Math.abs(r.leftMean - r.rightMean) * days;
|
|
233
|
+
if (impact <= 0) continue;
|
|
234
|
+
out.push(buildAction({
|
|
235
|
+
source: "change-point",
|
|
236
|
+
keyword: r.keyword,
|
|
237
|
+
page: r.page,
|
|
238
|
+
title: `Diagnose drop on "${truncate(r.keyword, 34)}"`,
|
|
239
|
+
why: `Significant regression around ${r.changeDate} (${r.leftMean.toFixed(1)} -> ${r.rightMean.toFixed(1)}, LLR ${r.llr.toFixed(0)}).`,
|
|
240
|
+
severity: clamp(Math.round(Math.log10(Math.max(10, r.llr)) / 3 * 100), 0, 100),
|
|
241
|
+
impressions: r.totalImpressions,
|
|
242
|
+
impact,
|
|
243
|
+
data: r
|
|
244
|
+
}));
|
|
245
|
+
}
|
|
246
|
+
return out;
|
|
247
|
+
}
|
|
248
|
+
function normalizePriorityActions(source, result) {
|
|
249
|
+
const rows = result.results;
|
|
250
|
+
if (source === "striking-distance") return fromStrikingDistance(rows);
|
|
251
|
+
if (source === "opportunity") return fromOpportunity(rows);
|
|
252
|
+
if (source === "cannibalization") return fromCannibalization(rows);
|
|
253
|
+
if (source === "ctr-anomaly") return fromCtrAnomaly(rows);
|
|
254
|
+
return fromChangePoint(rows);
|
|
255
|
+
}
|
|
256
|
+
function mergePriorityActions(all) {
|
|
257
|
+
const byId = /* @__PURE__ */ new Map();
|
|
258
|
+
for (const a of all) {
|
|
259
|
+
const existing = byId.get(a.id);
|
|
260
|
+
if (existing == null) {
|
|
261
|
+
byId.set(a.id, {
|
|
262
|
+
...a,
|
|
263
|
+
sources: [...a.sources],
|
|
264
|
+
data: { ...a.data }
|
|
265
|
+
});
|
|
266
|
+
continue;
|
|
267
|
+
}
|
|
268
|
+
const mergedSources = [...new Set([...existing.sources, ...a.sources])];
|
|
269
|
+
const preferNew = a.severity > existing.severity;
|
|
270
|
+
const mergedEffort = EFFORT_RANK[a.effort] < EFFORT_RANK[existing.effort] ? a.effort : existing.effort;
|
|
271
|
+
byId.set(a.id, {
|
|
272
|
+
id: existing.id,
|
|
273
|
+
title: preferNew ? a.title : existing.title,
|
|
274
|
+
keyword: existing.keyword,
|
|
275
|
+
page: existing.page,
|
|
276
|
+
sources: mergedSources,
|
|
277
|
+
severity: Math.max(existing.severity, a.severity),
|
|
278
|
+
impressions: Math.max(existing.impressions, a.impressions),
|
|
279
|
+
impact: existing.impact + a.impact,
|
|
280
|
+
why: preferNew ? a.why : existing.why,
|
|
281
|
+
effort: mergedEffort,
|
|
282
|
+
priorityScore: 0,
|
|
283
|
+
data: {
|
|
284
|
+
...existing.data,
|
|
285
|
+
...a.data
|
|
286
|
+
}
|
|
287
|
+
});
|
|
288
|
+
}
|
|
289
|
+
return [...byId.values()];
|
|
290
|
+
}
|
|
291
|
+
function scorePriorityActions(actions) {
|
|
292
|
+
for (const a of actions) {
|
|
293
|
+
const mult = EFFORT_MULTIPLIER[a.effort];
|
|
294
|
+
a.priorityScore = a.impact * (1 + a.severity / 100) * mult;
|
|
295
|
+
}
|
|
296
|
+
actions.sort((a, b) => b.priorityScore - a.priorityScore);
|
|
297
|
+
return actions;
|
|
298
|
+
}
|
|
299
|
+
async function analyzeActionPriority(analyzer, options = {}) {
|
|
300
|
+
const { sources = DEFAULT_SOURCES, limit = 40, continueOnError = true, paramsBySource = {}, onSourceStatus } = options;
|
|
301
|
+
const states = /* @__PURE__ */ new Map();
|
|
302
|
+
for (const source of sources) {
|
|
303
|
+
const state = {
|
|
304
|
+
source,
|
|
305
|
+
status: "pending",
|
|
306
|
+
count: 0
|
|
307
|
+
};
|
|
308
|
+
states.set(source, state);
|
|
309
|
+
onSourceStatus?.(state);
|
|
310
|
+
}
|
|
311
|
+
const update = (source, patch) => {
|
|
312
|
+
const next = {
|
|
313
|
+
...states.get(source) ?? {
|
|
314
|
+
source,
|
|
315
|
+
status: "pending",
|
|
316
|
+
count: 0
|
|
317
|
+
},
|
|
318
|
+
...patch
|
|
319
|
+
};
|
|
320
|
+
states.set(source, next);
|
|
321
|
+
onSourceStatus?.(next);
|
|
322
|
+
};
|
|
323
|
+
const runOne = async (source) => {
|
|
324
|
+
update(source, {
|
|
325
|
+
status: "running",
|
|
326
|
+
error: void 0
|
|
327
|
+
});
|
|
328
|
+
const params = {
|
|
329
|
+
type: source,
|
|
330
|
+
...paramsBySource[source] ?? {}
|
|
331
|
+
};
|
|
332
|
+
return analyzer.analyze(params).then((result) => {
|
|
333
|
+
const normalized = normalizePriorityActions(source, result);
|
|
334
|
+
update(source, {
|
|
335
|
+
status: normalized.length === 0 ? "skipped" : "done",
|
|
336
|
+
count: normalized.length
|
|
337
|
+
});
|
|
338
|
+
return normalized;
|
|
339
|
+
}).catch((error) => {
|
|
340
|
+
update(source, {
|
|
341
|
+
status: "error",
|
|
342
|
+
count: 0,
|
|
343
|
+
error: error instanceof Error ? error.message : String(error)
|
|
344
|
+
});
|
|
345
|
+
if (!continueOnError) throw error;
|
|
346
|
+
return [];
|
|
347
|
+
});
|
|
348
|
+
};
|
|
349
|
+
const all = (await Promise.all(sources.map(runOne))).flat();
|
|
350
|
+
return {
|
|
351
|
+
actions: scorePriorityActions(mergePriorityActions(all)).slice(0, limit),
|
|
352
|
+
totalSignals: all.length,
|
|
353
|
+
sources: sources.map((source) => states.get(source) ?? {
|
|
354
|
+
source,
|
|
355
|
+
status: "pending",
|
|
356
|
+
count: 0
|
|
357
|
+
})
|
|
358
|
+
};
|
|
359
|
+
}
|
|
360
|
+
async function analyzeActionPriorityFromSource(source, registry, options = {}) {
|
|
361
|
+
return analyzeActionPriority({ analyze: (params) => analyzeFromSource(source, params, registry) }, options);
|
|
362
|
+
}
|
|
363
|
+
function createAnalyzerRegistry(init = {}) {
|
|
364
|
+
const byId = /* @__PURE__ */ new Map();
|
|
365
|
+
for (const a of init.rows ?? []) {
|
|
366
|
+
const entry = byId.get(a.id) ?? {};
|
|
367
|
+
entry.rows = a;
|
|
368
|
+
byId.set(a.id, entry);
|
|
369
|
+
}
|
|
370
|
+
for (const a of init.sql ?? []) {
|
|
371
|
+
const entry = byId.get(a.id) ?? {};
|
|
372
|
+
entry.sql = a;
|
|
373
|
+
byId.set(a.id, entry);
|
|
374
|
+
}
|
|
375
|
+
const listAnalyzerIds = () => [...byId.keys()].sort();
|
|
376
|
+
const getAnalyzerVariants = (id) => byId.get(id);
|
|
377
|
+
const resolveAnalyzer = (id, sourceSupportsSql) => {
|
|
378
|
+
const variants = byId.get(id);
|
|
379
|
+
if (!variants) return void 0;
|
|
380
|
+
if (sourceSupportsSql) return variants.sql ?? variants.rows;
|
|
381
|
+
return variants.rows;
|
|
382
|
+
};
|
|
383
|
+
const listAnalyzersFor = (sourceSupportsSql) => {
|
|
384
|
+
const out = [];
|
|
385
|
+
for (const id of listAnalyzerIds()) {
|
|
386
|
+
const a = resolveAnalyzer(id, sourceSupportsSql);
|
|
387
|
+
if (a) out.push(a);
|
|
388
|
+
}
|
|
389
|
+
return out;
|
|
390
|
+
};
|
|
391
|
+
const listAnalyzerIdsFor = (source) => {
|
|
392
|
+
const sourceSupportsSql = typeof source.executeSql === "function";
|
|
393
|
+
const out = [];
|
|
394
|
+
for (const id of listAnalyzerIds()) if (resolveAnalyzer(id, sourceSupportsSql)) out.push(id);
|
|
395
|
+
return out;
|
|
396
|
+
};
|
|
397
|
+
return {
|
|
398
|
+
listAnalyzerIds,
|
|
399
|
+
getAnalyzerVariants,
|
|
400
|
+
resolveAnalyzer,
|
|
401
|
+
listAnalyzersFor,
|
|
402
|
+
listAnalyzerIdsFor
|
|
403
|
+
};
|
|
404
|
+
}
|
|
405
|
+
const DEFAULT_LIMIT$1 = 25e3;
|
|
406
|
+
function keywordsQueryState(period, limit = DEFAULT_LIMIT$1) {
|
|
407
|
+
return gsc.select(query, page).where(between(date, period.startDate, period.endDate)).limit(limit).getState();
|
|
408
|
+
}
|
|
409
|
+
function pagesQueryState(period, limit = DEFAULT_LIMIT$1) {
|
|
410
|
+
return gsc.select(page).where(between(date, period.startDate, period.endDate)).limit(limit).getState();
|
|
411
|
+
}
|
|
412
|
+
function datesQueryState(period, limit = DEFAULT_LIMIT$1) {
|
|
413
|
+
return gsc.select(date).where(between(date, period.startDate, period.endDate)).limit(limit).getState();
|
|
414
|
+
}
|
|
415
|
+
const DEFAULT_SQL_REQUIRES = ["executeSql", "partitionedParquet"];
|
|
416
|
+
function defineAnalyzer(opts) {
|
|
417
|
+
const { id, reduce, reduceSql, reduceRows, buildSql, buildRows, sqlRequires = DEFAULT_SQL_REQUIRES, rowsRequires = [] } = opts;
|
|
418
|
+
const sqlReducer = reduceSql ?? reduce;
|
|
419
|
+
const rowsReducer = reduceRows ?? reduce;
|
|
420
|
+
if (buildSql && !sqlReducer) throw new Error(`defineAnalyzer(${id}): buildSql requires reduce or reduceSql`);
|
|
421
|
+
if (buildRows && !rowsReducer) throw new Error(`defineAnalyzer(${id}): buildRows requires reduce or reduceRows`);
|
|
422
|
+
const wrap = (fn) => (rows, params, ctx) => {
|
|
423
|
+
return fn(Array.isArray(rows) ? rows : pickSingle(rows) ?? rows, params, ctx);
|
|
424
|
+
};
|
|
425
|
+
return {
|
|
426
|
+
id,
|
|
427
|
+
sql: buildSql && sqlReducer ? {
|
|
428
|
+
id,
|
|
429
|
+
requires: sqlRequires,
|
|
430
|
+
build(params) {
|
|
431
|
+
const spec = buildSql(params);
|
|
432
|
+
return {
|
|
433
|
+
kind: "sql",
|
|
434
|
+
sql: spec.sql,
|
|
435
|
+
params: spec.params,
|
|
436
|
+
current: spec.current,
|
|
437
|
+
previous: spec.previous,
|
|
438
|
+
extraFiles: spec.extraFiles,
|
|
439
|
+
extraQueries: spec.extraQueries,
|
|
440
|
+
requiresAttachedTables: spec.requiresAttachedTables
|
|
441
|
+
};
|
|
442
|
+
},
|
|
443
|
+
reduce(rows, ctx) {
|
|
444
|
+
const { results, meta } = wrap(sqlReducer)(rows, ctx.params, { extras: ctx.extras });
|
|
445
|
+
return {
|
|
446
|
+
results,
|
|
447
|
+
meta
|
|
448
|
+
};
|
|
449
|
+
}
|
|
450
|
+
} : void 0,
|
|
451
|
+
rows: buildRows && rowsReducer ? {
|
|
452
|
+
id,
|
|
453
|
+
requires: rowsRequires,
|
|
454
|
+
build(params) {
|
|
455
|
+
const queries = buildRows(params);
|
|
456
|
+
return {
|
|
457
|
+
kind: "rows",
|
|
458
|
+
queries: Object.fromEntries(Object.entries(queries).map(([k, state]) => [k, { state }]))
|
|
459
|
+
};
|
|
460
|
+
},
|
|
461
|
+
reduce(rows, ctx) {
|
|
462
|
+
const { results, meta } = wrap(rowsReducer)(rows, ctx.params, {});
|
|
463
|
+
return {
|
|
464
|
+
results,
|
|
465
|
+
meta
|
|
466
|
+
};
|
|
467
|
+
}
|
|
468
|
+
} : void 0
|
|
469
|
+
};
|
|
470
|
+
}
|
|
471
|
+
function pickSingle(rows) {
|
|
472
|
+
const keys = Object.keys(rows);
|
|
473
|
+
return keys.length === 1 ? rows[keys[0]] : void 0;
|
|
474
|
+
}
|
|
475
|
+
function defaultEndDate() {
|
|
476
|
+
return daysAgo(3);
|
|
477
|
+
}
|
|
478
|
+
function defaultStartDate() {
|
|
479
|
+
return daysAgo(31);
|
|
480
|
+
}
|
|
481
|
+
function periodOf(params) {
|
|
482
|
+
return {
|
|
483
|
+
startDate: params.startDate || defaultStartDate(),
|
|
484
|
+
endDate: params.endDate || defaultEndDate()
|
|
485
|
+
};
|
|
486
|
+
}
|
|
487
|
+
function comparisonOf(params) {
|
|
488
|
+
if (!params.prevStartDate || !params.prevEndDate) throw new Error(`${params.type} analysis requires prevStartDate and prevEndDate`);
|
|
489
|
+
return {
|
|
490
|
+
current: periodOf(params),
|
|
491
|
+
previous: {
|
|
492
|
+
startDate: params.prevStartDate,
|
|
493
|
+
endDate: params.prevEndDate
|
|
494
|
+
}
|
|
495
|
+
};
|
|
496
|
+
}
|
|
497
|
+
function parseIso(s) {
|
|
498
|
+
return /* @__PURE__ */ new Date(`${s}T00:00:00Z`);
|
|
499
|
+
}
|
|
500
|
+
function addDays(d, n) {
|
|
501
|
+
return new Date(d.getTime() + n * MS_PER_DAY);
|
|
502
|
+
}
|
|
503
|
+
function daysBetween(start, end) {
|
|
504
|
+
return Math.round((parseIso(end).getTime() - parseIso(start).getTime()) / MS_PER_DAY) + 1;
|
|
505
|
+
}
|
|
506
|
+
function resolveWindow(opts) {
|
|
507
|
+
const anchor = opts.anchor ? parseIso(opts.anchor) : /* @__PURE__ */ new Date();
|
|
508
|
+
const anchorIso = toIsoDate(anchor);
|
|
509
|
+
let start;
|
|
510
|
+
let end;
|
|
511
|
+
switch (opts.preset) {
|
|
512
|
+
case "last-7d":
|
|
513
|
+
end = anchorIso;
|
|
514
|
+
start = toIsoDate(addDays(anchor, -6));
|
|
515
|
+
break;
|
|
516
|
+
case "last-28d":
|
|
517
|
+
end = anchorIso;
|
|
518
|
+
start = toIsoDate(addDays(anchor, -27));
|
|
519
|
+
break;
|
|
520
|
+
case "last-30d":
|
|
521
|
+
end = anchorIso;
|
|
522
|
+
start = toIsoDate(addDays(anchor, -29));
|
|
523
|
+
break;
|
|
524
|
+
case "last-90d":
|
|
525
|
+
end = anchorIso;
|
|
526
|
+
start = toIsoDate(addDays(anchor, -89));
|
|
527
|
+
break;
|
|
528
|
+
case "last-180d":
|
|
529
|
+
end = anchorIso;
|
|
530
|
+
start = toIsoDate(addDays(anchor, -179));
|
|
531
|
+
break;
|
|
532
|
+
case "last-365d":
|
|
533
|
+
end = anchorIso;
|
|
534
|
+
start = toIsoDate(addDays(anchor, -364));
|
|
535
|
+
break;
|
|
536
|
+
case "mtd":
|
|
537
|
+
end = anchorIso;
|
|
538
|
+
start = toIsoDate(new Date(Date.UTC(anchor.getUTCFullYear(), anchor.getUTCMonth(), 1)));
|
|
539
|
+
break;
|
|
540
|
+
case "ytd":
|
|
541
|
+
end = anchorIso;
|
|
542
|
+
start = toIsoDate(new Date(Date.UTC(anchor.getUTCFullYear(), 0, 1)));
|
|
543
|
+
break;
|
|
544
|
+
case "custom":
|
|
545
|
+
if (!opts.start || !opts.end) throw new Error("resolveWindow: preset=custom requires start and end");
|
|
546
|
+
start = opts.start;
|
|
547
|
+
end = opts.end;
|
|
548
|
+
break;
|
|
549
|
+
}
|
|
550
|
+
const days = daysBetween(start, end);
|
|
551
|
+
const result = {
|
|
552
|
+
start,
|
|
553
|
+
end,
|
|
554
|
+
days
|
|
555
|
+
};
|
|
556
|
+
const mode = opts.comparison ?? "none";
|
|
557
|
+
if (mode === "prev-period") {
|
|
558
|
+
const prevEnd = toIsoDate(addDays(parseIso(start), -1));
|
|
559
|
+
result.comparison = {
|
|
560
|
+
start: toIsoDate(addDays(parseIso(prevEnd), -(days - 1))),
|
|
561
|
+
end: prevEnd
|
|
562
|
+
};
|
|
563
|
+
} else if (mode === "yoy") {
|
|
564
|
+
const prevEnd = toIsoDate(addDays(parseIso(end), -365));
|
|
565
|
+
result.comparison = {
|
|
566
|
+
start: toIsoDate(addDays(parseIso(start), -365)),
|
|
567
|
+
end: prevEnd
|
|
568
|
+
};
|
|
569
|
+
}
|
|
570
|
+
return result;
|
|
571
|
+
}
|
|
572
|
+
function windowToPeriod(w) {
|
|
573
|
+
return {
|
|
574
|
+
startDate: w.start,
|
|
575
|
+
endDate: w.end
|
|
576
|
+
};
|
|
577
|
+
}
|
|
578
|
+
function windowToComparisonPeriod(w) {
|
|
579
|
+
if (!w.comparison) return void 0;
|
|
580
|
+
return {
|
|
581
|
+
current: {
|
|
582
|
+
startDate: w.start,
|
|
583
|
+
endDate: w.end
|
|
584
|
+
},
|
|
585
|
+
previous: {
|
|
586
|
+
startDate: w.comparison.start,
|
|
587
|
+
endDate: w.comparison.end
|
|
588
|
+
}
|
|
589
|
+
};
|
|
590
|
+
}
|
|
591
|
+
const DEFAULT_FILL = {
|
|
592
|
+
clicks: 0,
|
|
593
|
+
impressions: 0,
|
|
594
|
+
ctr: 0,
|
|
595
|
+
position: 0
|
|
596
|
+
};
|
|
597
|
+
function padTimeseries(rows, options) {
|
|
598
|
+
const { startDate, endDate } = options;
|
|
599
|
+
const dateKey = options.dateKey ?? "date";
|
|
600
|
+
const fill = options.fill ?? DEFAULT_FILL;
|
|
601
|
+
const byDate = /* @__PURE__ */ new Map();
|
|
602
|
+
for (const row of rows) {
|
|
603
|
+
const d = String(row[dateKey]);
|
|
604
|
+
const bucket = byDate.get(d);
|
|
605
|
+
if (bucket) bucket.push(row);
|
|
606
|
+
else byDate.set(d, [row]);
|
|
607
|
+
}
|
|
608
|
+
const result = [];
|
|
609
|
+
const start = /* @__PURE__ */ new Date(`${startDate}T00:00:00Z`);
|
|
610
|
+
const end = /* @__PURE__ */ new Date(`${endDate}T00:00:00Z`);
|
|
611
|
+
if (Number.isNaN(start.getTime()) || Number.isNaN(end.getTime())) throw new Error(`padTimeseries: invalid date range ${startDate}..${endDate}`);
|
|
612
|
+
for (let cursorMs = start.getTime(), endMs = end.getTime(); cursorMs <= endMs; cursorMs += MS_PER_DAY) {
|
|
613
|
+
const dateStr = toIsoDate(new Date(cursorMs));
|
|
614
|
+
const existing = byDate.get(dateStr);
|
|
615
|
+
if (existing) result.push(...existing);
|
|
616
|
+
else result.push({
|
|
617
|
+
...fill,
|
|
618
|
+
[dateKey]: dateStr
|
|
619
|
+
});
|
|
620
|
+
}
|
|
621
|
+
return result;
|
|
622
|
+
}
|
|
623
|
+
function num(v) {
|
|
624
|
+
if (typeof v === "number") return v;
|
|
625
|
+
if (typeof v === "bigint") return Number(v);
|
|
626
|
+
if (v == null) return 0;
|
|
627
|
+
return Number(v);
|
|
628
|
+
}
|
|
629
|
+
function buildPeriodMap(rows, key, value, filter) {
|
|
630
|
+
const out = /* @__PURE__ */ new Map();
|
|
631
|
+
for (const row of rows) {
|
|
632
|
+
if (filter && !filter(row)) continue;
|
|
633
|
+
out.set(key(row), value(row));
|
|
634
|
+
}
|
|
635
|
+
return out;
|
|
636
|
+
}
|
|
637
|
+
function createSorter(getValue, defaultMetric, defaultOrder = "desc") {
|
|
638
|
+
return (items, sortBy = defaultMetric, sortOrder = defaultOrder) => {
|
|
639
|
+
const mult = sortOrder === "desc" ? -1 : 1;
|
|
640
|
+
return [...items].sort((a, b) => (getValue(a, sortBy) - getValue(b, sortBy)) * mult);
|
|
641
|
+
};
|
|
642
|
+
}
|
|
643
|
+
function createMetricSorter(defaultMetric, orderByMetric) {
|
|
644
|
+
return (items, sortBy = defaultMetric) => {
|
|
645
|
+
const mult = orderByMetric[sortBy] === "desc" ? -1 : 1;
|
|
646
|
+
return [...items].sort((a, b) => (a[sortBy] - b[sortBy]) * mult);
|
|
647
|
+
};
|
|
648
|
+
}
|
|
649
|
+
function escapeRegexAlt(s) {
|
|
650
|
+
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
651
|
+
}
|
|
652
|
+
function str$6(v) {
|
|
653
|
+
return v == null ? "" : String(v);
|
|
654
|
+
}
|
|
655
|
+
function analyzeBrandSegmentation(keywords, options) {
|
|
656
|
+
const { brandTerms, minImpressions = 10 } = options;
|
|
657
|
+
const lowerBrandTerms = brandTerms.map((t) => t.toLowerCase());
|
|
658
|
+
const brand = [];
|
|
659
|
+
const nonBrand = [];
|
|
660
|
+
for (const row of keywords) {
|
|
661
|
+
if (num(row.impressions) < minImpressions) continue;
|
|
662
|
+
if (lowerBrandTerms.some((term) => row.query.toLowerCase().includes(term))) brand.push(row);
|
|
663
|
+
else nonBrand.push(row);
|
|
664
|
+
}
|
|
665
|
+
const brandClicks = brand.reduce((sum, k) => sum + num(k.clicks), 0);
|
|
666
|
+
const nonBrandClicks = nonBrand.reduce((sum, k) => sum + num(k.clicks), 0);
|
|
667
|
+
const totalClicks = brandClicks + nonBrandClicks;
|
|
668
|
+
return {
|
|
669
|
+
brand,
|
|
670
|
+
nonBrand,
|
|
671
|
+
summary: {
|
|
672
|
+
brandClicks,
|
|
673
|
+
nonBrandClicks,
|
|
674
|
+
brandShare: totalClicks > 0 ? brandClicks / totalClicks : 0,
|
|
675
|
+
brandImpressions: brand.reduce((sum, k) => sum + num(k.impressions), 0),
|
|
676
|
+
nonBrandImpressions: nonBrand.reduce((sum, k) => sum + num(k.impressions), 0)
|
|
677
|
+
}
|
|
678
|
+
};
|
|
679
|
+
}
|
|
680
|
+
const brandAnalyzer = defineAnalyzer({
|
|
681
|
+
id: "brand",
|
|
682
|
+
buildSql(params) {
|
|
683
|
+
if (!params.brandTerms?.length) throw new Error("Brand analysis requires brandTerms");
|
|
684
|
+
const { startDate, endDate } = periodOf(params);
|
|
685
|
+
const minImpressions = params.minImpressions ?? 10;
|
|
686
|
+
const limit = params.limit ?? 1e4;
|
|
687
|
+
const regex = `(${params.brandTerms.map((t) => escapeRegexAlt(t.toLowerCase())).join("|")})`;
|
|
688
|
+
return {
|
|
689
|
+
sql: `
|
|
690
|
+
WITH agg AS (
|
|
691
|
+
SELECT
|
|
692
|
+
query,
|
|
693
|
+
url AS page,
|
|
694
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
695
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
696
|
+
${METRIC_EXPR.ctr} AS ctr,
|
|
697
|
+
${METRIC_EXPR.position} AS position
|
|
698
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
699
|
+
WHERE date >= ? AND date <= ?
|
|
700
|
+
GROUP BY query, url
|
|
701
|
+
HAVING SUM(impressions) >= ?
|
|
702
|
+
)
|
|
703
|
+
SELECT
|
|
704
|
+
query, page, clicks, impressions, ctr, position,
|
|
705
|
+
CASE WHEN regexp_matches(LOWER(query), ?) THEN 'brand' ELSE 'non-brand' END AS segment
|
|
706
|
+
FROM agg
|
|
707
|
+
ORDER BY clicks DESC
|
|
708
|
+
LIMIT ${Number(limit)}
|
|
709
|
+
`,
|
|
710
|
+
params: [
|
|
711
|
+
startDate,
|
|
712
|
+
endDate,
|
|
713
|
+
minImpressions,
|
|
714
|
+
regex
|
|
715
|
+
],
|
|
716
|
+
current: {
|
|
717
|
+
table: "page_keywords",
|
|
718
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
719
|
+
}
|
|
720
|
+
};
|
|
721
|
+
},
|
|
722
|
+
reduceSql(rows) {
|
|
723
|
+
const normalized = (Array.isArray(rows) ? rows : []).map((r) => ({
|
|
724
|
+
query: str$6(r.query),
|
|
725
|
+
page: r.page == null ? void 0 : str$6(r.page),
|
|
726
|
+
clicks: num(r.clicks),
|
|
727
|
+
impressions: num(r.impressions),
|
|
728
|
+
ctr: num(r.ctr),
|
|
729
|
+
position: num(r.position),
|
|
730
|
+
segment: str$6(r.segment)
|
|
731
|
+
}));
|
|
732
|
+
let brandClicks = 0;
|
|
733
|
+
let nonBrandClicks = 0;
|
|
734
|
+
let brandImpressions = 0;
|
|
735
|
+
let nonBrandImpressions = 0;
|
|
736
|
+
for (const r of normalized) if (r.segment === "brand") {
|
|
737
|
+
brandClicks += r.clicks;
|
|
738
|
+
brandImpressions += r.impressions;
|
|
739
|
+
} else {
|
|
740
|
+
nonBrandClicks += r.clicks;
|
|
741
|
+
nonBrandImpressions += r.impressions;
|
|
742
|
+
}
|
|
743
|
+
const totalClicks = brandClicks + nonBrandClicks;
|
|
744
|
+
return {
|
|
745
|
+
results: normalized,
|
|
746
|
+
meta: {
|
|
747
|
+
total: normalized.length,
|
|
748
|
+
summary: {
|
|
749
|
+
brandClicks,
|
|
750
|
+
nonBrandClicks,
|
|
751
|
+
brandShare: totalClicks > 0 ? brandClicks / totalClicks : 0,
|
|
752
|
+
brandImpressions,
|
|
753
|
+
nonBrandImpressions
|
|
754
|
+
}
|
|
755
|
+
}
|
|
756
|
+
};
|
|
757
|
+
},
|
|
758
|
+
buildRows(params) {
|
|
759
|
+
return { keywords: keywordsQueryState(periodOf(params), params.limit) };
|
|
760
|
+
},
|
|
761
|
+
reduceRows(rows, params) {
|
|
762
|
+
if (!params.brandTerms?.length) throw new Error("Brand analysis requires brandTerms");
|
|
763
|
+
const result = analyzeBrandSegmentation(Array.isArray(rows) ? rows : [], {
|
|
764
|
+
brandTerms: params.brandTerms,
|
|
765
|
+
minImpressions: params.minImpressions
|
|
766
|
+
});
|
|
767
|
+
return {
|
|
768
|
+
results: [...result.brand.map((r) => ({
|
|
769
|
+
...r,
|
|
770
|
+
segment: "brand"
|
|
771
|
+
})), ...result.nonBrand.map((r) => ({
|
|
772
|
+
...r,
|
|
773
|
+
segment: "non-brand"
|
|
774
|
+
}))],
|
|
775
|
+
meta: { summary: result.summary }
|
|
776
|
+
};
|
|
777
|
+
}
|
|
778
|
+
});
|
|
779
|
+
const sortRowResults$1 = createSorter((item, metric) => {
|
|
780
|
+
switch (metric) {
|
|
781
|
+
case "clicks": return item.totalClicks;
|
|
782
|
+
case "impressions": return item.totalImpressions;
|
|
783
|
+
case "positionSpread": return item.positionSpread;
|
|
784
|
+
case "pageCount": return item.pages.length;
|
|
785
|
+
}
|
|
786
|
+
}, "clicks");
|
|
787
|
+
function str$5(v) {
|
|
788
|
+
return v == null ? "" : String(v);
|
|
789
|
+
}
|
|
790
|
+
function parseJsonList$4(v) {
|
|
791
|
+
if (Array.isArray(v)) return v;
|
|
792
|
+
if (typeof v === "string" && v.length > 0) {
|
|
793
|
+
const parsed = JSON.parse(v);
|
|
794
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
795
|
+
}
|
|
796
|
+
return [];
|
|
797
|
+
}
|
|
798
|
+
function analyzeCannibalization(rows, options = {}) {
|
|
799
|
+
const { minImpressions = 10, maxPositionSpread = 10, minPages = 2, sortBy = "clicks", sortOrder = "desc" } = options;
|
|
800
|
+
const queryMap = /* @__PURE__ */ new Map();
|
|
801
|
+
for (const row of rows) {
|
|
802
|
+
if (row.impressions < minImpressions) continue;
|
|
803
|
+
const pages = queryMap.get(row.query) || [];
|
|
804
|
+
pages.push({
|
|
805
|
+
page: row.page,
|
|
806
|
+
clicks: row.clicks,
|
|
807
|
+
impressions: row.impressions,
|
|
808
|
+
ctr: row.ctr,
|
|
809
|
+
position: row.position
|
|
810
|
+
});
|
|
811
|
+
queryMap.set(row.query, pages);
|
|
812
|
+
}
|
|
813
|
+
const results = [];
|
|
814
|
+
for (const [query, pages] of queryMap) {
|
|
815
|
+
if (pages.length < minPages) continue;
|
|
816
|
+
pages.sort((a, b) => b.clicks - a.clicks);
|
|
817
|
+
const positions = pages.map((p) => p.position);
|
|
818
|
+
const positionSpread = Math.max(...positions) - Math.min(...positions);
|
|
819
|
+
if (positionSpread > maxPositionSpread) continue;
|
|
820
|
+
results.push({
|
|
821
|
+
query,
|
|
822
|
+
pages,
|
|
823
|
+
totalClicks: pages.reduce((sum, p) => sum + p.clicks, 0),
|
|
824
|
+
totalImpressions: pages.reduce((sum, p) => sum + p.impressions, 0),
|
|
825
|
+
positionSpread
|
|
826
|
+
});
|
|
827
|
+
}
|
|
828
|
+
return sortRowResults$1(results, sortBy, sortOrder);
|
|
829
|
+
}
|
|
830
|
+
const cannibalizationAnalyzer = defineAnalyzer({
|
|
831
|
+
id: "cannibalization",
|
|
832
|
+
buildSql(params) {
|
|
833
|
+
const { startDate, endDate } = periodOf(params);
|
|
834
|
+
const minImpressions = params.minImpressions ?? 50;
|
|
835
|
+
const minCompetitors = 2;
|
|
836
|
+
const minQueryImpressions = (params.minImpressions ?? 50) * 2;
|
|
837
|
+
const limit = params.limit ?? 200;
|
|
838
|
+
return {
|
|
839
|
+
sql: `
|
|
840
|
+
WITH agg AS (
|
|
841
|
+
SELECT
|
|
842
|
+
query,
|
|
843
|
+
url,
|
|
844
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
845
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
846
|
+
${METRIC_EXPR.ctr} AS ctr,
|
|
847
|
+
${METRIC_EXPR.position} AS position
|
|
848
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
849
|
+
WHERE date >= ? AND date <= ?
|
|
850
|
+
AND query IS NOT NULL AND query <> ''
|
|
851
|
+
AND url IS NOT NULL AND url <> ''
|
|
852
|
+
GROUP BY query, url
|
|
853
|
+
HAVING SUM(impressions) >= ?
|
|
854
|
+
),
|
|
855
|
+
query_totals AS (
|
|
856
|
+
SELECT
|
|
857
|
+
query,
|
|
858
|
+
SUM(impressions) AS total_impressions,
|
|
859
|
+
SUM(clicks) AS total_clicks,
|
|
860
|
+
COUNT(*) AS competitor_count
|
|
861
|
+
FROM agg
|
|
862
|
+
GROUP BY query
|
|
863
|
+
HAVING COUNT(*) >= ? AND SUM(impressions) >= ?
|
|
864
|
+
),
|
|
865
|
+
ranked AS (
|
|
866
|
+
SELECT
|
|
867
|
+
a.query,
|
|
868
|
+
a.url,
|
|
869
|
+
a.clicks,
|
|
870
|
+
a.impressions,
|
|
871
|
+
a.ctr,
|
|
872
|
+
a.position,
|
|
873
|
+
a.impressions / NULLIF(t.total_impressions, 0) AS share,
|
|
874
|
+
ROW_NUMBER() OVER (
|
|
875
|
+
PARTITION BY a.query
|
|
876
|
+
ORDER BY a.impressions DESC, a.clicks DESC, a.url ASC
|
|
877
|
+
) AS rnk
|
|
878
|
+
FROM agg a
|
|
879
|
+
JOIN query_totals t USING (query)
|
|
880
|
+
),
|
|
881
|
+
leader AS (
|
|
882
|
+
SELECT query, url AS leader_url, ctr AS leader_ctr, position AS leader_position
|
|
883
|
+
FROM ranked WHERE rnk = 1
|
|
884
|
+
),
|
|
885
|
+
events AS (
|
|
886
|
+
SELECT
|
|
887
|
+
r.query,
|
|
888
|
+
any_value(l.leader_url) AS leader_url,
|
|
889
|
+
any_value(l.leader_ctr) AS leader_ctr,
|
|
890
|
+
any_value(l.leader_position) AS leader_position,
|
|
891
|
+
SUM(POWER(r.share * 100.0, 2)) AS hhi,
|
|
892
|
+
SUM(CASE
|
|
893
|
+
WHEN r.rnk > 1 AND l.leader_ctr > r.ctr
|
|
894
|
+
THEN (l.leader_ctr - r.ctr) * r.impressions
|
|
895
|
+
ELSE 0.0
|
|
896
|
+
END) AS stolen_clicks,
|
|
897
|
+
to_json(list({
|
|
898
|
+
'url': r.url,
|
|
899
|
+
'clicks': r.clicks,
|
|
900
|
+
'impressions': r.impressions,
|
|
901
|
+
'ctr': r.ctr,
|
|
902
|
+
'position': r.position,
|
|
903
|
+
'share': r.share,
|
|
904
|
+
'rank': r.rnk
|
|
905
|
+
} ORDER BY r.rnk)) AS competitors
|
|
906
|
+
FROM ranked r
|
|
907
|
+
JOIN leader l USING (query)
|
|
908
|
+
GROUP BY r.query
|
|
909
|
+
)
|
|
910
|
+
SELECT
|
|
911
|
+
e.query AS keyword,
|
|
912
|
+
t.total_impressions AS totalImpressions,
|
|
913
|
+
t.total_clicks AS totalClicks,
|
|
914
|
+
t.competitor_count AS competitorCount,
|
|
915
|
+
e.leader_url AS leaderUrl,
|
|
916
|
+
e.leader_ctr AS leaderCtr,
|
|
917
|
+
e.leader_position AS leaderPosition,
|
|
918
|
+
e.hhi AS hhi,
|
|
919
|
+
GREATEST(0.0, 1.0 - e.hhi / 10000.0) AS fragmentation,
|
|
920
|
+
e.stolen_clicks AS stolenClicks,
|
|
921
|
+
e.competitors AS competitors,
|
|
922
|
+
CAST(ROUND(LEAST(100.0,
|
|
923
|
+
100.0 * POWER(
|
|
924
|
+
GREATEST(1.0 - e.hhi / 10000.0, 0.0)
|
|
925
|
+
* LEAST(e.stolen_clicks / GREATEST(t.total_clicks + e.stolen_clicks, 1.0), 1.0)
|
|
926
|
+
* LEAST(LOG10(GREATEST(t.total_impressions, 10.0)) / 5.0, 1.0),
|
|
927
|
+
1.0 / 3.0
|
|
928
|
+
)
|
|
929
|
+
)) AS DOUBLE) AS severity
|
|
930
|
+
FROM events e
|
|
931
|
+
JOIN query_totals t USING (query)
|
|
932
|
+
ORDER BY severity DESC, stolenClicks DESC
|
|
933
|
+
LIMIT ${Number(limit)}
|
|
934
|
+
`,
|
|
935
|
+
params: [
|
|
936
|
+
startDate,
|
|
937
|
+
endDate,
|
|
938
|
+
minImpressions,
|
|
939
|
+
minCompetitors,
|
|
940
|
+
minQueryImpressions
|
|
941
|
+
],
|
|
942
|
+
current: {
|
|
943
|
+
table: "page_keywords",
|
|
944
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
945
|
+
}
|
|
946
|
+
};
|
|
947
|
+
},
|
|
948
|
+
reduceSql(rows) {
|
|
949
|
+
const events = (Array.isArray(rows) ? rows : []).map((r) => ({
|
|
950
|
+
keyword: str$5(r.keyword),
|
|
951
|
+
totalImpressions: num(r.totalImpressions),
|
|
952
|
+
totalClicks: num(r.totalClicks),
|
|
953
|
+
competitorCount: num(r.competitorCount),
|
|
954
|
+
leaderUrl: str$5(r.leaderUrl),
|
|
955
|
+
leaderCtr: num(r.leaderCtr),
|
|
956
|
+
leaderPosition: num(r.leaderPosition),
|
|
957
|
+
hhi: num(r.hhi),
|
|
958
|
+
fragmentation: num(r.fragmentation),
|
|
959
|
+
stolenClicks: num(r.stolenClicks),
|
|
960
|
+
severity: num(r.severity),
|
|
961
|
+
competitors: parseJsonList$4(r.competitors).map((c) => ({
|
|
962
|
+
url: str$5(c.url),
|
|
963
|
+
clicks: num(c.clicks),
|
|
964
|
+
impressions: num(c.impressions),
|
|
965
|
+
ctr: num(c.ctr),
|
|
966
|
+
position: num(c.position),
|
|
967
|
+
share: num(c.share),
|
|
968
|
+
rank: num(c.rank)
|
|
969
|
+
}))
|
|
970
|
+
}));
|
|
971
|
+
const nodeAgg = /* @__PURE__ */ new Map();
|
|
972
|
+
const edgeAgg = /* @__PURE__ */ new Map();
|
|
973
|
+
for (const ev of events) {
|
|
974
|
+
for (const c of ev.competitors) {
|
|
975
|
+
const n = nodeAgg.get(c.url) ?? {
|
|
976
|
+
impressions: 0,
|
|
977
|
+
clicks: 0,
|
|
978
|
+
queries: /* @__PURE__ */ new Set()
|
|
979
|
+
};
|
|
980
|
+
n.impressions += c.impressions;
|
|
981
|
+
n.clicks += c.clicks;
|
|
982
|
+
n.queries.add(ev.keyword);
|
|
983
|
+
nodeAgg.set(c.url, n);
|
|
984
|
+
}
|
|
985
|
+
for (let i = 0; i < ev.competitors.length; i++) for (let j = i + 1; j < ev.competitors.length; j++) {
|
|
986
|
+
const a = ev.competitors[i];
|
|
987
|
+
const b = ev.competitors[j];
|
|
988
|
+
const [src, tgt] = a.url < b.url ? [a.url, b.url] : [b.url, a.url];
|
|
989
|
+
const key = `${src}${tgt}`;
|
|
990
|
+
const weight = Math.min(a.impressions, b.impressions);
|
|
991
|
+
const edge = edgeAgg.get(key) ?? {
|
|
992
|
+
source: src,
|
|
993
|
+
target: tgt,
|
|
994
|
+
weight: 0,
|
|
995
|
+
queries: 0
|
|
996
|
+
};
|
|
997
|
+
edge.weight += weight;
|
|
998
|
+
edge.queries += 1;
|
|
999
|
+
edgeAgg.set(key, edge);
|
|
1000
|
+
}
|
|
1001
|
+
}
|
|
1002
|
+
const nodes = [...nodeAgg.entries()].map(([url, n]) => ({
|
|
1003
|
+
url,
|
|
1004
|
+
impressions: n.impressions,
|
|
1005
|
+
clicks: n.clicks,
|
|
1006
|
+
queryCount: n.queries.size
|
|
1007
|
+
}));
|
|
1008
|
+
const edges = [...edgeAgg.values()];
|
|
1009
|
+
const avgFragmentation = events.length > 0 ? events.reduce((s, e) => s + e.fragmentation, 0) / events.length : 0;
|
|
1010
|
+
const totalStolenClicks = events.reduce((s, e) => s + e.stolenClicks, 0);
|
|
1011
|
+
return {
|
|
1012
|
+
results: events,
|
|
1013
|
+
meta: {
|
|
1014
|
+
total: events.length,
|
|
1015
|
+
totalStolenClicks,
|
|
1016
|
+
avgFragmentation,
|
|
1017
|
+
graph: {
|
|
1018
|
+
nodes,
|
|
1019
|
+
edges
|
|
1020
|
+
}
|
|
1021
|
+
}
|
|
1022
|
+
};
|
|
1023
|
+
},
|
|
1024
|
+
buildRows(params) {
|
|
1025
|
+
return { rows: keywordsQueryState(periodOf(params), params.limit) };
|
|
1026
|
+
},
|
|
1027
|
+
reduceRows(rows, params) {
|
|
1028
|
+
const results = analyzeCannibalization(Array.isArray(rows) ? rows : [], {
|
|
1029
|
+
minImpressions: params.minImpressions,
|
|
1030
|
+
maxPositionSpread: params.maxPositionSpread,
|
|
1031
|
+
minPages: params.minPages
|
|
1032
|
+
});
|
|
1033
|
+
return {
|
|
1034
|
+
results,
|
|
1035
|
+
meta: { total: results.length }
|
|
1036
|
+
};
|
|
1037
|
+
}
|
|
1038
|
+
});
|
|
1039
|
+
const INTENT_PREFIXES_REGEX = "^(how to|what is|what are|why is|why do|where to|when to|best|top|vs|versus|compare|review|buy|cheap|free|near me)(\\s|$)";
|
|
1040
|
+
const INTENT_PREFIXES = [
|
|
1041
|
+
"how to",
|
|
1042
|
+
"what is",
|
|
1043
|
+
"what are",
|
|
1044
|
+
"why is",
|
|
1045
|
+
"why do",
|
|
1046
|
+
"where to",
|
|
1047
|
+
"when to",
|
|
1048
|
+
"best",
|
|
1049
|
+
"top",
|
|
1050
|
+
"vs",
|
|
1051
|
+
"versus",
|
|
1052
|
+
"compare",
|
|
1053
|
+
"review",
|
|
1054
|
+
"buy",
|
|
1055
|
+
"cheap",
|
|
1056
|
+
"free",
|
|
1057
|
+
"near me"
|
|
1058
|
+
];
|
|
1059
|
+
const WHITESPACE_RE$1 = /\s+/;
|
|
1060
|
+
function str$4(v) {
|
|
1061
|
+
return v == null ? "" : String(v);
|
|
1062
|
+
}
|
|
1063
|
+
function parseJsonList$3(v) {
|
|
1064
|
+
if (Array.isArray(v)) return v;
|
|
1065
|
+
if (typeof v === "string" && v.length > 0) {
|
|
1066
|
+
const parsed = JSON.parse(v);
|
|
1067
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
1068
|
+
}
|
|
1069
|
+
return [];
|
|
1070
|
+
}
|
|
1071
|
+
function extractIntentPrefix(keyword) {
|
|
1072
|
+
const lower = keyword.toLowerCase();
|
|
1073
|
+
for (const prefix of INTENT_PREFIXES) if (lower.startsWith(`${prefix} `) || lower.startsWith(prefix)) return prefix;
|
|
1074
|
+
return null;
|
|
1075
|
+
}
|
|
1076
|
+
function extractWordPrefix(keyword, wordCount = 2) {
|
|
1077
|
+
const words = keyword.toLowerCase().split(WHITESPACE_RE$1).filter(Boolean);
|
|
1078
|
+
if (words.length < wordCount + 1) return null;
|
|
1079
|
+
return words.slice(0, wordCount).join(" ");
|
|
1080
|
+
}
|
|
1081
|
+
function analyzeClustering(keywords, options = {}) {
|
|
1082
|
+
const { minClusterSize = 2, minImpressions = 10, clusterBy = "both" } = options;
|
|
1083
|
+
const filtered = keywords.filter((k) => num(k.impressions) >= minImpressions);
|
|
1084
|
+
const clusterMap = /* @__PURE__ */ new Map();
|
|
1085
|
+
const clusteredKeywords = /* @__PURE__ */ new Set();
|
|
1086
|
+
if (clusterBy === "intent" || clusterBy === "both") for (const kw of filtered) {
|
|
1087
|
+
const intent = extractIntentPrefix(kw.query);
|
|
1088
|
+
if (intent) {
|
|
1089
|
+
const existing = clusterMap.get(intent);
|
|
1090
|
+
if (existing) existing.keywords.push(kw);
|
|
1091
|
+
else clusterMap.set(intent, {
|
|
1092
|
+
type: "intent",
|
|
1093
|
+
keywords: [kw]
|
|
1094
|
+
});
|
|
1095
|
+
clusteredKeywords.add(kw.query);
|
|
1096
|
+
}
|
|
1097
|
+
}
|
|
1098
|
+
if (clusterBy === "prefix" || clusterBy === "both") {
|
|
1099
|
+
const unclustered = filtered.filter((kw) => !clusteredKeywords.has(kw.query));
|
|
1100
|
+
const prefixMap = /* @__PURE__ */ new Map();
|
|
1101
|
+
for (const kw of unclustered) {
|
|
1102
|
+
const prefix = extractWordPrefix(kw.query);
|
|
1103
|
+
if (prefix) {
|
|
1104
|
+
const existing = prefixMap.get(prefix);
|
|
1105
|
+
if (existing) existing.push(kw);
|
|
1106
|
+
else prefixMap.set(prefix, [kw]);
|
|
1107
|
+
}
|
|
1108
|
+
}
|
|
1109
|
+
for (const [prefix, kws] of prefixMap) if (kws.length >= minClusterSize) {
|
|
1110
|
+
clusterMap.set(prefix, {
|
|
1111
|
+
type: "prefix",
|
|
1112
|
+
keywords: kws
|
|
1113
|
+
});
|
|
1114
|
+
kws.forEach((kw) => clusteredKeywords.add(kw.query));
|
|
1115
|
+
}
|
|
1116
|
+
}
|
|
1117
|
+
const clusters = [];
|
|
1118
|
+
for (const [name, data] of clusterMap) {
|
|
1119
|
+
if (data.keywords.length < minClusterSize) continue;
|
|
1120
|
+
const totalClicks = data.keywords.reduce((sum, k) => sum + num(k.clicks), 0);
|
|
1121
|
+
const totalImpressions = data.keywords.reduce((sum, k) => sum + num(k.impressions), 0);
|
|
1122
|
+
const avgPosition = data.keywords.reduce((sum, k) => sum + num(k.position), 0) / data.keywords.length;
|
|
1123
|
+
clusters.push({
|
|
1124
|
+
clusterName: name,
|
|
1125
|
+
clusterType: data.type,
|
|
1126
|
+
keywords: data.keywords,
|
|
1127
|
+
totalClicks,
|
|
1128
|
+
totalImpressions,
|
|
1129
|
+
avgPosition,
|
|
1130
|
+
keywordCount: data.keywords.length
|
|
1131
|
+
});
|
|
1132
|
+
}
|
|
1133
|
+
clusters.sort((a, b) => b.totalClicks - a.totalClicks);
|
|
1134
|
+
return {
|
|
1135
|
+
clusters,
|
|
1136
|
+
unclustered: filtered.filter((kw) => !clusteredKeywords.has(kw.query))
|
|
1137
|
+
};
|
|
1138
|
+
}
|
|
1139
|
+
const clusteringAnalyzer = defineAnalyzer({
|
|
1140
|
+
id: "clustering",
|
|
1141
|
+
buildSql(params) {
|
|
1142
|
+
const { startDate, endDate } = periodOf(params);
|
|
1143
|
+
const minImpressions = params.minImpressions ?? 10;
|
|
1144
|
+
const minClusterSize = params.minClusterSize ?? 2;
|
|
1145
|
+
const clusterBy = params.clusterBy ?? "both";
|
|
1146
|
+
const doIntent = clusterBy === "intent" || clusterBy === "both";
|
|
1147
|
+
const doPrefix = clusterBy === "prefix" || clusterBy === "both";
|
|
1148
|
+
const intentExpr = doIntent ? `NULLIF(regexp_extract(LOWER(query), '${INTENT_PREFIXES_REGEX}', 1), '')` : `CAST(NULL AS VARCHAR)`;
|
|
1149
|
+
const prefixExpr = doPrefix ? `CASE WHEN len(regexp_split_to_array(LOWER(query), '\\s+')) >= 3
|
|
1150
|
+
THEN array_to_string(list_slice(regexp_split_to_array(LOWER(query), '\\s+'), 1, 2), ' ')
|
|
1151
|
+
ELSE CAST(NULL AS VARCHAR) END` : `CAST(NULL AS VARCHAR)`;
|
|
1152
|
+
return {
|
|
1153
|
+
sql: `
|
|
1154
|
+
WITH agg AS (
|
|
1155
|
+
SELECT
|
|
1156
|
+
query,
|
|
1157
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
1158
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
1159
|
+
${METRIC_EXPR.ctr} AS ctr,
|
|
1160
|
+
${METRIC_EXPR.position} AS position
|
|
1161
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
1162
|
+
WHERE date >= ? AND date <= ?
|
|
1163
|
+
GROUP BY query
|
|
1164
|
+
HAVING SUM(impressions) >= ?
|
|
1165
|
+
),
|
|
1166
|
+
classified AS (
|
|
1167
|
+
SELECT
|
|
1168
|
+
query, clicks, impressions, ctr, position,
|
|
1169
|
+
${intentExpr} AS intent_prefix,
|
|
1170
|
+
${prefixExpr} AS word_prefix
|
|
1171
|
+
FROM agg
|
|
1172
|
+
),
|
|
1173
|
+
keyed AS (
|
|
1174
|
+
SELECT
|
|
1175
|
+
query, clicks, impressions, ctr, position,
|
|
1176
|
+
COALESCE(intent_prefix, word_prefix) AS cluster_name,
|
|
1177
|
+
CASE WHEN intent_prefix IS NOT NULL THEN 'intent' ELSE 'prefix' END AS cluster_type
|
|
1178
|
+
FROM classified
|
|
1179
|
+
WHERE COALESCE(intent_prefix, word_prefix) IS NOT NULL
|
|
1180
|
+
)
|
|
1181
|
+
SELECT
|
|
1182
|
+
cluster_name AS clusterName,
|
|
1183
|
+
any_value(cluster_type) AS clusterType,
|
|
1184
|
+
CAST(COUNT(*) AS DOUBLE) AS keywordCount,
|
|
1185
|
+
${METRIC_EXPR.clicks} AS totalClicks,
|
|
1186
|
+
${METRIC_EXPR.impressions} AS totalImpressions,
|
|
1187
|
+
AVG(position) AS avgPosition,
|
|
1188
|
+
to_json(list({ 'query': query, 'clicks': clicks, 'impressions': impressions, 'ctr': ctr, 'position': position })) AS keywords
|
|
1189
|
+
FROM keyed
|
|
1190
|
+
GROUP BY cluster_name
|
|
1191
|
+
HAVING COUNT(*) >= ?
|
|
1192
|
+
ORDER BY totalClicks DESC
|
|
1193
|
+
`,
|
|
1194
|
+
params: [
|
|
1195
|
+
startDate,
|
|
1196
|
+
endDate,
|
|
1197
|
+
minImpressions,
|
|
1198
|
+
minClusterSize
|
|
1199
|
+
],
|
|
1200
|
+
current: {
|
|
1201
|
+
table: "keywords",
|
|
1202
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
1203
|
+
}
|
|
1204
|
+
};
|
|
1205
|
+
},
|
|
1206
|
+
reduceSql(rows) {
|
|
1207
|
+
const clusters = (Array.isArray(rows) ? rows : []).map((r) => ({
|
|
1208
|
+
clusterName: str$4(r.clusterName),
|
|
1209
|
+
clusterType: str$4(r.clusterType),
|
|
1210
|
+
keywordCount: num(r.keywordCount),
|
|
1211
|
+
totalClicks: num(r.totalClicks),
|
|
1212
|
+
totalImpressions: num(r.totalImpressions),
|
|
1213
|
+
avgPosition: num(r.avgPosition),
|
|
1214
|
+
keywords: parseJsonList$3(r.keywords).map((k) => ({
|
|
1215
|
+
query: str$4(k.query),
|
|
1216
|
+
clicks: num(k.clicks),
|
|
1217
|
+
impressions: num(k.impressions),
|
|
1218
|
+
ctr: num(k.ctr),
|
|
1219
|
+
position: num(k.position)
|
|
1220
|
+
}))
|
|
1221
|
+
}));
|
|
1222
|
+
return {
|
|
1223
|
+
results: clusters,
|
|
1224
|
+
meta: {
|
|
1225
|
+
total: clusters.length,
|
|
1226
|
+
totalClusters: clusters.length
|
|
1227
|
+
}
|
|
1228
|
+
};
|
|
1229
|
+
},
|
|
1230
|
+
buildRows(params) {
|
|
1231
|
+
return { keywords: keywordsQueryState(periodOf(params), params.limit) };
|
|
1232
|
+
},
|
|
1233
|
+
reduceRows(rows, params) {
|
|
1234
|
+
const result = analyzeClustering(Array.isArray(rows) ? rows : [], {
|
|
1235
|
+
clusterBy: params.clusterBy,
|
|
1236
|
+
minClusterSize: params.minClusterSize,
|
|
1237
|
+
minImpressions: params.minImpressions
|
|
1238
|
+
});
|
|
1239
|
+
return {
|
|
1240
|
+
results: result.clusters,
|
|
1241
|
+
meta: { totalClusters: result.clusters.length }
|
|
1242
|
+
};
|
|
1243
|
+
}
|
|
1244
|
+
});
|
|
1245
|
+
function str$3(v) {
|
|
1246
|
+
return v == null ? "" : String(v);
|
|
1247
|
+
}
|
|
1248
|
+
function parseJsonList$2(v) {
|
|
1249
|
+
if (Array.isArray(v)) return v;
|
|
1250
|
+
if (typeof v === "string" && v.length > 0) {
|
|
1251
|
+
const parsed = JSON.parse(v);
|
|
1252
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
1253
|
+
}
|
|
1254
|
+
return [];
|
|
1255
|
+
}
|
|
1256
|
+
function calculateGini(values) {
|
|
1257
|
+
if (values.length === 0) return 0;
|
|
1258
|
+
const sorted = [...values].sort((a, b) => a - b);
|
|
1259
|
+
const n = sorted.length;
|
|
1260
|
+
const sum = sorted.reduce((a, b) => a + b, 0);
|
|
1261
|
+
if (sum === 0) return 0;
|
|
1262
|
+
let weightedSum = 0;
|
|
1263
|
+
for (let i = 0; i < n; i++) weightedSum += (2 * (i + 1) - n - 1) * sorted[i];
|
|
1264
|
+
return weightedSum / (n * sum);
|
|
1265
|
+
}
|
|
1266
|
+
function calculateHHI(shares) {
|
|
1267
|
+
return shares.reduce((sum, share) => sum + (share * 100) ** 2, 0);
|
|
1268
|
+
}
|
|
1269
|
+
function analyzeConcentration(items, options = {}) {
|
|
1270
|
+
const { topN = 10 } = options;
|
|
1271
|
+
if (items.length === 0) return {
|
|
1272
|
+
giniCoefficient: 0,
|
|
1273
|
+
hhi: 0,
|
|
1274
|
+
topNConcentration: 0,
|
|
1275
|
+
topNItems: [],
|
|
1276
|
+
totalItems: 0,
|
|
1277
|
+
totalClicks: 0,
|
|
1278
|
+
riskLevel: "low"
|
|
1279
|
+
};
|
|
1280
|
+
const sorted = [...items].sort((a, b) => b.clicks - a.clicks);
|
|
1281
|
+
const totalClicks = sorted.reduce((sum, item) => sum + item.clicks, 0);
|
|
1282
|
+
const clickValues = sorted.map((i) => i.clicks);
|
|
1283
|
+
const shares = totalClicks > 0 ? sorted.map((i) => i.clicks / totalClicks) : [];
|
|
1284
|
+
const giniCoefficient = calculateGini(clickValues);
|
|
1285
|
+
const hhi = calculateHHI(shares);
|
|
1286
|
+
const topNItems = sorted.slice(0, topN).map((item) => ({
|
|
1287
|
+
key: item.key,
|
|
1288
|
+
clicks: item.clicks,
|
|
1289
|
+
share: totalClicks > 0 ? item.clicks / totalClicks : 0
|
|
1290
|
+
}));
|
|
1291
|
+
const topNClicks = topNItems.reduce((sum, item) => sum + item.clicks, 0);
|
|
1292
|
+
const topNConcentration = totalClicks > 0 ? topNClicks / totalClicks : 0;
|
|
1293
|
+
let riskLevel = "low";
|
|
1294
|
+
if (hhi > 2500) riskLevel = "high";
|
|
1295
|
+
else if (hhi > 1500) riskLevel = "medium";
|
|
1296
|
+
return {
|
|
1297
|
+
giniCoefficient,
|
|
1298
|
+
hhi,
|
|
1299
|
+
topNConcentration,
|
|
1300
|
+
topNItems,
|
|
1301
|
+
totalItems: items.length,
|
|
1302
|
+
totalClicks,
|
|
1303
|
+
riskLevel
|
|
1304
|
+
};
|
|
1305
|
+
}
|
|
1306
|
+
function analyzePageConcentration(pages, options) {
|
|
1307
|
+
return analyzeConcentration(pages.map((p) => ({
|
|
1308
|
+
key: p.page,
|
|
1309
|
+
clicks: num(p.clicks)
|
|
1310
|
+
})), options);
|
|
1311
|
+
}
|
|
1312
|
+
function analyzeKeywordConcentration(keywords, options) {
|
|
1313
|
+
return analyzeConcentration(keywords.map((k) => ({
|
|
1314
|
+
key: k.query,
|
|
1315
|
+
clicks: num(k.clicks)
|
|
1316
|
+
})), options);
|
|
1317
|
+
}
|
|
1318
|
+
const concentrationAnalyzer = defineAnalyzer({
|
|
1319
|
+
id: "concentration",
|
|
1320
|
+
buildSql(params) {
|
|
1321
|
+
const { startDate, endDate } = periodOf(params);
|
|
1322
|
+
const dim = params.dimension || "pages";
|
|
1323
|
+
const topN = params.topN ?? 10;
|
|
1324
|
+
const table = dim === "keywords" ? "keywords" : "pages";
|
|
1325
|
+
const keyCol = dim === "keywords" ? "query" : "url";
|
|
1326
|
+
return {
|
|
1327
|
+
sql: `
|
|
1328
|
+
WITH items AS (
|
|
1329
|
+
SELECT
|
|
1330
|
+
${keyCol} AS key,
|
|
1331
|
+
${METRIC_EXPR.clicks} AS clicks
|
|
1332
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
1333
|
+
WHERE date >= ? AND date <= ?
|
|
1334
|
+
GROUP BY ${keyCol}
|
|
1335
|
+
HAVING SUM(clicks) > 0
|
|
1336
|
+
),
|
|
1337
|
+
totals AS (
|
|
1338
|
+
SELECT SUM(clicks) AS total_clicks, COUNT(*) AS total_items FROM items
|
|
1339
|
+
),
|
|
1340
|
+
ranked AS (
|
|
1341
|
+
SELECT
|
|
1342
|
+
i.key, i.clicks,
|
|
1343
|
+
i.clicks / NULLIF(t.total_clicks, 0) AS share,
|
|
1344
|
+
ROW_NUMBER() OVER (ORDER BY i.clicks DESC, i.key ASC) AS rnk_desc,
|
|
1345
|
+
ROW_NUMBER() OVER (ORDER BY i.clicks ASC, i.key ASC) AS rnk_asc,
|
|
1346
|
+
t.total_clicks AS tclicks,
|
|
1347
|
+
t.total_items AS titems
|
|
1348
|
+
FROM items i, totals t
|
|
1349
|
+
),
|
|
1350
|
+
gini_num AS (
|
|
1351
|
+
SELECT SUM((2.0 * rnk_asc - titems - 1) * clicks) AS weighted_sum FROM ranked
|
|
1352
|
+
),
|
|
1353
|
+
hhi_calc AS (
|
|
1354
|
+
SELECT SUM(POWER(share * 100, 2)) AS hhi FROM ranked
|
|
1355
|
+
),
|
|
1356
|
+
top_list AS (
|
|
1357
|
+
SELECT
|
|
1358
|
+
list({ 'key': key, 'clicks': clicks, 'share': share } ORDER BY clicks DESC, key ASC) AS items,
|
|
1359
|
+
SUM(clicks) AS top_clicks
|
|
1360
|
+
FROM ranked WHERE rnk_desc <= ?
|
|
1361
|
+
)
|
|
1362
|
+
SELECT
|
|
1363
|
+
COALESCE(
|
|
1364
|
+
(SELECT weighted_sum FROM gini_num)
|
|
1365
|
+
/ NULLIF((SELECT total_items FROM totals) * (SELECT total_clicks FROM totals), 0),
|
|
1366
|
+
0.0
|
|
1367
|
+
) AS giniCoefficient,
|
|
1368
|
+
COALESCE((SELECT hhi FROM hhi_calc), 0.0) AS hhi,
|
|
1369
|
+
COALESCE(
|
|
1370
|
+
CAST((SELECT top_clicks FROM top_list) AS DOUBLE)
|
|
1371
|
+
/ NULLIF((SELECT total_clicks FROM totals), 0),
|
|
1372
|
+
0.0
|
|
1373
|
+
) AS topNConcentration,
|
|
1374
|
+
COALESCE((SELECT to_json(items) FROM top_list), '[]') AS topNItems,
|
|
1375
|
+
COALESCE((SELECT total_items FROM totals), 0) AS totalItems,
|
|
1376
|
+
COALESCE((SELECT total_clicks FROM totals), 0.0) AS totalClicks,
|
|
1377
|
+
CASE
|
|
1378
|
+
WHEN COALESCE((SELECT hhi FROM hhi_calc), 0.0) > 2500 THEN 'high'
|
|
1379
|
+
WHEN COALESCE((SELECT hhi FROM hhi_calc), 0.0) > 1500 THEN 'medium'
|
|
1380
|
+
ELSE 'low'
|
|
1381
|
+
END AS riskLevel
|
|
1382
|
+
`,
|
|
1383
|
+
params: [
|
|
1384
|
+
startDate,
|
|
1385
|
+
endDate,
|
|
1386
|
+
topN
|
|
1387
|
+
],
|
|
1388
|
+
current: {
|
|
1389
|
+
table,
|
|
1390
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
1391
|
+
}
|
|
1392
|
+
};
|
|
1393
|
+
},
|
|
1394
|
+
reduceSql(rows, params) {
|
|
1395
|
+
const r = (Array.isArray(rows) ? rows : [])[0] ?? {};
|
|
1396
|
+
const topRaw = parseJsonList$2(r.topNItems);
|
|
1397
|
+
return {
|
|
1398
|
+
results: [{
|
|
1399
|
+
giniCoefficient: num(r.giniCoefficient),
|
|
1400
|
+
hhi: num(r.hhi),
|
|
1401
|
+
topNConcentration: num(r.topNConcentration),
|
|
1402
|
+
topNItems: topRaw.map((t) => ({
|
|
1403
|
+
key: str$3(t.key),
|
|
1404
|
+
clicks: num(t.clicks),
|
|
1405
|
+
share: num(t.share)
|
|
1406
|
+
})),
|
|
1407
|
+
totalItems: num(r.totalItems),
|
|
1408
|
+
totalClicks: num(r.totalClicks),
|
|
1409
|
+
riskLevel: str$3(r.riskLevel)
|
|
1410
|
+
}],
|
|
1411
|
+
meta: {
|
|
1412
|
+
total: 1,
|
|
1413
|
+
dimension: params.dimension || "pages"
|
|
1414
|
+
}
|
|
1415
|
+
};
|
|
1416
|
+
},
|
|
1417
|
+
buildRows(params) {
|
|
1418
|
+
const dim = params.dimension || "pages";
|
|
1419
|
+
const period = periodOf(params);
|
|
1420
|
+
const out = {};
|
|
1421
|
+
if (dim === "pages") out.pages = pagesQueryState(period, params.limit);
|
|
1422
|
+
else out.keywords = keywordsQueryState(period, params.limit);
|
|
1423
|
+
return out;
|
|
1424
|
+
},
|
|
1425
|
+
reduceRows(rows, params) {
|
|
1426
|
+
const dim = params.dimension || "pages";
|
|
1427
|
+
const arr = Array.isArray(rows) ? rows : rows[dim] ?? [];
|
|
1428
|
+
return {
|
|
1429
|
+
results: [dim === "pages" ? analyzePageConcentration(arr, { topN: params.topN }) : analyzeKeywordConcentration(arr, { topN: params.topN })],
|
|
1430
|
+
meta: { dimension: dim }
|
|
1431
|
+
};
|
|
1432
|
+
}
|
|
1433
|
+
});
|
|
1434
|
+
const sortResults$2 = createMetricSorter("lostClicks", {
|
|
1435
|
+
lostClicks: "desc",
|
|
1436
|
+
declinePercent: "desc",
|
|
1437
|
+
currentClicks: "asc"
|
|
1438
|
+
});
|
|
1439
|
+
function str$2(v) {
|
|
1440
|
+
return v == null ? "" : String(v);
|
|
1441
|
+
}
|
|
1442
|
+
function parseJsonList$1(v) {
|
|
1443
|
+
if (Array.isArray(v)) return v;
|
|
1444
|
+
if (typeof v === "string" && v.length > 0) {
|
|
1445
|
+
const parsed = JSON.parse(v);
|
|
1446
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
1447
|
+
}
|
|
1448
|
+
return [];
|
|
1449
|
+
}
|
|
1450
|
+
function analyzeDecay(input, options = {}) {
|
|
1451
|
+
const { minPreviousClicks = 50, threshold = .2, sortBy = "lostClicks" } = options;
|
|
1452
|
+
const currentMap = buildPeriodMap(input.current, (r) => r.page, (r) => ({
|
|
1453
|
+
clicks: num(r.clicks),
|
|
1454
|
+
position: num(r.position)
|
|
1455
|
+
}));
|
|
1456
|
+
const previousMap = buildPeriodMap(input.previous, (r) => r.page, (r) => ({
|
|
1457
|
+
clicks: num(r.clicks),
|
|
1458
|
+
position: num(r.position)
|
|
1459
|
+
}), (r) => num(r.clicks) >= minPreviousClicks);
|
|
1460
|
+
const results = [];
|
|
1461
|
+
for (const [page, prev] of previousMap) {
|
|
1462
|
+
const curr = currentMap.get(page) || {
|
|
1463
|
+
clicks: 0,
|
|
1464
|
+
position: 0
|
|
1465
|
+
};
|
|
1466
|
+
const lostClicks = prev.clicks - curr.clicks;
|
|
1467
|
+
const declinePercent = prev.clicks > 0 ? lostClicks / prev.clicks : 0;
|
|
1468
|
+
if (declinePercent >= threshold && lostClicks > 0) results.push({
|
|
1469
|
+
page,
|
|
1470
|
+
currentClicks: curr.clicks,
|
|
1471
|
+
previousClicks: prev.clicks,
|
|
1472
|
+
lostClicks,
|
|
1473
|
+
declinePercent,
|
|
1474
|
+
currentPosition: curr.position,
|
|
1475
|
+
previousPosition: prev.position,
|
|
1476
|
+
positionDrop: curr.position - prev.position
|
|
1477
|
+
});
|
|
1478
|
+
}
|
|
1479
|
+
return sortResults$2(results, sortBy);
|
|
1480
|
+
}
|
|
1481
|
+
const decayAnalyzer = defineAnalyzer({
|
|
1482
|
+
id: "decay",
|
|
1483
|
+
buildSql(params) {
|
|
1484
|
+
const { current: cur, previous: prev } = comparisonOf(params);
|
|
1485
|
+
const minPreviousClicks = params.minPreviousClicks ?? 50;
|
|
1486
|
+
const threshold = params.threshold ?? .2;
|
|
1487
|
+
const limit = params.limit ?? 2e3;
|
|
1488
|
+
return {
|
|
1489
|
+
sql: `
|
|
1490
|
+
WITH cur AS (
|
|
1491
|
+
SELECT
|
|
1492
|
+
url,
|
|
1493
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
1494
|
+
${METRIC_EXPR.position} AS position
|
|
1495
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
1496
|
+
WHERE date >= ? AND date <= ?
|
|
1497
|
+
GROUP BY url
|
|
1498
|
+
),
|
|
1499
|
+
prev AS (
|
|
1500
|
+
SELECT
|
|
1501
|
+
url,
|
|
1502
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
1503
|
+
${METRIC_EXPR.position} AS position
|
|
1504
|
+
FROM read_parquet({{FILES_PREV}}, union_by_name = true)
|
|
1505
|
+
WHERE date >= ? AND date <= ?
|
|
1506
|
+
GROUP BY url
|
|
1507
|
+
HAVING SUM(clicks) >= ?
|
|
1508
|
+
),
|
|
1509
|
+
weekly AS (
|
|
1510
|
+
SELECT url, date_trunc('week', CAST(date AS DATE)) AS week,
|
|
1511
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
1512
|
+
${METRIC_EXPR.impressions} AS impressions
|
|
1513
|
+
FROM (
|
|
1514
|
+
SELECT url, date, clicks, impressions
|
|
1515
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
1516
|
+
WHERE date >= ? AND date <= ?
|
|
1517
|
+
UNION ALL
|
|
1518
|
+
SELECT url, date, clicks, impressions
|
|
1519
|
+
FROM read_parquet({{FILES_PREV}}, union_by_name = true)
|
|
1520
|
+
WHERE date >= ? AND date <= ?
|
|
1521
|
+
)
|
|
1522
|
+
GROUP BY url, week
|
|
1523
|
+
),
|
|
1524
|
+
series_by_url AS (
|
|
1525
|
+
SELECT url, to_json(list({
|
|
1526
|
+
'week': strftime(week, '%Y-%m-%d'),
|
|
1527
|
+
'clicks': clicks,
|
|
1528
|
+
'impressions': impressions
|
|
1529
|
+
} ORDER BY week)) AS seriesJson
|
|
1530
|
+
FROM weekly GROUP BY url
|
|
1531
|
+
),
|
|
1532
|
+
joined AS (
|
|
1533
|
+
SELECT
|
|
1534
|
+
p.url AS page,
|
|
1535
|
+
COALESCE(c.clicks, 0.0) AS currentClicks,
|
|
1536
|
+
p.clicks AS previousClicks,
|
|
1537
|
+
(p.clicks - COALESCE(c.clicks, 0.0)) AS lostClicks,
|
|
1538
|
+
(p.clicks - COALESCE(c.clicks, 0.0)) / NULLIF(p.clicks, 0) AS declinePercent,
|
|
1539
|
+
COALESCE(c.position, 0.0) AS currentPosition,
|
|
1540
|
+
p.position AS previousPosition,
|
|
1541
|
+
(COALESCE(c.position, 0.0) - p.position) AS positionDrop,
|
|
1542
|
+
s.seriesJson
|
|
1543
|
+
FROM prev p
|
|
1544
|
+
LEFT JOIN cur c ON p.url = c.url
|
|
1545
|
+
LEFT JOIN series_by_url s ON p.url = s.url
|
|
1546
|
+
)
|
|
1547
|
+
SELECT *
|
|
1548
|
+
FROM joined
|
|
1549
|
+
WHERE declinePercent >= ? AND lostClicks > 0
|
|
1550
|
+
ORDER BY lostClicks DESC
|
|
1551
|
+
LIMIT ${Number(limit)}
|
|
1552
|
+
`,
|
|
1553
|
+
params: [
|
|
1554
|
+
cur.startDate,
|
|
1555
|
+
cur.endDate,
|
|
1556
|
+
prev.startDate,
|
|
1557
|
+
prev.endDate,
|
|
1558
|
+
minPreviousClicks,
|
|
1559
|
+
cur.startDate,
|
|
1560
|
+
cur.endDate,
|
|
1561
|
+
prev.startDate,
|
|
1562
|
+
prev.endDate,
|
|
1563
|
+
threshold
|
|
1564
|
+
],
|
|
1565
|
+
current: {
|
|
1566
|
+
table: "pages",
|
|
1567
|
+
partitions: enumeratePartitions(cur.startDate, cur.endDate)
|
|
1568
|
+
},
|
|
1569
|
+
previous: {
|
|
1570
|
+
table: "pages",
|
|
1571
|
+
partitions: enumeratePartitions(prev.startDate, prev.endDate)
|
|
1572
|
+
}
|
|
1573
|
+
};
|
|
1574
|
+
},
|
|
1575
|
+
reduceSql(rows) {
|
|
1576
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
1577
|
+
return {
|
|
1578
|
+
results: arr.map((r) => ({
|
|
1579
|
+
page: str$2(r.page),
|
|
1580
|
+
currentClicks: num(r.currentClicks),
|
|
1581
|
+
previousClicks: num(r.previousClicks),
|
|
1582
|
+
lostClicks: num(r.lostClicks),
|
|
1583
|
+
declinePercent: num(r.declinePercent),
|
|
1584
|
+
currentPosition: num(r.currentPosition),
|
|
1585
|
+
previousPosition: num(r.previousPosition),
|
|
1586
|
+
positionDrop: num(r.positionDrop),
|
|
1587
|
+
series: parseJsonList$1(r.seriesJson).map((s) => ({
|
|
1588
|
+
week: str$2(s.week),
|
|
1589
|
+
clicks: num(s.clicks),
|
|
1590
|
+
impressions: num(s.impressions)
|
|
1591
|
+
}))
|
|
1592
|
+
})),
|
|
1593
|
+
meta: { total: arr.length }
|
|
1594
|
+
};
|
|
1595
|
+
},
|
|
1596
|
+
buildRows(params) {
|
|
1597
|
+
const { current, previous } = comparisonOf(params);
|
|
1598
|
+
return {
|
|
1599
|
+
current: pagesQueryState(current, params.limit),
|
|
1600
|
+
previous: pagesQueryState(previous, params.limit)
|
|
1601
|
+
};
|
|
1602
|
+
},
|
|
1603
|
+
reduceRows(rows, params) {
|
|
1604
|
+
const map = rows && !Array.isArray(rows) ? rows : {
|
|
1605
|
+
current: [],
|
|
1606
|
+
previous: []
|
|
1607
|
+
};
|
|
1608
|
+
const results = analyzeDecay({
|
|
1609
|
+
current: map.current ?? [],
|
|
1610
|
+
previous: map.previous ?? []
|
|
1611
|
+
}, {
|
|
1612
|
+
minPreviousClicks: params.minPreviousClicks,
|
|
1613
|
+
threshold: params.threshold
|
|
1614
|
+
});
|
|
1615
|
+
return {
|
|
1616
|
+
results,
|
|
1617
|
+
meta: { total: results.length }
|
|
1618
|
+
};
|
|
1619
|
+
}
|
|
1620
|
+
});
|
|
1621
|
+
function str$1(v) {
|
|
1622
|
+
return v == null ? "" : String(v);
|
|
1623
|
+
}
|
|
1624
|
+
function parseJsonList(v) {
|
|
1625
|
+
if (Array.isArray(v)) return v;
|
|
1626
|
+
if (typeof v === "string" && v.length > 0) {
|
|
1627
|
+
const parsed = JSON.parse(v);
|
|
1628
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
1629
|
+
}
|
|
1630
|
+
return [];
|
|
1631
|
+
}
|
|
1632
|
+
function analyzeMovers(input, options = {}) {
|
|
1633
|
+
const { changeThreshold = .2, minImpressions = 50, sortBy = "clicksChange" } = options;
|
|
1634
|
+
const normFactor = input.normalizationFactor ?? 1;
|
|
1635
|
+
const baselineMap = buildPeriodMap(input.previous, (r) => r.query, (r) => ({
|
|
1636
|
+
clicks: num(r.clicks) / normFactor,
|
|
1637
|
+
impressions: num(r.impressions) / normFactor,
|
|
1638
|
+
position: num(r.position),
|
|
1639
|
+
page: r.page ?? null
|
|
1640
|
+
}));
|
|
1641
|
+
const pageMap = /* @__PURE__ */ new Map();
|
|
1642
|
+
for (const row of input.current) if (!pageMap.has(row.query) && row.page) pageMap.set(row.query, row.page);
|
|
1643
|
+
for (const row of input.previous) if (!pageMap.has(row.query) && row.page) pageMap.set(row.query, row.page);
|
|
1644
|
+
const rising = [];
|
|
1645
|
+
const declining = [];
|
|
1646
|
+
const stable = [];
|
|
1647
|
+
for (const row of input.current) {
|
|
1648
|
+
const impressions = num(row.impressions);
|
|
1649
|
+
const clicks = num(row.clicks);
|
|
1650
|
+
const position = num(row.position);
|
|
1651
|
+
if (impressions < minImpressions) continue;
|
|
1652
|
+
const baseline = baselineMap.get(row.query) || {
|
|
1653
|
+
clicks: 0,
|
|
1654
|
+
impressions: 0,
|
|
1655
|
+
position: 0,
|
|
1656
|
+
page: null
|
|
1657
|
+
};
|
|
1658
|
+
const clicksChangePercent = percentDifference(clicks, baseline.clicks);
|
|
1659
|
+
const impressionsChangePercent = percentDifference(impressions, baseline.impressions);
|
|
1660
|
+
const data = {
|
|
1661
|
+
keyword: row.query,
|
|
1662
|
+
page: pageMap.get(row.query) ?? null,
|
|
1663
|
+
recentClicks: clicks,
|
|
1664
|
+
recentImpressions: impressions,
|
|
1665
|
+
recentPosition: position,
|
|
1666
|
+
baselineClicks: Math.round(baseline.clicks),
|
|
1667
|
+
baselineImpressions: Math.round(baseline.impressions),
|
|
1668
|
+
baselinePosition: baseline.position,
|
|
1669
|
+
clicksChange: clicks - Math.round(baseline.clicks),
|
|
1670
|
+
clicksChangePercent,
|
|
1671
|
+
impressionsChangePercent,
|
|
1672
|
+
positionChange: position - baseline.position
|
|
1673
|
+
};
|
|
1674
|
+
const absChange = Math.abs(clicksChangePercent / 100);
|
|
1675
|
+
if (clicksChangePercent > 0 && absChange >= changeThreshold) rising.push(data);
|
|
1676
|
+
else if (clicksChangePercent < 0 && absChange >= changeThreshold) declining.push(data);
|
|
1677
|
+
else stable.push(data);
|
|
1678
|
+
}
|
|
1679
|
+
const sortFn = (a, b) => {
|
|
1680
|
+
switch (sortBy) {
|
|
1681
|
+
case "clicks": return b.recentClicks - a.recentClicks;
|
|
1682
|
+
case "impressions": return b.recentImpressions - a.recentImpressions;
|
|
1683
|
+
case "clicksChange": return Math.abs(b.clicksChangePercent) - Math.abs(a.clicksChangePercent);
|
|
1684
|
+
case "impressionsChange": return Math.abs(b.impressionsChangePercent) - Math.abs(a.impressionsChangePercent);
|
|
1685
|
+
case "positionChange": return Math.abs(b.positionChange) - Math.abs(a.positionChange);
|
|
1686
|
+
default: return Math.abs(b.clicksChangePercent) - Math.abs(a.clicksChangePercent);
|
|
1687
|
+
}
|
|
1688
|
+
};
|
|
1689
|
+
rising.sort(sortFn);
|
|
1690
|
+
declining.sort(sortFn);
|
|
1691
|
+
stable.sort((a, b) => b.recentClicks - a.recentClicks);
|
|
1692
|
+
return {
|
|
1693
|
+
rising,
|
|
1694
|
+
declining,
|
|
1695
|
+
stable
|
|
1696
|
+
};
|
|
1697
|
+
}
|
|
1698
|
+
const moversAnalyzer = defineAnalyzer({
|
|
1699
|
+
id: "movers",
|
|
1700
|
+
buildSql(params) {
|
|
1701
|
+
const { current: cur, previous: prev } = comparisonOf(params);
|
|
1702
|
+
const minImpressions = params.minImpressions ?? 50;
|
|
1703
|
+
const changeThreshold = params.changeThreshold ?? .2;
|
|
1704
|
+
const limit = params.limit ?? 2e3;
|
|
1705
|
+
return {
|
|
1706
|
+
sql: `
|
|
1707
|
+
WITH cur AS (
|
|
1708
|
+
SELECT
|
|
1709
|
+
query, url,
|
|
1710
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
1711
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
1712
|
+
${METRIC_EXPR.position} AS position
|
|
1713
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
1714
|
+
WHERE date >= ? AND date <= ?
|
|
1715
|
+
GROUP BY query, url
|
|
1716
|
+
),
|
|
1717
|
+
prev AS (
|
|
1718
|
+
SELECT
|
|
1719
|
+
query, url,
|
|
1720
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
1721
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
1722
|
+
${METRIC_EXPR.position} AS position
|
|
1723
|
+
FROM read_parquet({{FILES_PREV}}, union_by_name = true)
|
|
1724
|
+
WHERE date >= ? AND date <= ?
|
|
1725
|
+
GROUP BY query, url
|
|
1726
|
+
),
|
|
1727
|
+
weekly AS (
|
|
1728
|
+
SELECT query, url, date_trunc('week', CAST(date AS DATE)) AS week,
|
|
1729
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
1730
|
+
${METRIC_EXPR.impressions} AS impressions
|
|
1731
|
+
FROM (
|
|
1732
|
+
SELECT query, url, date, clicks, impressions
|
|
1733
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
1734
|
+
WHERE date >= ? AND date <= ?
|
|
1735
|
+
UNION ALL
|
|
1736
|
+
SELECT query, url, date, clicks, impressions
|
|
1737
|
+
FROM read_parquet({{FILES_PREV}}, union_by_name = true)
|
|
1738
|
+
WHERE date >= ? AND date <= ?
|
|
1739
|
+
)
|
|
1740
|
+
GROUP BY query, url, week
|
|
1741
|
+
),
|
|
1742
|
+
series_by_entity AS (
|
|
1743
|
+
SELECT query, url, to_json(list({
|
|
1744
|
+
'week': strftime(week, '%Y-%m-%d'),
|
|
1745
|
+
'clicks': clicks,
|
|
1746
|
+
'impressions': impressions
|
|
1747
|
+
} ORDER BY week)) AS seriesJson
|
|
1748
|
+
FROM weekly GROUP BY query, url
|
|
1749
|
+
),
|
|
1750
|
+
joined AS (
|
|
1751
|
+
SELECT
|
|
1752
|
+
c.query AS keyword,
|
|
1753
|
+
c.url AS page,
|
|
1754
|
+
c.clicks AS recentClicks,
|
|
1755
|
+
c.impressions AS recentImpressions,
|
|
1756
|
+
c.position AS recentPosition,
|
|
1757
|
+
COALESCE(p.clicks, 0.0) AS baselineClicks,
|
|
1758
|
+
COALESCE(p.impressions, 0.0) AS baselineImpressions,
|
|
1759
|
+
COALESCE(p.position, 0.0) AS baselinePosition,
|
|
1760
|
+
(c.clicks - COALESCE(p.clicks, 0.0)) AS clicksChange,
|
|
1761
|
+
CASE
|
|
1762
|
+
WHEN COALESCE(p.clicks, 0.0) = 0 THEN CASE WHEN c.clicks > 0 THEN 100.0 ELSE 0.0 END
|
|
1763
|
+
ELSE (c.clicks - p.clicks) * 100.0 / p.clicks
|
|
1764
|
+
END AS clicksChangePercent,
|
|
1765
|
+
CASE
|
|
1766
|
+
WHEN COALESCE(p.impressions, 0.0) = 0 THEN CASE WHEN c.impressions > 0 THEN 100.0 ELSE 0.0 END
|
|
1767
|
+
ELSE (c.impressions - p.impressions) * 100.0 / p.impressions
|
|
1768
|
+
END AS impressionsChangePercent,
|
|
1769
|
+
(c.position - COALESCE(p.position, 0.0)) AS positionChange,
|
|
1770
|
+
s.seriesJson
|
|
1771
|
+
FROM cur c
|
|
1772
|
+
LEFT JOIN prev p ON c.query = p.query AND c.url = p.url
|
|
1773
|
+
LEFT JOIN series_by_entity s ON c.query = s.query AND c.url = s.url
|
|
1774
|
+
WHERE c.impressions >= ?
|
|
1775
|
+
)
|
|
1776
|
+
SELECT *,
|
|
1777
|
+
CASE
|
|
1778
|
+
WHEN clicksChangePercent > 0 AND ABS(clicksChangePercent) / 100.0 >= ? THEN 'rising'
|
|
1779
|
+
WHEN clicksChangePercent < 0 AND ABS(clicksChangePercent) / 100.0 >= ? THEN 'declining'
|
|
1780
|
+
ELSE 'stable'
|
|
1781
|
+
END AS direction
|
|
1782
|
+
FROM joined
|
|
1783
|
+
ORDER BY ABS(clicksChangePercent) DESC
|
|
1784
|
+
LIMIT ${Number(limit)}
|
|
1785
|
+
`,
|
|
1786
|
+
params: [
|
|
1787
|
+
cur.startDate,
|
|
1788
|
+
cur.endDate,
|
|
1789
|
+
prev.startDate,
|
|
1790
|
+
prev.endDate,
|
|
1791
|
+
cur.startDate,
|
|
1792
|
+
cur.endDate,
|
|
1793
|
+
prev.startDate,
|
|
1794
|
+
prev.endDate,
|
|
1795
|
+
minImpressions,
|
|
1796
|
+
changeThreshold,
|
|
1797
|
+
changeThreshold
|
|
1798
|
+
],
|
|
1799
|
+
current: {
|
|
1800
|
+
table: "page_keywords",
|
|
1801
|
+
partitions: enumeratePartitions(cur.startDate, cur.endDate)
|
|
1802
|
+
},
|
|
1803
|
+
previous: {
|
|
1804
|
+
table: "page_keywords",
|
|
1805
|
+
partitions: enumeratePartitions(prev.startDate, prev.endDate)
|
|
1806
|
+
}
|
|
1807
|
+
};
|
|
1808
|
+
},
|
|
1809
|
+
reduceSql(rows) {
|
|
1810
|
+
const normalized = (Array.isArray(rows) ? rows : []).map((r) => ({
|
|
1811
|
+
keyword: str$1(r.keyword),
|
|
1812
|
+
page: r.page == null ? null : str$1(r.page),
|
|
1813
|
+
recentClicks: num(r.recentClicks),
|
|
1814
|
+
recentImpressions: num(r.recentImpressions),
|
|
1815
|
+
recentPosition: num(r.recentPosition),
|
|
1816
|
+
baselineClicks: Math.round(num(r.baselineClicks)),
|
|
1817
|
+
baselineImpressions: Math.round(num(r.baselineImpressions)),
|
|
1818
|
+
baselinePosition: num(r.baselinePosition),
|
|
1819
|
+
clicksChange: num(r.clicksChange),
|
|
1820
|
+
clicksChangePercent: num(r.clicksChangePercent),
|
|
1821
|
+
impressionsChangePercent: num(r.impressionsChangePercent),
|
|
1822
|
+
positionChange: num(r.positionChange),
|
|
1823
|
+
direction: str$1(r.direction),
|
|
1824
|
+
series: parseJsonList(r.seriesJson).map((s) => ({
|
|
1825
|
+
week: str$1(s.week),
|
|
1826
|
+
clicks: num(s.clicks),
|
|
1827
|
+
impressions: num(s.impressions)
|
|
1828
|
+
}))
|
|
1829
|
+
}));
|
|
1830
|
+
const rising = normalized.filter((r) => r.direction === "rising");
|
|
1831
|
+
const declining = normalized.filter((r) => r.direction === "declining");
|
|
1832
|
+
const stable = normalized.filter((r) => r.direction === "stable");
|
|
1833
|
+
const combined = [...rising, ...declining];
|
|
1834
|
+
return {
|
|
1835
|
+
results: combined,
|
|
1836
|
+
meta: {
|
|
1837
|
+
total: combined.length,
|
|
1838
|
+
rising: rising.length,
|
|
1839
|
+
declining: declining.length,
|
|
1840
|
+
stable: stable.length
|
|
1841
|
+
}
|
|
1842
|
+
};
|
|
1843
|
+
},
|
|
1844
|
+
buildRows(params) {
|
|
1845
|
+
const { current, previous } = comparisonOf(params);
|
|
1846
|
+
return {
|
|
1847
|
+
current: keywordsQueryState(current, params.limit),
|
|
1848
|
+
previous: keywordsQueryState(previous, params.limit)
|
|
1849
|
+
};
|
|
1850
|
+
},
|
|
1851
|
+
reduceRows(rows, params) {
|
|
1852
|
+
const map = rows && !Array.isArray(rows) ? rows : {
|
|
1853
|
+
current: [],
|
|
1854
|
+
previous: []
|
|
1855
|
+
};
|
|
1856
|
+
const result = analyzeMovers({
|
|
1857
|
+
current: map.current ?? [],
|
|
1858
|
+
previous: map.previous ?? []
|
|
1859
|
+
}, {
|
|
1860
|
+
changeThreshold: params.changeThreshold,
|
|
1861
|
+
minImpressions: params.minImpressions
|
|
1862
|
+
});
|
|
1863
|
+
return {
|
|
1864
|
+
results: [...result.rising.map((r) => ({
|
|
1865
|
+
...r,
|
|
1866
|
+
direction: "rising"
|
|
1867
|
+
})), ...result.declining.map((r) => ({
|
|
1868
|
+
...r,
|
|
1869
|
+
direction: "declining"
|
|
1870
|
+
}))],
|
|
1871
|
+
meta: {
|
|
1872
|
+
rising: result.rising.length,
|
|
1873
|
+
declining: result.declining.length
|
|
1874
|
+
}
|
|
1875
|
+
};
|
|
1876
|
+
}
|
|
1877
|
+
});
|
|
1878
|
+
const DEFAULT_LIMIT = 1e3;
|
|
1879
|
+
const MAX_LIMIT = 5e4;
|
|
1880
|
+
function clampLimit(limit, fallback = DEFAULT_LIMIT) {
|
|
1881
|
+
const n = Number(limit ?? fallback);
|
|
1882
|
+
if (!Number.isFinite(n) || n <= 0) return fallback;
|
|
1883
|
+
return Math.min(n, MAX_LIMIT);
|
|
1884
|
+
}
|
|
1885
|
+
function clampOffset(offset) {
|
|
1886
|
+
const n = Number(offset ?? 0);
|
|
1887
|
+
if (!Number.isFinite(n) || n < 0) return 0;
|
|
1888
|
+
return Math.floor(n);
|
|
1889
|
+
}
|
|
1890
|
+
function paginateClause(input) {
|
|
1891
|
+
const l = clampLimit(input.limit);
|
|
1892
|
+
const o = clampOffset(input.offset);
|
|
1893
|
+
return o > 0 ? `LIMIT ${l} OFFSET ${o}` : `LIMIT ${l}`;
|
|
1894
|
+
}
|
|
1895
|
+
function paginateInMemory(rows, input) {
|
|
1896
|
+
const l = clampLimit(input.limit, rows.length);
|
|
1897
|
+
const o = clampOffset(input.offset);
|
|
1898
|
+
return rows.slice(o, o + l);
|
|
1899
|
+
}
|
|
1900
|
+
const EXPECTED_CTR_BY_POSITION = {
|
|
1901
|
+
1: .3,
|
|
1902
|
+
2: .15,
|
|
1903
|
+
3: .1,
|
|
1904
|
+
4: .07,
|
|
1905
|
+
5: .05,
|
|
1906
|
+
6: .04,
|
|
1907
|
+
7: .03,
|
|
1908
|
+
8: .025,
|
|
1909
|
+
9: .02,
|
|
1910
|
+
10: .015
|
|
1911
|
+
};
|
|
1912
|
+
function getExpectedCtr(position) {
|
|
1913
|
+
return EXPECTED_CTR_BY_POSITION[Math.round(Math.max(1, Math.min(position, 10)))] || .01;
|
|
1914
|
+
}
|
|
1915
|
+
function calculatePositionScore(position) {
|
|
1916
|
+
if (position <= 3) return .2;
|
|
1917
|
+
if (position > 50) return .1;
|
|
1918
|
+
const distance = Math.abs(position - 11);
|
|
1919
|
+
return Math.max(0, 1 - distance / 15);
|
|
1920
|
+
}
|
|
1921
|
+
function calculateImpressionScore(impressions) {
|
|
1922
|
+
if (impressions <= 0) return 0;
|
|
1923
|
+
return Math.min(Math.log10(impressions) / 5, 1);
|
|
1924
|
+
}
|
|
1925
|
+
function calculateCtrGapScore(actualCtr, position) {
|
|
1926
|
+
const expectedCtr = getExpectedCtr(position);
|
|
1927
|
+
if (actualCtr >= expectedCtr) return 0;
|
|
1928
|
+
const gap = expectedCtr - actualCtr;
|
|
1929
|
+
return Math.min(gap / expectedCtr, 1);
|
|
1930
|
+
}
|
|
1931
|
+
const sortResults$1 = createMetricSorter("opportunityScore", {
|
|
1932
|
+
opportunityScore: "desc",
|
|
1933
|
+
potentialClicks: "desc",
|
|
1934
|
+
impressions: "desc",
|
|
1935
|
+
position: "asc"
|
|
1936
|
+
});
|
|
1937
|
+
const opportunityAnalyzer = defineAnalyzer({
|
|
1938
|
+
id: "opportunity",
|
|
1939
|
+
buildSql(params) {
|
|
1940
|
+
const { startDate, endDate } = periodOf(params);
|
|
1941
|
+
const minImpressions = params.minImpressions ?? 100;
|
|
1942
|
+
const w1 = 1;
|
|
1943
|
+
const w2 = 1;
|
|
1944
|
+
const w3 = 1;
|
|
1945
|
+
const totalW = w1 + w2 + w3;
|
|
1946
|
+
const limit = params.limit ?? 1e3;
|
|
1947
|
+
return {
|
|
1948
|
+
sql: `
|
|
1949
|
+
WITH agg AS (
|
|
1950
|
+
SELECT
|
|
1951
|
+
query AS keyword,
|
|
1952
|
+
url AS page,
|
|
1953
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
1954
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
1955
|
+
${METRIC_EXPR.ctr} AS ctr,
|
|
1956
|
+
${METRIC_EXPR.position} AS position
|
|
1957
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
1958
|
+
WHERE date >= ? AND date <= ?
|
|
1959
|
+
GROUP BY query, url
|
|
1960
|
+
HAVING SUM(impressions) >= ?
|
|
1961
|
+
),
|
|
1962
|
+
scored AS (
|
|
1963
|
+
SELECT
|
|
1964
|
+
keyword, page, clicks, impressions, ctr, position,
|
|
1965
|
+
CASE
|
|
1966
|
+
WHEN position <= 3 THEN 0.2
|
|
1967
|
+
WHEN position > 50 THEN 0.1
|
|
1968
|
+
ELSE GREATEST(0.0, 1.0 - ABS(position - 11.0) / 15.0)
|
|
1969
|
+
END AS positionScore,
|
|
1970
|
+
CASE WHEN impressions <= 0 THEN 0.0 ELSE LEAST(LOG10(impressions) / 5.0, 1.0) END AS impressionScore,
|
|
1971
|
+
CASE CAST(ROUND(GREATEST(LEAST(position, 10.0), 1.0)) AS INTEGER)
|
|
1972
|
+
WHEN 1 THEN 0.30
|
|
1973
|
+
WHEN 2 THEN 0.15
|
|
1974
|
+
WHEN 3 THEN 0.10
|
|
1975
|
+
WHEN 4 THEN 0.07
|
|
1976
|
+
WHEN 5 THEN 0.05
|
|
1977
|
+
WHEN 6 THEN 0.04
|
|
1978
|
+
WHEN 7 THEN 0.03
|
|
1979
|
+
WHEN 8 THEN 0.025
|
|
1980
|
+
WHEN 9 THEN 0.02
|
|
1981
|
+
WHEN 10 THEN 0.015
|
|
1982
|
+
ELSE 0.01
|
|
1983
|
+
END AS expectedCtr
|
|
1984
|
+
FROM agg
|
|
1985
|
+
),
|
|
1986
|
+
gapped AS (
|
|
1987
|
+
SELECT
|
|
1988
|
+
*,
|
|
1989
|
+
CASE WHEN ctr >= expectedCtr THEN 0.0 ELSE LEAST((expectedCtr - ctr) / expectedCtr, 1.0) END AS ctrGapScore
|
|
1990
|
+
FROM scored
|
|
1991
|
+
)
|
|
1992
|
+
SELECT
|
|
1993
|
+
keyword, page, clicks, impressions, ctr, position,
|
|
1994
|
+
CAST(ROUND(POWER(
|
|
1995
|
+
POWER(positionScore, ${w1}) * POWER(impressionScore, ${w2}) * POWER(ctrGapScore, ${w3}),
|
|
1996
|
+
1.0 / ${totalW}
|
|
1997
|
+
) * 100) AS DOUBLE) AS opportunityScore,
|
|
1998
|
+
CAST(ROUND(impressions * (
|
|
1999
|
+
CASE CAST(ROUND(GREATEST(LEAST(position, 3.0), 1.0)) AS INTEGER)
|
|
2000
|
+
WHEN 1 THEN 0.30
|
|
2001
|
+
WHEN 2 THEN 0.15
|
|
2002
|
+
WHEN 3 THEN 0.10
|
|
2003
|
+
ELSE 0.10
|
|
2004
|
+
END
|
|
2005
|
+
)) AS DOUBLE) AS potentialClicks,
|
|
2006
|
+
positionScore, impressionScore, ctrGapScore
|
|
2007
|
+
FROM gapped
|
|
2008
|
+
ORDER BY opportunityScore DESC
|
|
2009
|
+
${paginateClause({
|
|
2010
|
+
limit,
|
|
2011
|
+
offset: params.offset
|
|
2012
|
+
})}
|
|
2013
|
+
`,
|
|
2014
|
+
params: [
|
|
2015
|
+
startDate,
|
|
2016
|
+
endDate,
|
|
2017
|
+
minImpressions
|
|
2018
|
+
],
|
|
2019
|
+
current: {
|
|
2020
|
+
table: "page_keywords",
|
|
2021
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
2022
|
+
}
|
|
2023
|
+
};
|
|
2024
|
+
},
|
|
2025
|
+
reduceSql(rows) {
|
|
2026
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
2027
|
+
return {
|
|
2028
|
+
results: arr.map((r) => ({
|
|
2029
|
+
keyword: r.keyword == null ? "" : String(r.keyword),
|
|
2030
|
+
page: r.page == null ? null : String(r.page),
|
|
2031
|
+
clicks: num(r.clicks),
|
|
2032
|
+
impressions: num(r.impressions),
|
|
2033
|
+
ctr: num(r.ctr),
|
|
2034
|
+
position: num(r.position),
|
|
2035
|
+
opportunityScore: num(r.opportunityScore),
|
|
2036
|
+
potentialClicks: num(r.potentialClicks),
|
|
2037
|
+
factors: {
|
|
2038
|
+
positionScore: num(r.positionScore),
|
|
2039
|
+
impressionScore: num(r.impressionScore),
|
|
2040
|
+
ctrGapScore: num(r.ctrGapScore)
|
|
2041
|
+
}
|
|
2042
|
+
})),
|
|
2043
|
+
meta: { total: arr.length }
|
|
2044
|
+
};
|
|
2045
|
+
},
|
|
2046
|
+
buildRows(params) {
|
|
2047
|
+
return { keywords: keywordsQueryState(periodOf(params), params.limit) };
|
|
2048
|
+
},
|
|
2049
|
+
reduceRows(rows, params) {
|
|
2050
|
+
const keywords = (Array.isArray(rows) ? rows : []) ?? [];
|
|
2051
|
+
const minImpressions = params.minImpressions ?? 100;
|
|
2052
|
+
const positionWeight = 1;
|
|
2053
|
+
const impressionsWeight = 1;
|
|
2054
|
+
const ctrGapWeight = 1;
|
|
2055
|
+
const sortBy = "opportunityScore";
|
|
2056
|
+
const results = [];
|
|
2057
|
+
for (const row of keywords) {
|
|
2058
|
+
const impressions = num(row.impressions);
|
|
2059
|
+
const position = num(row.position);
|
|
2060
|
+
const ctr = num(row.ctr);
|
|
2061
|
+
const clicks = num(row.clicks);
|
|
2062
|
+
if (impressions < minImpressions) continue;
|
|
2063
|
+
const positionScore = calculatePositionScore(position);
|
|
2064
|
+
const impressionScore = calculateImpressionScore(impressions);
|
|
2065
|
+
const ctrGapScore = calculateCtrGapScore(ctr, position);
|
|
2066
|
+
const geometricMean = (positionScore ** positionWeight * impressionScore ** impressionsWeight * ctrGapScore ** ctrGapWeight) ** (1 / (positionWeight + impressionsWeight + ctrGapWeight));
|
|
2067
|
+
const opportunityScore = Math.round(geometricMean * 100);
|
|
2068
|
+
const targetCtr = getExpectedCtr(Math.min(3, position));
|
|
2069
|
+
const potentialClicks = Math.round(impressions * targetCtr);
|
|
2070
|
+
results.push({
|
|
2071
|
+
keyword: row.query,
|
|
2072
|
+
page: row.page ?? null,
|
|
2073
|
+
clicks,
|
|
2074
|
+
impressions,
|
|
2075
|
+
ctr,
|
|
2076
|
+
position,
|
|
2077
|
+
opportunityScore,
|
|
2078
|
+
potentialClicks,
|
|
2079
|
+
factors: {
|
|
2080
|
+
positionScore,
|
|
2081
|
+
impressionScore,
|
|
2082
|
+
ctrGapScore
|
|
2083
|
+
}
|
|
2084
|
+
});
|
|
2085
|
+
}
|
|
2086
|
+
const sorted = sortResults$1(results, sortBy);
|
|
2087
|
+
const paged = paginateInMemory(sorted, {
|
|
2088
|
+
limit: params.limit,
|
|
2089
|
+
offset: params.offset
|
|
2090
|
+
});
|
|
2091
|
+
return {
|
|
2092
|
+
results: paged,
|
|
2093
|
+
meta: {
|
|
2094
|
+
total: sorted.length,
|
|
2095
|
+
returned: paged.length
|
|
2096
|
+
}
|
|
2097
|
+
};
|
|
2098
|
+
}
|
|
2099
|
+
});
|
|
2100
|
+
function str(v) {
|
|
2101
|
+
return v == null ? "" : String(v);
|
|
2102
|
+
}
|
|
2103
|
+
function bool(v) {
|
|
2104
|
+
return v === true || v === 1 || v === "true";
|
|
2105
|
+
}
|
|
2106
|
+
function calculateCV(values) {
|
|
2107
|
+
if (values.length === 0) return 0;
|
|
2108
|
+
const mean = values.reduce((a, b) => a + b, 0) / values.length;
|
|
2109
|
+
if (mean === 0) return 0;
|
|
2110
|
+
const variance = values.reduce((sum, v) => sum + (v - mean) ** 2, 0) / values.length;
|
|
2111
|
+
return Math.min(Math.sqrt(variance) / mean, 1);
|
|
2112
|
+
}
|
|
2113
|
+
function analyzeSeasonality(dates, options = {}) {
|
|
2114
|
+
const { metric = "clicks" } = options;
|
|
2115
|
+
if (dates.length === 0) return {
|
|
2116
|
+
hasSeasonality: false,
|
|
2117
|
+
strength: 0,
|
|
2118
|
+
peakMonths: [],
|
|
2119
|
+
troughMonths: [],
|
|
2120
|
+
monthlyBreakdown: [],
|
|
2121
|
+
insufficientData: true
|
|
2122
|
+
};
|
|
2123
|
+
const monthlyMap = /* @__PURE__ */ new Map();
|
|
2124
|
+
for (const row of dates) {
|
|
2125
|
+
const month = row.date.substring(0, 7);
|
|
2126
|
+
const value = metric === "clicks" ? row.clicks : row.impressions;
|
|
2127
|
+
monthlyMap.set(month, (monthlyMap.get(month) || 0) + value);
|
|
2128
|
+
}
|
|
2129
|
+
const months = Array.from(monthlyMap.keys()).sort();
|
|
2130
|
+
const values = months.map((m) => monthlyMap.get(m) || 0);
|
|
2131
|
+
const insufficientData = months.length < 12;
|
|
2132
|
+
const totalValue = values.reduce((a, b) => a + b, 0);
|
|
2133
|
+
const avgValue = values.length > 0 ? totalValue / values.length : 0;
|
|
2134
|
+
const monthlyBreakdown = months.map((month, i) => {
|
|
2135
|
+
const value = values[i] ?? 0;
|
|
2136
|
+
const vsAverage = avgValue > 0 ? value / avgValue : 0;
|
|
2137
|
+
return {
|
|
2138
|
+
month,
|
|
2139
|
+
value,
|
|
2140
|
+
vsAverage,
|
|
2141
|
+
isPeak: vsAverage > 1.5,
|
|
2142
|
+
isTrough: vsAverage < .5
|
|
2143
|
+
};
|
|
2144
|
+
});
|
|
2145
|
+
const peakMonths = [...new Set(monthlyBreakdown.filter((m) => m.isPeak).map((m) => m.month.substring(5, 7)))];
|
|
2146
|
+
const troughMonths = [...new Set(monthlyBreakdown.filter((m) => m.isTrough).map((m) => m.month.substring(5, 7)))];
|
|
2147
|
+
const strength = calculateCV(values);
|
|
2148
|
+
return {
|
|
2149
|
+
hasSeasonality: peakMonths.length > 0 || troughMonths.length > 0 || strength > .3,
|
|
2150
|
+
strength,
|
|
2151
|
+
peakMonths,
|
|
2152
|
+
troughMonths,
|
|
2153
|
+
monthlyBreakdown,
|
|
2154
|
+
insufficientData
|
|
2155
|
+
};
|
|
2156
|
+
}
|
|
2157
|
+
const seasonalityAnalyzer = defineAnalyzer({
|
|
2158
|
+
id: "seasonality",
|
|
2159
|
+
buildSql(params) {
|
|
2160
|
+
const { startDate, endDate } = periodOf(params);
|
|
2161
|
+
return {
|
|
2162
|
+
sql: `
|
|
2163
|
+
WITH monthly AS (
|
|
2164
|
+
SELECT
|
|
2165
|
+
strftime(date, '%Y-%m') AS month,
|
|
2166
|
+
CAST(SUM(${params.metric === "impressions" ? "impressions" : "clicks"}) AS DOUBLE) AS value
|
|
2167
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
2168
|
+
WHERE date >= ? AND date <= ?
|
|
2169
|
+
GROUP BY month
|
|
2170
|
+
),
|
|
2171
|
+
stats AS (
|
|
2172
|
+
SELECT
|
|
2173
|
+
AVG(value) AS avg_val,
|
|
2174
|
+
COALESCE(STDDEV_POP(value), 0.0) AS std_val,
|
|
2175
|
+
CAST(COUNT(*) AS DOUBLE) AS month_count
|
|
2176
|
+
FROM monthly
|
|
2177
|
+
)
|
|
2178
|
+
SELECT
|
|
2179
|
+
m.month AS month,
|
|
2180
|
+
m.value AS value,
|
|
2181
|
+
CASE WHEN s.avg_val > 0 THEN m.value / s.avg_val ELSE 0.0 END AS vsAverage,
|
|
2182
|
+
(s.avg_val > 0 AND m.value / s.avg_val > 1.5) AS isPeak,
|
|
2183
|
+
(s.avg_val > 0 AND m.value / s.avg_val < 0.5) AS isTrough,
|
|
2184
|
+
CASE WHEN s.avg_val > 0 THEN LEAST(s.std_val / s.avg_val, 1.0) ELSE 0.0 END AS strength,
|
|
2185
|
+
s.month_count AS monthCount
|
|
2186
|
+
FROM monthly m, stats s
|
|
2187
|
+
ORDER BY m.month
|
|
2188
|
+
`,
|
|
2189
|
+
params: [startDate, endDate],
|
|
2190
|
+
current: {
|
|
2191
|
+
table: "pages",
|
|
2192
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
2193
|
+
}
|
|
2194
|
+
};
|
|
2195
|
+
},
|
|
2196
|
+
reduceSql(rows) {
|
|
2197
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
2198
|
+
const breakdown = arr.map((r) => ({
|
|
2199
|
+
month: str(r.month),
|
|
2200
|
+
value: num(r.value),
|
|
2201
|
+
vsAverage: num(r.vsAverage),
|
|
2202
|
+
isPeak: bool(r.isPeak),
|
|
2203
|
+
isTrough: bool(r.isTrough)
|
|
2204
|
+
}));
|
|
2205
|
+
const first = arr[0];
|
|
2206
|
+
const strength = first ? num(first.strength) : 0;
|
|
2207
|
+
const monthCount = first ? num(first.monthCount) : 0;
|
|
2208
|
+
const peakMonths = [...new Set(breakdown.filter((m) => m.isPeak).map((m) => m.month.substring(5, 7)))];
|
|
2209
|
+
const troughMonths = [...new Set(breakdown.filter((m) => m.isTrough).map((m) => m.month.substring(5, 7)))];
|
|
2210
|
+
const hasSeasonality = peakMonths.length > 0 || troughMonths.length > 0 || strength > .3;
|
|
2211
|
+
const insufficientData = monthCount < 12;
|
|
2212
|
+
return {
|
|
2213
|
+
results: breakdown,
|
|
2214
|
+
meta: {
|
|
2215
|
+
total: breakdown.length,
|
|
2216
|
+
hasSeasonality,
|
|
2217
|
+
strength,
|
|
2218
|
+
peakMonths,
|
|
2219
|
+
troughMonths,
|
|
2220
|
+
insufficientData
|
|
2221
|
+
}
|
|
2222
|
+
};
|
|
2223
|
+
},
|
|
2224
|
+
buildRows(params) {
|
|
2225
|
+
return { dates: datesQueryState(periodOf(params), params.limit) };
|
|
2226
|
+
},
|
|
2227
|
+
reduceRows(rows, params) {
|
|
2228
|
+
const result = analyzeSeasonality(Array.isArray(rows) ? rows : [], { metric: params.metric });
|
|
2229
|
+
return {
|
|
2230
|
+
results: result.monthlyBreakdown,
|
|
2231
|
+
meta: { strength: result.strength }
|
|
2232
|
+
};
|
|
2233
|
+
}
|
|
2234
|
+
});
|
|
2235
|
+
const DEFAULT_ROW_LIMIT$1 = 25e3;
|
|
2236
|
+
const strikingDistanceAnalyzer = defineAnalyzer({
|
|
2237
|
+
id: "striking-distance",
|
|
2238
|
+
reduce(rows, params) {
|
|
2239
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
2240
|
+
const minPosition = params.minPosition ?? 4;
|
|
2241
|
+
const maxPosition = params.maxPosition ?? 20;
|
|
2242
|
+
const minImpressions = params.minImpressions ?? 100;
|
|
2243
|
+
const maxCtr = params.maxCtr ?? .05;
|
|
2244
|
+
const limit = params.limit ?? 1e3;
|
|
2245
|
+
const results = [];
|
|
2246
|
+
for (const row of arr) {
|
|
2247
|
+
const position = num(row.position);
|
|
2248
|
+
const impressions = num(row.impressions);
|
|
2249
|
+
const ctr = num(row.ctr);
|
|
2250
|
+
const clicks = num(row.clicks);
|
|
2251
|
+
if (position < minPosition || position > maxPosition) continue;
|
|
2252
|
+
if (impressions < minImpressions) continue;
|
|
2253
|
+
if (ctr > maxCtr) continue;
|
|
2254
|
+
results.push({
|
|
2255
|
+
keyword: String(row.query ?? ""),
|
|
2256
|
+
page: row.page == null ? null : String(row.page),
|
|
2257
|
+
clicks,
|
|
2258
|
+
impressions,
|
|
2259
|
+
ctr,
|
|
2260
|
+
position,
|
|
2261
|
+
potentialClicks: Math.round(impressions * .15)
|
|
2262
|
+
});
|
|
2263
|
+
}
|
|
2264
|
+
results.sort((a, b) => b.potentialClicks - a.potentialClicks);
|
|
2265
|
+
const paged = paginateInMemory(results, {
|
|
2266
|
+
limit,
|
|
2267
|
+
offset: params.offset
|
|
2268
|
+
});
|
|
2269
|
+
return {
|
|
2270
|
+
results: paged,
|
|
2271
|
+
meta: {
|
|
2272
|
+
total: results.length,
|
|
2273
|
+
returned: paged.length
|
|
2274
|
+
}
|
|
2275
|
+
};
|
|
2276
|
+
},
|
|
2277
|
+
buildSql(params) {
|
|
2278
|
+
const { startDate, endDate } = periodOf(params);
|
|
2279
|
+
return {
|
|
2280
|
+
sql: `
|
|
2281
|
+
SELECT
|
|
2282
|
+
query,
|
|
2283
|
+
url AS page,
|
|
2284
|
+
CAST(SUM(clicks) AS DOUBLE) AS clicks,
|
|
2285
|
+
CAST(SUM(impressions) AS DOUBLE) AS impressions,
|
|
2286
|
+
CAST(SUM(clicks) AS DOUBLE) / NULLIF(SUM(impressions), 0) AS ctr,
|
|
2287
|
+
SUM(sum_position) / NULLIF(SUM(impressions), 0) + 1 AS position
|
|
2288
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
2289
|
+
WHERE date >= ? AND date <= ?
|
|
2290
|
+
GROUP BY query, url
|
|
2291
|
+
`,
|
|
2292
|
+
params: [startDate, endDate],
|
|
2293
|
+
current: {
|
|
2294
|
+
table: "page_keywords",
|
|
2295
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
2296
|
+
}
|
|
2297
|
+
};
|
|
2298
|
+
},
|
|
2299
|
+
buildRows(params) {
|
|
2300
|
+
return { keywords: keywordsQueryState(periodOf(params), params.limit ?? DEFAULT_ROW_LIMIT$1) };
|
|
2301
|
+
}
|
|
2302
|
+
});
|
|
2303
|
+
const DEFAULT_ROW_LIMIT = 25e3;
|
|
2304
|
+
const sortRowResults = createSorter((item) => item.impressions, "impressions");
|
|
2305
|
+
const zeroClickAnalyzer = defineAnalyzer({
|
|
2306
|
+
id: "zero-click",
|
|
2307
|
+
buildSql(params) {
|
|
2308
|
+
const { startDate, endDate } = periodOf(params);
|
|
2309
|
+
const minImpressions = params.minImpressions ?? 1e3;
|
|
2310
|
+
const maxCtr = params.maxCtr ?? .03;
|
|
2311
|
+
const maxPosition = params.maxPosition ?? 10;
|
|
2312
|
+
const limit = params.limit ?? 1e3;
|
|
2313
|
+
return {
|
|
2314
|
+
sql: `
|
|
2315
|
+
WITH agg AS (
|
|
2316
|
+
SELECT
|
|
2317
|
+
query,
|
|
2318
|
+
url AS page,
|
|
2319
|
+
${METRIC_EXPR.clicks} AS clicks,
|
|
2320
|
+
${METRIC_EXPR.impressions} AS impressions,
|
|
2321
|
+
${METRIC_EXPR.ctr} AS ctr,
|
|
2322
|
+
${METRIC_EXPR.position} AS position
|
|
2323
|
+
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
2324
|
+
WHERE date >= ? AND date <= ?
|
|
2325
|
+
GROUP BY query, url
|
|
2326
|
+
HAVING SUM(impressions) >= ?
|
|
2327
|
+
)
|
|
2328
|
+
SELECT
|
|
2329
|
+
query, page, clicks, impressions, ctr, position,
|
|
2330
|
+
CAST(GREATEST(0, ROUND(impressions * (
|
|
2331
|
+
CASE
|
|
2332
|
+
WHEN position <= 1 THEN 0.30
|
|
2333
|
+
WHEN position <= 3 THEN 0.15
|
|
2334
|
+
WHEN position <= 5 THEN 0.08
|
|
2335
|
+
ELSE 0.04
|
|
2336
|
+
END
|
|
2337
|
+
)) - clicks) AS DOUBLE) AS missedClicks
|
|
2338
|
+
FROM agg
|
|
2339
|
+
WHERE position <= ? AND ctr < ?
|
|
2340
|
+
ORDER BY impressions DESC
|
|
2341
|
+
${paginateClause({
|
|
2342
|
+
limit,
|
|
2343
|
+
offset: params.offset
|
|
2344
|
+
})}
|
|
2345
|
+
`,
|
|
2346
|
+
params: [
|
|
2347
|
+
startDate,
|
|
2348
|
+
endDate,
|
|
2349
|
+
minImpressions,
|
|
2350
|
+
maxPosition,
|
|
2351
|
+
maxCtr
|
|
2352
|
+
],
|
|
2353
|
+
current: {
|
|
2354
|
+
table: "page_keywords",
|
|
2355
|
+
partitions: enumeratePartitions(startDate, endDate)
|
|
2356
|
+
}
|
|
2357
|
+
};
|
|
2358
|
+
},
|
|
2359
|
+
reduceSql(rows, params) {
|
|
2360
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
2361
|
+
const minImpressions = params.minImpressions ?? 1e3;
|
|
2362
|
+
const maxCtr = params.maxCtr ?? .03;
|
|
2363
|
+
const maxPosition = params.maxPosition ?? 10;
|
|
2364
|
+
return {
|
|
2365
|
+
results: arr.map((r) => ({
|
|
2366
|
+
query: r.query == null ? "" : String(r.query),
|
|
2367
|
+
page: r.page == null ? "" : String(r.page),
|
|
2368
|
+
clicks: num(r.clicks),
|
|
2369
|
+
impressions: num(r.impressions),
|
|
2370
|
+
ctr: num(r.ctr),
|
|
2371
|
+
position: num(r.position),
|
|
2372
|
+
missedClicks: num(r.missedClicks)
|
|
2373
|
+
})),
|
|
2374
|
+
meta: {
|
|
2375
|
+
total: arr.length,
|
|
2376
|
+
minImpressions,
|
|
2377
|
+
maxCtr,
|
|
2378
|
+
maxPosition
|
|
2379
|
+
}
|
|
2380
|
+
};
|
|
2381
|
+
},
|
|
2382
|
+
buildRows(params) {
|
|
2383
|
+
const period = periodOf(params);
|
|
2384
|
+
const limit = params.limit ?? DEFAULT_ROW_LIMIT;
|
|
2385
|
+
return { rows: gsc.select(query, page).where(between(date, period.startDate, period.endDate)).limit(limit).getState() };
|
|
2386
|
+
},
|
|
2387
|
+
reduceRows(rows, params) {
|
|
2388
|
+
const arr = Array.isArray(rows) ? rows : [];
|
|
2389
|
+
const minImpressions = params.minImpressions ?? 1e3;
|
|
2390
|
+
const maxCtr = params.maxCtr ?? .03;
|
|
2391
|
+
const maxPosition = params.maxPosition ?? 10;
|
|
2392
|
+
const queryMap = /* @__PURE__ */ new Map();
|
|
2393
|
+
for (const row of arr) {
|
|
2394
|
+
if (row.impressions < minImpressions) continue;
|
|
2395
|
+
if (row.position > maxPosition) continue;
|
|
2396
|
+
if (row.ctr > maxCtr) continue;
|
|
2397
|
+
const existing = queryMap.get(row.query);
|
|
2398
|
+
if (!existing || row.position < existing.position) queryMap.set(row.query, {
|
|
2399
|
+
query: row.query,
|
|
2400
|
+
page: row.page,
|
|
2401
|
+
clicks: row.clicks,
|
|
2402
|
+
impressions: row.impressions,
|
|
2403
|
+
ctr: row.ctr,
|
|
2404
|
+
position: row.position
|
|
2405
|
+
});
|
|
2406
|
+
}
|
|
2407
|
+
const results = sortRowResults(Array.from(queryMap.values()), "impressions", "desc");
|
|
2408
|
+
const paged = paginateInMemory(results, {
|
|
2409
|
+
limit: params.limit,
|
|
2410
|
+
offset: params.offset
|
|
2411
|
+
});
|
|
2412
|
+
return {
|
|
2413
|
+
results: paged,
|
|
2414
|
+
meta: {
|
|
2415
|
+
total: results.length,
|
|
2416
|
+
returned: paged.length
|
|
2417
|
+
}
|
|
2418
|
+
};
|
|
2419
|
+
}
|
|
2420
|
+
});
|
|
2421
|
+
const ROW_ANALYZERS = [
|
|
2422
|
+
strikingDistanceAnalyzer.rows,
|
|
2423
|
+
opportunityAnalyzer.rows,
|
|
2424
|
+
brandAnalyzer.rows,
|
|
2425
|
+
concentrationAnalyzer.rows,
|
|
2426
|
+
clusteringAnalyzer.rows,
|
|
2427
|
+
seasonalityAnalyzer.rows,
|
|
2428
|
+
moversAnalyzer.rows,
|
|
2429
|
+
decayAnalyzer.rows,
|
|
2430
|
+
cannibalizationAnalyzer.rows,
|
|
2431
|
+
zeroClickAnalyzer.rows
|
|
2432
|
+
];
|
|
2433
|
+
const SYNONYMS = {
|
|
2434
|
+
checker: "validator",
|
|
2435
|
+
tester: "validator",
|
|
2436
|
+
verifier: "validator",
|
|
2437
|
+
verify: "validate",
|
|
2438
|
+
check: "validate",
|
|
2439
|
+
test: "validate",
|
|
2440
|
+
checking: "validate",
|
|
2441
|
+
testing: "validate",
|
|
2442
|
+
creator: "generator",
|
|
2443
|
+
builder: "generator",
|
|
2444
|
+
maker: "generator",
|
|
2445
|
+
create: "generate",
|
|
2446
|
+
build: "generate",
|
|
2447
|
+
make: "generate",
|
|
2448
|
+
lookup: "search",
|
|
2449
|
+
finder: "search",
|
|
2450
|
+
find: "search",
|
|
2451
|
+
online: "",
|
|
2452
|
+
free: ""
|
|
2453
|
+
};
|
|
2454
|
+
const NO_STRIP_S = new Set([
|
|
2455
|
+
"css",
|
|
2456
|
+
"js",
|
|
2457
|
+
"ts",
|
|
2458
|
+
"os",
|
|
2459
|
+
"as",
|
|
2460
|
+
"is",
|
|
2461
|
+
"us",
|
|
2462
|
+
"has",
|
|
2463
|
+
"was",
|
|
2464
|
+
"its",
|
|
2465
|
+
"this",
|
|
2466
|
+
"yes",
|
|
2467
|
+
"no",
|
|
2468
|
+
"bus",
|
|
2469
|
+
"gas",
|
|
2470
|
+
"dns",
|
|
2471
|
+
"rss",
|
|
2472
|
+
"sms",
|
|
2473
|
+
"gps",
|
|
2474
|
+
"aws",
|
|
2475
|
+
"sas",
|
|
2476
|
+
"cms",
|
|
2477
|
+
"ios",
|
|
2478
|
+
"less",
|
|
2479
|
+
"loss",
|
|
2480
|
+
"miss",
|
|
2481
|
+
"pass",
|
|
2482
|
+
"class",
|
|
2483
|
+
"access",
|
|
2484
|
+
"process",
|
|
2485
|
+
"express",
|
|
2486
|
+
"address",
|
|
2487
|
+
"cross",
|
|
2488
|
+
"press",
|
|
2489
|
+
"stress",
|
|
2490
|
+
"progress",
|
|
2491
|
+
"success",
|
|
2492
|
+
"business",
|
|
2493
|
+
"wordpress",
|
|
2494
|
+
"status",
|
|
2495
|
+
"radius",
|
|
2496
|
+
"nexus",
|
|
2497
|
+
"focus",
|
|
2498
|
+
"bonus",
|
|
2499
|
+
"campus",
|
|
2500
|
+
"census",
|
|
2501
|
+
"corpus",
|
|
2502
|
+
"nucleus",
|
|
2503
|
+
"stimulus",
|
|
2504
|
+
"terminus",
|
|
2505
|
+
"versus",
|
|
2506
|
+
"virus",
|
|
2507
|
+
"surplus",
|
|
2508
|
+
"cactus",
|
|
2509
|
+
"analysis",
|
|
2510
|
+
"basis",
|
|
2511
|
+
"thesis",
|
|
2512
|
+
"crisis",
|
|
2513
|
+
"axis",
|
|
2514
|
+
"genesis",
|
|
2515
|
+
"synopsis",
|
|
2516
|
+
"diagnosis",
|
|
2517
|
+
"emphasis",
|
|
2518
|
+
"hypothesis",
|
|
2519
|
+
"synthesis",
|
|
2520
|
+
"parenthesis",
|
|
2521
|
+
"redis",
|
|
2522
|
+
"apis",
|
|
2523
|
+
"chaos",
|
|
2524
|
+
"demos",
|
|
2525
|
+
"logos",
|
|
2526
|
+
"photos",
|
|
2527
|
+
"videos",
|
|
2528
|
+
"nuxtjs",
|
|
2529
|
+
"nextjs",
|
|
2530
|
+
"nodejs",
|
|
2531
|
+
"reactjs",
|
|
2532
|
+
"vuejs",
|
|
2533
|
+
"angularjs",
|
|
2534
|
+
"expressjs",
|
|
2535
|
+
"nestjs",
|
|
2536
|
+
"threejs",
|
|
2537
|
+
"alpinejs",
|
|
2538
|
+
"solidjs",
|
|
2539
|
+
"sveltejs",
|
|
2540
|
+
"dejs",
|
|
2541
|
+
"bunjs",
|
|
2542
|
+
"denojs",
|
|
2543
|
+
"canvas",
|
|
2544
|
+
"atlas",
|
|
2545
|
+
"alias",
|
|
2546
|
+
"bias",
|
|
2547
|
+
"perhaps",
|
|
2548
|
+
"whereas",
|
|
2549
|
+
"kubernetes",
|
|
2550
|
+
"sass",
|
|
2551
|
+
"postgres",
|
|
2552
|
+
"always",
|
|
2553
|
+
"across",
|
|
2554
|
+
"previous",
|
|
2555
|
+
"various",
|
|
2556
|
+
"serious",
|
|
2557
|
+
"famous",
|
|
2558
|
+
"anonymous",
|
|
2559
|
+
"continuous",
|
|
2560
|
+
"dangerous",
|
|
2561
|
+
"generous",
|
|
2562
|
+
"obvious",
|
|
2563
|
+
"numerous",
|
|
2564
|
+
"curious",
|
|
2565
|
+
"nervous",
|
|
2566
|
+
"conscious"
|
|
2567
|
+
]);
|
|
2568
|
+
function depluralize(token) {
|
|
2569
|
+
if (token.length <= 3) return token;
|
|
2570
|
+
if (NO_STRIP_S.has(token)) return token;
|
|
2571
|
+
if (token.endsWith("ies") && token.length > 4) return `${token.slice(0, -3)}y`;
|
|
2572
|
+
if (token.endsWith("ses") && token.length > 4) return token.slice(0, -1);
|
|
2573
|
+
if (token.endsWith("shes") || token.endsWith("ches") || token.endsWith("xes") || token.endsWith("zes")) return token.slice(0, -2);
|
|
2574
|
+
if (token.endsWith("s") && !token.endsWith("ss")) return token.slice(0, -1);
|
|
2575
|
+
return token;
|
|
2576
|
+
}
|
|
2577
|
+
const SEPARATOR_RE = /[-_/.@#:+]+/g;
|
|
2578
|
+
const WHITESPACE_RE = /\s+/g;
|
|
2579
|
+
function normalizeQuery(query) {
|
|
2580
|
+
return query.toLowerCase().replace(SEPARATOR_RE, " ").replace(WHITESPACE_RE, " ").trim().split(" ").filter(Boolean).map((token) => SYNONYMS[token] ?? token).filter(Boolean).map(depluralize).sort().join(" ");
|
|
2581
|
+
}
|
|
2582
|
+
function isMetricDimension$2(dim) {
|
|
2583
|
+
return [
|
|
2584
|
+
"clicks",
|
|
2585
|
+
"impressions",
|
|
2586
|
+
"ctr",
|
|
2587
|
+
"position"
|
|
2588
|
+
].includes(dim);
|
|
2589
|
+
}
|
|
2590
|
+
const ENGINE_SOURCE_CAPABILITIES = {
|
|
2591
|
+
regex: true,
|
|
2592
|
+
multiDataset: false,
|
|
2593
|
+
comparisonJoin: false,
|
|
2594
|
+
windowTotals: false,
|
|
2595
|
+
fileSets: true,
|
|
2596
|
+
localSource: true
|
|
2597
|
+
};
|
|
2598
|
+
function createEngineQuerySource(options) {
|
|
2599
|
+
const { engine, ctx } = options;
|
|
2600
|
+
return {
|
|
2601
|
+
name: "engine",
|
|
2602
|
+
capabilities: ENGINE_SOURCE_CAPABILITIES,
|
|
2603
|
+
async queryRows(state) {
|
|
2604
|
+
const filterDims = getFilterDimensions(state.filter, isMetricDimension$2);
|
|
2605
|
+
assertDimensionsSupported([...state.dimensions, ...filterDims], "stored", "engine query source");
|
|
2606
|
+
if (state.dimensions.includes("queryCanonical") || filterDims.includes("queryCanonical")) throw new Error("engine query source does not support queryCanonical; use browser/sqlite query sources for derived dimensions");
|
|
2607
|
+
return (await engine.query(ctx, state)).rows;
|
|
2608
|
+
},
|
|
2609
|
+
async executeSql(sql, params, opts) {
|
|
2610
|
+
const fileSets = opts?.fileSets;
|
|
2611
|
+
if (!fileSets?.FILES) throw new Error("engine query source: executeSql requires opts.fileSets with a FILES entry");
|
|
2612
|
+
const { rows } = await engine.runSQL({
|
|
2613
|
+
ctx,
|
|
2614
|
+
table: fileSets.FILES.table,
|
|
2615
|
+
fileSets,
|
|
2616
|
+
sql,
|
|
2617
|
+
params: params ?? []
|
|
2618
|
+
});
|
|
2619
|
+
return rows;
|
|
2620
|
+
}
|
|
2621
|
+
};
|
|
2622
|
+
}
|
|
2623
|
+
async function runAnalyzerWithEngine(deps, ctx, params, registry) {
|
|
2624
|
+
return runAnalyzerFromSource(createEngineQuerySource({
|
|
2625
|
+
engine: deps.engine,
|
|
2626
|
+
ctx
|
|
2627
|
+
}), params, registry);
|
|
2628
|
+
}
|
|
2629
|
+
async function collectRows(gen) {
|
|
2630
|
+
const out = [];
|
|
2631
|
+
for await (const batch of gen) out.push(...batch);
|
|
2632
|
+
return out;
|
|
2633
|
+
}
|
|
2634
|
+
const METRIC_NAMES = [
|
|
2635
|
+
"clicks",
|
|
2636
|
+
"impressions",
|
|
2637
|
+
"ctr",
|
|
2638
|
+
"position"
|
|
2639
|
+
];
|
|
2640
|
+
function isMetricDimension$1(dim) {
|
|
2641
|
+
return METRIC_NAMES.includes(dim);
|
|
2642
|
+
}
|
|
2643
|
+
function applyBuilderStatePostProcessing(rows, state) {
|
|
2644
|
+
const dimensionFilters = getDimensionFilters(state.filter, isMetricDimension$1);
|
|
2645
|
+
const metricFilters = extractMetricFilters(state.filter);
|
|
2646
|
+
const specialFilters = extractSpecialOperatorFilters(state.filter);
|
|
2647
|
+
const ordered = [...rows.filter((row) => {
|
|
2648
|
+
if (!dimensionFilters.every((filter) => matchesDimensionFilter(row, filter))) return false;
|
|
2649
|
+
if (!metricFilters.every((filter) => matchesMetricFilter(row, filter))) return false;
|
|
2650
|
+
if (specialFilters.some((filter) => filter.operator === "topLevel") && !matchesTopLevelPage(row)) return false;
|
|
2651
|
+
return true;
|
|
2652
|
+
})].sort((a, b) => {
|
|
2653
|
+
const column = state.orderBy?.column ?? "clicks";
|
|
2654
|
+
const dir = state.orderBy?.dir ?? "desc";
|
|
2655
|
+
const left = column === "date" ? String(a.date ?? "") : metricValue(a, column);
|
|
2656
|
+
const right = column === "date" ? String(b.date ?? "") : metricValue(b, column);
|
|
2657
|
+
if (left === right) return 0;
|
|
2658
|
+
if (dir === "asc") return left < right ? -1 : 1;
|
|
2659
|
+
return left > right ? -1 : 1;
|
|
2660
|
+
});
|
|
2661
|
+
const offset = Math.max(0, Number(state.startRow ?? 0));
|
|
2662
|
+
const limit = Math.max(0, Number((state.rowLimit ?? ordered.length) || 0));
|
|
2663
|
+
return ordered.slice(offset, offset + limit);
|
|
2664
|
+
}
|
|
2665
|
+
const GSC_API_CAPABILITIES = {
|
|
2666
|
+
regex: true,
|
|
2667
|
+
multiDataset: false,
|
|
2668
|
+
comparisonJoin: false,
|
|
2669
|
+
windowTotals: false
|
|
2670
|
+
};
|
|
2671
|
+
function isMetricDimension(dim) {
|
|
2672
|
+
return [
|
|
2673
|
+
"clicks",
|
|
2674
|
+
"impressions",
|
|
2675
|
+
"ctr",
|
|
2676
|
+
"position"
|
|
2677
|
+
].includes(dim);
|
|
2678
|
+
}
|
|
2679
|
+
function builderFromState(state) {
|
|
2680
|
+
return { getState: () => state };
|
|
2681
|
+
}
|
|
2682
|
+
function createGscApiQuerySource(options) {
|
|
2683
|
+
const { client, siteUrl } = options;
|
|
2684
|
+
return {
|
|
2685
|
+
name: "gsc-api",
|
|
2686
|
+
capabilities: GSC_API_CAPABILITIES,
|
|
2687
|
+
async queryRows(state) {
|
|
2688
|
+
buildLogicalPlan(state, GSC_API_CAPABILITIES);
|
|
2689
|
+
const filterDims = getFilterDimensions(state.filter, isMetricDimension);
|
|
2690
|
+
assertDimensionsSupported([...state.dimensions, ...filterDims], "api", "gsc-api query source");
|
|
2691
|
+
return applyBuilderStatePostProcessing(await collectRows(client.query(siteUrl, builderFromState(state))), state);
|
|
2692
|
+
}
|
|
2693
|
+
};
|
|
2694
|
+
}
|
|
2695
|
+
const IN_MEMORY_DEFAULT_CAPABILITIES = {
|
|
2696
|
+
regex: true,
|
|
2697
|
+
multiDataset: true,
|
|
2698
|
+
comparisonJoin: true,
|
|
2699
|
+
windowTotals: true
|
|
2700
|
+
};
|
|
2701
|
+
function createInMemoryQuerySource(options) {
|
|
2702
|
+
return {
|
|
2703
|
+
name: "memory",
|
|
2704
|
+
capabilities: options.capabilities ?? IN_MEMORY_DEFAULT_CAPABILITIES,
|
|
2705
|
+
async queryRows(state) {
|
|
2706
|
+
return await options.queryRows(state);
|
|
2707
|
+
}
|
|
2708
|
+
};
|
|
2709
|
+
}
|
|
2710
|
+
const sortResults = createSorter((item, metric) => item[metric], "potentialClicks");
|
|
2711
|
+
function analyzeStrikingDistance(keywords, options = {}) {
|
|
2712
|
+
const { minPosition = 4, maxPosition = 20, minImpressions = 100, maxCtr = .05, sortBy = "potentialClicks", sortOrder = "desc" } = options;
|
|
2713
|
+
const results = [];
|
|
2714
|
+
for (const row of keywords) {
|
|
2715
|
+
const position = num(row.position);
|
|
2716
|
+
const impressions = num(row.impressions);
|
|
2717
|
+
const ctr = num(row.ctr);
|
|
2718
|
+
const clicks = num(row.clicks);
|
|
2719
|
+
if (position < minPosition || position > maxPosition) continue;
|
|
2720
|
+
if (impressions < minImpressions) continue;
|
|
2721
|
+
if (ctr > maxCtr) continue;
|
|
2722
|
+
const potentialClicks = Math.round(impressions * .15);
|
|
2723
|
+
results.push({
|
|
2724
|
+
keyword: row.query,
|
|
2725
|
+
page: row.page ?? null,
|
|
2726
|
+
clicks,
|
|
2727
|
+
impressions,
|
|
2728
|
+
ctr,
|
|
2729
|
+
position,
|
|
2730
|
+
potentialClicks
|
|
2731
|
+
});
|
|
2732
|
+
}
|
|
2733
|
+
return sortResults(results, sortBy, sortOrder);
|
|
2734
|
+
}
|
|
2735
|
+
function typedQuery(state) {
|
|
2736
|
+
return { state };
|
|
2737
|
+
}
|
|
2738
|
+
function isTypedQuery(value) {
|
|
2739
|
+
return "state" in value;
|
|
2740
|
+
}
|
|
2741
|
+
async function queryRows(source, query) {
|
|
2742
|
+
const state = isTypedQuery(query) ? query.state : query;
|
|
2743
|
+
return await source.queryRows(state);
|
|
2744
|
+
}
|
|
2745
|
+
async function queryComparisonRows(source, current, previous) {
|
|
2746
|
+
const [currentRows, previousRows] = await Promise.all([queryRows(source, current), queryRows(source, previous)]);
|
|
2747
|
+
return {
|
|
2748
|
+
current: currentRows,
|
|
2749
|
+
previous: previousRows
|
|
2750
|
+
};
|
|
2751
|
+
}
|
|
2752
|
+
function keywordQuery(period, limit) {
|
|
2753
|
+
return typedQuery(keywordsQueryState(period, limit));
|
|
2754
|
+
}
|
|
2755
|
+
function pageQuery(period, limit) {
|
|
2756
|
+
return typedQuery(pagesQueryState(period, limit));
|
|
2757
|
+
}
|
|
2758
|
+
function dateQuery(period, limit) {
|
|
2759
|
+
return typedQuery(datesQueryState(period, limit));
|
|
2760
|
+
}
|
|
2761
|
+
function definePortableAnalyzer(definition) {
|
|
2762
|
+
return definition;
|
|
2763
|
+
}
|
|
2764
|
+
async function runPortableAnalyzer(source, definition, input, options, limit = 25e3) {
|
|
2765
|
+
const requiredQueries = definition.requiredQueries(input, limit);
|
|
2766
|
+
const entries = Object.entries(requiredQueries);
|
|
2767
|
+
const resolvedRows = await Promise.all(entries.map(async ([key, spec]) => [key, await queryRows(source, spec)]));
|
|
2768
|
+
return definition.run(Object.fromEntries(resolvedRows), options);
|
|
2769
|
+
}
|
|
2770
|
+
const PORTABLE_ANALYZERS = {
|
|
2771
|
+
strikingDistance: definePortableAnalyzer({
|
|
2772
|
+
requiredQueries: (period, limit) => ({ keywords: keywordQuery(period, limit) }),
|
|
2773
|
+
run: ({ keywords }, options) => analyzeStrikingDistance(keywords, options)
|
|
2774
|
+
}),
|
|
2775
|
+
opportunity: definePortableAnalyzer({
|
|
2776
|
+
requiredQueries: (period, limit) => ({ keywords: keywordQuery(period, limit) }),
|
|
2777
|
+
run: ({ keywords }, options) => {
|
|
2778
|
+
const { results } = opportunityAnalyzer.rows.reduce(keywords, { params: {
|
|
2779
|
+
type: "opportunity",
|
|
2780
|
+
minImpressions: options?.minImpressions
|
|
2781
|
+
} });
|
|
2782
|
+
return results;
|
|
2783
|
+
}
|
|
2784
|
+
}),
|
|
2785
|
+
brandSegmentation: definePortableAnalyzer({
|
|
2786
|
+
requiredQueries: (period, limit) => ({ keywords: keywordQuery(period, limit) }),
|
|
2787
|
+
run: ({ keywords }, options) => analyzeBrandSegmentation(keywords, options)
|
|
2788
|
+
}),
|
|
2789
|
+
pageConcentration: definePortableAnalyzer({
|
|
2790
|
+
requiredQueries: (period, limit) => ({ pages: pageQuery(period, limit) }),
|
|
2791
|
+
run: ({ pages }, options) => analyzePageConcentration(pages, options)
|
|
2792
|
+
}),
|
|
2793
|
+
keywordConcentration: definePortableAnalyzer({
|
|
2794
|
+
requiredQueries: (period, limit) => ({ keywords: keywordQuery(period, limit) }),
|
|
2795
|
+
run: ({ keywords }, options) => analyzeKeywordConcentration(keywords, options)
|
|
2796
|
+
}),
|
|
2797
|
+
clustering: definePortableAnalyzer({
|
|
2798
|
+
requiredQueries: (period, limit) => ({ keywords: keywordQuery(period, limit) }),
|
|
2799
|
+
run: ({ keywords }, options) => analyzeClustering(keywords, options)
|
|
2800
|
+
}),
|
|
2801
|
+
seasonality: definePortableAnalyzer({
|
|
2802
|
+
requiredQueries: (period, limit) => ({ dates: dateQuery(period, limit) }),
|
|
2803
|
+
run: ({ dates }, options) => analyzeSeasonality(dates, options)
|
|
2804
|
+
}),
|
|
2805
|
+
decay: definePortableAnalyzer({
|
|
2806
|
+
requiredQueries: (periods, limit) => ({
|
|
2807
|
+
current: pageQuery(periods.current, limit),
|
|
2808
|
+
previous: pageQuery(periods.previous, limit)
|
|
2809
|
+
}),
|
|
2810
|
+
run: ({ current, previous }, options) => analyzeDecay({
|
|
2811
|
+
current,
|
|
2812
|
+
previous
|
|
2813
|
+
}, options)
|
|
2814
|
+
}),
|
|
2815
|
+
movers: definePortableAnalyzer({
|
|
2816
|
+
requiredQueries: (periods, limit) => ({
|
|
2817
|
+
current: keywordQuery(periods.current, limit),
|
|
2818
|
+
previous: keywordQuery(periods.previous, limit)
|
|
2819
|
+
}),
|
|
2820
|
+
run: ({ current, previous }, options) => analyzeMovers({
|
|
2821
|
+
current,
|
|
2822
|
+
previous
|
|
2823
|
+
}, options)
|
|
2824
|
+
})
|
|
2825
|
+
};
|
|
2826
|
+
async function queryAnalyticsFromSource(source, period, options = {}) {
|
|
2827
|
+
const limit = options.limit ?? 25e3;
|
|
2828
|
+
const [keywords, pages, dates] = await Promise.all([
|
|
2829
|
+
queryRows(source, keywordQuery(period, limit)),
|
|
2830
|
+
queryRows(source, pageQuery(period, limit)),
|
|
2831
|
+
queryRows(source, dateQuery(period, limit))
|
|
2832
|
+
]);
|
|
2833
|
+
return {
|
|
2834
|
+
keywords,
|
|
2835
|
+
pages,
|
|
2836
|
+
dates
|
|
2837
|
+
};
|
|
2838
|
+
}
|
|
2839
|
+
async function queryComparisonFromSource(source, periods, options = {}) {
|
|
2840
|
+
const [current, previous] = await Promise.all([queryAnalyticsFromSource(source, periods.current, options), queryAnalyticsFromSource(source, periods.previous, options)]);
|
|
2841
|
+
return {
|
|
2842
|
+
current,
|
|
2843
|
+
previous
|
|
2844
|
+
};
|
|
2845
|
+
}
|
|
2846
|
+
async function analyzeStrikingDistanceFromSource(source, period, options) {
|
|
2847
|
+
return runPortableAnalyzer(source, PORTABLE_ANALYZERS.strikingDistance, period, options);
|
|
2848
|
+
}
|
|
2849
|
+
async function analyzeOpportunityFromSource(source, period, options) {
|
|
2850
|
+
return runPortableAnalyzer(source, PORTABLE_ANALYZERS.opportunity, period, options);
|
|
2851
|
+
}
|
|
2852
|
+
async function analyzeBrandSegmentationFromSource(source, period, options) {
|
|
2853
|
+
return runPortableAnalyzer(source, PORTABLE_ANALYZERS.brandSegmentation, period, options);
|
|
2854
|
+
}
|
|
2855
|
+
async function analyzePageConcentrationFromSource(source, period, options) {
|
|
2856
|
+
return runPortableAnalyzer(source, PORTABLE_ANALYZERS.pageConcentration, period, options);
|
|
2857
|
+
}
|
|
2858
|
+
async function analyzeKeywordConcentrationFromSource(source, period, options) {
|
|
2859
|
+
return runPortableAnalyzer(source, PORTABLE_ANALYZERS.keywordConcentration, period, options);
|
|
2860
|
+
}
|
|
2861
|
+
async function analyzeClusteringFromSource(source, period, options) {
|
|
2862
|
+
return runPortableAnalyzer(source, PORTABLE_ANALYZERS.clustering, period, options);
|
|
2863
|
+
}
|
|
2864
|
+
async function analyzeSeasonalityFromSource(source, period, options) {
|
|
2865
|
+
return runPortableAnalyzer(source, PORTABLE_ANALYZERS.seasonality, period, options);
|
|
2866
|
+
}
|
|
2867
|
+
async function analyzeDecayFromSource(source, periods, options) {
|
|
2868
|
+
return runPortableAnalyzer(source, PORTABLE_ANALYZERS.decay, periods, options);
|
|
2869
|
+
}
|
|
2870
|
+
async function analyzeMoversFromSource(source, periods, options) {
|
|
2871
|
+
return runPortableAnalyzer(source, PORTABLE_ANALYZERS.movers, periods, options);
|
|
2872
|
+
}
|
|
2873
|
+
export { AnalyzerCapabilityError, ROW_ANALYZERS, analyzeActionPriority, analyzeActionPriorityFromSource, analyzeBrandSegmentation, analyzeBrandSegmentationFromSource, analyzeCannibalization, analyzeClustering, analyzeClusteringFromSource, analyzeConcentration, analyzeDecay, analyzeDecayFromSource, analyzeFromSource, analyzeKeywordConcentration, analyzeKeywordConcentrationFromSource, analyzeMovers, analyzeMoversFromSource, analyzeOpportunityFromSource, analyzePageConcentration, analyzePageConcentrationFromSource, analyzeSeasonality, analyzeSeasonalityFromSource, analyzeStrikingDistance, analyzeStrikingDistanceFromSource, comparisonOf, createAnalyzerRegistry, createBrowserQuerySource, createEngineQuerySource, createGscApiQuerySource, createInMemoryQuerySource, createSorter, createSqliteQuerySource, isSqlQuerySource, mergePriorityActions, normalizePriorityActions, normalizeQuery, num, padTimeseries, periodOf, queryAnalyticsFromSource, queryComparisonFromSource, queryComparisonRows, queryRows, resolveWindow, runAnalyzerFromSource, runAnalyzerWithEngine, scorePriorityActions, windowToComparisonPeriod, windowToPeriod };
|