@gscdump/analysis 0.9.1 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -8
- package/dist/analyzer/index.d.mts +1 -3
- package/dist/analyzer/index.mjs +88 -86
- package/dist/default-registry.d.mts +4 -4
- package/dist/default-registry.mjs +3230 -3232
- package/dist/index.d.mts +52 -77
- package/dist/index.mjs +2870 -3014
- package/dist/query/index.d.mts +17 -5
- package/dist/query/index.mjs +36 -36
- package/dist/report/index.d.mts +1 -1
- package/dist/report/index.mjs +1 -4
- package/dist/semantic/index.d.mts +1 -1
- package/dist/source/index.d.mts +10 -245
- package/dist/source/index.mjs +12 -1525
- package/package.json +4 -14
- package/dist/rollups.d.mts +0 -163
- package/dist/rollups.mjs +0 -346
- package/dist/routing/index.d.mts +0 -23
- package/dist/routing/index.mjs +0 -53
package/dist/source/index.mjs
CHANGED
|
@@ -1,28 +1,20 @@
|
|
|
1
|
-
import { AnalyzerCapabilityError, defineAnalyzer, runAnalyzerFromSource } from "@gscdump/engine/analyzer";
|
|
2
1
|
import { canProxyToGsc } from "@gscdump/engine-gsc-api";
|
|
3
|
-
import {
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
return runAnalyzerFromSource(source, params, registry);
|
|
2
|
+
import { extractDateRange } from "gscdump/query";
|
|
3
|
+
function shouldRouteToLive(state, site) {
|
|
4
|
+
if (!canProxyToGsc(state)) return false;
|
|
5
|
+
const { startDate, endDate } = extractDateRange(state.filter);
|
|
6
|
+
if (!startDate || !endDate) return false;
|
|
7
|
+
if (!site.oldestDateSynced || !site.newestDateSynced) return true;
|
|
8
|
+
return startDate < site.oldestDateSynced || endDate > site.newestDateSynced;
|
|
11
9
|
}
|
|
12
10
|
function createCompositeSource(opts) {
|
|
13
11
|
const { engine, live, site } = opts;
|
|
14
|
-
function rangeCovered(state) {
|
|
15
|
-
const { startDate, endDate } = extractDateRange(state.filter);
|
|
16
|
-
return !!(startDate && endDate && site.oldestDateSynced && site.newestDateSynced && startDate >= site.oldestDateSynced && endDate <= site.newestDateSynced);
|
|
17
|
-
}
|
|
18
12
|
return {
|
|
13
|
+
...engine,
|
|
19
14
|
name: "composite-engine-live",
|
|
20
|
-
capabilities: engine.capabilities,
|
|
21
15
|
async queryRows(state) {
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
},
|
|
25
|
-
executeSql: engine.executeSql
|
|
16
|
+
return shouldRouteToLive(state, site) ? live.queryRows(state) : engine.queryRows(state);
|
|
17
|
+
}
|
|
26
18
|
};
|
|
27
19
|
}
|
|
28
20
|
const IN_MEMORY_DEFAULT_CAPABILITIES = {
|
|
@@ -34,1516 +26,11 @@ const IN_MEMORY_DEFAULT_CAPABILITIES = {
|
|
|
34
26
|
function createInMemoryQuerySource(options) {
|
|
35
27
|
return {
|
|
36
28
|
name: "memory",
|
|
29
|
+
kind: "in-memory",
|
|
37
30
|
capabilities: options.capabilities ?? IN_MEMORY_DEFAULT_CAPABILITIES,
|
|
38
31
|
async queryRows(state) {
|
|
39
32
|
return await options.queryRows(state);
|
|
40
33
|
}
|
|
41
34
|
};
|
|
42
35
|
}
|
|
43
|
-
|
|
44
|
-
function keywordsQueryState(period, limit = DEFAULT_LIMIT$1) {
|
|
45
|
-
return gsc.select(query, page).where(between(date, period.startDate, period.endDate)).limit(limit).getState();
|
|
46
|
-
}
|
|
47
|
-
function pagesQueryState(period, limit = DEFAULT_LIMIT$1) {
|
|
48
|
-
return gsc.select(page).where(between(date, period.startDate, period.endDate)).limit(limit).getState();
|
|
49
|
-
}
|
|
50
|
-
function datesQueryState(period, limit = DEFAULT_LIMIT$1) {
|
|
51
|
-
return gsc.select(date).where(between(date, period.startDate, period.endDate)).limit(limit).getState();
|
|
52
|
-
}
|
|
53
|
-
function escapeRegexAlt(s) {
|
|
54
|
-
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
55
|
-
}
|
|
56
|
-
function str$5(v) {
|
|
57
|
-
return v == null ? "" : String(v);
|
|
58
|
-
}
|
|
59
|
-
function analyzeBrandSegmentation(keywords, options) {
|
|
60
|
-
const { brandTerms, minImpressions = 10 } = options;
|
|
61
|
-
const lowerBrandTerms = brandTerms.map((t) => t.toLowerCase());
|
|
62
|
-
const brand = [];
|
|
63
|
-
const nonBrand = [];
|
|
64
|
-
for (const row of keywords) {
|
|
65
|
-
if (num(row.impressions) < minImpressions) continue;
|
|
66
|
-
if (lowerBrandTerms.some((term) => row.query.toLowerCase().includes(term))) brand.push(row);
|
|
67
|
-
else nonBrand.push(row);
|
|
68
|
-
}
|
|
69
|
-
const brandClicks = brand.reduce((sum, k) => sum + num(k.clicks), 0);
|
|
70
|
-
const nonBrandClicks = nonBrand.reduce((sum, k) => sum + num(k.clicks), 0);
|
|
71
|
-
const totalClicks = brandClicks + nonBrandClicks;
|
|
72
|
-
return {
|
|
73
|
-
brand,
|
|
74
|
-
nonBrand,
|
|
75
|
-
summary: {
|
|
76
|
-
brandClicks,
|
|
77
|
-
nonBrandClicks,
|
|
78
|
-
brandShare: totalClicks > 0 ? brandClicks / totalClicks : 0,
|
|
79
|
-
brandImpressions: brand.reduce((sum, k) => sum + num(k.impressions), 0),
|
|
80
|
-
nonBrandImpressions: nonBrand.reduce((sum, k) => sum + num(k.impressions), 0)
|
|
81
|
-
}
|
|
82
|
-
};
|
|
83
|
-
}
|
|
84
|
-
defineAnalyzer({
|
|
85
|
-
id: "brand",
|
|
86
|
-
buildSql(params) {
|
|
87
|
-
if (!params.brandTerms?.length) throw new Error("Brand analysis requires brandTerms");
|
|
88
|
-
const { startDate, endDate } = periodOf(params);
|
|
89
|
-
const minImpressions = params.minImpressions ?? 10;
|
|
90
|
-
const limit = params.limit ?? 1e4;
|
|
91
|
-
const regex = `(${params.brandTerms.map((t) => escapeRegexAlt(t.toLowerCase())).join("|")})`;
|
|
92
|
-
return {
|
|
93
|
-
sql: `
|
|
94
|
-
WITH agg AS (
|
|
95
|
-
SELECT
|
|
96
|
-
query,
|
|
97
|
-
url AS page,
|
|
98
|
-
${METRIC_EXPR.clicks} AS clicks,
|
|
99
|
-
${METRIC_EXPR.impressions} AS impressions,
|
|
100
|
-
${METRIC_EXPR.ctr} AS ctr,
|
|
101
|
-
${METRIC_EXPR.position} AS position
|
|
102
|
-
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
103
|
-
WHERE date >= ? AND date <= ?
|
|
104
|
-
GROUP BY query, url
|
|
105
|
-
HAVING SUM(impressions) >= ?
|
|
106
|
-
)
|
|
107
|
-
SELECT
|
|
108
|
-
query, page, clicks, impressions, ctr, position,
|
|
109
|
-
CASE WHEN regexp_matches(LOWER(query), ?) THEN 'brand' ELSE 'non-brand' END AS segment
|
|
110
|
-
FROM agg
|
|
111
|
-
ORDER BY clicks DESC
|
|
112
|
-
LIMIT ${Number(limit)}
|
|
113
|
-
`,
|
|
114
|
-
params: [
|
|
115
|
-
startDate,
|
|
116
|
-
endDate,
|
|
117
|
-
minImpressions,
|
|
118
|
-
regex
|
|
119
|
-
],
|
|
120
|
-
current: {
|
|
121
|
-
table: "page_keywords",
|
|
122
|
-
partitions: enumeratePartitions(startDate, endDate)
|
|
123
|
-
}
|
|
124
|
-
};
|
|
125
|
-
},
|
|
126
|
-
reduceSql(rows) {
|
|
127
|
-
const normalized = (Array.isArray(rows) ? rows : []).map((r) => ({
|
|
128
|
-
query: str$5(r.query),
|
|
129
|
-
page: r.page == null ? void 0 : str$5(r.page),
|
|
130
|
-
clicks: num(r.clicks),
|
|
131
|
-
impressions: num(r.impressions),
|
|
132
|
-
ctr: num(r.ctr),
|
|
133
|
-
position: num(r.position),
|
|
134
|
-
segment: str$5(r.segment)
|
|
135
|
-
}));
|
|
136
|
-
let brandClicks = 0;
|
|
137
|
-
let nonBrandClicks = 0;
|
|
138
|
-
let brandImpressions = 0;
|
|
139
|
-
let nonBrandImpressions = 0;
|
|
140
|
-
for (const r of normalized) if (r.segment === "brand") {
|
|
141
|
-
brandClicks += r.clicks;
|
|
142
|
-
brandImpressions += r.impressions;
|
|
143
|
-
} else {
|
|
144
|
-
nonBrandClicks += r.clicks;
|
|
145
|
-
nonBrandImpressions += r.impressions;
|
|
146
|
-
}
|
|
147
|
-
const totalClicks = brandClicks + nonBrandClicks;
|
|
148
|
-
return {
|
|
149
|
-
results: normalized,
|
|
150
|
-
meta: {
|
|
151
|
-
total: normalized.length,
|
|
152
|
-
summary: {
|
|
153
|
-
brandClicks,
|
|
154
|
-
nonBrandClicks,
|
|
155
|
-
brandShare: totalClicks > 0 ? brandClicks / totalClicks : 0,
|
|
156
|
-
brandImpressions,
|
|
157
|
-
nonBrandImpressions
|
|
158
|
-
}
|
|
159
|
-
}
|
|
160
|
-
};
|
|
161
|
-
},
|
|
162
|
-
buildRows(params) {
|
|
163
|
-
return { keywords: keywordsQueryState(periodOf(params), params.limit) };
|
|
164
|
-
},
|
|
165
|
-
reduceRows(rows, params) {
|
|
166
|
-
if (!params.brandTerms?.length) throw new Error("Brand analysis requires brandTerms");
|
|
167
|
-
const result = analyzeBrandSegmentation(Array.isArray(rows) ? rows : [], {
|
|
168
|
-
brandTerms: params.brandTerms,
|
|
169
|
-
minImpressions: params.minImpressions
|
|
170
|
-
});
|
|
171
|
-
return {
|
|
172
|
-
results: [...result.brand.map((r) => ({
|
|
173
|
-
...r,
|
|
174
|
-
segment: "brand"
|
|
175
|
-
})), ...result.nonBrand.map((r) => ({
|
|
176
|
-
...r,
|
|
177
|
-
segment: "non-brand"
|
|
178
|
-
}))],
|
|
179
|
-
meta: { summary: result.summary }
|
|
180
|
-
};
|
|
181
|
-
}
|
|
182
|
-
});
|
|
183
|
-
const INTENT_PREFIXES_REGEX = "^(how to|what is|what are|why is|why do|where to|when to|best|top|vs|versus|compare|review|buy|cheap|free|near me)(\\s|$)";
|
|
184
|
-
const INTENT_PREFIXES = [
|
|
185
|
-
"how to",
|
|
186
|
-
"what is",
|
|
187
|
-
"what are",
|
|
188
|
-
"why is",
|
|
189
|
-
"why do",
|
|
190
|
-
"where to",
|
|
191
|
-
"when to",
|
|
192
|
-
"best",
|
|
193
|
-
"top",
|
|
194
|
-
"vs",
|
|
195
|
-
"versus",
|
|
196
|
-
"compare",
|
|
197
|
-
"review",
|
|
198
|
-
"buy",
|
|
199
|
-
"cheap",
|
|
200
|
-
"free",
|
|
201
|
-
"near me"
|
|
202
|
-
];
|
|
203
|
-
const WHITESPACE_RE = /\s+/;
|
|
204
|
-
function str$4(v) {
|
|
205
|
-
return v == null ? "" : String(v);
|
|
206
|
-
}
|
|
207
|
-
function parseJsonList$3(v) {
|
|
208
|
-
if (Array.isArray(v)) return v;
|
|
209
|
-
if (typeof v === "string" && v.length > 0) {
|
|
210
|
-
const parsed = JSON.parse(v);
|
|
211
|
-
return Array.isArray(parsed) ? parsed : [];
|
|
212
|
-
}
|
|
213
|
-
return [];
|
|
214
|
-
}
|
|
215
|
-
function extractIntentPrefix(keyword) {
|
|
216
|
-
const lower = keyword.toLowerCase();
|
|
217
|
-
for (const prefix of INTENT_PREFIXES) if (lower.startsWith(`${prefix} `) || lower.startsWith(prefix)) return prefix;
|
|
218
|
-
return null;
|
|
219
|
-
}
|
|
220
|
-
function extractWordPrefix(keyword, wordCount = 2) {
|
|
221
|
-
const words = keyword.toLowerCase().split(WHITESPACE_RE).filter(Boolean);
|
|
222
|
-
if (words.length < wordCount + 1) return null;
|
|
223
|
-
return words.slice(0, wordCount).join(" ");
|
|
224
|
-
}
|
|
225
|
-
function analyzeClustering(keywords, options = {}) {
|
|
226
|
-
const { minClusterSize = 2, minImpressions = 10, clusterBy = "both" } = options;
|
|
227
|
-
const filtered = keywords.filter((k) => num(k.impressions) >= minImpressions);
|
|
228
|
-
const clusterMap = /* @__PURE__ */ new Map();
|
|
229
|
-
const clusteredKeywords = /* @__PURE__ */ new Set();
|
|
230
|
-
if (clusterBy === "intent" || clusterBy === "both") for (const kw of filtered) {
|
|
231
|
-
const intent = extractIntentPrefix(kw.query);
|
|
232
|
-
if (intent) {
|
|
233
|
-
const existing = clusterMap.get(intent);
|
|
234
|
-
if (existing) existing.keywords.push(kw);
|
|
235
|
-
else clusterMap.set(intent, {
|
|
236
|
-
type: "intent",
|
|
237
|
-
keywords: [kw]
|
|
238
|
-
});
|
|
239
|
-
clusteredKeywords.add(kw.query);
|
|
240
|
-
}
|
|
241
|
-
}
|
|
242
|
-
if (clusterBy === "prefix" || clusterBy === "both") {
|
|
243
|
-
const unclustered = filtered.filter((kw) => !clusteredKeywords.has(kw.query));
|
|
244
|
-
const prefixMap = /* @__PURE__ */ new Map();
|
|
245
|
-
for (const kw of unclustered) {
|
|
246
|
-
const prefix = extractWordPrefix(kw.query);
|
|
247
|
-
if (prefix) {
|
|
248
|
-
const existing = prefixMap.get(prefix);
|
|
249
|
-
if (existing) existing.push(kw);
|
|
250
|
-
else prefixMap.set(prefix, [kw]);
|
|
251
|
-
}
|
|
252
|
-
}
|
|
253
|
-
for (const [prefix, kws] of prefixMap) if (kws.length >= minClusterSize) {
|
|
254
|
-
clusterMap.set(prefix, {
|
|
255
|
-
type: "prefix",
|
|
256
|
-
keywords: kws
|
|
257
|
-
});
|
|
258
|
-
kws.forEach((kw) => clusteredKeywords.add(kw.query));
|
|
259
|
-
}
|
|
260
|
-
}
|
|
261
|
-
const clusters = [];
|
|
262
|
-
for (const [name, data] of clusterMap) {
|
|
263
|
-
if (data.keywords.length < minClusterSize) continue;
|
|
264
|
-
const totalClicks = data.keywords.reduce((sum, k) => sum + num(k.clicks), 0);
|
|
265
|
-
const totalImpressions = data.keywords.reduce((sum, k) => sum + num(k.impressions), 0);
|
|
266
|
-
const avgPosition = data.keywords.reduce((sum, k) => sum + num(k.position), 0) / data.keywords.length;
|
|
267
|
-
clusters.push({
|
|
268
|
-
clusterName: name,
|
|
269
|
-
clusterType: data.type,
|
|
270
|
-
keywords: data.keywords,
|
|
271
|
-
totalClicks,
|
|
272
|
-
totalImpressions,
|
|
273
|
-
avgPosition,
|
|
274
|
-
keywordCount: data.keywords.length
|
|
275
|
-
});
|
|
276
|
-
}
|
|
277
|
-
clusters.sort((a, b) => b.totalClicks - a.totalClicks);
|
|
278
|
-
return {
|
|
279
|
-
clusters,
|
|
280
|
-
unclustered: filtered.filter((kw) => !clusteredKeywords.has(kw.query))
|
|
281
|
-
};
|
|
282
|
-
}
|
|
283
|
-
defineAnalyzer({
|
|
284
|
-
id: "clustering",
|
|
285
|
-
buildSql(params) {
|
|
286
|
-
const { startDate, endDate } = periodOf(params);
|
|
287
|
-
const minImpressions = params.minImpressions ?? 10;
|
|
288
|
-
const minClusterSize = params.minClusterSize ?? 2;
|
|
289
|
-
const clusterBy = params.clusterBy ?? "both";
|
|
290
|
-
const doIntent = clusterBy === "intent" || clusterBy === "both";
|
|
291
|
-
const doPrefix = clusterBy === "prefix" || clusterBy === "both";
|
|
292
|
-
const intentExpr = doIntent ? `NULLIF(regexp_extract(LOWER(query), '${INTENT_PREFIXES_REGEX}', 1), '')` : `CAST(NULL AS VARCHAR)`;
|
|
293
|
-
const prefixExpr = doPrefix ? `CASE WHEN len(regexp_split_to_array(LOWER(query), '\\s+')) >= 3
|
|
294
|
-
THEN array_to_string(list_slice(regexp_split_to_array(LOWER(query), '\\s+'), 1, 2), ' ')
|
|
295
|
-
ELSE CAST(NULL AS VARCHAR) END` : `CAST(NULL AS VARCHAR)`;
|
|
296
|
-
return {
|
|
297
|
-
sql: `
|
|
298
|
-
WITH agg AS (
|
|
299
|
-
SELECT
|
|
300
|
-
query,
|
|
301
|
-
${METRIC_EXPR.clicks} AS clicks,
|
|
302
|
-
${METRIC_EXPR.impressions} AS impressions,
|
|
303
|
-
${METRIC_EXPR.ctr} AS ctr,
|
|
304
|
-
${METRIC_EXPR.position} AS position
|
|
305
|
-
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
306
|
-
WHERE date >= ? AND date <= ?
|
|
307
|
-
GROUP BY query
|
|
308
|
-
HAVING SUM(impressions) >= ?
|
|
309
|
-
),
|
|
310
|
-
classified AS (
|
|
311
|
-
SELECT
|
|
312
|
-
query, clicks, impressions, ctr, position,
|
|
313
|
-
${intentExpr} AS intent_prefix,
|
|
314
|
-
${prefixExpr} AS word_prefix
|
|
315
|
-
FROM agg
|
|
316
|
-
),
|
|
317
|
-
keyed AS (
|
|
318
|
-
SELECT
|
|
319
|
-
query, clicks, impressions, ctr, position,
|
|
320
|
-
COALESCE(intent_prefix, word_prefix) AS cluster_name,
|
|
321
|
-
CASE WHEN intent_prefix IS NOT NULL THEN 'intent' ELSE 'prefix' END AS cluster_type
|
|
322
|
-
FROM classified
|
|
323
|
-
WHERE COALESCE(intent_prefix, word_prefix) IS NOT NULL
|
|
324
|
-
)
|
|
325
|
-
SELECT
|
|
326
|
-
cluster_name AS clusterName,
|
|
327
|
-
any_value(cluster_type) AS clusterType,
|
|
328
|
-
CAST(COUNT(*) AS DOUBLE) AS keywordCount,
|
|
329
|
-
${METRIC_EXPR.clicks} AS totalClicks,
|
|
330
|
-
${METRIC_EXPR.impressions} AS totalImpressions,
|
|
331
|
-
AVG(position) AS avgPosition,
|
|
332
|
-
to_json(list({ 'query': query, 'clicks': clicks, 'impressions': impressions, 'ctr': ctr, 'position': position })) AS keywords
|
|
333
|
-
FROM keyed
|
|
334
|
-
GROUP BY cluster_name
|
|
335
|
-
HAVING COUNT(*) >= ?
|
|
336
|
-
ORDER BY totalClicks DESC
|
|
337
|
-
`,
|
|
338
|
-
params: [
|
|
339
|
-
startDate,
|
|
340
|
-
endDate,
|
|
341
|
-
minImpressions,
|
|
342
|
-
minClusterSize
|
|
343
|
-
],
|
|
344
|
-
current: {
|
|
345
|
-
table: "keywords",
|
|
346
|
-
partitions: enumeratePartitions(startDate, endDate)
|
|
347
|
-
}
|
|
348
|
-
};
|
|
349
|
-
},
|
|
350
|
-
reduceSql(rows) {
|
|
351
|
-
const clusters = (Array.isArray(rows) ? rows : []).map((r) => ({
|
|
352
|
-
clusterName: str$4(r.clusterName),
|
|
353
|
-
clusterType: str$4(r.clusterType),
|
|
354
|
-
keywordCount: num(r.keywordCount),
|
|
355
|
-
totalClicks: num(r.totalClicks),
|
|
356
|
-
totalImpressions: num(r.totalImpressions),
|
|
357
|
-
avgPosition: num(r.avgPosition),
|
|
358
|
-
keywords: parseJsonList$3(r.keywords).map((k) => ({
|
|
359
|
-
query: str$4(k.query),
|
|
360
|
-
clicks: num(k.clicks),
|
|
361
|
-
impressions: num(k.impressions),
|
|
362
|
-
ctr: num(k.ctr),
|
|
363
|
-
position: num(k.position)
|
|
364
|
-
}))
|
|
365
|
-
}));
|
|
366
|
-
return {
|
|
367
|
-
results: clusters,
|
|
368
|
-
meta: {
|
|
369
|
-
total: clusters.length,
|
|
370
|
-
totalClusters: clusters.length
|
|
371
|
-
}
|
|
372
|
-
};
|
|
373
|
-
},
|
|
374
|
-
buildRows(params) {
|
|
375
|
-
return { keywords: keywordsQueryState(periodOf(params), params.limit) };
|
|
376
|
-
},
|
|
377
|
-
reduceRows(rows, params) {
|
|
378
|
-
const result = analyzeClustering(Array.isArray(rows) ? rows : [], {
|
|
379
|
-
clusterBy: params.clusterBy,
|
|
380
|
-
minClusterSize: params.minClusterSize,
|
|
381
|
-
minImpressions: params.minImpressions
|
|
382
|
-
});
|
|
383
|
-
return {
|
|
384
|
-
results: result.clusters,
|
|
385
|
-
meta: { totalClusters: result.clusters.length }
|
|
386
|
-
};
|
|
387
|
-
}
|
|
388
|
-
});
|
|
389
|
-
function str$3(v) {
|
|
390
|
-
return v == null ? "" : String(v);
|
|
391
|
-
}
|
|
392
|
-
function parseJsonList$2(v) {
|
|
393
|
-
if (Array.isArray(v)) return v;
|
|
394
|
-
if (typeof v === "string" && v.length > 0) {
|
|
395
|
-
const parsed = JSON.parse(v);
|
|
396
|
-
return Array.isArray(parsed) ? parsed : [];
|
|
397
|
-
}
|
|
398
|
-
return [];
|
|
399
|
-
}
|
|
400
|
-
function calculateGini(values) {
|
|
401
|
-
if (values.length === 0) return 0;
|
|
402
|
-
const sorted = [...values].sort((a, b) => a - b);
|
|
403
|
-
const n = sorted.length;
|
|
404
|
-
const sum = sorted.reduce((a, b) => a + b, 0);
|
|
405
|
-
if (sum === 0) return 0;
|
|
406
|
-
let weightedSum = 0;
|
|
407
|
-
for (let i = 0; i < n; i++) weightedSum += (2 * (i + 1) - n - 1) * sorted[i];
|
|
408
|
-
return weightedSum / (n * sum);
|
|
409
|
-
}
|
|
410
|
-
function calculateHHI(shares) {
|
|
411
|
-
return shares.reduce((sum, share) => sum + (share * 100) ** 2, 0);
|
|
412
|
-
}
|
|
413
|
-
function analyzeConcentration(items, options = {}) {
|
|
414
|
-
const { topN = 10 } = options;
|
|
415
|
-
if (items.length === 0) return {
|
|
416
|
-
giniCoefficient: 0,
|
|
417
|
-
hhi: 0,
|
|
418
|
-
topNConcentration: 0,
|
|
419
|
-
topNItems: [],
|
|
420
|
-
totalItems: 0,
|
|
421
|
-
totalClicks: 0,
|
|
422
|
-
riskLevel: "low"
|
|
423
|
-
};
|
|
424
|
-
const sorted = [...items].sort((a, b) => b.clicks - a.clicks);
|
|
425
|
-
const totalClicks = sorted.reduce((sum, item) => sum + item.clicks, 0);
|
|
426
|
-
const clickValues = sorted.map((i) => i.clicks);
|
|
427
|
-
const shares = totalClicks > 0 ? sorted.map((i) => i.clicks / totalClicks) : [];
|
|
428
|
-
const giniCoefficient = calculateGini(clickValues);
|
|
429
|
-
const hhi = calculateHHI(shares);
|
|
430
|
-
const topNItems = sorted.slice(0, topN).map((item) => ({
|
|
431
|
-
key: item.key,
|
|
432
|
-
clicks: item.clicks,
|
|
433
|
-
share: totalClicks > 0 ? item.clicks / totalClicks : 0
|
|
434
|
-
}));
|
|
435
|
-
const topNClicks = topNItems.reduce((sum, item) => sum + item.clicks, 0);
|
|
436
|
-
const topNConcentration = totalClicks > 0 ? topNClicks / totalClicks : 0;
|
|
437
|
-
let riskLevel = "low";
|
|
438
|
-
if (hhi > 2500) riskLevel = "high";
|
|
439
|
-
else if (hhi > 1500) riskLevel = "medium";
|
|
440
|
-
return {
|
|
441
|
-
giniCoefficient,
|
|
442
|
-
hhi,
|
|
443
|
-
topNConcentration,
|
|
444
|
-
topNItems,
|
|
445
|
-
totalItems: items.length,
|
|
446
|
-
totalClicks,
|
|
447
|
-
riskLevel
|
|
448
|
-
};
|
|
449
|
-
}
|
|
450
|
-
function analyzePageConcentration(pages, options) {
|
|
451
|
-
return analyzeConcentration(pages.map((p) => ({
|
|
452
|
-
key: p.page,
|
|
453
|
-
clicks: num(p.clicks)
|
|
454
|
-
})), options);
|
|
455
|
-
}
|
|
456
|
-
function analyzeKeywordConcentration(keywords, options) {
|
|
457
|
-
return analyzeConcentration(keywords.map((k) => ({
|
|
458
|
-
key: k.query,
|
|
459
|
-
clicks: num(k.clicks)
|
|
460
|
-
})), options);
|
|
461
|
-
}
|
|
462
|
-
defineAnalyzer({
|
|
463
|
-
id: "concentration",
|
|
464
|
-
buildSql(params) {
|
|
465
|
-
const { startDate, endDate } = periodOf(params);
|
|
466
|
-
const dim = params.dimension || "pages";
|
|
467
|
-
const topN = params.topN ?? 10;
|
|
468
|
-
const table = dim === "keywords" ? "keywords" : "pages";
|
|
469
|
-
const keyCol = dim === "keywords" ? "query" : "url";
|
|
470
|
-
return {
|
|
471
|
-
sql: `
|
|
472
|
-
WITH items AS (
|
|
473
|
-
SELECT
|
|
474
|
-
${keyCol} AS key,
|
|
475
|
-
${METRIC_EXPR.clicks} AS clicks
|
|
476
|
-
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
477
|
-
WHERE date >= ? AND date <= ?
|
|
478
|
-
GROUP BY ${keyCol}
|
|
479
|
-
HAVING SUM(clicks) > 0
|
|
480
|
-
),
|
|
481
|
-
totals AS (
|
|
482
|
-
SELECT SUM(clicks) AS total_clicks, COUNT(*) AS total_items FROM items
|
|
483
|
-
),
|
|
484
|
-
ranked AS (
|
|
485
|
-
SELECT
|
|
486
|
-
i.key, i.clicks,
|
|
487
|
-
i.clicks / NULLIF(t.total_clicks, 0) AS share,
|
|
488
|
-
ROW_NUMBER() OVER (ORDER BY i.clicks DESC, i.key ASC) AS rnk_desc,
|
|
489
|
-
ROW_NUMBER() OVER (ORDER BY i.clicks ASC, i.key ASC) AS rnk_asc,
|
|
490
|
-
t.total_clicks AS tclicks,
|
|
491
|
-
t.total_items AS titems
|
|
492
|
-
FROM items i, totals t
|
|
493
|
-
),
|
|
494
|
-
gini_num AS (
|
|
495
|
-
SELECT SUM((2.0 * rnk_asc - titems - 1) * clicks) AS weighted_sum FROM ranked
|
|
496
|
-
),
|
|
497
|
-
hhi_calc AS (
|
|
498
|
-
SELECT SUM(POWER(share * 100, 2)) AS hhi FROM ranked
|
|
499
|
-
),
|
|
500
|
-
top_list AS (
|
|
501
|
-
SELECT
|
|
502
|
-
list({ 'key': key, 'clicks': clicks, 'share': share } ORDER BY clicks DESC, key ASC) AS items,
|
|
503
|
-
SUM(clicks) AS top_clicks
|
|
504
|
-
FROM ranked WHERE rnk_desc <= ?
|
|
505
|
-
)
|
|
506
|
-
SELECT
|
|
507
|
-
COALESCE(
|
|
508
|
-
(SELECT weighted_sum FROM gini_num)
|
|
509
|
-
/ NULLIF((SELECT total_items FROM totals) * (SELECT total_clicks FROM totals), 0),
|
|
510
|
-
0.0
|
|
511
|
-
) AS giniCoefficient,
|
|
512
|
-
COALESCE((SELECT hhi FROM hhi_calc), 0.0) AS hhi,
|
|
513
|
-
COALESCE(
|
|
514
|
-
CAST((SELECT top_clicks FROM top_list) AS DOUBLE)
|
|
515
|
-
/ NULLIF((SELECT total_clicks FROM totals), 0),
|
|
516
|
-
0.0
|
|
517
|
-
) AS topNConcentration,
|
|
518
|
-
COALESCE((SELECT to_json(items) FROM top_list), '[]') AS topNItems,
|
|
519
|
-
COALESCE((SELECT total_items FROM totals), 0) AS totalItems,
|
|
520
|
-
COALESCE((SELECT total_clicks FROM totals), 0.0) AS totalClicks,
|
|
521
|
-
CASE
|
|
522
|
-
WHEN COALESCE((SELECT hhi FROM hhi_calc), 0.0) > 2500 THEN 'high'
|
|
523
|
-
WHEN COALESCE((SELECT hhi FROM hhi_calc), 0.0) > 1500 THEN 'medium'
|
|
524
|
-
ELSE 'low'
|
|
525
|
-
END AS riskLevel
|
|
526
|
-
`,
|
|
527
|
-
params: [
|
|
528
|
-
startDate,
|
|
529
|
-
endDate,
|
|
530
|
-
topN
|
|
531
|
-
],
|
|
532
|
-
current: {
|
|
533
|
-
table,
|
|
534
|
-
partitions: enumeratePartitions(startDate, endDate)
|
|
535
|
-
}
|
|
536
|
-
};
|
|
537
|
-
},
|
|
538
|
-
reduceSql(rows, params) {
|
|
539
|
-
const r = (Array.isArray(rows) ? rows : [])[0] ?? {};
|
|
540
|
-
const topRaw = parseJsonList$2(r.topNItems);
|
|
541
|
-
return {
|
|
542
|
-
results: [{
|
|
543
|
-
giniCoefficient: num(r.giniCoefficient),
|
|
544
|
-
hhi: num(r.hhi),
|
|
545
|
-
topNConcentration: num(r.topNConcentration),
|
|
546
|
-
topNItems: topRaw.map((t) => ({
|
|
547
|
-
key: str$3(t.key),
|
|
548
|
-
clicks: num(t.clicks),
|
|
549
|
-
share: num(t.share)
|
|
550
|
-
})),
|
|
551
|
-
totalItems: num(r.totalItems),
|
|
552
|
-
totalClicks: num(r.totalClicks),
|
|
553
|
-
riskLevel: str$3(r.riskLevel)
|
|
554
|
-
}],
|
|
555
|
-
meta: {
|
|
556
|
-
total: 1,
|
|
557
|
-
dimension: params.dimension || "pages"
|
|
558
|
-
}
|
|
559
|
-
};
|
|
560
|
-
},
|
|
561
|
-
buildRows(params) {
|
|
562
|
-
const dim = params.dimension || "pages";
|
|
563
|
-
const period = periodOf(params);
|
|
564
|
-
const out = {};
|
|
565
|
-
if (dim === "pages") out.pages = pagesQueryState(period, params.limit);
|
|
566
|
-
else out.keywords = keywordsQueryState(period, params.limit);
|
|
567
|
-
return out;
|
|
568
|
-
},
|
|
569
|
-
reduceRows(rows, params) {
|
|
570
|
-
const dim = params.dimension || "pages";
|
|
571
|
-
const arr = Array.isArray(rows) ? rows : rows[dim] ?? [];
|
|
572
|
-
return {
|
|
573
|
-
results: [dim === "pages" ? analyzePageConcentration(arr, { topN: params.topN }) : analyzeKeywordConcentration(arr, { topN: params.topN })],
|
|
574
|
-
meta: { dimension: dim }
|
|
575
|
-
};
|
|
576
|
-
}
|
|
577
|
-
});
|
|
578
|
-
function buildPeriodMap(rows, key, value, filter) {
|
|
579
|
-
const out = /* @__PURE__ */ new Map();
|
|
580
|
-
for (const row of rows) {
|
|
581
|
-
if (filter && !filter(row)) continue;
|
|
582
|
-
out.set(key(row), value(row));
|
|
583
|
-
}
|
|
584
|
-
return out;
|
|
585
|
-
}
|
|
586
|
-
function createSorter(getValue, defaultMetric, defaultOrder = "desc") {
|
|
587
|
-
return (items, sortBy = defaultMetric, sortOrder = defaultOrder) => {
|
|
588
|
-
const mult = sortOrder === "desc" ? -1 : 1;
|
|
589
|
-
return [...items].sort((a, b) => (getValue(a, sortBy) - getValue(b, sortBy)) * mult);
|
|
590
|
-
};
|
|
591
|
-
}
|
|
592
|
-
function createMetricSorter(defaultMetric, orderByMetric) {
|
|
593
|
-
return (items, sortBy = defaultMetric) => {
|
|
594
|
-
const mult = orderByMetric[sortBy] === "desc" ? -1 : 1;
|
|
595
|
-
return [...items].sort((a, b) => (a[sortBy] - b[sortBy]) * mult);
|
|
596
|
-
};
|
|
597
|
-
}
|
|
598
|
-
const sortResults$2 = createMetricSorter("lostClicks", {
|
|
599
|
-
lostClicks: "desc",
|
|
600
|
-
declinePercent: "desc",
|
|
601
|
-
currentClicks: "asc"
|
|
602
|
-
});
|
|
603
|
-
function str$2(v) {
|
|
604
|
-
return v == null ? "" : String(v);
|
|
605
|
-
}
|
|
606
|
-
function parseJsonList$1(v) {
|
|
607
|
-
if (Array.isArray(v)) return v;
|
|
608
|
-
if (typeof v === "string" && v.length > 0) {
|
|
609
|
-
const parsed = JSON.parse(v);
|
|
610
|
-
return Array.isArray(parsed) ? parsed : [];
|
|
611
|
-
}
|
|
612
|
-
return [];
|
|
613
|
-
}
|
|
614
|
-
function analyzeDecay(input, options = {}) {
|
|
615
|
-
const { minPreviousClicks = 50, threshold = .2, sortBy = "lostClicks" } = options;
|
|
616
|
-
const currentMap = buildPeriodMap(input.current, (r) => r.page, (r) => ({
|
|
617
|
-
clicks: num(r.clicks),
|
|
618
|
-
position: num(r.position)
|
|
619
|
-
}));
|
|
620
|
-
const previousMap = buildPeriodMap(input.previous, (r) => r.page, (r) => ({
|
|
621
|
-
clicks: num(r.clicks),
|
|
622
|
-
position: num(r.position)
|
|
623
|
-
}), (r) => num(r.clicks) >= minPreviousClicks);
|
|
624
|
-
const results = [];
|
|
625
|
-
for (const [page, prev] of previousMap) {
|
|
626
|
-
const curr = currentMap.get(page) || {
|
|
627
|
-
clicks: 0,
|
|
628
|
-
position: 0
|
|
629
|
-
};
|
|
630
|
-
const lostClicks = prev.clicks - curr.clicks;
|
|
631
|
-
const declinePercent = prev.clicks > 0 ? lostClicks / prev.clicks : 0;
|
|
632
|
-
if (declinePercent >= threshold && lostClicks > 0) results.push({
|
|
633
|
-
page,
|
|
634
|
-
currentClicks: curr.clicks,
|
|
635
|
-
previousClicks: prev.clicks,
|
|
636
|
-
lostClicks,
|
|
637
|
-
declinePercent,
|
|
638
|
-
currentPosition: curr.position,
|
|
639
|
-
previousPosition: prev.position,
|
|
640
|
-
positionDrop: curr.position - prev.position
|
|
641
|
-
});
|
|
642
|
-
}
|
|
643
|
-
return sortResults$2(results, sortBy);
|
|
644
|
-
}
|
|
645
|
-
defineAnalyzer({
|
|
646
|
-
id: "decay",
|
|
647
|
-
buildSql(params) {
|
|
648
|
-
const { current: cur, previous: prev } = comparisonOf(params);
|
|
649
|
-
const minPreviousClicks = params.minPreviousClicks ?? 50;
|
|
650
|
-
const threshold = params.threshold ?? .2;
|
|
651
|
-
const limit = params.limit ?? 2e3;
|
|
652
|
-
return {
|
|
653
|
-
sql: `
|
|
654
|
-
WITH cur AS (
|
|
655
|
-
SELECT
|
|
656
|
-
url,
|
|
657
|
-
${METRIC_EXPR.clicks} AS clicks,
|
|
658
|
-
${METRIC_EXPR.position} AS position
|
|
659
|
-
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
660
|
-
WHERE date >= ? AND date <= ?
|
|
661
|
-
GROUP BY url
|
|
662
|
-
),
|
|
663
|
-
prev AS (
|
|
664
|
-
SELECT
|
|
665
|
-
url,
|
|
666
|
-
${METRIC_EXPR.clicks} AS clicks,
|
|
667
|
-
${METRIC_EXPR.position} AS position
|
|
668
|
-
FROM read_parquet({{FILES_PREV}}, union_by_name = true)
|
|
669
|
-
WHERE date >= ? AND date <= ?
|
|
670
|
-
GROUP BY url
|
|
671
|
-
HAVING SUM(clicks) >= ?
|
|
672
|
-
),
|
|
673
|
-
weekly AS (
|
|
674
|
-
SELECT url, date_trunc('week', CAST(date AS DATE)) AS week,
|
|
675
|
-
${METRIC_EXPR.clicks} AS clicks,
|
|
676
|
-
${METRIC_EXPR.impressions} AS impressions
|
|
677
|
-
FROM (
|
|
678
|
-
SELECT url, date, clicks, impressions
|
|
679
|
-
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
680
|
-
WHERE date >= ? AND date <= ?
|
|
681
|
-
UNION ALL
|
|
682
|
-
SELECT url, date, clicks, impressions
|
|
683
|
-
FROM read_parquet({{FILES_PREV}}, union_by_name = true)
|
|
684
|
-
WHERE date >= ? AND date <= ?
|
|
685
|
-
)
|
|
686
|
-
GROUP BY url, week
|
|
687
|
-
),
|
|
688
|
-
series_by_url AS (
|
|
689
|
-
SELECT url, to_json(list({
|
|
690
|
-
'week': strftime(week, '%Y-%m-%d'),
|
|
691
|
-
'clicks': clicks,
|
|
692
|
-
'impressions': impressions
|
|
693
|
-
} ORDER BY week)) AS seriesJson
|
|
694
|
-
FROM weekly GROUP BY url
|
|
695
|
-
),
|
|
696
|
-
joined AS (
|
|
697
|
-
SELECT
|
|
698
|
-
p.url AS page,
|
|
699
|
-
COALESCE(c.clicks, 0.0) AS currentClicks,
|
|
700
|
-
p.clicks AS previousClicks,
|
|
701
|
-
(p.clicks - COALESCE(c.clicks, 0.0)) AS lostClicks,
|
|
702
|
-
(p.clicks - COALESCE(c.clicks, 0.0)) / NULLIF(p.clicks, 0) AS declinePercent,
|
|
703
|
-
COALESCE(c.position, 0.0) AS currentPosition,
|
|
704
|
-
p.position AS previousPosition,
|
|
705
|
-
(COALESCE(c.position, 0.0) - p.position) AS positionDrop,
|
|
706
|
-
s.seriesJson
|
|
707
|
-
FROM prev p
|
|
708
|
-
LEFT JOIN cur c ON p.url = c.url
|
|
709
|
-
LEFT JOIN series_by_url s ON p.url = s.url
|
|
710
|
-
)
|
|
711
|
-
SELECT *
|
|
712
|
-
FROM joined
|
|
713
|
-
WHERE declinePercent >= ? AND lostClicks > 0
|
|
714
|
-
ORDER BY lostClicks DESC
|
|
715
|
-
LIMIT ${Number(limit)}
|
|
716
|
-
`,
|
|
717
|
-
params: [
|
|
718
|
-
cur.startDate,
|
|
719
|
-
cur.endDate,
|
|
720
|
-
prev.startDate,
|
|
721
|
-
prev.endDate,
|
|
722
|
-
minPreviousClicks,
|
|
723
|
-
cur.startDate,
|
|
724
|
-
cur.endDate,
|
|
725
|
-
prev.startDate,
|
|
726
|
-
prev.endDate,
|
|
727
|
-
threshold
|
|
728
|
-
],
|
|
729
|
-
current: {
|
|
730
|
-
table: "pages",
|
|
731
|
-
partitions: enumeratePartitions(cur.startDate, cur.endDate)
|
|
732
|
-
},
|
|
733
|
-
previous: {
|
|
734
|
-
table: "pages",
|
|
735
|
-
partitions: enumeratePartitions(prev.startDate, prev.endDate)
|
|
736
|
-
}
|
|
737
|
-
};
|
|
738
|
-
},
|
|
739
|
-
reduceSql(rows) {
|
|
740
|
-
const arr = Array.isArray(rows) ? rows : [];
|
|
741
|
-
return {
|
|
742
|
-
results: arr.map((r) => ({
|
|
743
|
-
page: str$2(r.page),
|
|
744
|
-
currentClicks: num(r.currentClicks),
|
|
745
|
-
previousClicks: num(r.previousClicks),
|
|
746
|
-
lostClicks: num(r.lostClicks),
|
|
747
|
-
declinePercent: num(r.declinePercent),
|
|
748
|
-
currentPosition: num(r.currentPosition),
|
|
749
|
-
previousPosition: num(r.previousPosition),
|
|
750
|
-
positionDrop: num(r.positionDrop),
|
|
751
|
-
series: parseJsonList$1(r.seriesJson).map((s) => ({
|
|
752
|
-
week: str$2(s.week),
|
|
753
|
-
clicks: num(s.clicks),
|
|
754
|
-
impressions: num(s.impressions)
|
|
755
|
-
}))
|
|
756
|
-
})),
|
|
757
|
-
meta: { total: arr.length }
|
|
758
|
-
};
|
|
759
|
-
},
|
|
760
|
-
buildRows(params) {
|
|
761
|
-
const { current, previous } = comparisonOf(params);
|
|
762
|
-
return {
|
|
763
|
-
current: pagesQueryState(current, params.limit),
|
|
764
|
-
previous: pagesQueryState(previous, params.limit)
|
|
765
|
-
};
|
|
766
|
-
},
|
|
767
|
-
reduceRows(rows, params) {
|
|
768
|
-
const map = rows && !Array.isArray(rows) ? rows : {
|
|
769
|
-
current: [],
|
|
770
|
-
previous: []
|
|
771
|
-
};
|
|
772
|
-
const results = analyzeDecay({
|
|
773
|
-
current: map.current ?? [],
|
|
774
|
-
previous: map.previous ?? []
|
|
775
|
-
}, {
|
|
776
|
-
minPreviousClicks: params.minPreviousClicks,
|
|
777
|
-
threshold: params.threshold
|
|
778
|
-
});
|
|
779
|
-
return {
|
|
780
|
-
results,
|
|
781
|
-
meta: { total: results.length }
|
|
782
|
-
};
|
|
783
|
-
}
|
|
784
|
-
});
|
|
785
|
-
function percentDifference(current, previous) {
|
|
786
|
-
if (previous === 0) return current > 0 ? 100 : 0;
|
|
787
|
-
return (current - previous) / previous * 100;
|
|
788
|
-
}
|
|
789
|
-
function str$1(v) {
|
|
790
|
-
return v == null ? "" : String(v);
|
|
791
|
-
}
|
|
792
|
-
function parseJsonList(v) {
|
|
793
|
-
if (Array.isArray(v)) return v;
|
|
794
|
-
if (typeof v === "string" && v.length > 0) {
|
|
795
|
-
const parsed = JSON.parse(v);
|
|
796
|
-
return Array.isArray(parsed) ? parsed : [];
|
|
797
|
-
}
|
|
798
|
-
return [];
|
|
799
|
-
}
|
|
800
|
-
function analyzeMovers(input, options = {}) {
|
|
801
|
-
const { changeThreshold = .2, minImpressions = 50, sortBy = "clicksChange" } = options;
|
|
802
|
-
const normFactor = input.normalizationFactor ?? 1;
|
|
803
|
-
const baselineMap = buildPeriodMap(input.previous, (r) => r.query, (r) => ({
|
|
804
|
-
clicks: num(r.clicks) / normFactor,
|
|
805
|
-
impressions: num(r.impressions) / normFactor,
|
|
806
|
-
position: num(r.position),
|
|
807
|
-
page: r.page ?? null
|
|
808
|
-
}));
|
|
809
|
-
const pageMap = /* @__PURE__ */ new Map();
|
|
810
|
-
for (const row of input.current) if (!pageMap.has(row.query) && row.page) pageMap.set(row.query, row.page);
|
|
811
|
-
for (const row of input.previous) if (!pageMap.has(row.query) && row.page) pageMap.set(row.query, row.page);
|
|
812
|
-
const rising = [];
|
|
813
|
-
const declining = [];
|
|
814
|
-
const stable = [];
|
|
815
|
-
for (const row of input.current) {
|
|
816
|
-
const impressions = num(row.impressions);
|
|
817
|
-
const clicks = num(row.clicks);
|
|
818
|
-
const position = num(row.position);
|
|
819
|
-
if (impressions < minImpressions) continue;
|
|
820
|
-
const baseline = baselineMap.get(row.query) || {
|
|
821
|
-
clicks: 0,
|
|
822
|
-
impressions: 0,
|
|
823
|
-
position: 0,
|
|
824
|
-
page: null
|
|
825
|
-
};
|
|
826
|
-
const clicksChangePercent = percentDifference(clicks, baseline.clicks);
|
|
827
|
-
const impressionsChangePercent = percentDifference(impressions, baseline.impressions);
|
|
828
|
-
const data = {
|
|
829
|
-
keyword: row.query,
|
|
830
|
-
page: pageMap.get(row.query) ?? null,
|
|
831
|
-
recentClicks: clicks,
|
|
832
|
-
recentImpressions: impressions,
|
|
833
|
-
recentPosition: position,
|
|
834
|
-
baselineClicks: Math.round(baseline.clicks),
|
|
835
|
-
baselineImpressions: Math.round(baseline.impressions),
|
|
836
|
-
baselinePosition: baseline.position,
|
|
837
|
-
clicksChange: clicks - Math.round(baseline.clicks),
|
|
838
|
-
clicksChangePercent,
|
|
839
|
-
impressionsChangePercent,
|
|
840
|
-
positionChange: position - baseline.position
|
|
841
|
-
};
|
|
842
|
-
const absChange = Math.abs(clicksChangePercent / 100);
|
|
843
|
-
if (clicksChangePercent > 0 && absChange >= changeThreshold) rising.push(data);
|
|
844
|
-
else if (clicksChangePercent < 0 && absChange >= changeThreshold) declining.push(data);
|
|
845
|
-
else stable.push(data);
|
|
846
|
-
}
|
|
847
|
-
const sortFn = (a, b) => {
|
|
848
|
-
switch (sortBy) {
|
|
849
|
-
case "clicks": return b.recentClicks - a.recentClicks;
|
|
850
|
-
case "impressions": return b.recentImpressions - a.recentImpressions;
|
|
851
|
-
case "clicksChange": return Math.abs(b.clicksChangePercent) - Math.abs(a.clicksChangePercent);
|
|
852
|
-
case "impressionsChange": return Math.abs(b.impressionsChangePercent) - Math.abs(a.impressionsChangePercent);
|
|
853
|
-
case "positionChange": return Math.abs(b.positionChange) - Math.abs(a.positionChange);
|
|
854
|
-
default: return Math.abs(b.clicksChangePercent) - Math.abs(a.clicksChangePercent);
|
|
855
|
-
}
|
|
856
|
-
};
|
|
857
|
-
rising.sort(sortFn);
|
|
858
|
-
declining.sort(sortFn);
|
|
859
|
-
stable.sort((a, b) => b.recentClicks - a.recentClicks);
|
|
860
|
-
return {
|
|
861
|
-
rising,
|
|
862
|
-
declining,
|
|
863
|
-
stable
|
|
864
|
-
};
|
|
865
|
-
}
|
|
866
|
-
defineAnalyzer({
|
|
867
|
-
id: "movers",
|
|
868
|
-
buildSql(params) {
|
|
869
|
-
const { current: cur, previous: prev } = comparisonOf(params);
|
|
870
|
-
const minImpressions = params.minImpressions ?? 50;
|
|
871
|
-
const changeThreshold = params.changeThreshold ?? .2;
|
|
872
|
-
const limit = params.limit ?? 2e3;
|
|
873
|
-
return {
|
|
874
|
-
sql: `
|
|
875
|
-
WITH cur AS (
|
|
876
|
-
SELECT
|
|
877
|
-
query, url,
|
|
878
|
-
${METRIC_EXPR.clicks} AS clicks,
|
|
879
|
-
${METRIC_EXPR.impressions} AS impressions,
|
|
880
|
-
${METRIC_EXPR.position} AS position
|
|
881
|
-
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
882
|
-
WHERE date >= ? AND date <= ?
|
|
883
|
-
GROUP BY query, url
|
|
884
|
-
),
|
|
885
|
-
prev AS (
|
|
886
|
-
SELECT
|
|
887
|
-
query, url,
|
|
888
|
-
${METRIC_EXPR.clicks} AS clicks,
|
|
889
|
-
${METRIC_EXPR.impressions} AS impressions,
|
|
890
|
-
${METRIC_EXPR.position} AS position
|
|
891
|
-
FROM read_parquet({{FILES_PREV}}, union_by_name = true)
|
|
892
|
-
WHERE date >= ? AND date <= ?
|
|
893
|
-
GROUP BY query, url
|
|
894
|
-
),
|
|
895
|
-
weekly AS (
|
|
896
|
-
SELECT query, url, date_trunc('week', CAST(date AS DATE)) AS week,
|
|
897
|
-
${METRIC_EXPR.clicks} AS clicks,
|
|
898
|
-
${METRIC_EXPR.impressions} AS impressions
|
|
899
|
-
FROM (
|
|
900
|
-
SELECT query, url, date, clicks, impressions
|
|
901
|
-
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
902
|
-
WHERE date >= ? AND date <= ?
|
|
903
|
-
UNION ALL
|
|
904
|
-
SELECT query, url, date, clicks, impressions
|
|
905
|
-
FROM read_parquet({{FILES_PREV}}, union_by_name = true)
|
|
906
|
-
WHERE date >= ? AND date <= ?
|
|
907
|
-
)
|
|
908
|
-
GROUP BY query, url, week
|
|
909
|
-
),
|
|
910
|
-
series_by_entity AS (
|
|
911
|
-
SELECT query, url, to_json(list({
|
|
912
|
-
'week': strftime(week, '%Y-%m-%d'),
|
|
913
|
-
'clicks': clicks,
|
|
914
|
-
'impressions': impressions
|
|
915
|
-
} ORDER BY week)) AS seriesJson
|
|
916
|
-
FROM weekly GROUP BY query, url
|
|
917
|
-
),
|
|
918
|
-
joined AS (
|
|
919
|
-
SELECT
|
|
920
|
-
c.query AS keyword,
|
|
921
|
-
c.url AS page,
|
|
922
|
-
c.clicks AS recentClicks,
|
|
923
|
-
c.impressions AS recentImpressions,
|
|
924
|
-
c.position AS recentPosition,
|
|
925
|
-
COALESCE(p.clicks, 0.0) AS baselineClicks,
|
|
926
|
-
COALESCE(p.impressions, 0.0) AS baselineImpressions,
|
|
927
|
-
COALESCE(p.position, 0.0) AS baselinePosition,
|
|
928
|
-
(c.clicks - COALESCE(p.clicks, 0.0)) AS clicksChange,
|
|
929
|
-
CASE
|
|
930
|
-
WHEN COALESCE(p.clicks, 0.0) = 0 THEN CASE WHEN c.clicks > 0 THEN 100.0 ELSE 0.0 END
|
|
931
|
-
ELSE (c.clicks - p.clicks) * 100.0 / p.clicks
|
|
932
|
-
END AS clicksChangePercent,
|
|
933
|
-
CASE
|
|
934
|
-
WHEN COALESCE(p.impressions, 0.0) = 0 THEN CASE WHEN c.impressions > 0 THEN 100.0 ELSE 0.0 END
|
|
935
|
-
ELSE (c.impressions - p.impressions) * 100.0 / p.impressions
|
|
936
|
-
END AS impressionsChangePercent,
|
|
937
|
-
(c.position - COALESCE(p.position, 0.0)) AS positionChange,
|
|
938
|
-
s.seriesJson
|
|
939
|
-
FROM cur c
|
|
940
|
-
LEFT JOIN prev p ON c.query = p.query AND c.url = p.url
|
|
941
|
-
LEFT JOIN series_by_entity s ON c.query = s.query AND c.url = s.url
|
|
942
|
-
WHERE c.impressions >= ?
|
|
943
|
-
)
|
|
944
|
-
SELECT *,
|
|
945
|
-
CASE
|
|
946
|
-
WHEN clicksChangePercent > 0 AND ABS(clicksChangePercent) / 100.0 >= ? THEN 'rising'
|
|
947
|
-
WHEN clicksChangePercent < 0 AND ABS(clicksChangePercent) / 100.0 >= ? THEN 'declining'
|
|
948
|
-
ELSE 'stable'
|
|
949
|
-
END AS direction
|
|
950
|
-
FROM joined
|
|
951
|
-
ORDER BY ABS(clicksChangePercent) DESC
|
|
952
|
-
LIMIT ${Number(limit)}
|
|
953
|
-
`,
|
|
954
|
-
params: [
|
|
955
|
-
cur.startDate,
|
|
956
|
-
cur.endDate,
|
|
957
|
-
prev.startDate,
|
|
958
|
-
prev.endDate,
|
|
959
|
-
cur.startDate,
|
|
960
|
-
cur.endDate,
|
|
961
|
-
prev.startDate,
|
|
962
|
-
prev.endDate,
|
|
963
|
-
minImpressions,
|
|
964
|
-
changeThreshold,
|
|
965
|
-
changeThreshold
|
|
966
|
-
],
|
|
967
|
-
current: {
|
|
968
|
-
table: "page_keywords",
|
|
969
|
-
partitions: enumeratePartitions(cur.startDate, cur.endDate)
|
|
970
|
-
},
|
|
971
|
-
previous: {
|
|
972
|
-
table: "page_keywords",
|
|
973
|
-
partitions: enumeratePartitions(prev.startDate, prev.endDate)
|
|
974
|
-
}
|
|
975
|
-
};
|
|
976
|
-
},
|
|
977
|
-
reduceSql(rows) {
|
|
978
|
-
const normalized = (Array.isArray(rows) ? rows : []).map((r) => ({
|
|
979
|
-
keyword: str$1(r.keyword),
|
|
980
|
-
page: r.page == null ? null : str$1(r.page),
|
|
981
|
-
recentClicks: num(r.recentClicks),
|
|
982
|
-
recentImpressions: num(r.recentImpressions),
|
|
983
|
-
recentPosition: num(r.recentPosition),
|
|
984
|
-
baselineClicks: Math.round(num(r.baselineClicks)),
|
|
985
|
-
baselineImpressions: Math.round(num(r.baselineImpressions)),
|
|
986
|
-
baselinePosition: num(r.baselinePosition),
|
|
987
|
-
clicksChange: num(r.clicksChange),
|
|
988
|
-
clicksChangePercent: num(r.clicksChangePercent),
|
|
989
|
-
impressionsChangePercent: num(r.impressionsChangePercent),
|
|
990
|
-
positionChange: num(r.positionChange),
|
|
991
|
-
direction: str$1(r.direction),
|
|
992
|
-
series: parseJsonList(r.seriesJson).map((s) => ({
|
|
993
|
-
week: str$1(s.week),
|
|
994
|
-
clicks: num(s.clicks),
|
|
995
|
-
impressions: num(s.impressions)
|
|
996
|
-
}))
|
|
997
|
-
}));
|
|
998
|
-
const rising = normalized.filter((r) => r.direction === "rising");
|
|
999
|
-
const declining = normalized.filter((r) => r.direction === "declining");
|
|
1000
|
-
const stable = normalized.filter((r) => r.direction === "stable");
|
|
1001
|
-
const combined = [...rising, ...declining];
|
|
1002
|
-
return {
|
|
1003
|
-
results: combined,
|
|
1004
|
-
meta: {
|
|
1005
|
-
total: combined.length,
|
|
1006
|
-
rising: rising.length,
|
|
1007
|
-
declining: declining.length,
|
|
1008
|
-
stable: stable.length
|
|
1009
|
-
}
|
|
1010
|
-
};
|
|
1011
|
-
},
|
|
1012
|
-
buildRows(params) {
|
|
1013
|
-
const { current, previous } = comparisonOf(params);
|
|
1014
|
-
return {
|
|
1015
|
-
current: keywordsQueryState(current, params.limit),
|
|
1016
|
-
previous: keywordsQueryState(previous, params.limit)
|
|
1017
|
-
};
|
|
1018
|
-
},
|
|
1019
|
-
reduceRows(rows, params) {
|
|
1020
|
-
const map = rows && !Array.isArray(rows) ? rows : {
|
|
1021
|
-
current: [],
|
|
1022
|
-
previous: []
|
|
1023
|
-
};
|
|
1024
|
-
const result = analyzeMovers({
|
|
1025
|
-
current: map.current ?? [],
|
|
1026
|
-
previous: map.previous ?? []
|
|
1027
|
-
}, {
|
|
1028
|
-
changeThreshold: params.changeThreshold,
|
|
1029
|
-
minImpressions: params.minImpressions
|
|
1030
|
-
});
|
|
1031
|
-
return {
|
|
1032
|
-
results: [...result.rising.map((r) => ({
|
|
1033
|
-
...r,
|
|
1034
|
-
direction: "rising"
|
|
1035
|
-
})), ...result.declining.map((r) => ({
|
|
1036
|
-
...r,
|
|
1037
|
-
direction: "declining"
|
|
1038
|
-
}))],
|
|
1039
|
-
meta: {
|
|
1040
|
-
rising: result.rising.length,
|
|
1041
|
-
declining: result.declining.length
|
|
1042
|
-
}
|
|
1043
|
-
};
|
|
1044
|
-
}
|
|
1045
|
-
});
|
|
1046
|
-
const DEFAULT_LIMIT = 1e3;
|
|
1047
|
-
const MAX_LIMIT = 5e4;
|
|
1048
|
-
function clampLimit(limit, fallback = DEFAULT_LIMIT) {
|
|
1049
|
-
const n = Number(limit ?? fallback);
|
|
1050
|
-
if (!Number.isFinite(n) || n <= 0) return fallback;
|
|
1051
|
-
return Math.min(n, MAX_LIMIT);
|
|
1052
|
-
}
|
|
1053
|
-
function clampOffset(offset) {
|
|
1054
|
-
const n = Number(offset ?? 0);
|
|
1055
|
-
if (!Number.isFinite(n) || n < 0) return 0;
|
|
1056
|
-
return Math.floor(n);
|
|
1057
|
-
}
|
|
1058
|
-
function paginateClause(input) {
|
|
1059
|
-
const l = clampLimit(input.limit);
|
|
1060
|
-
const o = clampOffset(input.offset);
|
|
1061
|
-
return o > 0 ? `LIMIT ${l} OFFSET ${o}` : `LIMIT ${l}`;
|
|
1062
|
-
}
|
|
1063
|
-
function paginateInMemory(rows, input) {
|
|
1064
|
-
const l = clampLimit(input.limit, rows.length);
|
|
1065
|
-
const o = clampOffset(input.offset);
|
|
1066
|
-
return rows.slice(o, o + l);
|
|
1067
|
-
}
|
|
1068
|
-
const EXPECTED_CTR_BY_POSITION = {
|
|
1069
|
-
1: .3,
|
|
1070
|
-
2: .15,
|
|
1071
|
-
3: .1,
|
|
1072
|
-
4: .07,
|
|
1073
|
-
5: .05,
|
|
1074
|
-
6: .04,
|
|
1075
|
-
7: .03,
|
|
1076
|
-
8: .025,
|
|
1077
|
-
9: .02,
|
|
1078
|
-
10: .015
|
|
1079
|
-
};
|
|
1080
|
-
function getExpectedCtr(position) {
|
|
1081
|
-
return EXPECTED_CTR_BY_POSITION[Math.round(Math.max(1, Math.min(position, 10)))] || .01;
|
|
1082
|
-
}
|
|
1083
|
-
function calculatePositionScore(position) {
|
|
1084
|
-
if (position <= 3) return .2;
|
|
1085
|
-
if (position > 50) return .1;
|
|
1086
|
-
const distance = Math.abs(position - 11);
|
|
1087
|
-
return Math.max(0, 1 - distance / 15);
|
|
1088
|
-
}
|
|
1089
|
-
function calculateImpressionScore(impressions) {
|
|
1090
|
-
if (impressions <= 0) return 0;
|
|
1091
|
-
return Math.min(Math.log10(impressions) / 5, 1);
|
|
1092
|
-
}
|
|
1093
|
-
function calculateCtrGapScore(actualCtr, position) {
|
|
1094
|
-
const expectedCtr = getExpectedCtr(position);
|
|
1095
|
-
if (actualCtr >= expectedCtr) return 0;
|
|
1096
|
-
const gap = expectedCtr - actualCtr;
|
|
1097
|
-
return Math.min(gap / expectedCtr, 1);
|
|
1098
|
-
}
|
|
1099
|
-
const sortResults$1 = createMetricSorter("opportunityScore", {
|
|
1100
|
-
opportunityScore: "desc",
|
|
1101
|
-
potentialClicks: "desc",
|
|
1102
|
-
impressions: "desc",
|
|
1103
|
-
position: "asc"
|
|
1104
|
-
});
|
|
1105
|
-
const opportunityAnalyzer = defineAnalyzer({
|
|
1106
|
-
id: "opportunity",
|
|
1107
|
-
buildSql(params) {
|
|
1108
|
-
const { startDate, endDate } = periodOf(params);
|
|
1109
|
-
const minImpressions = params.minImpressions ?? 100;
|
|
1110
|
-
const w1 = 1;
|
|
1111
|
-
const w2 = 1;
|
|
1112
|
-
const w3 = 1;
|
|
1113
|
-
const totalW = w1 + w2 + w3;
|
|
1114
|
-
const limit = params.limit ?? 1e3;
|
|
1115
|
-
return {
|
|
1116
|
-
sql: `
|
|
1117
|
-
WITH agg AS (
|
|
1118
|
-
SELECT
|
|
1119
|
-
query AS keyword,
|
|
1120
|
-
url AS page,
|
|
1121
|
-
${METRIC_EXPR.clicks} AS clicks,
|
|
1122
|
-
${METRIC_EXPR.impressions} AS impressions,
|
|
1123
|
-
${METRIC_EXPR.ctr} AS ctr,
|
|
1124
|
-
${METRIC_EXPR.position} AS position
|
|
1125
|
-
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
1126
|
-
WHERE date >= ? AND date <= ?
|
|
1127
|
-
GROUP BY query, url
|
|
1128
|
-
HAVING SUM(impressions) >= ?
|
|
1129
|
-
),
|
|
1130
|
-
scored AS (
|
|
1131
|
-
SELECT
|
|
1132
|
-
keyword, page, clicks, impressions, ctr, position,
|
|
1133
|
-
CASE
|
|
1134
|
-
WHEN position <= 3 THEN 0.2
|
|
1135
|
-
WHEN position > 50 THEN 0.1
|
|
1136
|
-
ELSE GREATEST(0.0, 1.0 - ABS(position - 11.0) / 15.0)
|
|
1137
|
-
END AS positionScore,
|
|
1138
|
-
CASE WHEN impressions <= 0 THEN 0.0 ELSE LEAST(LOG10(impressions) / 5.0, 1.0) END AS impressionScore,
|
|
1139
|
-
CASE CAST(ROUND(GREATEST(LEAST(position, 10.0), 1.0)) AS INTEGER)
|
|
1140
|
-
WHEN 1 THEN 0.30
|
|
1141
|
-
WHEN 2 THEN 0.15
|
|
1142
|
-
WHEN 3 THEN 0.10
|
|
1143
|
-
WHEN 4 THEN 0.07
|
|
1144
|
-
WHEN 5 THEN 0.05
|
|
1145
|
-
WHEN 6 THEN 0.04
|
|
1146
|
-
WHEN 7 THEN 0.03
|
|
1147
|
-
WHEN 8 THEN 0.025
|
|
1148
|
-
WHEN 9 THEN 0.02
|
|
1149
|
-
WHEN 10 THEN 0.015
|
|
1150
|
-
ELSE 0.01
|
|
1151
|
-
END AS expectedCtr
|
|
1152
|
-
FROM agg
|
|
1153
|
-
),
|
|
1154
|
-
gapped AS (
|
|
1155
|
-
SELECT
|
|
1156
|
-
*,
|
|
1157
|
-
CASE WHEN ctr >= expectedCtr THEN 0.0 ELSE LEAST((expectedCtr - ctr) / expectedCtr, 1.0) END AS ctrGapScore
|
|
1158
|
-
FROM scored
|
|
1159
|
-
)
|
|
1160
|
-
SELECT
|
|
1161
|
-
keyword, page, clicks, impressions, ctr, position,
|
|
1162
|
-
CAST(ROUND(POWER(
|
|
1163
|
-
POWER(positionScore, ${w1}) * POWER(impressionScore, ${w2}) * POWER(ctrGapScore, ${w3}),
|
|
1164
|
-
1.0 / ${totalW}
|
|
1165
|
-
) * 100) AS DOUBLE) AS opportunityScore,
|
|
1166
|
-
CAST(ROUND(impressions * (
|
|
1167
|
-
CASE CAST(ROUND(GREATEST(LEAST(position, 3.0), 1.0)) AS INTEGER)
|
|
1168
|
-
WHEN 1 THEN 0.30
|
|
1169
|
-
WHEN 2 THEN 0.15
|
|
1170
|
-
WHEN 3 THEN 0.10
|
|
1171
|
-
ELSE 0.10
|
|
1172
|
-
END
|
|
1173
|
-
)) AS DOUBLE) AS potentialClicks,
|
|
1174
|
-
positionScore, impressionScore, ctrGapScore
|
|
1175
|
-
FROM gapped
|
|
1176
|
-
ORDER BY opportunityScore DESC
|
|
1177
|
-
${paginateClause({
|
|
1178
|
-
limit,
|
|
1179
|
-
offset: params.offset
|
|
1180
|
-
})}
|
|
1181
|
-
`,
|
|
1182
|
-
params: [
|
|
1183
|
-
startDate,
|
|
1184
|
-
endDate,
|
|
1185
|
-
minImpressions
|
|
1186
|
-
],
|
|
1187
|
-
current: {
|
|
1188
|
-
table: "page_keywords",
|
|
1189
|
-
partitions: enumeratePartitions(startDate, endDate)
|
|
1190
|
-
}
|
|
1191
|
-
};
|
|
1192
|
-
},
|
|
1193
|
-
reduceSql(rows) {
|
|
1194
|
-
const arr = Array.isArray(rows) ? rows : [];
|
|
1195
|
-
return {
|
|
1196
|
-
results: arr.map((r) => ({
|
|
1197
|
-
keyword: r.keyword == null ? "" : String(r.keyword),
|
|
1198
|
-
page: r.page == null ? null : String(r.page),
|
|
1199
|
-
clicks: num(r.clicks),
|
|
1200
|
-
impressions: num(r.impressions),
|
|
1201
|
-
ctr: num(r.ctr),
|
|
1202
|
-
position: num(r.position),
|
|
1203
|
-
opportunityScore: num(r.opportunityScore),
|
|
1204
|
-
potentialClicks: num(r.potentialClicks),
|
|
1205
|
-
factors: {
|
|
1206
|
-
positionScore: num(r.positionScore),
|
|
1207
|
-
impressionScore: num(r.impressionScore),
|
|
1208
|
-
ctrGapScore: num(r.ctrGapScore)
|
|
1209
|
-
}
|
|
1210
|
-
})),
|
|
1211
|
-
meta: { total: arr.length }
|
|
1212
|
-
};
|
|
1213
|
-
},
|
|
1214
|
-
buildRows(params) {
|
|
1215
|
-
return { keywords: keywordsQueryState(periodOf(params), params.limit) };
|
|
1216
|
-
},
|
|
1217
|
-
reduceRows(rows, params) {
|
|
1218
|
-
const keywords = (Array.isArray(rows) ? rows : []) ?? [];
|
|
1219
|
-
const minImpressions = params.minImpressions ?? 100;
|
|
1220
|
-
const positionWeight = 1;
|
|
1221
|
-
const impressionsWeight = 1;
|
|
1222
|
-
const ctrGapWeight = 1;
|
|
1223
|
-
const sortBy = "opportunityScore";
|
|
1224
|
-
const results = [];
|
|
1225
|
-
for (const row of keywords) {
|
|
1226
|
-
const impressions = num(row.impressions);
|
|
1227
|
-
const position = num(row.position);
|
|
1228
|
-
const ctr = num(row.ctr);
|
|
1229
|
-
const clicks = num(row.clicks);
|
|
1230
|
-
if (impressions < minImpressions) continue;
|
|
1231
|
-
const positionScore = calculatePositionScore(position);
|
|
1232
|
-
const impressionScore = calculateImpressionScore(impressions);
|
|
1233
|
-
const ctrGapScore = calculateCtrGapScore(ctr, position);
|
|
1234
|
-
const geometricMean = (positionScore ** positionWeight * impressionScore ** impressionsWeight * ctrGapScore ** ctrGapWeight) ** (1 / (positionWeight + impressionsWeight + ctrGapWeight));
|
|
1235
|
-
const opportunityScore = Math.round(geometricMean * 100);
|
|
1236
|
-
const targetCtr = getExpectedCtr(Math.min(3, position));
|
|
1237
|
-
const potentialClicks = Math.round(impressions * targetCtr);
|
|
1238
|
-
results.push({
|
|
1239
|
-
keyword: row.query,
|
|
1240
|
-
page: row.page ?? null,
|
|
1241
|
-
clicks,
|
|
1242
|
-
impressions,
|
|
1243
|
-
ctr,
|
|
1244
|
-
position,
|
|
1245
|
-
opportunityScore,
|
|
1246
|
-
potentialClicks,
|
|
1247
|
-
factors: {
|
|
1248
|
-
positionScore,
|
|
1249
|
-
impressionScore,
|
|
1250
|
-
ctrGapScore
|
|
1251
|
-
}
|
|
1252
|
-
});
|
|
1253
|
-
}
|
|
1254
|
-
const sorted = sortResults$1(results, sortBy);
|
|
1255
|
-
const paged = paginateInMemory(sorted, {
|
|
1256
|
-
limit: params.limit,
|
|
1257
|
-
offset: params.offset
|
|
1258
|
-
});
|
|
1259
|
-
return {
|
|
1260
|
-
results: paged,
|
|
1261
|
-
meta: {
|
|
1262
|
-
total: sorted.length,
|
|
1263
|
-
returned: paged.length
|
|
1264
|
-
}
|
|
1265
|
-
};
|
|
1266
|
-
}
|
|
1267
|
-
});
|
|
1268
|
-
function str(v) {
|
|
1269
|
-
return v == null ? "" : String(v);
|
|
1270
|
-
}
|
|
1271
|
-
function bool(v) {
|
|
1272
|
-
return v === true || v === 1 || v === "true";
|
|
1273
|
-
}
|
|
1274
|
-
function calculateCV(values) {
|
|
1275
|
-
if (values.length === 0) return 0;
|
|
1276
|
-
const mean = values.reduce((a, b) => a + b, 0) / values.length;
|
|
1277
|
-
if (mean === 0) return 0;
|
|
1278
|
-
const variance = values.reduce((sum, v) => sum + (v - mean) ** 2, 0) / values.length;
|
|
1279
|
-
return Math.min(Math.sqrt(variance) / mean, 1);
|
|
1280
|
-
}
|
|
1281
|
-
function analyzeSeasonality(dates, options = {}) {
|
|
1282
|
-
const { metric = "clicks" } = options;
|
|
1283
|
-
if (dates.length === 0) return {
|
|
1284
|
-
hasSeasonality: false,
|
|
1285
|
-
strength: 0,
|
|
1286
|
-
peakMonths: [],
|
|
1287
|
-
troughMonths: [],
|
|
1288
|
-
monthlyBreakdown: [],
|
|
1289
|
-
insufficientData: true
|
|
1290
|
-
};
|
|
1291
|
-
const monthlyMap = /* @__PURE__ */ new Map();
|
|
1292
|
-
for (const row of dates) {
|
|
1293
|
-
const month = row.date.substring(0, 7);
|
|
1294
|
-
const value = metric === "clicks" ? row.clicks : row.impressions;
|
|
1295
|
-
monthlyMap.set(month, (monthlyMap.get(month) || 0) + value);
|
|
1296
|
-
}
|
|
1297
|
-
const months = Array.from(monthlyMap.keys()).sort();
|
|
1298
|
-
const values = months.map((m) => monthlyMap.get(m) || 0);
|
|
1299
|
-
const insufficientData = months.length < 12;
|
|
1300
|
-
const totalValue = values.reduce((a, b) => a + b, 0);
|
|
1301
|
-
const avgValue = values.length > 0 ? totalValue / values.length : 0;
|
|
1302
|
-
const monthlyBreakdown = months.map((month, i) => {
|
|
1303
|
-
const value = values[i] ?? 0;
|
|
1304
|
-
const vsAverage = avgValue > 0 ? value / avgValue : 0;
|
|
1305
|
-
return {
|
|
1306
|
-
month,
|
|
1307
|
-
value,
|
|
1308
|
-
vsAverage,
|
|
1309
|
-
isPeak: vsAverage > 1.5,
|
|
1310
|
-
isTrough: vsAverage < .5
|
|
1311
|
-
};
|
|
1312
|
-
});
|
|
1313
|
-
const peakMonths = [...new Set(monthlyBreakdown.filter((m) => m.isPeak).map((m) => m.month.substring(5, 7)))];
|
|
1314
|
-
const troughMonths = [...new Set(monthlyBreakdown.filter((m) => m.isTrough).map((m) => m.month.substring(5, 7)))];
|
|
1315
|
-
const strength = calculateCV(values);
|
|
1316
|
-
return {
|
|
1317
|
-
hasSeasonality: peakMonths.length > 0 || troughMonths.length > 0 || strength > .3,
|
|
1318
|
-
strength,
|
|
1319
|
-
peakMonths,
|
|
1320
|
-
troughMonths,
|
|
1321
|
-
monthlyBreakdown,
|
|
1322
|
-
insufficientData
|
|
1323
|
-
};
|
|
1324
|
-
}
|
|
1325
|
-
defineAnalyzer({
|
|
1326
|
-
id: "seasonality",
|
|
1327
|
-
buildSql(params) {
|
|
1328
|
-
const { startDate, endDate } = periodOf(params);
|
|
1329
|
-
return {
|
|
1330
|
-
sql: `
|
|
1331
|
-
WITH monthly AS (
|
|
1332
|
-
SELECT
|
|
1333
|
-
strftime(date, '%Y-%m') AS month,
|
|
1334
|
-
CAST(SUM(${params.metric === "impressions" ? "impressions" : "clicks"}) AS DOUBLE) AS value
|
|
1335
|
-
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
1336
|
-
WHERE date >= ? AND date <= ?
|
|
1337
|
-
GROUP BY month
|
|
1338
|
-
),
|
|
1339
|
-
stats AS (
|
|
1340
|
-
SELECT
|
|
1341
|
-
AVG(value) AS avg_val,
|
|
1342
|
-
COALESCE(STDDEV_POP(value), 0.0) AS std_val,
|
|
1343
|
-
CAST(COUNT(*) AS DOUBLE) AS month_count
|
|
1344
|
-
FROM monthly
|
|
1345
|
-
)
|
|
1346
|
-
SELECT
|
|
1347
|
-
m.month AS month,
|
|
1348
|
-
m.value AS value,
|
|
1349
|
-
CASE WHEN s.avg_val > 0 THEN m.value / s.avg_val ELSE 0.0 END AS vsAverage,
|
|
1350
|
-
(s.avg_val > 0 AND m.value / s.avg_val > 1.5) AS isPeak,
|
|
1351
|
-
(s.avg_val > 0 AND m.value / s.avg_val < 0.5) AS isTrough,
|
|
1352
|
-
CASE WHEN s.avg_val > 0 THEN LEAST(s.std_val / s.avg_val, 1.0) ELSE 0.0 END AS strength,
|
|
1353
|
-
s.month_count AS monthCount
|
|
1354
|
-
FROM monthly m, stats s
|
|
1355
|
-
ORDER BY m.month
|
|
1356
|
-
`,
|
|
1357
|
-
params: [startDate, endDate],
|
|
1358
|
-
current: {
|
|
1359
|
-
table: "pages",
|
|
1360
|
-
partitions: enumeratePartitions(startDate, endDate)
|
|
1361
|
-
}
|
|
1362
|
-
};
|
|
1363
|
-
},
|
|
1364
|
-
reduceSql(rows) {
|
|
1365
|
-
const arr = Array.isArray(rows) ? rows : [];
|
|
1366
|
-
const breakdown = arr.map((r) => ({
|
|
1367
|
-
month: str(r.month),
|
|
1368
|
-
value: num(r.value),
|
|
1369
|
-
vsAverage: num(r.vsAverage),
|
|
1370
|
-
isPeak: bool(r.isPeak),
|
|
1371
|
-
isTrough: bool(r.isTrough)
|
|
1372
|
-
}));
|
|
1373
|
-
const first = arr[0];
|
|
1374
|
-
const strength = first ? num(first.strength) : 0;
|
|
1375
|
-
const monthCount = first ? num(first.monthCount) : 0;
|
|
1376
|
-
const peakMonths = [...new Set(breakdown.filter((m) => m.isPeak).map((m) => m.month.substring(5, 7)))];
|
|
1377
|
-
const troughMonths = [...new Set(breakdown.filter((m) => m.isTrough).map((m) => m.month.substring(5, 7)))];
|
|
1378
|
-
const hasSeasonality = peakMonths.length > 0 || troughMonths.length > 0 || strength > .3;
|
|
1379
|
-
const insufficientData = monthCount < 12;
|
|
1380
|
-
return {
|
|
1381
|
-
results: breakdown,
|
|
1382
|
-
meta: {
|
|
1383
|
-
total: breakdown.length,
|
|
1384
|
-
hasSeasonality,
|
|
1385
|
-
strength,
|
|
1386
|
-
peakMonths,
|
|
1387
|
-
troughMonths,
|
|
1388
|
-
insufficientData
|
|
1389
|
-
}
|
|
1390
|
-
};
|
|
1391
|
-
},
|
|
1392
|
-
buildRows(params) {
|
|
1393
|
-
return { dates: datesQueryState(periodOf(params), params.limit) };
|
|
1394
|
-
},
|
|
1395
|
-
reduceRows(rows, params) {
|
|
1396
|
-
const result = analyzeSeasonality(Array.isArray(rows) ? rows : [], { metric: params.metric });
|
|
1397
|
-
return {
|
|
1398
|
-
results: result.monthlyBreakdown,
|
|
1399
|
-
meta: { strength: result.strength }
|
|
1400
|
-
};
|
|
1401
|
-
}
|
|
1402
|
-
});
|
|
1403
|
-
const sortResults = createSorter((item, metric) => item[metric], "potentialClicks");
|
|
1404
|
-
function analyzeStrikingDistance(keywords, options = {}) {
|
|
1405
|
-
const { minPosition = 4, maxPosition = 20, minImpressions = 100, maxCtr = .05, sortBy = "potentialClicks", sortOrder = "desc" } = options;
|
|
1406
|
-
const results = [];
|
|
1407
|
-
for (const row of keywords) {
|
|
1408
|
-
const position = num(row.position);
|
|
1409
|
-
const impressions = num(row.impressions);
|
|
1410
|
-
const ctr = num(row.ctr);
|
|
1411
|
-
const clicks = num(row.clicks);
|
|
1412
|
-
if (position < minPosition || position > maxPosition) continue;
|
|
1413
|
-
if (impressions < minImpressions) continue;
|
|
1414
|
-
if (ctr > maxCtr) continue;
|
|
1415
|
-
const potentialClicks = Math.round(impressions * .15);
|
|
1416
|
-
results.push({
|
|
1417
|
-
keyword: row.query,
|
|
1418
|
-
page: row.page ?? null,
|
|
1419
|
-
clicks,
|
|
1420
|
-
impressions,
|
|
1421
|
-
ctr,
|
|
1422
|
-
position,
|
|
1423
|
-
potentialClicks
|
|
1424
|
-
});
|
|
1425
|
-
}
|
|
1426
|
-
return sortResults(results, sortBy, sortOrder);
|
|
1427
|
-
}
|
|
1428
|
-
function keywordQuery(period, limit) {
|
|
1429
|
-
return typedQuery(keywordsQueryState(period, limit));
|
|
1430
|
-
}
|
|
1431
|
-
function pageQuery(period, limit) {
|
|
1432
|
-
return typedQuery(pagesQueryState(period, limit));
|
|
1433
|
-
}
|
|
1434
|
-
function dateQuery(period, limit) {
|
|
1435
|
-
return typedQuery(datesQueryState(period, limit));
|
|
1436
|
-
}
|
|
1437
|
-
function definePortableAnalyzer(definition) {
|
|
1438
|
-
return definition;
|
|
1439
|
-
}
|
|
1440
|
-
async function runPortableAnalyzer(source, definition, input, options, limit = 25e3) {
|
|
1441
|
-
const requiredQueries = definition.requiredQueries(input, limit);
|
|
1442
|
-
const entries = Object.entries(requiredQueries);
|
|
1443
|
-
const resolvedRows = await Promise.all(entries.map(async ([key, spec]) => [key, await queryRows(source, spec)]));
|
|
1444
|
-
return definition.run(Object.fromEntries(resolvedRows), options);
|
|
1445
|
-
}
|
|
1446
|
-
const PORTABLE_ANALYZERS = {
|
|
1447
|
-
strikingDistance: definePortableAnalyzer({
|
|
1448
|
-
requiredQueries: (period, limit) => ({ keywords: keywordQuery(period, limit) }),
|
|
1449
|
-
run: ({ keywords }, options) => analyzeStrikingDistance(keywords, options)
|
|
1450
|
-
}),
|
|
1451
|
-
opportunity: definePortableAnalyzer({
|
|
1452
|
-
requiredQueries: (period, limit) => ({ keywords: keywordQuery(period, limit) }),
|
|
1453
|
-
run: ({ keywords }, options) => {
|
|
1454
|
-
const { results } = opportunityAnalyzer.rows.reduce(keywords, { params: {
|
|
1455
|
-
type: "opportunity",
|
|
1456
|
-
minImpressions: options?.minImpressions
|
|
1457
|
-
} });
|
|
1458
|
-
return results;
|
|
1459
|
-
}
|
|
1460
|
-
}),
|
|
1461
|
-
brandSegmentation: definePortableAnalyzer({
|
|
1462
|
-
requiredQueries: (period, limit) => ({ keywords: keywordQuery(period, limit) }),
|
|
1463
|
-
run: ({ keywords }, options) => analyzeBrandSegmentation(keywords, options)
|
|
1464
|
-
}),
|
|
1465
|
-
pageConcentration: definePortableAnalyzer({
|
|
1466
|
-
requiredQueries: (period, limit) => ({ pages: pageQuery(period, limit) }),
|
|
1467
|
-
run: ({ pages }, options) => analyzePageConcentration(pages, options)
|
|
1468
|
-
}),
|
|
1469
|
-
keywordConcentration: definePortableAnalyzer({
|
|
1470
|
-
requiredQueries: (period, limit) => ({ keywords: keywordQuery(period, limit) }),
|
|
1471
|
-
run: ({ keywords }, options) => analyzeKeywordConcentration(keywords, options)
|
|
1472
|
-
}),
|
|
1473
|
-
clustering: definePortableAnalyzer({
|
|
1474
|
-
requiredQueries: (period, limit) => ({ keywords: keywordQuery(period, limit) }),
|
|
1475
|
-
run: ({ keywords }, options) => analyzeClustering(keywords, options)
|
|
1476
|
-
}),
|
|
1477
|
-
seasonality: definePortableAnalyzer({
|
|
1478
|
-
requiredQueries: (period, limit) => ({ dates: dateQuery(period, limit) }),
|
|
1479
|
-
run: ({ dates }, options) => analyzeSeasonality(dates, options)
|
|
1480
|
-
}),
|
|
1481
|
-
decay: definePortableAnalyzer({
|
|
1482
|
-
requiredQueries: (periods, limit) => ({
|
|
1483
|
-
current: pageQuery(periods.current, limit),
|
|
1484
|
-
previous: pageQuery(periods.previous, limit)
|
|
1485
|
-
}),
|
|
1486
|
-
run: ({ current, previous }, options) => analyzeDecay({
|
|
1487
|
-
current,
|
|
1488
|
-
previous
|
|
1489
|
-
}, options)
|
|
1490
|
-
}),
|
|
1491
|
-
movers: definePortableAnalyzer({
|
|
1492
|
-
requiredQueries: (periods, limit) => ({
|
|
1493
|
-
current: keywordQuery(periods.current, limit),
|
|
1494
|
-
previous: keywordQuery(periods.previous, limit)
|
|
1495
|
-
}),
|
|
1496
|
-
run: ({ current, previous }, options) => analyzeMovers({
|
|
1497
|
-
current,
|
|
1498
|
-
previous
|
|
1499
|
-
}, options)
|
|
1500
|
-
})
|
|
1501
|
-
};
|
|
1502
|
-
async function queryAnalyticsFromSource(source, period, options = {}) {
|
|
1503
|
-
const limit = options.limit ?? 25e3;
|
|
1504
|
-
const [keywords, pages, dates] = await Promise.all([
|
|
1505
|
-
queryRows(source, keywordQuery(period, limit)),
|
|
1506
|
-
queryRows(source, pageQuery(period, limit)),
|
|
1507
|
-
queryRows(source, dateQuery(period, limit))
|
|
1508
|
-
]);
|
|
1509
|
-
return {
|
|
1510
|
-
keywords,
|
|
1511
|
-
pages,
|
|
1512
|
-
dates
|
|
1513
|
-
};
|
|
1514
|
-
}
|
|
1515
|
-
async function queryComparisonFromSource(source, periods, options = {}) {
|
|
1516
|
-
const [current, previous] = await Promise.all([queryAnalyticsFromSource(source, periods.current, options), queryAnalyticsFromSource(source, periods.previous, options)]);
|
|
1517
|
-
return {
|
|
1518
|
-
current,
|
|
1519
|
-
previous
|
|
1520
|
-
};
|
|
1521
|
-
}
|
|
1522
|
-
async function analyzeStrikingDistanceFromSource(source, period, options) {
|
|
1523
|
-
return runPortableAnalyzer(source, PORTABLE_ANALYZERS.strikingDistance, period, options);
|
|
1524
|
-
}
|
|
1525
|
-
async function analyzeOpportunityFromSource(source, period, options) {
|
|
1526
|
-
return runPortableAnalyzer(source, PORTABLE_ANALYZERS.opportunity, period, options);
|
|
1527
|
-
}
|
|
1528
|
-
async function analyzeBrandSegmentationFromSource(source, period, options) {
|
|
1529
|
-
return runPortableAnalyzer(source, PORTABLE_ANALYZERS.brandSegmentation, period, options);
|
|
1530
|
-
}
|
|
1531
|
-
async function analyzePageConcentrationFromSource(source, period, options) {
|
|
1532
|
-
return runPortableAnalyzer(source, PORTABLE_ANALYZERS.pageConcentration, period, options);
|
|
1533
|
-
}
|
|
1534
|
-
async function analyzeKeywordConcentrationFromSource(source, period, options) {
|
|
1535
|
-
return runPortableAnalyzer(source, PORTABLE_ANALYZERS.keywordConcentration, period, options);
|
|
1536
|
-
}
|
|
1537
|
-
async function analyzeClusteringFromSource(source, period, options) {
|
|
1538
|
-
return runPortableAnalyzer(source, PORTABLE_ANALYZERS.clustering, period, options);
|
|
1539
|
-
}
|
|
1540
|
-
async function analyzeSeasonalityFromSource(source, period, options) {
|
|
1541
|
-
return runPortableAnalyzer(source, PORTABLE_ANALYZERS.seasonality, period, options);
|
|
1542
|
-
}
|
|
1543
|
-
async function analyzeDecayFromSource(source, periods, options) {
|
|
1544
|
-
return runPortableAnalyzer(source, PORTABLE_ANALYZERS.decay, periods, options);
|
|
1545
|
-
}
|
|
1546
|
-
async function analyzeMoversFromSource(source, periods, options) {
|
|
1547
|
-
return runPortableAnalyzer(source, PORTABLE_ANALYZERS.movers, periods, options);
|
|
1548
|
-
}
|
|
1549
|
-
export { AnalyzerCapabilityError, IN_MEMORY_DEFAULT_CAPABILITIES, analyzeBrandSegmentationFromSource, analyzeClusteringFromSource, analyzeDecayFromSource, analyzeFromSource, analyzeKeywordConcentrationFromSource, analyzeMoversFromSource, analyzeOpportunityFromSource, analyzePageConcentrationFromSource, analyzeSeasonalityFromSource, analyzeStrikingDistanceFromSource, createCompositeSource, createInMemoryQuerySource, queryAnalyticsFromSource, queryComparisonFromSource };
|
|
36
|
+
export { IN_MEMORY_DEFAULT_CAPABILITIES, createCompositeSource, createInMemoryQuerySource };
|