@gscdump/analysis 0.9.2 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,28 +1,20 @@
1
- import { AnalyzerCapabilityError, defineAnalyzer, runAnalyzerFromSource } from "@gscdump/engine/analyzer";
2
1
  import { canProxyToGsc } from "@gscdump/engine-gsc-api";
3
- import { between, date, extractDateRange, gsc, page, query } from "gscdump/query";
4
- import { queryRows, typedQuery } from "@gscdump/engine/source";
5
- import { num } from "@gscdump/engine/analysis-types";
6
- import { comparisonOf, periodOf } from "@gscdump/engine/period";
7
- import { enumeratePartitions } from "@gscdump/engine/planner";
8
- import { METRIC_EXPR } from "@gscdump/engine/sql-fragments";
9
- async function analyzeFromSource(source, params, registry) {
10
- return runAnalyzerFromSource(source, params, registry);
2
+ import { extractDateRange } from "gscdump/query";
3
+ function shouldRouteToLive(state, site) {
4
+ if (!canProxyToGsc(state)) return false;
5
+ const { startDate, endDate } = extractDateRange(state.filter);
6
+ if (!startDate || !endDate) return false;
7
+ if (!site.oldestDateSynced || !site.newestDateSynced) return true;
8
+ return startDate < site.oldestDateSynced || endDate > site.newestDateSynced;
11
9
  }
12
10
  function createCompositeSource(opts) {
13
11
  const { engine, live, site } = opts;
14
- function rangeCovered(state) {
15
- const { startDate, endDate } = extractDateRange(state.filter);
16
- return !!(startDate && endDate && site.oldestDateSynced && site.newestDateSynced && startDate >= site.oldestDateSynced && endDate <= site.newestDateSynced);
17
- }
18
12
  return {
13
+ ...engine,
19
14
  name: "composite-engine-live",
20
- capabilities: engine.capabilities,
21
15
  async queryRows(state) {
22
- if (!rangeCovered(state) && canProxyToGsc(state)) return live.queryRows(state);
23
- return engine.queryRows(state);
24
- },
25
- executeSql: engine.executeSql
16
+ return shouldRouteToLive(state, site) ? live.queryRows(state) : engine.queryRows(state);
17
+ }
26
18
  };
27
19
  }
28
20
  const IN_MEMORY_DEFAULT_CAPABILITIES = {
@@ -34,1516 +26,11 @@ const IN_MEMORY_DEFAULT_CAPABILITIES = {
34
26
  function createInMemoryQuerySource(options) {
35
27
  return {
36
28
  name: "memory",
29
+ kind: "in-memory",
37
30
  capabilities: options.capabilities ?? IN_MEMORY_DEFAULT_CAPABILITIES,
38
31
  async queryRows(state) {
39
32
  return await options.queryRows(state);
40
33
  }
41
34
  };
42
35
  }
43
- const DEFAULT_LIMIT$1 = 25e3;
44
- function keywordsQueryState(period, limit = DEFAULT_LIMIT$1) {
45
- return gsc.select(query, page).where(between(date, period.startDate, period.endDate)).limit(limit).getState();
46
- }
47
- function pagesQueryState(period, limit = DEFAULT_LIMIT$1) {
48
- return gsc.select(page).where(between(date, period.startDate, period.endDate)).limit(limit).getState();
49
- }
50
- function datesQueryState(period, limit = DEFAULT_LIMIT$1) {
51
- return gsc.select(date).where(between(date, period.startDate, period.endDate)).limit(limit).getState();
52
- }
53
- function escapeRegexAlt(s) {
54
- return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
55
- }
56
- function str$5(v) {
57
- return v == null ? "" : String(v);
58
- }
59
- function analyzeBrandSegmentation(keywords, options) {
60
- const { brandTerms, minImpressions = 10 } = options;
61
- const lowerBrandTerms = brandTerms.map((t) => t.toLowerCase());
62
- const brand = [];
63
- const nonBrand = [];
64
- for (const row of keywords) {
65
- if (num(row.impressions) < minImpressions) continue;
66
- if (lowerBrandTerms.some((term) => row.query.toLowerCase().includes(term))) brand.push(row);
67
- else nonBrand.push(row);
68
- }
69
- const brandClicks = brand.reduce((sum, k) => sum + num(k.clicks), 0);
70
- const nonBrandClicks = nonBrand.reduce((sum, k) => sum + num(k.clicks), 0);
71
- const totalClicks = brandClicks + nonBrandClicks;
72
- return {
73
- brand,
74
- nonBrand,
75
- summary: {
76
- brandClicks,
77
- nonBrandClicks,
78
- brandShare: totalClicks > 0 ? brandClicks / totalClicks : 0,
79
- brandImpressions: brand.reduce((sum, k) => sum + num(k.impressions), 0),
80
- nonBrandImpressions: nonBrand.reduce((sum, k) => sum + num(k.impressions), 0)
81
- }
82
- };
83
- }
84
- defineAnalyzer({
85
- id: "brand",
86
- buildSql(params) {
87
- if (!params.brandTerms?.length) throw new Error("Brand analysis requires brandTerms");
88
- const { startDate, endDate } = periodOf(params);
89
- const minImpressions = params.minImpressions ?? 10;
90
- const limit = params.limit ?? 1e4;
91
- const regex = `(${params.brandTerms.map((t) => escapeRegexAlt(t.toLowerCase())).join("|")})`;
92
- return {
93
- sql: `
94
- WITH agg AS (
95
- SELECT
96
- query,
97
- url AS page,
98
- ${METRIC_EXPR.clicks} AS clicks,
99
- ${METRIC_EXPR.impressions} AS impressions,
100
- ${METRIC_EXPR.ctr} AS ctr,
101
- ${METRIC_EXPR.position} AS position
102
- FROM read_parquet({{FILES}}, union_by_name = true)
103
- WHERE date >= ? AND date <= ?
104
- GROUP BY query, url
105
- HAVING SUM(impressions) >= ?
106
- )
107
- SELECT
108
- query, page, clicks, impressions, ctr, position,
109
- CASE WHEN regexp_matches(LOWER(query), ?) THEN 'brand' ELSE 'non-brand' END AS segment
110
- FROM agg
111
- ORDER BY clicks DESC
112
- LIMIT ${Number(limit)}
113
- `,
114
- params: [
115
- startDate,
116
- endDate,
117
- minImpressions,
118
- regex
119
- ],
120
- current: {
121
- table: "page_keywords",
122
- partitions: enumeratePartitions(startDate, endDate)
123
- }
124
- };
125
- },
126
- reduceSql(rows) {
127
- const normalized = (Array.isArray(rows) ? rows : []).map((r) => ({
128
- query: str$5(r.query),
129
- page: r.page == null ? void 0 : str$5(r.page),
130
- clicks: num(r.clicks),
131
- impressions: num(r.impressions),
132
- ctr: num(r.ctr),
133
- position: num(r.position),
134
- segment: str$5(r.segment)
135
- }));
136
- let brandClicks = 0;
137
- let nonBrandClicks = 0;
138
- let brandImpressions = 0;
139
- let nonBrandImpressions = 0;
140
- for (const r of normalized) if (r.segment === "brand") {
141
- brandClicks += r.clicks;
142
- brandImpressions += r.impressions;
143
- } else {
144
- nonBrandClicks += r.clicks;
145
- nonBrandImpressions += r.impressions;
146
- }
147
- const totalClicks = brandClicks + nonBrandClicks;
148
- return {
149
- results: normalized,
150
- meta: {
151
- total: normalized.length,
152
- summary: {
153
- brandClicks,
154
- nonBrandClicks,
155
- brandShare: totalClicks > 0 ? brandClicks / totalClicks : 0,
156
- brandImpressions,
157
- nonBrandImpressions
158
- }
159
- }
160
- };
161
- },
162
- buildRows(params) {
163
- return { keywords: keywordsQueryState(periodOf(params), params.limit) };
164
- },
165
- reduceRows(rows, params) {
166
- if (!params.brandTerms?.length) throw new Error("Brand analysis requires brandTerms");
167
- const result = analyzeBrandSegmentation(Array.isArray(rows) ? rows : [], {
168
- brandTerms: params.brandTerms,
169
- minImpressions: params.minImpressions
170
- });
171
- return {
172
- results: [...result.brand.map((r) => ({
173
- ...r,
174
- segment: "brand"
175
- })), ...result.nonBrand.map((r) => ({
176
- ...r,
177
- segment: "non-brand"
178
- }))],
179
- meta: { summary: result.summary }
180
- };
181
- }
182
- });
183
- const INTENT_PREFIXES_REGEX = "^(how to|what is|what are|why is|why do|where to|when to|best|top|vs|versus|compare|review|buy|cheap|free|near me)(\\s|$)";
184
- const INTENT_PREFIXES = [
185
- "how to",
186
- "what is",
187
- "what are",
188
- "why is",
189
- "why do",
190
- "where to",
191
- "when to",
192
- "best",
193
- "top",
194
- "vs",
195
- "versus",
196
- "compare",
197
- "review",
198
- "buy",
199
- "cheap",
200
- "free",
201
- "near me"
202
- ];
203
- const WHITESPACE_RE = /\s+/;
204
- function str$4(v) {
205
- return v == null ? "" : String(v);
206
- }
207
- function parseJsonList$3(v) {
208
- if (Array.isArray(v)) return v;
209
- if (typeof v === "string" && v.length > 0) {
210
- const parsed = JSON.parse(v);
211
- return Array.isArray(parsed) ? parsed : [];
212
- }
213
- return [];
214
- }
215
- function extractIntentPrefix(keyword) {
216
- const lower = keyword.toLowerCase();
217
- for (const prefix of INTENT_PREFIXES) if (lower.startsWith(`${prefix} `) || lower.startsWith(prefix)) return prefix;
218
- return null;
219
- }
220
- function extractWordPrefix(keyword, wordCount = 2) {
221
- const words = keyword.toLowerCase().split(WHITESPACE_RE).filter(Boolean);
222
- if (words.length < wordCount + 1) return null;
223
- return words.slice(0, wordCount).join(" ");
224
- }
225
- function analyzeClustering(keywords, options = {}) {
226
- const { minClusterSize = 2, minImpressions = 10, clusterBy = "both" } = options;
227
- const filtered = keywords.filter((k) => num(k.impressions) >= minImpressions);
228
- const clusterMap = /* @__PURE__ */ new Map();
229
- const clusteredKeywords = /* @__PURE__ */ new Set();
230
- if (clusterBy === "intent" || clusterBy === "both") for (const kw of filtered) {
231
- const intent = extractIntentPrefix(kw.query);
232
- if (intent) {
233
- const existing = clusterMap.get(intent);
234
- if (existing) existing.keywords.push(kw);
235
- else clusterMap.set(intent, {
236
- type: "intent",
237
- keywords: [kw]
238
- });
239
- clusteredKeywords.add(kw.query);
240
- }
241
- }
242
- if (clusterBy === "prefix" || clusterBy === "both") {
243
- const unclustered = filtered.filter((kw) => !clusteredKeywords.has(kw.query));
244
- const prefixMap = /* @__PURE__ */ new Map();
245
- for (const kw of unclustered) {
246
- const prefix = extractWordPrefix(kw.query);
247
- if (prefix) {
248
- const existing = prefixMap.get(prefix);
249
- if (existing) existing.push(kw);
250
- else prefixMap.set(prefix, [kw]);
251
- }
252
- }
253
- for (const [prefix, kws] of prefixMap) if (kws.length >= minClusterSize) {
254
- clusterMap.set(prefix, {
255
- type: "prefix",
256
- keywords: kws
257
- });
258
- kws.forEach((kw) => clusteredKeywords.add(kw.query));
259
- }
260
- }
261
- const clusters = [];
262
- for (const [name, data] of clusterMap) {
263
- if (data.keywords.length < minClusterSize) continue;
264
- const totalClicks = data.keywords.reduce((sum, k) => sum + num(k.clicks), 0);
265
- const totalImpressions = data.keywords.reduce((sum, k) => sum + num(k.impressions), 0);
266
- const avgPosition = data.keywords.reduce((sum, k) => sum + num(k.position), 0) / data.keywords.length;
267
- clusters.push({
268
- clusterName: name,
269
- clusterType: data.type,
270
- keywords: data.keywords,
271
- totalClicks,
272
- totalImpressions,
273
- avgPosition,
274
- keywordCount: data.keywords.length
275
- });
276
- }
277
- clusters.sort((a, b) => b.totalClicks - a.totalClicks);
278
- return {
279
- clusters,
280
- unclustered: filtered.filter((kw) => !clusteredKeywords.has(kw.query))
281
- };
282
- }
283
- defineAnalyzer({
284
- id: "clustering",
285
- buildSql(params) {
286
- const { startDate, endDate } = periodOf(params);
287
- const minImpressions = params.minImpressions ?? 10;
288
- const minClusterSize = params.minClusterSize ?? 2;
289
- const clusterBy = params.clusterBy ?? "both";
290
- const doIntent = clusterBy === "intent" || clusterBy === "both";
291
- const doPrefix = clusterBy === "prefix" || clusterBy === "both";
292
- const intentExpr = doIntent ? `NULLIF(regexp_extract(LOWER(query), '${INTENT_PREFIXES_REGEX}', 1), '')` : `CAST(NULL AS VARCHAR)`;
293
- const prefixExpr = doPrefix ? `CASE WHEN len(regexp_split_to_array(LOWER(query), '\\s+')) >= 3
294
- THEN array_to_string(list_slice(regexp_split_to_array(LOWER(query), '\\s+'), 1, 2), ' ')
295
- ELSE CAST(NULL AS VARCHAR) END` : `CAST(NULL AS VARCHAR)`;
296
- return {
297
- sql: `
298
- WITH agg AS (
299
- SELECT
300
- query,
301
- ${METRIC_EXPR.clicks} AS clicks,
302
- ${METRIC_EXPR.impressions} AS impressions,
303
- ${METRIC_EXPR.ctr} AS ctr,
304
- ${METRIC_EXPR.position} AS position
305
- FROM read_parquet({{FILES}}, union_by_name = true)
306
- WHERE date >= ? AND date <= ?
307
- GROUP BY query
308
- HAVING SUM(impressions) >= ?
309
- ),
310
- classified AS (
311
- SELECT
312
- query, clicks, impressions, ctr, position,
313
- ${intentExpr} AS intent_prefix,
314
- ${prefixExpr} AS word_prefix
315
- FROM agg
316
- ),
317
- keyed AS (
318
- SELECT
319
- query, clicks, impressions, ctr, position,
320
- COALESCE(intent_prefix, word_prefix) AS cluster_name,
321
- CASE WHEN intent_prefix IS NOT NULL THEN 'intent' ELSE 'prefix' END AS cluster_type
322
- FROM classified
323
- WHERE COALESCE(intent_prefix, word_prefix) IS NOT NULL
324
- )
325
- SELECT
326
- cluster_name AS clusterName,
327
- any_value(cluster_type) AS clusterType,
328
- CAST(COUNT(*) AS DOUBLE) AS keywordCount,
329
- ${METRIC_EXPR.clicks} AS totalClicks,
330
- ${METRIC_EXPR.impressions} AS totalImpressions,
331
- AVG(position) AS avgPosition,
332
- to_json(list({ 'query': query, 'clicks': clicks, 'impressions': impressions, 'ctr': ctr, 'position': position })) AS keywords
333
- FROM keyed
334
- GROUP BY cluster_name
335
- HAVING COUNT(*) >= ?
336
- ORDER BY totalClicks DESC
337
- `,
338
- params: [
339
- startDate,
340
- endDate,
341
- minImpressions,
342
- minClusterSize
343
- ],
344
- current: {
345
- table: "keywords",
346
- partitions: enumeratePartitions(startDate, endDate)
347
- }
348
- };
349
- },
350
- reduceSql(rows) {
351
- const clusters = (Array.isArray(rows) ? rows : []).map((r) => ({
352
- clusterName: str$4(r.clusterName),
353
- clusterType: str$4(r.clusterType),
354
- keywordCount: num(r.keywordCount),
355
- totalClicks: num(r.totalClicks),
356
- totalImpressions: num(r.totalImpressions),
357
- avgPosition: num(r.avgPosition),
358
- keywords: parseJsonList$3(r.keywords).map((k) => ({
359
- query: str$4(k.query),
360
- clicks: num(k.clicks),
361
- impressions: num(k.impressions),
362
- ctr: num(k.ctr),
363
- position: num(k.position)
364
- }))
365
- }));
366
- return {
367
- results: clusters,
368
- meta: {
369
- total: clusters.length,
370
- totalClusters: clusters.length
371
- }
372
- };
373
- },
374
- buildRows(params) {
375
- return { keywords: keywordsQueryState(periodOf(params), params.limit) };
376
- },
377
- reduceRows(rows, params) {
378
- const result = analyzeClustering(Array.isArray(rows) ? rows : [], {
379
- clusterBy: params.clusterBy,
380
- minClusterSize: params.minClusterSize,
381
- minImpressions: params.minImpressions
382
- });
383
- return {
384
- results: result.clusters,
385
- meta: { totalClusters: result.clusters.length }
386
- };
387
- }
388
- });
389
- function str$3(v) {
390
- return v == null ? "" : String(v);
391
- }
392
- function parseJsonList$2(v) {
393
- if (Array.isArray(v)) return v;
394
- if (typeof v === "string" && v.length > 0) {
395
- const parsed = JSON.parse(v);
396
- return Array.isArray(parsed) ? parsed : [];
397
- }
398
- return [];
399
- }
400
- function calculateGini(values) {
401
- if (values.length === 0) return 0;
402
- const sorted = [...values].sort((a, b) => a - b);
403
- const n = sorted.length;
404
- const sum = sorted.reduce((a, b) => a + b, 0);
405
- if (sum === 0) return 0;
406
- let weightedSum = 0;
407
- for (let i = 0; i < n; i++) weightedSum += (2 * (i + 1) - n - 1) * sorted[i];
408
- return weightedSum / (n * sum);
409
- }
410
- function calculateHHI(shares) {
411
- return shares.reduce((sum, share) => sum + (share * 100) ** 2, 0);
412
- }
413
- function analyzeConcentration(items, options = {}) {
414
- const { topN = 10 } = options;
415
- if (items.length === 0) return {
416
- giniCoefficient: 0,
417
- hhi: 0,
418
- topNConcentration: 0,
419
- topNItems: [],
420
- totalItems: 0,
421
- totalClicks: 0,
422
- riskLevel: "low"
423
- };
424
- const sorted = [...items].sort((a, b) => b.clicks - a.clicks);
425
- const totalClicks = sorted.reduce((sum, item) => sum + item.clicks, 0);
426
- const clickValues = sorted.map((i) => i.clicks);
427
- const shares = totalClicks > 0 ? sorted.map((i) => i.clicks / totalClicks) : [];
428
- const giniCoefficient = calculateGini(clickValues);
429
- const hhi = calculateHHI(shares);
430
- const topNItems = sorted.slice(0, topN).map((item) => ({
431
- key: item.key,
432
- clicks: item.clicks,
433
- share: totalClicks > 0 ? item.clicks / totalClicks : 0
434
- }));
435
- const topNClicks = topNItems.reduce((sum, item) => sum + item.clicks, 0);
436
- const topNConcentration = totalClicks > 0 ? topNClicks / totalClicks : 0;
437
- let riskLevel = "low";
438
- if (hhi > 2500) riskLevel = "high";
439
- else if (hhi > 1500) riskLevel = "medium";
440
- return {
441
- giniCoefficient,
442
- hhi,
443
- topNConcentration,
444
- topNItems,
445
- totalItems: items.length,
446
- totalClicks,
447
- riskLevel
448
- };
449
- }
450
- function analyzePageConcentration(pages, options) {
451
- return analyzeConcentration(pages.map((p) => ({
452
- key: p.page,
453
- clicks: num(p.clicks)
454
- })), options);
455
- }
456
- function analyzeKeywordConcentration(keywords, options) {
457
- return analyzeConcentration(keywords.map((k) => ({
458
- key: k.query,
459
- clicks: num(k.clicks)
460
- })), options);
461
- }
462
- defineAnalyzer({
463
- id: "concentration",
464
- buildSql(params) {
465
- const { startDate, endDate } = periodOf(params);
466
- const dim = params.dimension || "pages";
467
- const topN = params.topN ?? 10;
468
- const table = dim === "keywords" ? "keywords" : "pages";
469
- const keyCol = dim === "keywords" ? "query" : "url";
470
- return {
471
- sql: `
472
- WITH items AS (
473
- SELECT
474
- ${keyCol} AS key,
475
- ${METRIC_EXPR.clicks} AS clicks
476
- FROM read_parquet({{FILES}}, union_by_name = true)
477
- WHERE date >= ? AND date <= ?
478
- GROUP BY ${keyCol}
479
- HAVING SUM(clicks) > 0
480
- ),
481
- totals AS (
482
- SELECT SUM(clicks) AS total_clicks, COUNT(*) AS total_items FROM items
483
- ),
484
- ranked AS (
485
- SELECT
486
- i.key, i.clicks,
487
- i.clicks / NULLIF(t.total_clicks, 0) AS share,
488
- ROW_NUMBER() OVER (ORDER BY i.clicks DESC, i.key ASC) AS rnk_desc,
489
- ROW_NUMBER() OVER (ORDER BY i.clicks ASC, i.key ASC) AS rnk_asc,
490
- t.total_clicks AS tclicks,
491
- t.total_items AS titems
492
- FROM items i, totals t
493
- ),
494
- gini_num AS (
495
- SELECT SUM((2.0 * rnk_asc - titems - 1) * clicks) AS weighted_sum FROM ranked
496
- ),
497
- hhi_calc AS (
498
- SELECT SUM(POWER(share * 100, 2)) AS hhi FROM ranked
499
- ),
500
- top_list AS (
501
- SELECT
502
- list({ 'key': key, 'clicks': clicks, 'share': share } ORDER BY clicks DESC, key ASC) AS items,
503
- SUM(clicks) AS top_clicks
504
- FROM ranked WHERE rnk_desc <= ?
505
- )
506
- SELECT
507
- COALESCE(
508
- (SELECT weighted_sum FROM gini_num)
509
- / NULLIF((SELECT total_items FROM totals) * (SELECT total_clicks FROM totals), 0),
510
- 0.0
511
- ) AS giniCoefficient,
512
- COALESCE((SELECT hhi FROM hhi_calc), 0.0) AS hhi,
513
- COALESCE(
514
- CAST((SELECT top_clicks FROM top_list) AS DOUBLE)
515
- / NULLIF((SELECT total_clicks FROM totals), 0),
516
- 0.0
517
- ) AS topNConcentration,
518
- COALESCE((SELECT to_json(items) FROM top_list), '[]') AS topNItems,
519
- COALESCE((SELECT total_items FROM totals), 0) AS totalItems,
520
- COALESCE((SELECT total_clicks FROM totals), 0.0) AS totalClicks,
521
- CASE
522
- WHEN COALESCE((SELECT hhi FROM hhi_calc), 0.0) > 2500 THEN 'high'
523
- WHEN COALESCE((SELECT hhi FROM hhi_calc), 0.0) > 1500 THEN 'medium'
524
- ELSE 'low'
525
- END AS riskLevel
526
- `,
527
- params: [
528
- startDate,
529
- endDate,
530
- topN
531
- ],
532
- current: {
533
- table,
534
- partitions: enumeratePartitions(startDate, endDate)
535
- }
536
- };
537
- },
538
- reduceSql(rows, params) {
539
- const r = (Array.isArray(rows) ? rows : [])[0] ?? {};
540
- const topRaw = parseJsonList$2(r.topNItems);
541
- return {
542
- results: [{
543
- giniCoefficient: num(r.giniCoefficient),
544
- hhi: num(r.hhi),
545
- topNConcentration: num(r.topNConcentration),
546
- topNItems: topRaw.map((t) => ({
547
- key: str$3(t.key),
548
- clicks: num(t.clicks),
549
- share: num(t.share)
550
- })),
551
- totalItems: num(r.totalItems),
552
- totalClicks: num(r.totalClicks),
553
- riskLevel: str$3(r.riskLevel)
554
- }],
555
- meta: {
556
- total: 1,
557
- dimension: params.dimension || "pages"
558
- }
559
- };
560
- },
561
- buildRows(params) {
562
- const dim = params.dimension || "pages";
563
- const period = periodOf(params);
564
- const out = {};
565
- if (dim === "pages") out.pages = pagesQueryState(period, params.limit);
566
- else out.keywords = keywordsQueryState(period, params.limit);
567
- return out;
568
- },
569
- reduceRows(rows, params) {
570
- const dim = params.dimension || "pages";
571
- const arr = Array.isArray(rows) ? rows : rows[dim] ?? [];
572
- return {
573
- results: [dim === "pages" ? analyzePageConcentration(arr, { topN: params.topN }) : analyzeKeywordConcentration(arr, { topN: params.topN })],
574
- meta: { dimension: dim }
575
- };
576
- }
577
- });
578
- function buildPeriodMap(rows, key, value, filter) {
579
- const out = /* @__PURE__ */ new Map();
580
- for (const row of rows) {
581
- if (filter && !filter(row)) continue;
582
- out.set(key(row), value(row));
583
- }
584
- return out;
585
- }
586
- function createSorter(getValue, defaultMetric, defaultOrder = "desc") {
587
- return (items, sortBy = defaultMetric, sortOrder = defaultOrder) => {
588
- const mult = sortOrder === "desc" ? -1 : 1;
589
- return [...items].sort((a, b) => (getValue(a, sortBy) - getValue(b, sortBy)) * mult);
590
- };
591
- }
592
- function createMetricSorter(defaultMetric, orderByMetric) {
593
- return (items, sortBy = defaultMetric) => {
594
- const mult = orderByMetric[sortBy] === "desc" ? -1 : 1;
595
- return [...items].sort((a, b) => (a[sortBy] - b[sortBy]) * mult);
596
- };
597
- }
598
- const sortResults$2 = createMetricSorter("lostClicks", {
599
- lostClicks: "desc",
600
- declinePercent: "desc",
601
- currentClicks: "asc"
602
- });
603
- function str$2(v) {
604
- return v == null ? "" : String(v);
605
- }
606
- function parseJsonList$1(v) {
607
- if (Array.isArray(v)) return v;
608
- if (typeof v === "string" && v.length > 0) {
609
- const parsed = JSON.parse(v);
610
- return Array.isArray(parsed) ? parsed : [];
611
- }
612
- return [];
613
- }
614
- function analyzeDecay(input, options = {}) {
615
- const { minPreviousClicks = 50, threshold = .2, sortBy = "lostClicks" } = options;
616
- const currentMap = buildPeriodMap(input.current, (r) => r.page, (r) => ({
617
- clicks: num(r.clicks),
618
- position: num(r.position)
619
- }));
620
- const previousMap = buildPeriodMap(input.previous, (r) => r.page, (r) => ({
621
- clicks: num(r.clicks),
622
- position: num(r.position)
623
- }), (r) => num(r.clicks) >= minPreviousClicks);
624
- const results = [];
625
- for (const [page, prev] of previousMap) {
626
- const curr = currentMap.get(page) || {
627
- clicks: 0,
628
- position: 0
629
- };
630
- const lostClicks = prev.clicks - curr.clicks;
631
- const declinePercent = prev.clicks > 0 ? lostClicks / prev.clicks : 0;
632
- if (declinePercent >= threshold && lostClicks > 0) results.push({
633
- page,
634
- currentClicks: curr.clicks,
635
- previousClicks: prev.clicks,
636
- lostClicks,
637
- declinePercent,
638
- currentPosition: curr.position,
639
- previousPosition: prev.position,
640
- positionDrop: curr.position - prev.position
641
- });
642
- }
643
- return sortResults$2(results, sortBy);
644
- }
645
- defineAnalyzer({
646
- id: "decay",
647
- buildSql(params) {
648
- const { current: cur, previous: prev } = comparisonOf(params);
649
- const minPreviousClicks = params.minPreviousClicks ?? 50;
650
- const threshold = params.threshold ?? .2;
651
- const limit = params.limit ?? 2e3;
652
- return {
653
- sql: `
654
- WITH cur AS (
655
- SELECT
656
- url,
657
- ${METRIC_EXPR.clicks} AS clicks,
658
- ${METRIC_EXPR.position} AS position
659
- FROM read_parquet({{FILES}}, union_by_name = true)
660
- WHERE date >= ? AND date <= ?
661
- GROUP BY url
662
- ),
663
- prev AS (
664
- SELECT
665
- url,
666
- ${METRIC_EXPR.clicks} AS clicks,
667
- ${METRIC_EXPR.position} AS position
668
- FROM read_parquet({{FILES_PREV}}, union_by_name = true)
669
- WHERE date >= ? AND date <= ?
670
- GROUP BY url
671
- HAVING SUM(clicks) >= ?
672
- ),
673
- weekly AS (
674
- SELECT url, date_trunc('week', CAST(date AS DATE)) AS week,
675
- ${METRIC_EXPR.clicks} AS clicks,
676
- ${METRIC_EXPR.impressions} AS impressions
677
- FROM (
678
- SELECT url, date, clicks, impressions
679
- FROM read_parquet({{FILES}}, union_by_name = true)
680
- WHERE date >= ? AND date <= ?
681
- UNION ALL
682
- SELECT url, date, clicks, impressions
683
- FROM read_parquet({{FILES_PREV}}, union_by_name = true)
684
- WHERE date >= ? AND date <= ?
685
- )
686
- GROUP BY url, week
687
- ),
688
- series_by_url AS (
689
- SELECT url, to_json(list({
690
- 'week': strftime(week, '%Y-%m-%d'),
691
- 'clicks': clicks,
692
- 'impressions': impressions
693
- } ORDER BY week)) AS seriesJson
694
- FROM weekly GROUP BY url
695
- ),
696
- joined AS (
697
- SELECT
698
- p.url AS page,
699
- COALESCE(c.clicks, 0.0) AS currentClicks,
700
- p.clicks AS previousClicks,
701
- (p.clicks - COALESCE(c.clicks, 0.0)) AS lostClicks,
702
- (p.clicks - COALESCE(c.clicks, 0.0)) / NULLIF(p.clicks, 0) AS declinePercent,
703
- COALESCE(c.position, 0.0) AS currentPosition,
704
- p.position AS previousPosition,
705
- (COALESCE(c.position, 0.0) - p.position) AS positionDrop,
706
- s.seriesJson
707
- FROM prev p
708
- LEFT JOIN cur c ON p.url = c.url
709
- LEFT JOIN series_by_url s ON p.url = s.url
710
- )
711
- SELECT *
712
- FROM joined
713
- WHERE declinePercent >= ? AND lostClicks > 0
714
- ORDER BY lostClicks DESC
715
- LIMIT ${Number(limit)}
716
- `,
717
- params: [
718
- cur.startDate,
719
- cur.endDate,
720
- prev.startDate,
721
- prev.endDate,
722
- minPreviousClicks,
723
- cur.startDate,
724
- cur.endDate,
725
- prev.startDate,
726
- prev.endDate,
727
- threshold
728
- ],
729
- current: {
730
- table: "pages",
731
- partitions: enumeratePartitions(cur.startDate, cur.endDate)
732
- },
733
- previous: {
734
- table: "pages",
735
- partitions: enumeratePartitions(prev.startDate, prev.endDate)
736
- }
737
- };
738
- },
739
- reduceSql(rows) {
740
- const arr = Array.isArray(rows) ? rows : [];
741
- return {
742
- results: arr.map((r) => ({
743
- page: str$2(r.page),
744
- currentClicks: num(r.currentClicks),
745
- previousClicks: num(r.previousClicks),
746
- lostClicks: num(r.lostClicks),
747
- declinePercent: num(r.declinePercent),
748
- currentPosition: num(r.currentPosition),
749
- previousPosition: num(r.previousPosition),
750
- positionDrop: num(r.positionDrop),
751
- series: parseJsonList$1(r.seriesJson).map((s) => ({
752
- week: str$2(s.week),
753
- clicks: num(s.clicks),
754
- impressions: num(s.impressions)
755
- }))
756
- })),
757
- meta: { total: arr.length }
758
- };
759
- },
760
- buildRows(params) {
761
- const { current, previous } = comparisonOf(params);
762
- return {
763
- current: pagesQueryState(current, params.limit),
764
- previous: pagesQueryState(previous, params.limit)
765
- };
766
- },
767
- reduceRows(rows, params) {
768
- const map = rows && !Array.isArray(rows) ? rows : {
769
- current: [],
770
- previous: []
771
- };
772
- const results = analyzeDecay({
773
- current: map.current ?? [],
774
- previous: map.previous ?? []
775
- }, {
776
- minPreviousClicks: params.minPreviousClicks,
777
- threshold: params.threshold
778
- });
779
- return {
780
- results,
781
- meta: { total: results.length }
782
- };
783
- }
784
- });
785
- function percentDifference(current, previous) {
786
- if (previous === 0) return current > 0 ? 100 : 0;
787
- return (current - previous) / previous * 100;
788
- }
789
- function str$1(v) {
790
- return v == null ? "" : String(v);
791
- }
792
- function parseJsonList(v) {
793
- if (Array.isArray(v)) return v;
794
- if (typeof v === "string" && v.length > 0) {
795
- const parsed = JSON.parse(v);
796
- return Array.isArray(parsed) ? parsed : [];
797
- }
798
- return [];
799
- }
800
- function analyzeMovers(input, options = {}) {
801
- const { changeThreshold = .2, minImpressions = 50, sortBy = "clicksChange" } = options;
802
- const normFactor = input.normalizationFactor ?? 1;
803
- const baselineMap = buildPeriodMap(input.previous, (r) => r.query, (r) => ({
804
- clicks: num(r.clicks) / normFactor,
805
- impressions: num(r.impressions) / normFactor,
806
- position: num(r.position),
807
- page: r.page ?? null
808
- }));
809
- const pageMap = /* @__PURE__ */ new Map();
810
- for (const row of input.current) if (!pageMap.has(row.query) && row.page) pageMap.set(row.query, row.page);
811
- for (const row of input.previous) if (!pageMap.has(row.query) && row.page) pageMap.set(row.query, row.page);
812
- const rising = [];
813
- const declining = [];
814
- const stable = [];
815
- for (const row of input.current) {
816
- const impressions = num(row.impressions);
817
- const clicks = num(row.clicks);
818
- const position = num(row.position);
819
- if (impressions < minImpressions) continue;
820
- const baseline = baselineMap.get(row.query) || {
821
- clicks: 0,
822
- impressions: 0,
823
- position: 0,
824
- page: null
825
- };
826
- const clicksChangePercent = percentDifference(clicks, baseline.clicks);
827
- const impressionsChangePercent = percentDifference(impressions, baseline.impressions);
828
- const data = {
829
- keyword: row.query,
830
- page: pageMap.get(row.query) ?? null,
831
- recentClicks: clicks,
832
- recentImpressions: impressions,
833
- recentPosition: position,
834
- baselineClicks: Math.round(baseline.clicks),
835
- baselineImpressions: Math.round(baseline.impressions),
836
- baselinePosition: baseline.position,
837
- clicksChange: clicks - Math.round(baseline.clicks),
838
- clicksChangePercent,
839
- impressionsChangePercent,
840
- positionChange: position - baseline.position
841
- };
842
- const absChange = Math.abs(clicksChangePercent / 100);
843
- if (clicksChangePercent > 0 && absChange >= changeThreshold) rising.push(data);
844
- else if (clicksChangePercent < 0 && absChange >= changeThreshold) declining.push(data);
845
- else stable.push(data);
846
- }
847
- const sortFn = (a, b) => {
848
- switch (sortBy) {
849
- case "clicks": return b.recentClicks - a.recentClicks;
850
- case "impressions": return b.recentImpressions - a.recentImpressions;
851
- case "clicksChange": return Math.abs(b.clicksChangePercent) - Math.abs(a.clicksChangePercent);
852
- case "impressionsChange": return Math.abs(b.impressionsChangePercent) - Math.abs(a.impressionsChangePercent);
853
- case "positionChange": return Math.abs(b.positionChange) - Math.abs(a.positionChange);
854
- default: return Math.abs(b.clicksChangePercent) - Math.abs(a.clicksChangePercent);
855
- }
856
- };
857
- rising.sort(sortFn);
858
- declining.sort(sortFn);
859
- stable.sort((a, b) => b.recentClicks - a.recentClicks);
860
- return {
861
- rising,
862
- declining,
863
- stable
864
- };
865
- }
866
- defineAnalyzer({
867
- id: "movers",
868
- buildSql(params) {
869
- const { current: cur, previous: prev } = comparisonOf(params);
870
- const minImpressions = params.minImpressions ?? 50;
871
- const changeThreshold = params.changeThreshold ?? .2;
872
- const limit = params.limit ?? 2e3;
873
- return {
874
- sql: `
875
- WITH cur AS (
876
- SELECT
877
- query, url,
878
- ${METRIC_EXPR.clicks} AS clicks,
879
- ${METRIC_EXPR.impressions} AS impressions,
880
- ${METRIC_EXPR.position} AS position
881
- FROM read_parquet({{FILES}}, union_by_name = true)
882
- WHERE date >= ? AND date <= ?
883
- GROUP BY query, url
884
- ),
885
- prev AS (
886
- SELECT
887
- query, url,
888
- ${METRIC_EXPR.clicks} AS clicks,
889
- ${METRIC_EXPR.impressions} AS impressions,
890
- ${METRIC_EXPR.position} AS position
891
- FROM read_parquet({{FILES_PREV}}, union_by_name = true)
892
- WHERE date >= ? AND date <= ?
893
- GROUP BY query, url
894
- ),
895
- weekly AS (
896
- SELECT query, url, date_trunc('week', CAST(date AS DATE)) AS week,
897
- ${METRIC_EXPR.clicks} AS clicks,
898
- ${METRIC_EXPR.impressions} AS impressions
899
- FROM (
900
- SELECT query, url, date, clicks, impressions
901
- FROM read_parquet({{FILES}}, union_by_name = true)
902
- WHERE date >= ? AND date <= ?
903
- UNION ALL
904
- SELECT query, url, date, clicks, impressions
905
- FROM read_parquet({{FILES_PREV}}, union_by_name = true)
906
- WHERE date >= ? AND date <= ?
907
- )
908
- GROUP BY query, url, week
909
- ),
910
- series_by_entity AS (
911
- SELECT query, url, to_json(list({
912
- 'week': strftime(week, '%Y-%m-%d'),
913
- 'clicks': clicks,
914
- 'impressions': impressions
915
- } ORDER BY week)) AS seriesJson
916
- FROM weekly GROUP BY query, url
917
- ),
918
- joined AS (
919
- SELECT
920
- c.query AS keyword,
921
- c.url AS page,
922
- c.clicks AS recentClicks,
923
- c.impressions AS recentImpressions,
924
- c.position AS recentPosition,
925
- COALESCE(p.clicks, 0.0) AS baselineClicks,
926
- COALESCE(p.impressions, 0.0) AS baselineImpressions,
927
- COALESCE(p.position, 0.0) AS baselinePosition,
928
- (c.clicks - COALESCE(p.clicks, 0.0)) AS clicksChange,
929
- CASE
930
- WHEN COALESCE(p.clicks, 0.0) = 0 THEN CASE WHEN c.clicks > 0 THEN 100.0 ELSE 0.0 END
931
- ELSE (c.clicks - p.clicks) * 100.0 / p.clicks
932
- END AS clicksChangePercent,
933
- CASE
934
- WHEN COALESCE(p.impressions, 0.0) = 0 THEN CASE WHEN c.impressions > 0 THEN 100.0 ELSE 0.0 END
935
- ELSE (c.impressions - p.impressions) * 100.0 / p.impressions
936
- END AS impressionsChangePercent,
937
- (c.position - COALESCE(p.position, 0.0)) AS positionChange,
938
- s.seriesJson
939
- FROM cur c
940
- LEFT JOIN prev p ON c.query = p.query AND c.url = p.url
941
- LEFT JOIN series_by_entity s ON c.query = s.query AND c.url = s.url
942
- WHERE c.impressions >= ?
943
- )
944
- SELECT *,
945
- CASE
946
- WHEN clicksChangePercent > 0 AND ABS(clicksChangePercent) / 100.0 >= ? THEN 'rising'
947
- WHEN clicksChangePercent < 0 AND ABS(clicksChangePercent) / 100.0 >= ? THEN 'declining'
948
- ELSE 'stable'
949
- END AS direction
950
- FROM joined
951
- ORDER BY ABS(clicksChangePercent) DESC
952
- LIMIT ${Number(limit)}
953
- `,
954
- params: [
955
- cur.startDate,
956
- cur.endDate,
957
- prev.startDate,
958
- prev.endDate,
959
- cur.startDate,
960
- cur.endDate,
961
- prev.startDate,
962
- prev.endDate,
963
- minImpressions,
964
- changeThreshold,
965
- changeThreshold
966
- ],
967
- current: {
968
- table: "page_keywords",
969
- partitions: enumeratePartitions(cur.startDate, cur.endDate)
970
- },
971
- previous: {
972
- table: "page_keywords",
973
- partitions: enumeratePartitions(prev.startDate, prev.endDate)
974
- }
975
- };
976
- },
977
- reduceSql(rows) {
978
- const normalized = (Array.isArray(rows) ? rows : []).map((r) => ({
979
- keyword: str$1(r.keyword),
980
- page: r.page == null ? null : str$1(r.page),
981
- recentClicks: num(r.recentClicks),
982
- recentImpressions: num(r.recentImpressions),
983
- recentPosition: num(r.recentPosition),
984
- baselineClicks: Math.round(num(r.baselineClicks)),
985
- baselineImpressions: Math.round(num(r.baselineImpressions)),
986
- baselinePosition: num(r.baselinePosition),
987
- clicksChange: num(r.clicksChange),
988
- clicksChangePercent: num(r.clicksChangePercent),
989
- impressionsChangePercent: num(r.impressionsChangePercent),
990
- positionChange: num(r.positionChange),
991
- direction: str$1(r.direction),
992
- series: parseJsonList(r.seriesJson).map((s) => ({
993
- week: str$1(s.week),
994
- clicks: num(s.clicks),
995
- impressions: num(s.impressions)
996
- }))
997
- }));
998
- const rising = normalized.filter((r) => r.direction === "rising");
999
- const declining = normalized.filter((r) => r.direction === "declining");
1000
- const stable = normalized.filter((r) => r.direction === "stable");
1001
- const combined = [...rising, ...declining];
1002
- return {
1003
- results: combined,
1004
- meta: {
1005
- total: combined.length,
1006
- rising: rising.length,
1007
- declining: declining.length,
1008
- stable: stable.length
1009
- }
1010
- };
1011
- },
1012
- buildRows(params) {
1013
- const { current, previous } = comparisonOf(params);
1014
- return {
1015
- current: keywordsQueryState(current, params.limit),
1016
- previous: keywordsQueryState(previous, params.limit)
1017
- };
1018
- },
1019
- reduceRows(rows, params) {
1020
- const map = rows && !Array.isArray(rows) ? rows : {
1021
- current: [],
1022
- previous: []
1023
- };
1024
- const result = analyzeMovers({
1025
- current: map.current ?? [],
1026
- previous: map.previous ?? []
1027
- }, {
1028
- changeThreshold: params.changeThreshold,
1029
- minImpressions: params.minImpressions
1030
- });
1031
- return {
1032
- results: [...result.rising.map((r) => ({
1033
- ...r,
1034
- direction: "rising"
1035
- })), ...result.declining.map((r) => ({
1036
- ...r,
1037
- direction: "declining"
1038
- }))],
1039
- meta: {
1040
- rising: result.rising.length,
1041
- declining: result.declining.length
1042
- }
1043
- };
1044
- }
1045
- });
1046
- const DEFAULT_LIMIT = 1e3;
1047
- const MAX_LIMIT = 5e4;
1048
- function clampLimit(limit, fallback = DEFAULT_LIMIT) {
1049
- const n = Number(limit ?? fallback);
1050
- if (!Number.isFinite(n) || n <= 0) return fallback;
1051
- return Math.min(n, MAX_LIMIT);
1052
- }
1053
- function clampOffset(offset) {
1054
- const n = Number(offset ?? 0);
1055
- if (!Number.isFinite(n) || n < 0) return 0;
1056
- return Math.floor(n);
1057
- }
1058
- function paginateClause(input) {
1059
- const l = clampLimit(input.limit);
1060
- const o = clampOffset(input.offset);
1061
- return o > 0 ? `LIMIT ${l} OFFSET ${o}` : `LIMIT ${l}`;
1062
- }
1063
- function paginateInMemory(rows, input) {
1064
- const l = clampLimit(input.limit, rows.length);
1065
- const o = clampOffset(input.offset);
1066
- return rows.slice(o, o + l);
1067
- }
1068
- const EXPECTED_CTR_BY_POSITION = {
1069
- 1: .3,
1070
- 2: .15,
1071
- 3: .1,
1072
- 4: .07,
1073
- 5: .05,
1074
- 6: .04,
1075
- 7: .03,
1076
- 8: .025,
1077
- 9: .02,
1078
- 10: .015
1079
- };
1080
- function getExpectedCtr(position) {
1081
- return EXPECTED_CTR_BY_POSITION[Math.round(Math.max(1, Math.min(position, 10)))] || .01;
1082
- }
1083
- function calculatePositionScore(position) {
1084
- if (position <= 3) return .2;
1085
- if (position > 50) return .1;
1086
- const distance = Math.abs(position - 11);
1087
- return Math.max(0, 1 - distance / 15);
1088
- }
1089
- function calculateImpressionScore(impressions) {
1090
- if (impressions <= 0) return 0;
1091
- return Math.min(Math.log10(impressions) / 5, 1);
1092
- }
1093
- function calculateCtrGapScore(actualCtr, position) {
1094
- const expectedCtr = getExpectedCtr(position);
1095
- if (actualCtr >= expectedCtr) return 0;
1096
- const gap = expectedCtr - actualCtr;
1097
- return Math.min(gap / expectedCtr, 1);
1098
- }
1099
- const sortResults$1 = createMetricSorter("opportunityScore", {
1100
- opportunityScore: "desc",
1101
- potentialClicks: "desc",
1102
- impressions: "desc",
1103
- position: "asc"
1104
- });
1105
- const opportunityAnalyzer = defineAnalyzer({
1106
- id: "opportunity",
1107
- buildSql(params) {
1108
- const { startDate, endDate } = periodOf(params);
1109
- const minImpressions = params.minImpressions ?? 100;
1110
- const w1 = 1;
1111
- const w2 = 1;
1112
- const w3 = 1;
1113
- const totalW = w1 + w2 + w3;
1114
- const limit = params.limit ?? 1e3;
1115
- return {
1116
- sql: `
1117
- WITH agg AS (
1118
- SELECT
1119
- query AS keyword,
1120
- url AS page,
1121
- ${METRIC_EXPR.clicks} AS clicks,
1122
- ${METRIC_EXPR.impressions} AS impressions,
1123
- ${METRIC_EXPR.ctr} AS ctr,
1124
- ${METRIC_EXPR.position} AS position
1125
- FROM read_parquet({{FILES}}, union_by_name = true)
1126
- WHERE date >= ? AND date <= ?
1127
- GROUP BY query, url
1128
- HAVING SUM(impressions) >= ?
1129
- ),
1130
- scored AS (
1131
- SELECT
1132
- keyword, page, clicks, impressions, ctr, position,
1133
- CASE
1134
- WHEN position <= 3 THEN 0.2
1135
- WHEN position > 50 THEN 0.1
1136
- ELSE GREATEST(0.0, 1.0 - ABS(position - 11.0) / 15.0)
1137
- END AS positionScore,
1138
- CASE WHEN impressions <= 0 THEN 0.0 ELSE LEAST(LOG10(impressions) / 5.0, 1.0) END AS impressionScore,
1139
- CASE CAST(ROUND(GREATEST(LEAST(position, 10.0), 1.0)) AS INTEGER)
1140
- WHEN 1 THEN 0.30
1141
- WHEN 2 THEN 0.15
1142
- WHEN 3 THEN 0.10
1143
- WHEN 4 THEN 0.07
1144
- WHEN 5 THEN 0.05
1145
- WHEN 6 THEN 0.04
1146
- WHEN 7 THEN 0.03
1147
- WHEN 8 THEN 0.025
1148
- WHEN 9 THEN 0.02
1149
- WHEN 10 THEN 0.015
1150
- ELSE 0.01
1151
- END AS expectedCtr
1152
- FROM agg
1153
- ),
1154
- gapped AS (
1155
- SELECT
1156
- *,
1157
- CASE WHEN ctr >= expectedCtr THEN 0.0 ELSE LEAST((expectedCtr - ctr) / expectedCtr, 1.0) END AS ctrGapScore
1158
- FROM scored
1159
- )
1160
- SELECT
1161
- keyword, page, clicks, impressions, ctr, position,
1162
- CAST(ROUND(POWER(
1163
- POWER(positionScore, ${w1}) * POWER(impressionScore, ${w2}) * POWER(ctrGapScore, ${w3}),
1164
- 1.0 / ${totalW}
1165
- ) * 100) AS DOUBLE) AS opportunityScore,
1166
- CAST(ROUND(impressions * (
1167
- CASE CAST(ROUND(GREATEST(LEAST(position, 3.0), 1.0)) AS INTEGER)
1168
- WHEN 1 THEN 0.30
1169
- WHEN 2 THEN 0.15
1170
- WHEN 3 THEN 0.10
1171
- ELSE 0.10
1172
- END
1173
- )) AS DOUBLE) AS potentialClicks,
1174
- positionScore, impressionScore, ctrGapScore
1175
- FROM gapped
1176
- ORDER BY opportunityScore DESC
1177
- ${paginateClause({
1178
- limit,
1179
- offset: params.offset
1180
- })}
1181
- `,
1182
- params: [
1183
- startDate,
1184
- endDate,
1185
- minImpressions
1186
- ],
1187
- current: {
1188
- table: "page_keywords",
1189
- partitions: enumeratePartitions(startDate, endDate)
1190
- }
1191
- };
1192
- },
1193
- reduceSql(rows) {
1194
- const arr = Array.isArray(rows) ? rows : [];
1195
- return {
1196
- results: arr.map((r) => ({
1197
- keyword: r.keyword == null ? "" : String(r.keyword),
1198
- page: r.page == null ? null : String(r.page),
1199
- clicks: num(r.clicks),
1200
- impressions: num(r.impressions),
1201
- ctr: num(r.ctr),
1202
- position: num(r.position),
1203
- opportunityScore: num(r.opportunityScore),
1204
- potentialClicks: num(r.potentialClicks),
1205
- factors: {
1206
- positionScore: num(r.positionScore),
1207
- impressionScore: num(r.impressionScore),
1208
- ctrGapScore: num(r.ctrGapScore)
1209
- }
1210
- })),
1211
- meta: { total: arr.length }
1212
- };
1213
- },
1214
- buildRows(params) {
1215
- return { keywords: keywordsQueryState(periodOf(params), params.limit) };
1216
- },
1217
- reduceRows(rows, params) {
1218
- const keywords = (Array.isArray(rows) ? rows : []) ?? [];
1219
- const minImpressions = params.minImpressions ?? 100;
1220
- const positionWeight = 1;
1221
- const impressionsWeight = 1;
1222
- const ctrGapWeight = 1;
1223
- const sortBy = "opportunityScore";
1224
- const results = [];
1225
- for (const row of keywords) {
1226
- const impressions = num(row.impressions);
1227
- const position = num(row.position);
1228
- const ctr = num(row.ctr);
1229
- const clicks = num(row.clicks);
1230
- if (impressions < minImpressions) continue;
1231
- const positionScore = calculatePositionScore(position);
1232
- const impressionScore = calculateImpressionScore(impressions);
1233
- const ctrGapScore = calculateCtrGapScore(ctr, position);
1234
- const geometricMean = (positionScore ** positionWeight * impressionScore ** impressionsWeight * ctrGapScore ** ctrGapWeight) ** (1 / (positionWeight + impressionsWeight + ctrGapWeight));
1235
- const opportunityScore = Math.round(geometricMean * 100);
1236
- const targetCtr = getExpectedCtr(Math.min(3, position));
1237
- const potentialClicks = Math.round(impressions * targetCtr);
1238
- results.push({
1239
- keyword: row.query,
1240
- page: row.page ?? null,
1241
- clicks,
1242
- impressions,
1243
- ctr,
1244
- position,
1245
- opportunityScore,
1246
- potentialClicks,
1247
- factors: {
1248
- positionScore,
1249
- impressionScore,
1250
- ctrGapScore
1251
- }
1252
- });
1253
- }
1254
- const sorted = sortResults$1(results, sortBy);
1255
- const paged = paginateInMemory(sorted, {
1256
- limit: params.limit,
1257
- offset: params.offset
1258
- });
1259
- return {
1260
- results: paged,
1261
- meta: {
1262
- total: sorted.length,
1263
- returned: paged.length
1264
- }
1265
- };
1266
- }
1267
- });
1268
- function str(v) {
1269
- return v == null ? "" : String(v);
1270
- }
1271
- function bool(v) {
1272
- return v === true || v === 1 || v === "true";
1273
- }
1274
- function calculateCV(values) {
1275
- if (values.length === 0) return 0;
1276
- const mean = values.reduce((a, b) => a + b, 0) / values.length;
1277
- if (mean === 0) return 0;
1278
- const variance = values.reduce((sum, v) => sum + (v - mean) ** 2, 0) / values.length;
1279
- return Math.min(Math.sqrt(variance) / mean, 1);
1280
- }
1281
- function analyzeSeasonality(dates, options = {}) {
1282
- const { metric = "clicks" } = options;
1283
- if (dates.length === 0) return {
1284
- hasSeasonality: false,
1285
- strength: 0,
1286
- peakMonths: [],
1287
- troughMonths: [],
1288
- monthlyBreakdown: [],
1289
- insufficientData: true
1290
- };
1291
- const monthlyMap = /* @__PURE__ */ new Map();
1292
- for (const row of dates) {
1293
- const month = row.date.substring(0, 7);
1294
- const value = metric === "clicks" ? row.clicks : row.impressions;
1295
- monthlyMap.set(month, (monthlyMap.get(month) || 0) + value);
1296
- }
1297
- const months = Array.from(monthlyMap.keys()).sort();
1298
- const values = months.map((m) => monthlyMap.get(m) || 0);
1299
- const insufficientData = months.length < 12;
1300
- const totalValue = values.reduce((a, b) => a + b, 0);
1301
- const avgValue = values.length > 0 ? totalValue / values.length : 0;
1302
- const monthlyBreakdown = months.map((month, i) => {
1303
- const value = values[i] ?? 0;
1304
- const vsAverage = avgValue > 0 ? value / avgValue : 0;
1305
- return {
1306
- month,
1307
- value,
1308
- vsAverage,
1309
- isPeak: vsAverage > 1.5,
1310
- isTrough: vsAverage < .5
1311
- };
1312
- });
1313
- const peakMonths = [...new Set(monthlyBreakdown.filter((m) => m.isPeak).map((m) => m.month.substring(5, 7)))];
1314
- const troughMonths = [...new Set(monthlyBreakdown.filter((m) => m.isTrough).map((m) => m.month.substring(5, 7)))];
1315
- const strength = calculateCV(values);
1316
- return {
1317
- hasSeasonality: peakMonths.length > 0 || troughMonths.length > 0 || strength > .3,
1318
- strength,
1319
- peakMonths,
1320
- troughMonths,
1321
- monthlyBreakdown,
1322
- insufficientData
1323
- };
1324
- }
1325
- defineAnalyzer({
1326
- id: "seasonality",
1327
- buildSql(params) {
1328
- const { startDate, endDate } = periodOf(params);
1329
- return {
1330
- sql: `
1331
- WITH monthly AS (
1332
- SELECT
1333
- strftime(date, '%Y-%m') AS month,
1334
- CAST(SUM(${params.metric === "impressions" ? "impressions" : "clicks"}) AS DOUBLE) AS value
1335
- FROM read_parquet({{FILES}}, union_by_name = true)
1336
- WHERE date >= ? AND date <= ?
1337
- GROUP BY month
1338
- ),
1339
- stats AS (
1340
- SELECT
1341
- AVG(value) AS avg_val,
1342
- COALESCE(STDDEV_POP(value), 0.0) AS std_val,
1343
- CAST(COUNT(*) AS DOUBLE) AS month_count
1344
- FROM monthly
1345
- )
1346
- SELECT
1347
- m.month AS month,
1348
- m.value AS value,
1349
- CASE WHEN s.avg_val > 0 THEN m.value / s.avg_val ELSE 0.0 END AS vsAverage,
1350
- (s.avg_val > 0 AND m.value / s.avg_val > 1.5) AS isPeak,
1351
- (s.avg_val > 0 AND m.value / s.avg_val < 0.5) AS isTrough,
1352
- CASE WHEN s.avg_val > 0 THEN LEAST(s.std_val / s.avg_val, 1.0) ELSE 0.0 END AS strength,
1353
- s.month_count AS monthCount
1354
- FROM monthly m, stats s
1355
- ORDER BY m.month
1356
- `,
1357
- params: [startDate, endDate],
1358
- current: {
1359
- table: "pages",
1360
- partitions: enumeratePartitions(startDate, endDate)
1361
- }
1362
- };
1363
- },
1364
- reduceSql(rows) {
1365
- const arr = Array.isArray(rows) ? rows : [];
1366
- const breakdown = arr.map((r) => ({
1367
- month: str(r.month),
1368
- value: num(r.value),
1369
- vsAverage: num(r.vsAverage),
1370
- isPeak: bool(r.isPeak),
1371
- isTrough: bool(r.isTrough)
1372
- }));
1373
- const first = arr[0];
1374
- const strength = first ? num(first.strength) : 0;
1375
- const monthCount = first ? num(first.monthCount) : 0;
1376
- const peakMonths = [...new Set(breakdown.filter((m) => m.isPeak).map((m) => m.month.substring(5, 7)))];
1377
- const troughMonths = [...new Set(breakdown.filter((m) => m.isTrough).map((m) => m.month.substring(5, 7)))];
1378
- const hasSeasonality = peakMonths.length > 0 || troughMonths.length > 0 || strength > .3;
1379
- const insufficientData = monthCount < 12;
1380
- return {
1381
- results: breakdown,
1382
- meta: {
1383
- total: breakdown.length,
1384
- hasSeasonality,
1385
- strength,
1386
- peakMonths,
1387
- troughMonths,
1388
- insufficientData
1389
- }
1390
- };
1391
- },
1392
- buildRows(params) {
1393
- return { dates: datesQueryState(periodOf(params), params.limit) };
1394
- },
1395
- reduceRows(rows, params) {
1396
- const result = analyzeSeasonality(Array.isArray(rows) ? rows : [], { metric: params.metric });
1397
- return {
1398
- results: result.monthlyBreakdown,
1399
- meta: { strength: result.strength }
1400
- };
1401
- }
1402
- });
1403
- const sortResults = createSorter((item, metric) => item[metric], "potentialClicks");
1404
- function analyzeStrikingDistance(keywords, options = {}) {
1405
- const { minPosition = 4, maxPosition = 20, minImpressions = 100, maxCtr = .05, sortBy = "potentialClicks", sortOrder = "desc" } = options;
1406
- const results = [];
1407
- for (const row of keywords) {
1408
- const position = num(row.position);
1409
- const impressions = num(row.impressions);
1410
- const ctr = num(row.ctr);
1411
- const clicks = num(row.clicks);
1412
- if (position < minPosition || position > maxPosition) continue;
1413
- if (impressions < minImpressions) continue;
1414
- if (ctr > maxCtr) continue;
1415
- const potentialClicks = Math.round(impressions * .15);
1416
- results.push({
1417
- keyword: row.query,
1418
- page: row.page ?? null,
1419
- clicks,
1420
- impressions,
1421
- ctr,
1422
- position,
1423
- potentialClicks
1424
- });
1425
- }
1426
- return sortResults(results, sortBy, sortOrder);
1427
- }
1428
- function keywordQuery(period, limit) {
1429
- return typedQuery(keywordsQueryState(period, limit));
1430
- }
1431
- function pageQuery(period, limit) {
1432
- return typedQuery(pagesQueryState(period, limit));
1433
- }
1434
- function dateQuery(period, limit) {
1435
- return typedQuery(datesQueryState(period, limit));
1436
- }
1437
- function definePortableAnalyzer(definition) {
1438
- return definition;
1439
- }
1440
- async function runPortableAnalyzer(source, definition, input, options, limit = 25e3) {
1441
- const requiredQueries = definition.requiredQueries(input, limit);
1442
- const entries = Object.entries(requiredQueries);
1443
- const resolvedRows = await Promise.all(entries.map(async ([key, spec]) => [key, await queryRows(source, spec)]));
1444
- return definition.run(Object.fromEntries(resolvedRows), options);
1445
- }
1446
- const PORTABLE_ANALYZERS = {
1447
- strikingDistance: definePortableAnalyzer({
1448
- requiredQueries: (period, limit) => ({ keywords: keywordQuery(period, limit) }),
1449
- run: ({ keywords }, options) => analyzeStrikingDistance(keywords, options)
1450
- }),
1451
- opportunity: definePortableAnalyzer({
1452
- requiredQueries: (period, limit) => ({ keywords: keywordQuery(period, limit) }),
1453
- run: ({ keywords }, options) => {
1454
- const { results } = opportunityAnalyzer.rows.reduce(keywords, { params: {
1455
- type: "opportunity",
1456
- minImpressions: options?.minImpressions
1457
- } });
1458
- return results;
1459
- }
1460
- }),
1461
- brandSegmentation: definePortableAnalyzer({
1462
- requiredQueries: (period, limit) => ({ keywords: keywordQuery(period, limit) }),
1463
- run: ({ keywords }, options) => analyzeBrandSegmentation(keywords, options)
1464
- }),
1465
- pageConcentration: definePortableAnalyzer({
1466
- requiredQueries: (period, limit) => ({ pages: pageQuery(period, limit) }),
1467
- run: ({ pages }, options) => analyzePageConcentration(pages, options)
1468
- }),
1469
- keywordConcentration: definePortableAnalyzer({
1470
- requiredQueries: (period, limit) => ({ keywords: keywordQuery(period, limit) }),
1471
- run: ({ keywords }, options) => analyzeKeywordConcentration(keywords, options)
1472
- }),
1473
- clustering: definePortableAnalyzer({
1474
- requiredQueries: (period, limit) => ({ keywords: keywordQuery(period, limit) }),
1475
- run: ({ keywords }, options) => analyzeClustering(keywords, options)
1476
- }),
1477
- seasonality: definePortableAnalyzer({
1478
- requiredQueries: (period, limit) => ({ dates: dateQuery(period, limit) }),
1479
- run: ({ dates }, options) => analyzeSeasonality(dates, options)
1480
- }),
1481
- decay: definePortableAnalyzer({
1482
- requiredQueries: (periods, limit) => ({
1483
- current: pageQuery(periods.current, limit),
1484
- previous: pageQuery(periods.previous, limit)
1485
- }),
1486
- run: ({ current, previous }, options) => analyzeDecay({
1487
- current,
1488
- previous
1489
- }, options)
1490
- }),
1491
- movers: definePortableAnalyzer({
1492
- requiredQueries: (periods, limit) => ({
1493
- current: keywordQuery(periods.current, limit),
1494
- previous: keywordQuery(periods.previous, limit)
1495
- }),
1496
- run: ({ current, previous }, options) => analyzeMovers({
1497
- current,
1498
- previous
1499
- }, options)
1500
- })
1501
- };
1502
- async function queryAnalyticsFromSource(source, period, options = {}) {
1503
- const limit = options.limit ?? 25e3;
1504
- const [keywords, pages, dates] = await Promise.all([
1505
- queryRows(source, keywordQuery(period, limit)),
1506
- queryRows(source, pageQuery(period, limit)),
1507
- queryRows(source, dateQuery(period, limit))
1508
- ]);
1509
- return {
1510
- keywords,
1511
- pages,
1512
- dates
1513
- };
1514
- }
1515
- async function queryComparisonFromSource(source, periods, options = {}) {
1516
- const [current, previous] = await Promise.all([queryAnalyticsFromSource(source, periods.current, options), queryAnalyticsFromSource(source, periods.previous, options)]);
1517
- return {
1518
- current,
1519
- previous
1520
- };
1521
- }
1522
- async function analyzeStrikingDistanceFromSource(source, period, options) {
1523
- return runPortableAnalyzer(source, PORTABLE_ANALYZERS.strikingDistance, period, options);
1524
- }
1525
- async function analyzeOpportunityFromSource(source, period, options) {
1526
- return runPortableAnalyzer(source, PORTABLE_ANALYZERS.opportunity, period, options);
1527
- }
1528
- async function analyzeBrandSegmentationFromSource(source, period, options) {
1529
- return runPortableAnalyzer(source, PORTABLE_ANALYZERS.brandSegmentation, period, options);
1530
- }
1531
- async function analyzePageConcentrationFromSource(source, period, options) {
1532
- return runPortableAnalyzer(source, PORTABLE_ANALYZERS.pageConcentration, period, options);
1533
- }
1534
- async function analyzeKeywordConcentrationFromSource(source, period, options) {
1535
- return runPortableAnalyzer(source, PORTABLE_ANALYZERS.keywordConcentration, period, options);
1536
- }
1537
- async function analyzeClusteringFromSource(source, period, options) {
1538
- return runPortableAnalyzer(source, PORTABLE_ANALYZERS.clustering, period, options);
1539
- }
1540
- async function analyzeSeasonalityFromSource(source, period, options) {
1541
- return runPortableAnalyzer(source, PORTABLE_ANALYZERS.seasonality, period, options);
1542
- }
1543
- async function analyzeDecayFromSource(source, periods, options) {
1544
- return runPortableAnalyzer(source, PORTABLE_ANALYZERS.decay, periods, options);
1545
- }
1546
- async function analyzeMoversFromSource(source, periods, options) {
1547
- return runPortableAnalyzer(source, PORTABLE_ANALYZERS.movers, periods, options);
1548
- }
1549
- export { AnalyzerCapabilityError, IN_MEMORY_DEFAULT_CAPABILITIES, analyzeBrandSegmentationFromSource, analyzeClusteringFromSource, analyzeDecayFromSource, analyzeFromSource, analyzeKeywordConcentrationFromSource, analyzeMoversFromSource, analyzeOpportunityFromSource, analyzePageConcentrationFromSource, analyzeSeasonalityFromSource, analyzeStrikingDistanceFromSource, createCompositeSource, createInMemoryQuerySource, queryAnalyticsFromSource, queryComparisonFromSource };
36
+ export { IN_MEMORY_DEFAULT_CAPABILITIES, createCompositeSource, createInMemoryQuerySource };