@gscdump/analysis 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1865 @@
1
+ import { createEngine as createBrowserQuerySource } from "@gscdump/engine-wasm";
2
+ import { assertDimensionsSupported, getDimensionFilters, getFilterDimensions, isSqlQuerySource, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, metricValue } from "@gscdump/engine/resolver";
3
+ import { buildLogicalPlan } from "gscdump/query/plan";
4
+ import { between, clicks, date, extractMetricFilters, extractSpecialOperatorFilters, gsc, page, query } from "gscdump/query";
5
+ import { enumeratePartitions } from "@gscdump/engine/planner";
6
+ import { METRIC_EXPR } from "@gscdump/engine/sql-fragments";
7
+ import { daysAgo } from "gscdump";
8
+ import { createEngine as createSqliteQuerySource } from "@gscdump/engine-sqlite";
9
+ var AnalyzerCapabilityError = class extends Error {
10
+ constructor(tool, missing) {
11
+ super(`analyzer "${tool}" requires capabilities [${missing.join(", ")}] not provided by source`);
12
+ this.tool = tool;
13
+ this.missing = missing;
14
+ this.name = "AnalyzerCapabilityError";
15
+ }
16
+ };
17
+ function sourceCapabilities(source) {
18
+ const caps = /* @__PURE__ */ new Set();
19
+ if (source.executeSql) caps.add("executeSql");
20
+ if (source.capabilities.fileSets) caps.add("partitionedParquet");
21
+ if (source.capabilities.regex) caps.add("regex");
22
+ if (source.capabilities.windowTotals) caps.add("windowTotals");
23
+ if (source.capabilities.comparisonJoin) caps.add("comparisonJoin");
24
+ if (source.capabilities.attachedTables) caps.add("attachedTables");
25
+ return caps;
26
+ }
27
+ function assertSatisfies(analyzer, caps) {
28
+ const missing = analyzer.requires.filter((c) => !caps.has(c));
29
+ if (missing.length > 0) throw new AnalyzerCapabilityError(analyzer.id, missing);
30
+ }
31
+ async function runAnalyzerFromSource(source, params, registry) {
32
+ const caps = sourceCapabilities(source);
33
+ const analyzer = registry.resolveAnalyzer(params.type, caps.has("executeSql") || caps.has("attachedTables"));
34
+ if (!analyzer) throw new AnalyzerCapabilityError(params.type, ["executeSql"]);
35
+ assertSatisfies(analyzer, caps);
36
+ const plan = analyzer.build(params);
37
+ if (plan.kind === "rows") return runRowsPlanAgainstSource(source, analyzer, plan, params);
38
+ return runSqlPlanAgainstSource(source, analyzer, plan, params);
39
+ }
40
+ async function runRowsPlanAgainstSource(source, analyzer, plan, params) {
41
+ const entries = Object.entries(plan.queries);
42
+ const resolved = await Promise.all(entries.map(async ([k, q]) => [k, await source.queryRows(q.state)]));
43
+ const rowMap = Object.fromEntries(resolved);
44
+ const { results, meta } = analyzer.reduce(rowMap, { params });
45
+ return {
46
+ results,
47
+ meta: {
48
+ tool: params.type,
49
+ ...meta
50
+ }
51
+ };
52
+ }
53
+ function fileSetsFor(plan) {
54
+ const fileSets = { FILES: plan.current };
55
+ if (plan.previous) fileSets.FILES_PREV = plan.previous;
56
+ if (plan.extraFiles) for (const [key, fs] of Object.entries(plan.extraFiles)) fileSets[`FILES_${key}`] = fs;
57
+ return fileSets;
58
+ }
59
+ async function runSqlPlanAgainstSource(source, analyzer, plan, params) {
60
+ if (!source.executeSql) throw new AnalyzerCapabilityError(analyzer.id, ["executeSql"]);
61
+ if (plan.requiresAttachedTables && !source.capabilities.attachedTables) throw new AnalyzerCapabilityError(analyzer.id, ["attachedTables"]);
62
+ const fileSets = source.capabilities.fileSets ? fileSetsFor(plan) : void 0;
63
+ const rows = await source.executeSql(plan.sql, plan.params, fileSets ? { fileSets } : void 0);
64
+ const extras = {};
65
+ if (plan.extraQueries) for (const q of plan.extraQueries) {
66
+ const extraRows = await source.executeSql(q.sql, q.params, fileSets ? { fileSets } : void 0);
67
+ extras[q.name] = extraRows;
68
+ }
69
+ const { results, meta } = analyzer.reduce(rows, {
70
+ params,
71
+ extras
72
+ });
73
+ const sourceMeta = source.capabilities.localSource ? { source: "local" } : {};
74
+ return {
75
+ results,
76
+ meta: {
77
+ tool: params.type,
78
+ ...sourceMeta,
79
+ ...meta
80
+ }
81
+ };
82
+ }
83
+ async function analyzeFromSource(source, params, registry) {
84
+ return runAnalyzerFromSource(source, params, registry);
85
+ }
86
+ function isMetricDimension$2(dim) {
87
+ return [
88
+ "clicks",
89
+ "impressions",
90
+ "ctr",
91
+ "position"
92
+ ].includes(dim);
93
+ }
94
+ const ENGINE_QUERY_CAPABILITIES = {
95
+ regex: true,
96
+ multiDataset: false,
97
+ comparisonJoin: false,
98
+ windowTotals: false
99
+ };
100
+ const ENGINE_SOURCE_CAPABILITIES = {
101
+ ...ENGINE_QUERY_CAPABILITIES,
102
+ fileSets: true,
103
+ localSource: true
104
+ };
105
+ function createEngineQuerySource(options) {
106
+ const { engine, ctx } = options;
107
+ return {
108
+ name: "engine",
109
+ capabilities: ENGINE_SOURCE_CAPABILITIES,
110
+ async queryRows(state) {
111
+ const filterDims = getFilterDimensions(state.filter, isMetricDimension$2);
112
+ assertDimensionsSupported([...state.dimensions, ...filterDims], "stored", "engine query source");
113
+ if (state.dimensions.includes("queryCanonical") || filterDims.includes("queryCanonical")) throw new Error("engine query source does not support queryCanonical; use browser/sqlite query sources for derived dimensions");
114
+ return (await engine.query(ctx, state)).rows;
115
+ },
116
+ async executeSql(sql, params, opts) {
117
+ const fileSets = opts?.fileSets;
118
+ if (!fileSets?.FILES) throw new Error("engine query source: executeSql requires opts.fileSets with a FILES entry");
119
+ const { rows } = await engine.runSQL({
120
+ ctx,
121
+ table: fileSets.FILES.table,
122
+ fileSets,
123
+ sql,
124
+ params: params ?? []
125
+ });
126
+ return rows;
127
+ }
128
+ };
129
+ }
130
+ async function runAnalyzerWithEngine(deps, ctx, params, registry) {
131
+ return runAnalyzerFromSource(createEngineQuerySource({
132
+ engine: deps.engine,
133
+ ctx
134
+ }), params, registry);
135
+ }
136
+ async function collectRows(gen) {
137
+ const out = [];
138
+ for await (const batch of gen) out.push(...batch);
139
+ return out;
140
+ }
141
+ async function fetchGscTopN(opts) {
142
+ const { client, siteUrl, dimension, range, orderByClicksDesc, limit, sliceTop } = opts;
143
+ let builder = gsc.select(dimension).where(between(date, range.start, range.end));
144
+ if (orderByClicksDesc) builder = builder.orderBy(clicks, "desc");
145
+ if (typeof limit === "number") builder = builder.limit(limit);
146
+ const mapped = (await collectRows(client.query(siteUrl, builder))).map((r) => {
147
+ const row = r;
148
+ const key = row[dimension.dimension];
149
+ if (typeof key !== "string" || !key) return null;
150
+ const impressions = Number(row.impressions ?? 0);
151
+ const position = Number(row.position ?? 0);
152
+ return {
153
+ key,
154
+ clicks: Number(row.clicks ?? 0),
155
+ impressions,
156
+ sum_position: position * impressions
157
+ };
158
+ }).filter((x) => x != null);
159
+ if (!orderByClicksDesc) mapped.sort((a, b) => b.clicks - a.clicks);
160
+ return typeof sliceTop === "number" ? mapped.slice(0, sliceTop) : mapped;
161
+ }
162
+ async function fetchGscDaily(opts) {
163
+ const { client, siteUrl, range } = opts;
164
+ const builder = gsc.select(date).where(between(date, range.start, range.end));
165
+ return (await collectRows(client.query(siteUrl, builder))).map((r) => {
166
+ const row = r;
167
+ if (!row.date) return null;
168
+ const impressions = row.impressions ?? 0;
169
+ return {
170
+ date: Date.parse(`${row.date}T00:00:00Z`),
171
+ clicks: row.clicks ?? 0,
172
+ impressions,
173
+ sum_position: (row.position ?? 0) * impressions,
174
+ anonymizedImpressionsPct: 0
175
+ };
176
+ }).filter((x) => x != null).sort((a, b) => a.date - b.date);
177
+ }
178
+ const METRIC_NAMES = [
179
+ "clicks",
180
+ "impressions",
181
+ "ctr",
182
+ "position"
183
+ ];
184
+ function isMetricDimension$1(dim) {
185
+ return METRIC_NAMES.includes(dim);
186
+ }
187
+ function applyBuilderStatePostProcessing(rows, state) {
188
+ const dimensionFilters = getDimensionFilters(state.filter, isMetricDimension$1);
189
+ const metricFilters = extractMetricFilters(state.filter);
190
+ const specialFilters = extractSpecialOperatorFilters(state.filter);
191
+ const ordered = [...rows.filter((row) => {
192
+ if (!dimensionFilters.every((filter) => matchesDimensionFilter(row, filter))) return false;
193
+ if (!metricFilters.every((filter) => matchesMetricFilter(row, filter))) return false;
194
+ if (specialFilters.some((filter) => filter.operator === "topLevel") && !matchesTopLevelPage(row)) return false;
195
+ return true;
196
+ })].sort((a, b) => {
197
+ const column = state.orderBy?.column ?? "clicks";
198
+ const dir = state.orderBy?.dir ?? "desc";
199
+ const left = column === "date" ? String(a.date ?? "") : metricValue(a, column);
200
+ const right = column === "date" ? String(b.date ?? "") : metricValue(b, column);
201
+ if (left === right) return 0;
202
+ if (dir === "asc") return left < right ? -1 : 1;
203
+ return left > right ? -1 : 1;
204
+ });
205
+ const offset = Math.max(0, Number(state.startRow ?? 0));
206
+ const limit = Math.max(0, Number((state.rowLimit ?? ordered.length) || 0));
207
+ return ordered.slice(offset, offset + limit);
208
+ }
209
+ const GSC_API_CAPABILITIES = {
210
+ regex: true,
211
+ multiDataset: false,
212
+ comparisonJoin: false,
213
+ windowTotals: false
214
+ };
215
+ function isMetricDimension(dim) {
216
+ return [
217
+ "clicks",
218
+ "impressions",
219
+ "ctr",
220
+ "position"
221
+ ].includes(dim);
222
+ }
223
+ function builderFromState(state) {
224
+ return { getState: () => state };
225
+ }
226
+ function createGscApiQuerySource(options) {
227
+ const { client, siteUrl } = options;
228
+ return {
229
+ name: "gsc-api",
230
+ capabilities: GSC_API_CAPABILITIES,
231
+ async queryRows(state) {
232
+ buildLogicalPlan(state, GSC_API_CAPABILITIES);
233
+ const filterDims = getFilterDimensions(state.filter, isMetricDimension);
234
+ assertDimensionsSupported([...state.dimensions, ...filterDims], "api", "gsc-api query source");
235
+ return applyBuilderStatePostProcessing(await collectRows(client.query(siteUrl, builderFromState(state))), state);
236
+ }
237
+ };
238
+ }
239
+ const IN_MEMORY_DEFAULT_CAPABILITIES = {
240
+ regex: true,
241
+ multiDataset: true,
242
+ comparisonJoin: true,
243
+ windowTotals: true
244
+ };
245
+ function createInMemoryQuerySource(options) {
246
+ return {
247
+ name: "memory",
248
+ capabilities: options.capabilities ?? IN_MEMORY_DEFAULT_CAPABILITIES,
249
+ async queryRows(state) {
250
+ return await options.queryRows(state);
251
+ }
252
+ };
253
+ }
254
+ const DEFAULT_LIMIT$1 = 25e3;
255
+ function keywordsQueryState(period, limit = DEFAULT_LIMIT$1) {
256
+ return gsc.select(query, page).where(between(date, period.startDate, period.endDate)).limit(limit).getState();
257
+ }
258
+ function pagesQueryState(period, limit = DEFAULT_LIMIT$1) {
259
+ return gsc.select(page).where(between(date, period.startDate, period.endDate)).limit(limit).getState();
260
+ }
261
+ function datesQueryState(period, limit = DEFAULT_LIMIT$1) {
262
+ return gsc.select(date).where(between(date, period.startDate, period.endDate)).limit(limit).getState();
263
+ }
264
+ const DEFAULT_SQL_REQUIRES = ["executeSql", "partitionedParquet"];
265
+ function defineAnalyzer(opts) {
266
+ const { id, reduce, reduceSql, reduceRows, buildSql, buildRows, sqlRequires = DEFAULT_SQL_REQUIRES, rowsRequires = [] } = opts;
267
+ const sqlReducer = reduceSql ?? reduce;
268
+ const rowsReducer = reduceRows ?? reduce;
269
+ if (buildSql && !sqlReducer) throw new Error(`defineAnalyzer(${id}): buildSql requires reduce or reduceSql`);
270
+ if (buildRows && !rowsReducer) throw new Error(`defineAnalyzer(${id}): buildRows requires reduce or reduceRows`);
271
+ const wrap = (fn) => (rows, params, ctx) => {
272
+ return fn(Array.isArray(rows) ? rows : pickSingle(rows) ?? rows, params, ctx);
273
+ };
274
+ return {
275
+ id,
276
+ sql: buildSql && sqlReducer ? {
277
+ id,
278
+ requires: sqlRequires,
279
+ build(params) {
280
+ const spec = buildSql(params);
281
+ return {
282
+ kind: "sql",
283
+ sql: spec.sql,
284
+ params: spec.params,
285
+ current: spec.current,
286
+ previous: spec.previous,
287
+ extraFiles: spec.extraFiles,
288
+ extraQueries: spec.extraQueries,
289
+ requiresAttachedTables: spec.requiresAttachedTables
290
+ };
291
+ },
292
+ reduce(rows, ctx) {
293
+ const { results, meta } = wrap(sqlReducer)(rows, ctx.params, { extras: ctx.extras });
294
+ return {
295
+ results,
296
+ meta
297
+ };
298
+ }
299
+ } : void 0,
300
+ rows: buildRows && rowsReducer ? {
301
+ id,
302
+ requires: rowsRequires,
303
+ build(params) {
304
+ const queries = buildRows(params);
305
+ return {
306
+ kind: "rows",
307
+ queries: Object.fromEntries(Object.entries(queries).map(([k, state]) => [k, { state }]))
308
+ };
309
+ },
310
+ reduce(rows, ctx) {
311
+ const { results, meta } = wrap(rowsReducer)(rows, ctx.params, {});
312
+ return {
313
+ results,
314
+ meta
315
+ };
316
+ }
317
+ } : void 0
318
+ };
319
+ }
320
+ function pickSingle(rows) {
321
+ const keys = Object.keys(rows);
322
+ return keys.length === 1 ? rows[keys[0]] : void 0;
323
+ }
324
+ function defaultEndDate() {
325
+ return daysAgo(3);
326
+ }
327
+ function defaultStartDate() {
328
+ return daysAgo(31);
329
+ }
330
+ function periodOf(params) {
331
+ return {
332
+ startDate: params.startDate || defaultStartDate(),
333
+ endDate: params.endDate || defaultEndDate()
334
+ };
335
+ }
336
+ function comparisonOf(params) {
337
+ if (!params.prevStartDate || !params.prevEndDate) throw new Error(`${params.type} analysis requires prevStartDate and prevEndDate`);
338
+ return {
339
+ current: periodOf(params),
340
+ previous: {
341
+ startDate: params.prevStartDate,
342
+ endDate: params.prevEndDate
343
+ }
344
+ };
345
+ }
346
+ function num(v) {
347
+ if (typeof v === "number") return v;
348
+ if (typeof v === "bigint") return Number(v);
349
+ if (v == null) return 0;
350
+ return Number(v);
351
+ }
352
+ function buildPeriodMap(rows, key, value, filter) {
353
+ const out = /* @__PURE__ */ new Map();
354
+ for (const row of rows) {
355
+ if (filter && !filter(row)) continue;
356
+ out.set(key(row), value(row));
357
+ }
358
+ return out;
359
+ }
360
+ function createSorter(getValue, defaultMetric, defaultOrder = "desc") {
361
+ return (items, sortBy = defaultMetric, sortOrder = defaultOrder) => {
362
+ const mult = sortOrder === "desc" ? -1 : 1;
363
+ return [...items].sort((a, b) => (getValue(a, sortBy) - getValue(b, sortBy)) * mult);
364
+ };
365
+ }
366
+ function createMetricSorter(defaultMetric, orderByMetric) {
367
+ return (items, sortBy = defaultMetric) => {
368
+ const mult = orderByMetric[sortBy] === "desc" ? -1 : 1;
369
+ return [...items].sort((a, b) => (a[sortBy] - b[sortBy]) * mult);
370
+ };
371
+ }
372
+ function escapeRegexAlt(s) {
373
+ return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
374
+ }
375
+ function str$5(v) {
376
+ return v == null ? "" : String(v);
377
+ }
378
+ function analyzeBrandSegmentation(keywords, options) {
379
+ const { brandTerms, minImpressions = 10 } = options;
380
+ const lowerBrandTerms = brandTerms.map((t) => t.toLowerCase());
381
+ const brand = [];
382
+ const nonBrand = [];
383
+ for (const row of keywords) {
384
+ if (num(row.impressions) < minImpressions) continue;
385
+ if (lowerBrandTerms.some((term) => row.query.toLowerCase().includes(term))) brand.push(row);
386
+ else nonBrand.push(row);
387
+ }
388
+ const brandClicks = brand.reduce((sum, k) => sum + num(k.clicks), 0);
389
+ const nonBrandClicks = nonBrand.reduce((sum, k) => sum + num(k.clicks), 0);
390
+ const totalClicks = brandClicks + nonBrandClicks;
391
+ return {
392
+ brand,
393
+ nonBrand,
394
+ summary: {
395
+ brandClicks,
396
+ nonBrandClicks,
397
+ brandShare: totalClicks > 0 ? brandClicks / totalClicks : 0,
398
+ brandImpressions: brand.reduce((sum, k) => sum + num(k.impressions), 0),
399
+ nonBrandImpressions: nonBrand.reduce((sum, k) => sum + num(k.impressions), 0)
400
+ }
401
+ };
402
+ }
403
+ defineAnalyzer({
404
+ id: "brand",
405
+ buildSql(params) {
406
+ if (!params.brandTerms?.length) throw new Error("Brand analysis requires brandTerms");
407
+ const { startDate, endDate } = periodOf(params);
408
+ const minImpressions = params.minImpressions ?? 10;
409
+ const limit = params.limit ?? 1e4;
410
+ const regex = `(${params.brandTerms.map((t) => escapeRegexAlt(t.toLowerCase())).join("|")})`;
411
+ return {
412
+ sql: `
413
+ WITH agg AS (
414
+ SELECT
415
+ query,
416
+ url AS page,
417
+ ${METRIC_EXPR.clicks} AS clicks,
418
+ ${METRIC_EXPR.impressions} AS impressions,
419
+ ${METRIC_EXPR.ctr} AS ctr,
420
+ ${METRIC_EXPR.position} AS position
421
+ FROM read_parquet({{FILES}}, union_by_name = true)
422
+ WHERE date >= ? AND date <= ?
423
+ GROUP BY query, url
424
+ HAVING SUM(impressions) >= ?
425
+ )
426
+ SELECT
427
+ query, page, clicks, impressions, ctr, position,
428
+ CASE WHEN regexp_matches(LOWER(query), ?) THEN 'brand' ELSE 'non-brand' END AS segment
429
+ FROM agg
430
+ ORDER BY clicks DESC
431
+ LIMIT ${Number(limit)}
432
+ `,
433
+ params: [
434
+ startDate,
435
+ endDate,
436
+ minImpressions,
437
+ regex
438
+ ],
439
+ current: {
440
+ table: "page_keywords",
441
+ partitions: enumeratePartitions(startDate, endDate)
442
+ }
443
+ };
444
+ },
445
+ reduceSql(rows) {
446
+ const normalized = (Array.isArray(rows) ? rows : []).map((r) => ({
447
+ query: str$5(r.query),
448
+ page: r.page == null ? void 0 : str$5(r.page),
449
+ clicks: num(r.clicks),
450
+ impressions: num(r.impressions),
451
+ ctr: num(r.ctr),
452
+ position: num(r.position),
453
+ segment: str$5(r.segment)
454
+ }));
455
+ let brandClicks = 0;
456
+ let nonBrandClicks = 0;
457
+ let brandImpressions = 0;
458
+ let nonBrandImpressions = 0;
459
+ for (const r of normalized) if (r.segment === "brand") {
460
+ brandClicks += r.clicks;
461
+ brandImpressions += r.impressions;
462
+ } else {
463
+ nonBrandClicks += r.clicks;
464
+ nonBrandImpressions += r.impressions;
465
+ }
466
+ const totalClicks = brandClicks + nonBrandClicks;
467
+ return {
468
+ results: normalized,
469
+ meta: {
470
+ total: normalized.length,
471
+ summary: {
472
+ brandClicks,
473
+ nonBrandClicks,
474
+ brandShare: totalClicks > 0 ? brandClicks / totalClicks : 0,
475
+ brandImpressions,
476
+ nonBrandImpressions
477
+ }
478
+ }
479
+ };
480
+ },
481
+ buildRows(params) {
482
+ return { keywords: keywordsQueryState(periodOf(params), params.limit) };
483
+ },
484
+ reduceRows(rows, params) {
485
+ if (!params.brandTerms?.length) throw new Error("Brand analysis requires brandTerms");
486
+ const result = analyzeBrandSegmentation(Array.isArray(rows) ? rows : [], {
487
+ brandTerms: params.brandTerms,
488
+ minImpressions: params.minImpressions
489
+ });
490
+ return {
491
+ results: [...result.brand.map((r) => ({
492
+ ...r,
493
+ segment: "brand"
494
+ })), ...result.nonBrand.map((r) => ({
495
+ ...r,
496
+ segment: "non-brand"
497
+ }))],
498
+ meta: { summary: result.summary }
499
+ };
500
+ }
501
+ });
502
+ const INTENT_PREFIXES_REGEX = "^(how to|what is|what are|why is|why do|where to|when to|best|top|vs|versus|compare|review|buy|cheap|free|near me)(\\s|$)";
503
+ const INTENT_PREFIXES = [
504
+ "how to",
505
+ "what is",
506
+ "what are",
507
+ "why is",
508
+ "why do",
509
+ "where to",
510
+ "when to",
511
+ "best",
512
+ "top",
513
+ "vs",
514
+ "versus",
515
+ "compare",
516
+ "review",
517
+ "buy",
518
+ "cheap",
519
+ "free",
520
+ "near me"
521
+ ];
522
+ const WHITESPACE_RE = /\s+/;
523
+ function str$4(v) {
524
+ return v == null ? "" : String(v);
525
+ }
526
+ function parseJsonList$3(v) {
527
+ if (Array.isArray(v)) return v;
528
+ if (typeof v === "string" && v.length > 0) {
529
+ const parsed = JSON.parse(v);
530
+ return Array.isArray(parsed) ? parsed : [];
531
+ }
532
+ return [];
533
+ }
534
+ function extractIntentPrefix(keyword) {
535
+ const lower = keyword.toLowerCase();
536
+ for (const prefix of INTENT_PREFIXES) if (lower.startsWith(`${prefix} `) || lower.startsWith(prefix)) return prefix;
537
+ return null;
538
+ }
539
+ function extractWordPrefix(keyword, wordCount = 2) {
540
+ const words = keyword.toLowerCase().split(WHITESPACE_RE).filter(Boolean);
541
+ if (words.length < wordCount + 1) return null;
542
+ return words.slice(0, wordCount).join(" ");
543
+ }
544
+ function analyzeClustering(keywords, options = {}) {
545
+ const { minClusterSize = 2, minImpressions = 10, clusterBy = "both" } = options;
546
+ const filtered = keywords.filter((k) => num(k.impressions) >= minImpressions);
547
+ const clusterMap = /* @__PURE__ */ new Map();
548
+ const clusteredKeywords = /* @__PURE__ */ new Set();
549
+ if (clusterBy === "intent" || clusterBy === "both") for (const kw of filtered) {
550
+ const intent = extractIntentPrefix(kw.query);
551
+ if (intent) {
552
+ const existing = clusterMap.get(intent);
553
+ if (existing) existing.keywords.push(kw);
554
+ else clusterMap.set(intent, {
555
+ type: "intent",
556
+ keywords: [kw]
557
+ });
558
+ clusteredKeywords.add(kw.query);
559
+ }
560
+ }
561
+ if (clusterBy === "prefix" || clusterBy === "both") {
562
+ const unclustered = filtered.filter((kw) => !clusteredKeywords.has(kw.query));
563
+ const prefixMap = /* @__PURE__ */ new Map();
564
+ for (const kw of unclustered) {
565
+ const prefix = extractWordPrefix(kw.query);
566
+ if (prefix) {
567
+ const existing = prefixMap.get(prefix);
568
+ if (existing) existing.push(kw);
569
+ else prefixMap.set(prefix, [kw]);
570
+ }
571
+ }
572
+ for (const [prefix, kws] of prefixMap) if (kws.length >= minClusterSize) {
573
+ clusterMap.set(prefix, {
574
+ type: "prefix",
575
+ keywords: kws
576
+ });
577
+ kws.forEach((kw) => clusteredKeywords.add(kw.query));
578
+ }
579
+ }
580
+ const clusters = [];
581
+ for (const [name, data] of clusterMap) {
582
+ if (data.keywords.length < minClusterSize) continue;
583
+ const totalClicks = data.keywords.reduce((sum, k) => sum + num(k.clicks), 0);
584
+ const totalImpressions = data.keywords.reduce((sum, k) => sum + num(k.impressions), 0);
585
+ const avgPosition = data.keywords.reduce((sum, k) => sum + num(k.position), 0) / data.keywords.length;
586
+ clusters.push({
587
+ clusterName: name,
588
+ clusterType: data.type,
589
+ keywords: data.keywords,
590
+ totalClicks,
591
+ totalImpressions,
592
+ avgPosition,
593
+ keywordCount: data.keywords.length
594
+ });
595
+ }
596
+ clusters.sort((a, b) => b.totalClicks - a.totalClicks);
597
+ return {
598
+ clusters,
599
+ unclustered: filtered.filter((kw) => !clusteredKeywords.has(kw.query))
600
+ };
601
+ }
602
+ defineAnalyzer({
603
+ id: "clustering",
604
+ buildSql(params) {
605
+ const { startDate, endDate } = periodOf(params);
606
+ const minImpressions = params.minImpressions ?? 10;
607
+ const minClusterSize = params.minClusterSize ?? 2;
608
+ const clusterBy = params.clusterBy ?? "both";
609
+ const doIntent = clusterBy === "intent" || clusterBy === "both";
610
+ const doPrefix = clusterBy === "prefix" || clusterBy === "both";
611
+ const intentExpr = doIntent ? `NULLIF(regexp_extract(LOWER(query), '${INTENT_PREFIXES_REGEX}', 1), '')` : `CAST(NULL AS VARCHAR)`;
612
+ const prefixExpr = doPrefix ? `CASE WHEN len(regexp_split_to_array(LOWER(query), '\\s+')) >= 3
613
+ THEN array_to_string(list_slice(regexp_split_to_array(LOWER(query), '\\s+'), 1, 2), ' ')
614
+ ELSE CAST(NULL AS VARCHAR) END` : `CAST(NULL AS VARCHAR)`;
615
+ return {
616
+ sql: `
617
+ WITH agg AS (
618
+ SELECT
619
+ query,
620
+ ${METRIC_EXPR.clicks} AS clicks,
621
+ ${METRIC_EXPR.impressions} AS impressions,
622
+ ${METRIC_EXPR.ctr} AS ctr,
623
+ ${METRIC_EXPR.position} AS position
624
+ FROM read_parquet({{FILES}}, union_by_name = true)
625
+ WHERE date >= ? AND date <= ?
626
+ GROUP BY query
627
+ HAVING SUM(impressions) >= ?
628
+ ),
629
+ classified AS (
630
+ SELECT
631
+ query, clicks, impressions, ctr, position,
632
+ ${intentExpr} AS intent_prefix,
633
+ ${prefixExpr} AS word_prefix
634
+ FROM agg
635
+ ),
636
+ keyed AS (
637
+ SELECT
638
+ query, clicks, impressions, ctr, position,
639
+ COALESCE(intent_prefix, word_prefix) AS cluster_name,
640
+ CASE WHEN intent_prefix IS NOT NULL THEN 'intent' ELSE 'prefix' END AS cluster_type
641
+ FROM classified
642
+ WHERE COALESCE(intent_prefix, word_prefix) IS NOT NULL
643
+ )
644
+ SELECT
645
+ cluster_name AS clusterName,
646
+ any_value(cluster_type) AS clusterType,
647
+ CAST(COUNT(*) AS DOUBLE) AS keywordCount,
648
+ ${METRIC_EXPR.clicks} AS totalClicks,
649
+ ${METRIC_EXPR.impressions} AS totalImpressions,
650
+ AVG(position) AS avgPosition,
651
+ to_json(list({ 'query': query, 'clicks': clicks, 'impressions': impressions, 'ctr': ctr, 'position': position })) AS keywords
652
+ FROM keyed
653
+ GROUP BY cluster_name
654
+ HAVING COUNT(*) >= ?
655
+ ORDER BY totalClicks DESC
656
+ `,
657
+ params: [
658
+ startDate,
659
+ endDate,
660
+ minImpressions,
661
+ minClusterSize
662
+ ],
663
+ current: {
664
+ table: "keywords",
665
+ partitions: enumeratePartitions(startDate, endDate)
666
+ }
667
+ };
668
+ },
669
+ reduceSql(rows) {
670
+ const clusters = (Array.isArray(rows) ? rows : []).map((r) => ({
671
+ clusterName: str$4(r.clusterName),
672
+ clusterType: str$4(r.clusterType),
673
+ keywordCount: num(r.keywordCount),
674
+ totalClicks: num(r.totalClicks),
675
+ totalImpressions: num(r.totalImpressions),
676
+ avgPosition: num(r.avgPosition),
677
+ keywords: parseJsonList$3(r.keywords).map((k) => ({
678
+ query: str$4(k.query),
679
+ clicks: num(k.clicks),
680
+ impressions: num(k.impressions),
681
+ ctr: num(k.ctr),
682
+ position: num(k.position)
683
+ }))
684
+ }));
685
+ return {
686
+ results: clusters,
687
+ meta: {
688
+ total: clusters.length,
689
+ totalClusters: clusters.length
690
+ }
691
+ };
692
+ },
693
+ buildRows(params) {
694
+ return { keywords: keywordsQueryState(periodOf(params), params.limit) };
695
+ },
696
+ reduceRows(rows, params) {
697
+ const result = analyzeClustering(Array.isArray(rows) ? rows : [], {
698
+ clusterBy: params.clusterBy,
699
+ minClusterSize: params.minClusterSize,
700
+ minImpressions: params.minImpressions
701
+ });
702
+ return {
703
+ results: result.clusters,
704
+ meta: { totalClusters: result.clusters.length }
705
+ };
706
+ }
707
+ });
708
+ function str$3(v) {
709
+ return v == null ? "" : String(v);
710
+ }
711
+ function parseJsonList$2(v) {
712
+ if (Array.isArray(v)) return v;
713
+ if (typeof v === "string" && v.length > 0) {
714
+ const parsed = JSON.parse(v);
715
+ return Array.isArray(parsed) ? parsed : [];
716
+ }
717
+ return [];
718
+ }
719
+ function calculateGini(values) {
720
+ if (values.length === 0) return 0;
721
+ const sorted = [...values].sort((a, b) => a - b);
722
+ const n = sorted.length;
723
+ const sum = sorted.reduce((a, b) => a + b, 0);
724
+ if (sum === 0) return 0;
725
+ let weightedSum = 0;
726
+ for (let i = 0; i < n; i++) weightedSum += (2 * (i + 1) - n - 1) * sorted[i];
727
+ return weightedSum / (n * sum);
728
+ }
729
+ function calculateHHI(shares) {
730
+ return shares.reduce((sum, share) => sum + (share * 100) ** 2, 0);
731
+ }
732
+ function analyzeConcentration(items, options = {}) {
733
+ const { topN = 10 } = options;
734
+ if (items.length === 0) return {
735
+ giniCoefficient: 0,
736
+ hhi: 0,
737
+ topNConcentration: 0,
738
+ topNItems: [],
739
+ totalItems: 0,
740
+ totalClicks: 0,
741
+ riskLevel: "low"
742
+ };
743
+ const sorted = [...items].sort((a, b) => b.clicks - a.clicks);
744
+ const totalClicks = sorted.reduce((sum, item) => sum + item.clicks, 0);
745
+ const clickValues = sorted.map((i) => i.clicks);
746
+ const shares = totalClicks > 0 ? sorted.map((i) => i.clicks / totalClicks) : [];
747
+ const giniCoefficient = calculateGini(clickValues);
748
+ const hhi = calculateHHI(shares);
749
+ const topNItems = sorted.slice(0, topN).map((item) => ({
750
+ key: item.key,
751
+ clicks: item.clicks,
752
+ share: totalClicks > 0 ? item.clicks / totalClicks : 0
753
+ }));
754
+ const topNClicks = topNItems.reduce((sum, item) => sum + item.clicks, 0);
755
+ const topNConcentration = totalClicks > 0 ? topNClicks / totalClicks : 0;
756
+ let riskLevel = "low";
757
+ if (hhi > 2500) riskLevel = "high";
758
+ else if (hhi > 1500) riskLevel = "medium";
759
+ return {
760
+ giniCoefficient,
761
+ hhi,
762
+ topNConcentration,
763
+ topNItems,
764
+ totalItems: items.length,
765
+ totalClicks,
766
+ riskLevel
767
+ };
768
+ }
769
+ function analyzePageConcentration(pages, options) {
770
+ return analyzeConcentration(pages.map((p) => ({
771
+ key: p.page,
772
+ clicks: num(p.clicks)
773
+ })), options);
774
+ }
775
+ function analyzeKeywordConcentration(keywords, options) {
776
+ return analyzeConcentration(keywords.map((k) => ({
777
+ key: k.query,
778
+ clicks: num(k.clicks)
779
+ })), options);
780
+ }
781
+ defineAnalyzer({
782
+ id: "concentration",
783
+ buildSql(params) {
784
+ const { startDate, endDate } = periodOf(params);
785
+ const dim = params.dimension || "pages";
786
+ const topN = params.topN ?? 10;
787
+ const table = dim === "keywords" ? "keywords" : "pages";
788
+ const keyCol = dim === "keywords" ? "query" : "url";
789
+ return {
790
+ sql: `
791
+ WITH items AS (
792
+ SELECT
793
+ ${keyCol} AS key,
794
+ ${METRIC_EXPR.clicks} AS clicks
795
+ FROM read_parquet({{FILES}}, union_by_name = true)
796
+ WHERE date >= ? AND date <= ?
797
+ GROUP BY ${keyCol}
798
+ HAVING SUM(clicks) > 0
799
+ ),
800
+ totals AS (
801
+ SELECT SUM(clicks) AS total_clicks, COUNT(*) AS total_items FROM items
802
+ ),
803
+ ranked AS (
804
+ SELECT
805
+ i.key, i.clicks,
806
+ i.clicks / NULLIF(t.total_clicks, 0) AS share,
807
+ ROW_NUMBER() OVER (ORDER BY i.clicks DESC, i.key ASC) AS rnk_desc,
808
+ ROW_NUMBER() OVER (ORDER BY i.clicks ASC, i.key ASC) AS rnk_asc,
809
+ t.total_clicks AS tclicks,
810
+ t.total_items AS titems
811
+ FROM items i, totals t
812
+ ),
813
+ gini_num AS (
814
+ SELECT SUM((2.0 * rnk_asc - titems - 1) * clicks) AS weighted_sum FROM ranked
815
+ ),
816
+ hhi_calc AS (
817
+ SELECT SUM(POWER(share * 100, 2)) AS hhi FROM ranked
818
+ ),
819
+ top_list AS (
820
+ SELECT
821
+ list({ 'key': key, 'clicks': clicks, 'share': share } ORDER BY clicks DESC, key ASC) AS items,
822
+ SUM(clicks) AS top_clicks
823
+ FROM ranked WHERE rnk_desc <= ?
824
+ )
825
+ SELECT
826
+ COALESCE(
827
+ (SELECT weighted_sum FROM gini_num)
828
+ / NULLIF((SELECT total_items FROM totals) * (SELECT total_clicks FROM totals), 0),
829
+ 0.0
830
+ ) AS giniCoefficient,
831
+ COALESCE((SELECT hhi FROM hhi_calc), 0.0) AS hhi,
832
+ COALESCE(
833
+ CAST((SELECT top_clicks FROM top_list) AS DOUBLE)
834
+ / NULLIF((SELECT total_clicks FROM totals), 0),
835
+ 0.0
836
+ ) AS topNConcentration,
837
+ COALESCE((SELECT to_json(items) FROM top_list), '[]') AS topNItems,
838
+ COALESCE((SELECT total_items FROM totals), 0) AS totalItems,
839
+ COALESCE((SELECT total_clicks FROM totals), 0.0) AS totalClicks,
840
+ CASE
841
+ WHEN COALESCE((SELECT hhi FROM hhi_calc), 0.0) > 2500 THEN 'high'
842
+ WHEN COALESCE((SELECT hhi FROM hhi_calc), 0.0) > 1500 THEN 'medium'
843
+ ELSE 'low'
844
+ END AS riskLevel
845
+ `,
846
+ params: [
847
+ startDate,
848
+ endDate,
849
+ topN
850
+ ],
851
+ current: {
852
+ table,
853
+ partitions: enumeratePartitions(startDate, endDate)
854
+ }
855
+ };
856
+ },
857
+ reduceSql(rows, params) {
858
+ const r = (Array.isArray(rows) ? rows : [])[0] ?? {};
859
+ const topRaw = parseJsonList$2(r.topNItems);
860
+ return {
861
+ results: [{
862
+ giniCoefficient: num(r.giniCoefficient),
863
+ hhi: num(r.hhi),
864
+ topNConcentration: num(r.topNConcentration),
865
+ topNItems: topRaw.map((t) => ({
866
+ key: str$3(t.key),
867
+ clicks: num(t.clicks),
868
+ share: num(t.share)
869
+ })),
870
+ totalItems: num(r.totalItems),
871
+ totalClicks: num(r.totalClicks),
872
+ riskLevel: str$3(r.riskLevel)
873
+ }],
874
+ meta: {
875
+ total: 1,
876
+ dimension: params.dimension || "pages"
877
+ }
878
+ };
879
+ },
880
+ buildRows(params) {
881
+ const dim = params.dimension || "pages";
882
+ const period = periodOf(params);
883
+ const out = {};
884
+ if (dim === "pages") out.pages = pagesQueryState(period, params.limit);
885
+ else out.keywords = keywordsQueryState(period, params.limit);
886
+ return out;
887
+ },
888
+ reduceRows(rows, params) {
889
+ const dim = params.dimension || "pages";
890
+ const arr = Array.isArray(rows) ? rows : rows[dim] ?? [];
891
+ return {
892
+ results: [dim === "pages" ? analyzePageConcentration(arr, { topN: params.topN }) : analyzeKeywordConcentration(arr, { topN: params.topN })],
893
+ meta: { dimension: dim }
894
+ };
895
+ }
896
+ });
897
+ const sortResults$2 = createMetricSorter("lostClicks", {
898
+ lostClicks: "desc",
899
+ declinePercent: "desc",
900
+ currentClicks: "asc"
901
+ });
902
+ function str$2(v) {
903
+ return v == null ? "" : String(v);
904
+ }
905
+ function parseJsonList$1(v) {
906
+ if (Array.isArray(v)) return v;
907
+ if (typeof v === "string" && v.length > 0) {
908
+ const parsed = JSON.parse(v);
909
+ return Array.isArray(parsed) ? parsed : [];
910
+ }
911
+ return [];
912
+ }
913
+ function analyzeDecay(input, options = {}) {
914
+ const { minPreviousClicks = 50, threshold = .2, sortBy = "lostClicks" } = options;
915
+ const currentMap = buildPeriodMap(input.current, (r) => r.page, (r) => ({
916
+ clicks: num(r.clicks),
917
+ position: num(r.position)
918
+ }));
919
+ const previousMap = buildPeriodMap(input.previous, (r) => r.page, (r) => ({
920
+ clicks: num(r.clicks),
921
+ position: num(r.position)
922
+ }), (r) => num(r.clicks) >= minPreviousClicks);
923
+ const results = [];
924
+ for (const [page, prev] of previousMap) {
925
+ const curr = currentMap.get(page) || {
926
+ clicks: 0,
927
+ position: 0
928
+ };
929
+ const lostClicks = prev.clicks - curr.clicks;
930
+ const declinePercent = prev.clicks > 0 ? lostClicks / prev.clicks : 0;
931
+ if (declinePercent >= threshold && lostClicks > 0) results.push({
932
+ page,
933
+ currentClicks: curr.clicks,
934
+ previousClicks: prev.clicks,
935
+ lostClicks,
936
+ declinePercent,
937
+ currentPosition: curr.position,
938
+ previousPosition: prev.position,
939
+ positionDrop: curr.position - prev.position
940
+ });
941
+ }
942
+ return sortResults$2(results, sortBy);
943
+ }
944
+ defineAnalyzer({
945
+ id: "decay",
946
+ buildSql(params) {
947
+ const { current: cur, previous: prev } = comparisonOf(params);
948
+ const minPreviousClicks = params.minPreviousClicks ?? 50;
949
+ const threshold = params.threshold ?? .2;
950
+ const limit = params.limit ?? 2e3;
951
+ return {
952
+ sql: `
953
+ WITH cur AS (
954
+ SELECT
955
+ url,
956
+ ${METRIC_EXPR.clicks} AS clicks,
957
+ ${METRIC_EXPR.position} AS position
958
+ FROM read_parquet({{FILES}}, union_by_name = true)
959
+ WHERE date >= ? AND date <= ?
960
+ GROUP BY url
961
+ ),
962
+ prev AS (
963
+ SELECT
964
+ url,
965
+ ${METRIC_EXPR.clicks} AS clicks,
966
+ ${METRIC_EXPR.position} AS position
967
+ FROM read_parquet({{FILES_PREV}}, union_by_name = true)
968
+ WHERE date >= ? AND date <= ?
969
+ GROUP BY url
970
+ HAVING SUM(clicks) >= ?
971
+ ),
972
+ weekly AS (
973
+ SELECT url, date_trunc('week', CAST(date AS DATE)) AS week,
974
+ ${METRIC_EXPR.clicks} AS clicks,
975
+ ${METRIC_EXPR.impressions} AS impressions
976
+ FROM (
977
+ SELECT url, date, clicks, impressions
978
+ FROM read_parquet({{FILES}}, union_by_name = true)
979
+ WHERE date >= ? AND date <= ?
980
+ UNION ALL
981
+ SELECT url, date, clicks, impressions
982
+ FROM read_parquet({{FILES_PREV}}, union_by_name = true)
983
+ WHERE date >= ? AND date <= ?
984
+ )
985
+ GROUP BY url, week
986
+ ),
987
+ series_by_url AS (
988
+ SELECT url, to_json(list({
989
+ 'week': strftime(week, '%Y-%m-%d'),
990
+ 'clicks': clicks,
991
+ 'impressions': impressions
992
+ } ORDER BY week)) AS seriesJson
993
+ FROM weekly GROUP BY url
994
+ ),
995
+ joined AS (
996
+ SELECT
997
+ p.url AS page,
998
+ COALESCE(c.clicks, 0.0) AS currentClicks,
999
+ p.clicks AS previousClicks,
1000
+ (p.clicks - COALESCE(c.clicks, 0.0)) AS lostClicks,
1001
+ (p.clicks - COALESCE(c.clicks, 0.0)) / NULLIF(p.clicks, 0) AS declinePercent,
1002
+ COALESCE(c.position, 0.0) AS currentPosition,
1003
+ p.position AS previousPosition,
1004
+ (COALESCE(c.position, 0.0) - p.position) AS positionDrop,
1005
+ s.seriesJson
1006
+ FROM prev p
1007
+ LEFT JOIN cur c ON p.url = c.url
1008
+ LEFT JOIN series_by_url s ON p.url = s.url
1009
+ )
1010
+ SELECT *
1011
+ FROM joined
1012
+ WHERE declinePercent >= ? AND lostClicks > 0
1013
+ ORDER BY lostClicks DESC
1014
+ LIMIT ${Number(limit)}
1015
+ `,
1016
+ params: [
1017
+ cur.startDate,
1018
+ cur.endDate,
1019
+ prev.startDate,
1020
+ prev.endDate,
1021
+ minPreviousClicks,
1022
+ cur.startDate,
1023
+ cur.endDate,
1024
+ prev.startDate,
1025
+ prev.endDate,
1026
+ threshold
1027
+ ],
1028
+ current: {
1029
+ table: "pages",
1030
+ partitions: enumeratePartitions(cur.startDate, cur.endDate)
1031
+ },
1032
+ previous: {
1033
+ table: "pages",
1034
+ partitions: enumeratePartitions(prev.startDate, prev.endDate)
1035
+ }
1036
+ };
1037
+ },
1038
+ reduceSql(rows) {
1039
+ const arr = Array.isArray(rows) ? rows : [];
1040
+ return {
1041
+ results: arr.map((r) => ({
1042
+ page: str$2(r.page),
1043
+ currentClicks: num(r.currentClicks),
1044
+ previousClicks: num(r.previousClicks),
1045
+ lostClicks: num(r.lostClicks),
1046
+ declinePercent: num(r.declinePercent),
1047
+ currentPosition: num(r.currentPosition),
1048
+ previousPosition: num(r.previousPosition),
1049
+ positionDrop: num(r.positionDrop),
1050
+ series: parseJsonList$1(r.seriesJson).map((s) => ({
1051
+ week: str$2(s.week),
1052
+ clicks: num(s.clicks),
1053
+ impressions: num(s.impressions)
1054
+ }))
1055
+ })),
1056
+ meta: { total: arr.length }
1057
+ };
1058
+ },
1059
+ buildRows(params) {
1060
+ const { current, previous } = comparisonOf(params);
1061
+ return {
1062
+ current: pagesQueryState(current, params.limit),
1063
+ previous: pagesQueryState(previous, params.limit)
1064
+ };
1065
+ },
1066
+ reduceRows(rows, params) {
1067
+ const map = rows && !Array.isArray(rows) ? rows : {
1068
+ current: [],
1069
+ previous: []
1070
+ };
1071
+ const results = analyzeDecay({
1072
+ current: map.current ?? [],
1073
+ previous: map.previous ?? []
1074
+ }, {
1075
+ minPreviousClicks: params.minPreviousClicks,
1076
+ threshold: params.threshold
1077
+ });
1078
+ return {
1079
+ results,
1080
+ meta: { total: results.length }
1081
+ };
1082
+ }
1083
+ });
1084
+ function percentDifference(current, previous) {
1085
+ if (previous === 0) return current > 0 ? 100 : 0;
1086
+ return (current - previous) / previous * 100;
1087
+ }
1088
+ function str$1(v) {
1089
+ return v == null ? "" : String(v);
1090
+ }
1091
+ function parseJsonList(v) {
1092
+ if (Array.isArray(v)) return v;
1093
+ if (typeof v === "string" && v.length > 0) {
1094
+ const parsed = JSON.parse(v);
1095
+ return Array.isArray(parsed) ? parsed : [];
1096
+ }
1097
+ return [];
1098
+ }
1099
+ function analyzeMovers(input, options = {}) {
1100
+ const { changeThreshold = .2, minImpressions = 50, sortBy = "clicksChange" } = options;
1101
+ const normFactor = input.normalizationFactor ?? 1;
1102
+ const baselineMap = buildPeriodMap(input.previous, (r) => r.query, (r) => ({
1103
+ clicks: num(r.clicks) / normFactor,
1104
+ impressions: num(r.impressions) / normFactor,
1105
+ position: num(r.position),
1106
+ page: r.page ?? null
1107
+ }));
1108
+ const pageMap = /* @__PURE__ */ new Map();
1109
+ for (const row of input.current) if (!pageMap.has(row.query) && row.page) pageMap.set(row.query, row.page);
1110
+ for (const row of input.previous) if (!pageMap.has(row.query) && row.page) pageMap.set(row.query, row.page);
1111
+ const rising = [];
1112
+ const declining = [];
1113
+ const stable = [];
1114
+ for (const row of input.current) {
1115
+ const impressions = num(row.impressions);
1116
+ const clicks = num(row.clicks);
1117
+ const position = num(row.position);
1118
+ if (impressions < minImpressions) continue;
1119
+ const baseline = baselineMap.get(row.query) || {
1120
+ clicks: 0,
1121
+ impressions: 0,
1122
+ position: 0,
1123
+ page: null
1124
+ };
1125
+ const clicksChangePercent = percentDifference(clicks, baseline.clicks);
1126
+ const impressionsChangePercent = percentDifference(impressions, baseline.impressions);
1127
+ const data = {
1128
+ keyword: row.query,
1129
+ page: pageMap.get(row.query) ?? null,
1130
+ recentClicks: clicks,
1131
+ recentImpressions: impressions,
1132
+ recentPosition: position,
1133
+ baselineClicks: Math.round(baseline.clicks),
1134
+ baselineImpressions: Math.round(baseline.impressions),
1135
+ baselinePosition: baseline.position,
1136
+ clicksChange: clicks - Math.round(baseline.clicks),
1137
+ clicksChangePercent,
1138
+ impressionsChangePercent,
1139
+ positionChange: position - baseline.position
1140
+ };
1141
+ const absChange = Math.abs(clicksChangePercent / 100);
1142
+ if (clicksChangePercent > 0 && absChange >= changeThreshold) rising.push(data);
1143
+ else if (clicksChangePercent < 0 && absChange >= changeThreshold) declining.push(data);
1144
+ else stable.push(data);
1145
+ }
1146
+ const sortFn = (a, b) => {
1147
+ switch (sortBy) {
1148
+ case "clicks": return b.recentClicks - a.recentClicks;
1149
+ case "impressions": return b.recentImpressions - a.recentImpressions;
1150
+ case "clicksChange": return Math.abs(b.clicksChangePercent) - Math.abs(a.clicksChangePercent);
1151
+ case "impressionsChange": return Math.abs(b.impressionsChangePercent) - Math.abs(a.impressionsChangePercent);
1152
+ case "positionChange": return Math.abs(b.positionChange) - Math.abs(a.positionChange);
1153
+ default: return Math.abs(b.clicksChangePercent) - Math.abs(a.clicksChangePercent);
1154
+ }
1155
+ };
1156
+ rising.sort(sortFn);
1157
+ declining.sort(sortFn);
1158
+ stable.sort((a, b) => b.recentClicks - a.recentClicks);
1159
+ return {
1160
+ rising,
1161
+ declining,
1162
+ stable
1163
+ };
1164
+ }
1165
+ defineAnalyzer({
1166
+ id: "movers",
1167
+ buildSql(params) {
1168
+ const { current: cur, previous: prev } = comparisonOf(params);
1169
+ const minImpressions = params.minImpressions ?? 50;
1170
+ const changeThreshold = params.changeThreshold ?? .2;
1171
+ const limit = params.limit ?? 2e3;
1172
+ return {
1173
+ sql: `
1174
+ WITH cur AS (
1175
+ SELECT
1176
+ query, url,
1177
+ ${METRIC_EXPR.clicks} AS clicks,
1178
+ ${METRIC_EXPR.impressions} AS impressions,
1179
+ ${METRIC_EXPR.position} AS position
1180
+ FROM read_parquet({{FILES}}, union_by_name = true)
1181
+ WHERE date >= ? AND date <= ?
1182
+ GROUP BY query, url
1183
+ ),
1184
+ prev AS (
1185
+ SELECT
1186
+ query, url,
1187
+ ${METRIC_EXPR.clicks} AS clicks,
1188
+ ${METRIC_EXPR.impressions} AS impressions,
1189
+ ${METRIC_EXPR.position} AS position
1190
+ FROM read_parquet({{FILES_PREV}}, union_by_name = true)
1191
+ WHERE date >= ? AND date <= ?
1192
+ GROUP BY query, url
1193
+ ),
1194
+ weekly AS (
1195
+ SELECT query, url, date_trunc('week', CAST(date AS DATE)) AS week,
1196
+ ${METRIC_EXPR.clicks} AS clicks,
1197
+ ${METRIC_EXPR.impressions} AS impressions
1198
+ FROM (
1199
+ SELECT query, url, date, clicks, impressions
1200
+ FROM read_parquet({{FILES}}, union_by_name = true)
1201
+ WHERE date >= ? AND date <= ?
1202
+ UNION ALL
1203
+ SELECT query, url, date, clicks, impressions
1204
+ FROM read_parquet({{FILES_PREV}}, union_by_name = true)
1205
+ WHERE date >= ? AND date <= ?
1206
+ )
1207
+ GROUP BY query, url, week
1208
+ ),
1209
+ series_by_entity AS (
1210
+ SELECT query, url, to_json(list({
1211
+ 'week': strftime(week, '%Y-%m-%d'),
1212
+ 'clicks': clicks,
1213
+ 'impressions': impressions
1214
+ } ORDER BY week)) AS seriesJson
1215
+ FROM weekly GROUP BY query, url
1216
+ ),
1217
+ joined AS (
1218
+ SELECT
1219
+ c.query AS keyword,
1220
+ c.url AS page,
1221
+ c.clicks AS recentClicks,
1222
+ c.impressions AS recentImpressions,
1223
+ c.position AS recentPosition,
1224
+ COALESCE(p.clicks, 0.0) AS baselineClicks,
1225
+ COALESCE(p.impressions, 0.0) AS baselineImpressions,
1226
+ COALESCE(p.position, 0.0) AS baselinePosition,
1227
+ (c.clicks - COALESCE(p.clicks, 0.0)) AS clicksChange,
1228
+ CASE
1229
+ WHEN COALESCE(p.clicks, 0.0) = 0 THEN CASE WHEN c.clicks > 0 THEN 100.0 ELSE 0.0 END
1230
+ ELSE (c.clicks - p.clicks) * 100.0 / p.clicks
1231
+ END AS clicksChangePercent,
1232
+ CASE
1233
+ WHEN COALESCE(p.impressions, 0.0) = 0 THEN CASE WHEN c.impressions > 0 THEN 100.0 ELSE 0.0 END
1234
+ ELSE (c.impressions - p.impressions) * 100.0 / p.impressions
1235
+ END AS impressionsChangePercent,
1236
+ (c.position - COALESCE(p.position, 0.0)) AS positionChange,
1237
+ s.seriesJson
1238
+ FROM cur c
1239
+ LEFT JOIN prev p ON c.query = p.query AND c.url = p.url
1240
+ LEFT JOIN series_by_entity s ON c.query = s.query AND c.url = s.url
1241
+ WHERE c.impressions >= ?
1242
+ )
1243
+ SELECT *,
1244
+ CASE
1245
+ WHEN clicksChangePercent > 0 AND ABS(clicksChangePercent) / 100.0 >= ? THEN 'rising'
1246
+ WHEN clicksChangePercent < 0 AND ABS(clicksChangePercent) / 100.0 >= ? THEN 'declining'
1247
+ ELSE 'stable'
1248
+ END AS direction
1249
+ FROM joined
1250
+ ORDER BY ABS(clicksChangePercent) DESC
1251
+ LIMIT ${Number(limit)}
1252
+ `,
1253
+ params: [
1254
+ cur.startDate,
1255
+ cur.endDate,
1256
+ prev.startDate,
1257
+ prev.endDate,
1258
+ cur.startDate,
1259
+ cur.endDate,
1260
+ prev.startDate,
1261
+ prev.endDate,
1262
+ minImpressions,
1263
+ changeThreshold,
1264
+ changeThreshold
1265
+ ],
1266
+ current: {
1267
+ table: "page_keywords",
1268
+ partitions: enumeratePartitions(cur.startDate, cur.endDate)
1269
+ },
1270
+ previous: {
1271
+ table: "page_keywords",
1272
+ partitions: enumeratePartitions(prev.startDate, prev.endDate)
1273
+ }
1274
+ };
1275
+ },
1276
+ reduceSql(rows) {
1277
+ const normalized = (Array.isArray(rows) ? rows : []).map((r) => ({
1278
+ keyword: str$1(r.keyword),
1279
+ page: r.page == null ? null : str$1(r.page),
1280
+ recentClicks: num(r.recentClicks),
1281
+ recentImpressions: num(r.recentImpressions),
1282
+ recentPosition: num(r.recentPosition),
1283
+ baselineClicks: Math.round(num(r.baselineClicks)),
1284
+ baselineImpressions: Math.round(num(r.baselineImpressions)),
1285
+ baselinePosition: num(r.baselinePosition),
1286
+ clicksChange: num(r.clicksChange),
1287
+ clicksChangePercent: num(r.clicksChangePercent),
1288
+ impressionsChangePercent: num(r.impressionsChangePercent),
1289
+ positionChange: num(r.positionChange),
1290
+ direction: str$1(r.direction),
1291
+ series: parseJsonList(r.seriesJson).map((s) => ({
1292
+ week: str$1(s.week),
1293
+ clicks: num(s.clicks),
1294
+ impressions: num(s.impressions)
1295
+ }))
1296
+ }));
1297
+ const rising = normalized.filter((r) => r.direction === "rising");
1298
+ const declining = normalized.filter((r) => r.direction === "declining");
1299
+ const stable = normalized.filter((r) => r.direction === "stable");
1300
+ const combined = [...rising, ...declining];
1301
+ return {
1302
+ results: combined,
1303
+ meta: {
1304
+ total: combined.length,
1305
+ rising: rising.length,
1306
+ declining: declining.length,
1307
+ stable: stable.length
1308
+ }
1309
+ };
1310
+ },
1311
+ buildRows(params) {
1312
+ const { current, previous } = comparisonOf(params);
1313
+ return {
1314
+ current: keywordsQueryState(current, params.limit),
1315
+ previous: keywordsQueryState(previous, params.limit)
1316
+ };
1317
+ },
1318
+ reduceRows(rows, params) {
1319
+ const map = rows && !Array.isArray(rows) ? rows : {
1320
+ current: [],
1321
+ previous: []
1322
+ };
1323
+ const result = analyzeMovers({
1324
+ current: map.current ?? [],
1325
+ previous: map.previous ?? []
1326
+ }, {
1327
+ changeThreshold: params.changeThreshold,
1328
+ minImpressions: params.minImpressions
1329
+ });
1330
+ return {
1331
+ results: [...result.rising.map((r) => ({
1332
+ ...r,
1333
+ direction: "rising"
1334
+ })), ...result.declining.map((r) => ({
1335
+ ...r,
1336
+ direction: "declining"
1337
+ }))],
1338
+ meta: {
1339
+ rising: result.rising.length,
1340
+ declining: result.declining.length
1341
+ }
1342
+ };
1343
+ }
1344
+ });
1345
+ const DEFAULT_LIMIT = 1e3;
1346
+ const MAX_LIMIT = 5e4;
1347
+ function clampLimit(limit, fallback = DEFAULT_LIMIT) {
1348
+ const n = Number(limit ?? fallback);
1349
+ if (!Number.isFinite(n) || n <= 0) return fallback;
1350
+ return Math.min(n, MAX_LIMIT);
1351
+ }
1352
+ function clampOffset(offset) {
1353
+ const n = Number(offset ?? 0);
1354
+ if (!Number.isFinite(n) || n < 0) return 0;
1355
+ return Math.floor(n);
1356
+ }
1357
+ function paginateClause(input) {
1358
+ const l = clampLimit(input.limit);
1359
+ const o = clampOffset(input.offset);
1360
+ return o > 0 ? `LIMIT ${l} OFFSET ${o}` : `LIMIT ${l}`;
1361
+ }
1362
+ function paginateInMemory(rows, input) {
1363
+ const l = clampLimit(input.limit, rows.length);
1364
+ const o = clampOffset(input.offset);
1365
+ return rows.slice(o, o + l);
1366
+ }
1367
+ const EXPECTED_CTR_BY_POSITION = {
1368
+ 1: .3,
1369
+ 2: .15,
1370
+ 3: .1,
1371
+ 4: .07,
1372
+ 5: .05,
1373
+ 6: .04,
1374
+ 7: .03,
1375
+ 8: .025,
1376
+ 9: .02,
1377
+ 10: .015
1378
+ };
1379
+ function getExpectedCtr(position) {
1380
+ return EXPECTED_CTR_BY_POSITION[Math.round(Math.max(1, Math.min(position, 10)))] || .01;
1381
+ }
1382
+ function calculatePositionScore(position) {
1383
+ if (position <= 3) return .2;
1384
+ if (position > 50) return .1;
1385
+ const distance = Math.abs(position - 11);
1386
+ return Math.max(0, 1 - distance / 15);
1387
+ }
1388
+ function calculateImpressionScore(impressions) {
1389
+ if (impressions <= 0) return 0;
1390
+ return Math.min(Math.log10(impressions) / 5, 1);
1391
+ }
1392
+ function calculateCtrGapScore(actualCtr, position) {
1393
+ const expectedCtr = getExpectedCtr(position);
1394
+ if (actualCtr >= expectedCtr) return 0;
1395
+ const gap = expectedCtr - actualCtr;
1396
+ return Math.min(gap / expectedCtr, 1);
1397
+ }
1398
+ const sortResults$1 = createMetricSorter("opportunityScore", {
1399
+ opportunityScore: "desc",
1400
+ potentialClicks: "desc",
1401
+ impressions: "desc",
1402
+ position: "asc"
1403
+ });
1404
+ const opportunityAnalyzer = defineAnalyzer({
1405
+ id: "opportunity",
1406
+ buildSql(params) {
1407
+ const { startDate, endDate } = periodOf(params);
1408
+ const minImpressions = params.minImpressions ?? 100;
1409
+ const w1 = 1;
1410
+ const w2 = 1;
1411
+ const w3 = 1;
1412
+ const totalW = w1 + w2 + w3;
1413
+ const limit = params.limit ?? 1e3;
1414
+ return {
1415
+ sql: `
1416
+ WITH agg AS (
1417
+ SELECT
1418
+ query AS keyword,
1419
+ url AS page,
1420
+ ${METRIC_EXPR.clicks} AS clicks,
1421
+ ${METRIC_EXPR.impressions} AS impressions,
1422
+ ${METRIC_EXPR.ctr} AS ctr,
1423
+ ${METRIC_EXPR.position} AS position
1424
+ FROM read_parquet({{FILES}}, union_by_name = true)
1425
+ WHERE date >= ? AND date <= ?
1426
+ GROUP BY query, url
1427
+ HAVING SUM(impressions) >= ?
1428
+ ),
1429
+ scored AS (
1430
+ SELECT
1431
+ keyword, page, clicks, impressions, ctr, position,
1432
+ CASE
1433
+ WHEN position <= 3 THEN 0.2
1434
+ WHEN position > 50 THEN 0.1
1435
+ ELSE GREATEST(0.0, 1.0 - ABS(position - 11.0) / 15.0)
1436
+ END AS positionScore,
1437
+ CASE WHEN impressions <= 0 THEN 0.0 ELSE LEAST(LOG10(impressions) / 5.0, 1.0) END AS impressionScore,
1438
+ CASE CAST(ROUND(GREATEST(LEAST(position, 10.0), 1.0)) AS INTEGER)
1439
+ WHEN 1 THEN 0.30
1440
+ WHEN 2 THEN 0.15
1441
+ WHEN 3 THEN 0.10
1442
+ WHEN 4 THEN 0.07
1443
+ WHEN 5 THEN 0.05
1444
+ WHEN 6 THEN 0.04
1445
+ WHEN 7 THEN 0.03
1446
+ WHEN 8 THEN 0.025
1447
+ WHEN 9 THEN 0.02
1448
+ WHEN 10 THEN 0.015
1449
+ ELSE 0.01
1450
+ END AS expectedCtr
1451
+ FROM agg
1452
+ ),
1453
+ gapped AS (
1454
+ SELECT
1455
+ *,
1456
+ CASE WHEN ctr >= expectedCtr THEN 0.0 ELSE LEAST((expectedCtr - ctr) / expectedCtr, 1.0) END AS ctrGapScore
1457
+ FROM scored
1458
+ )
1459
+ SELECT
1460
+ keyword, page, clicks, impressions, ctr, position,
1461
+ CAST(ROUND(POWER(
1462
+ POWER(positionScore, ${w1}) * POWER(impressionScore, ${w2}) * POWER(ctrGapScore, ${w3}),
1463
+ 1.0 / ${totalW}
1464
+ ) * 100) AS DOUBLE) AS opportunityScore,
1465
+ CAST(ROUND(impressions * (
1466
+ CASE CAST(ROUND(GREATEST(LEAST(position, 3.0), 1.0)) AS INTEGER)
1467
+ WHEN 1 THEN 0.30
1468
+ WHEN 2 THEN 0.15
1469
+ WHEN 3 THEN 0.10
1470
+ ELSE 0.10
1471
+ END
1472
+ )) AS DOUBLE) AS potentialClicks,
1473
+ positionScore, impressionScore, ctrGapScore
1474
+ FROM gapped
1475
+ ORDER BY opportunityScore DESC
1476
+ ${paginateClause({
1477
+ limit,
1478
+ offset: params.offset
1479
+ })}
1480
+ `,
1481
+ params: [
1482
+ startDate,
1483
+ endDate,
1484
+ minImpressions
1485
+ ],
1486
+ current: {
1487
+ table: "page_keywords",
1488
+ partitions: enumeratePartitions(startDate, endDate)
1489
+ }
1490
+ };
1491
+ },
1492
+ reduceSql(rows) {
1493
+ const arr = Array.isArray(rows) ? rows : [];
1494
+ return {
1495
+ results: arr.map((r) => ({
1496
+ keyword: r.keyword == null ? "" : String(r.keyword),
1497
+ page: r.page == null ? null : String(r.page),
1498
+ clicks: num(r.clicks),
1499
+ impressions: num(r.impressions),
1500
+ ctr: num(r.ctr),
1501
+ position: num(r.position),
1502
+ opportunityScore: num(r.opportunityScore),
1503
+ potentialClicks: num(r.potentialClicks),
1504
+ factors: {
1505
+ positionScore: num(r.positionScore),
1506
+ impressionScore: num(r.impressionScore),
1507
+ ctrGapScore: num(r.ctrGapScore)
1508
+ }
1509
+ })),
1510
+ meta: { total: arr.length }
1511
+ };
1512
+ },
1513
+ buildRows(params) {
1514
+ return { keywords: keywordsQueryState(periodOf(params), params.limit) };
1515
+ },
1516
+ reduceRows(rows, params) {
1517
+ const keywords = (Array.isArray(rows) ? rows : []) ?? [];
1518
+ const minImpressions = params.minImpressions ?? 100;
1519
+ const positionWeight = 1;
1520
+ const impressionsWeight = 1;
1521
+ const ctrGapWeight = 1;
1522
+ const sortBy = "opportunityScore";
1523
+ const results = [];
1524
+ for (const row of keywords) {
1525
+ const impressions = num(row.impressions);
1526
+ const position = num(row.position);
1527
+ const ctr = num(row.ctr);
1528
+ const clicks = num(row.clicks);
1529
+ if (impressions < minImpressions) continue;
1530
+ const positionScore = calculatePositionScore(position);
1531
+ const impressionScore = calculateImpressionScore(impressions);
1532
+ const ctrGapScore = calculateCtrGapScore(ctr, position);
1533
+ const geometricMean = (positionScore ** positionWeight * impressionScore ** impressionsWeight * ctrGapScore ** ctrGapWeight) ** (1 / (positionWeight + impressionsWeight + ctrGapWeight));
1534
+ const opportunityScore = Math.round(geometricMean * 100);
1535
+ const targetCtr = getExpectedCtr(Math.min(3, position));
1536
+ const potentialClicks = Math.round(impressions * targetCtr);
1537
+ results.push({
1538
+ keyword: row.query,
1539
+ page: row.page ?? null,
1540
+ clicks,
1541
+ impressions,
1542
+ ctr,
1543
+ position,
1544
+ opportunityScore,
1545
+ potentialClicks,
1546
+ factors: {
1547
+ positionScore,
1548
+ impressionScore,
1549
+ ctrGapScore
1550
+ }
1551
+ });
1552
+ }
1553
+ const sorted = sortResults$1(results, sortBy);
1554
+ const paged = paginateInMemory(sorted, {
1555
+ limit: params.limit,
1556
+ offset: params.offset
1557
+ });
1558
+ return {
1559
+ results: paged,
1560
+ meta: {
1561
+ total: sorted.length,
1562
+ returned: paged.length
1563
+ }
1564
+ };
1565
+ }
1566
+ });
1567
+ function str(v) {
1568
+ return v == null ? "" : String(v);
1569
+ }
1570
+ function bool(v) {
1571
+ return v === true || v === 1 || v === "true";
1572
+ }
1573
+ function calculateCV(values) {
1574
+ if (values.length === 0) return 0;
1575
+ const mean = values.reduce((a, b) => a + b, 0) / values.length;
1576
+ if (mean === 0) return 0;
1577
+ const variance = values.reduce((sum, v) => sum + (v - mean) ** 2, 0) / values.length;
1578
+ return Math.min(Math.sqrt(variance) / mean, 1);
1579
+ }
1580
+ function analyzeSeasonality(dates, options = {}) {
1581
+ const { metric = "clicks" } = options;
1582
+ if (dates.length === 0) return {
1583
+ hasSeasonality: false,
1584
+ strength: 0,
1585
+ peakMonths: [],
1586
+ troughMonths: [],
1587
+ monthlyBreakdown: [],
1588
+ insufficientData: true
1589
+ };
1590
+ const monthlyMap = /* @__PURE__ */ new Map();
1591
+ for (const row of dates) {
1592
+ const month = row.date.substring(0, 7);
1593
+ const value = metric === "clicks" ? row.clicks : row.impressions;
1594
+ monthlyMap.set(month, (monthlyMap.get(month) || 0) + value);
1595
+ }
1596
+ const months = Array.from(monthlyMap.keys()).sort();
1597
+ const values = months.map((m) => monthlyMap.get(m) || 0);
1598
+ const insufficientData = months.length < 12;
1599
+ const totalValue = values.reduce((a, b) => a + b, 0);
1600
+ const avgValue = values.length > 0 ? totalValue / values.length : 0;
1601
+ const monthlyBreakdown = months.map((month, i) => {
1602
+ const value = values[i] ?? 0;
1603
+ const vsAverage = avgValue > 0 ? value / avgValue : 0;
1604
+ return {
1605
+ month,
1606
+ value,
1607
+ vsAverage,
1608
+ isPeak: vsAverage > 1.5,
1609
+ isTrough: vsAverage < .5
1610
+ };
1611
+ });
1612
+ const peakMonths = [...new Set(monthlyBreakdown.filter((m) => m.isPeak).map((m) => m.month.substring(5, 7)))];
1613
+ const troughMonths = [...new Set(monthlyBreakdown.filter((m) => m.isTrough).map((m) => m.month.substring(5, 7)))];
1614
+ const strength = calculateCV(values);
1615
+ return {
1616
+ hasSeasonality: peakMonths.length > 0 || troughMonths.length > 0 || strength > .3,
1617
+ strength,
1618
+ peakMonths,
1619
+ troughMonths,
1620
+ monthlyBreakdown,
1621
+ insufficientData
1622
+ };
1623
+ }
1624
+ defineAnalyzer({
1625
+ id: "seasonality",
1626
+ buildSql(params) {
1627
+ const { startDate, endDate } = periodOf(params);
1628
+ return {
1629
+ sql: `
1630
+ WITH monthly AS (
1631
+ SELECT
1632
+ strftime(date, '%Y-%m') AS month,
1633
+ CAST(SUM(${params.metric === "impressions" ? "impressions" : "clicks"}) AS DOUBLE) AS value
1634
+ FROM read_parquet({{FILES}}, union_by_name = true)
1635
+ WHERE date >= ? AND date <= ?
1636
+ GROUP BY month
1637
+ ),
1638
+ stats AS (
1639
+ SELECT
1640
+ AVG(value) AS avg_val,
1641
+ COALESCE(STDDEV_POP(value), 0.0) AS std_val,
1642
+ CAST(COUNT(*) AS DOUBLE) AS month_count
1643
+ FROM monthly
1644
+ )
1645
+ SELECT
1646
+ m.month AS month,
1647
+ m.value AS value,
1648
+ CASE WHEN s.avg_val > 0 THEN m.value / s.avg_val ELSE 0.0 END AS vsAverage,
1649
+ (s.avg_val > 0 AND m.value / s.avg_val > 1.5) AS isPeak,
1650
+ (s.avg_val > 0 AND m.value / s.avg_val < 0.5) AS isTrough,
1651
+ CASE WHEN s.avg_val > 0 THEN LEAST(s.std_val / s.avg_val, 1.0) ELSE 0.0 END AS strength,
1652
+ s.month_count AS monthCount
1653
+ FROM monthly m, stats s
1654
+ ORDER BY m.month
1655
+ `,
1656
+ params: [startDate, endDate],
1657
+ current: {
1658
+ table: "pages",
1659
+ partitions: enumeratePartitions(startDate, endDate)
1660
+ }
1661
+ };
1662
+ },
1663
+ reduceSql(rows) {
1664
+ const arr = Array.isArray(rows) ? rows : [];
1665
+ const breakdown = arr.map((r) => ({
1666
+ month: str(r.month),
1667
+ value: num(r.value),
1668
+ vsAverage: num(r.vsAverage),
1669
+ isPeak: bool(r.isPeak),
1670
+ isTrough: bool(r.isTrough)
1671
+ }));
1672
+ const first = arr[0];
1673
+ const strength = first ? num(first.strength) : 0;
1674
+ const monthCount = first ? num(first.monthCount) : 0;
1675
+ const peakMonths = [...new Set(breakdown.filter((m) => m.isPeak).map((m) => m.month.substring(5, 7)))];
1676
+ const troughMonths = [...new Set(breakdown.filter((m) => m.isTrough).map((m) => m.month.substring(5, 7)))];
1677
+ const hasSeasonality = peakMonths.length > 0 || troughMonths.length > 0 || strength > .3;
1678
+ const insufficientData = monthCount < 12;
1679
+ return {
1680
+ results: breakdown,
1681
+ meta: {
1682
+ total: breakdown.length,
1683
+ hasSeasonality,
1684
+ strength,
1685
+ peakMonths,
1686
+ troughMonths,
1687
+ insufficientData
1688
+ }
1689
+ };
1690
+ },
1691
+ buildRows(params) {
1692
+ return { dates: datesQueryState(periodOf(params), params.limit) };
1693
+ },
1694
+ reduceRows(rows, params) {
1695
+ const result = analyzeSeasonality(Array.isArray(rows) ? rows : [], { metric: params.metric });
1696
+ return {
1697
+ results: result.monthlyBreakdown,
1698
+ meta: { strength: result.strength }
1699
+ };
1700
+ }
1701
+ });
1702
+ const sortResults = createSorter((item, metric) => item[metric], "potentialClicks");
1703
+ function analyzeStrikingDistance(keywords, options = {}) {
1704
+ const { minPosition = 4, maxPosition = 20, minImpressions = 100, maxCtr = .05, sortBy = "potentialClicks", sortOrder = "desc" } = options;
1705
+ const results = [];
1706
+ for (const row of keywords) {
1707
+ const position = num(row.position);
1708
+ const impressions = num(row.impressions);
1709
+ const ctr = num(row.ctr);
1710
+ const clicks = num(row.clicks);
1711
+ if (position < minPosition || position > maxPosition) continue;
1712
+ if (impressions < minImpressions) continue;
1713
+ if (ctr > maxCtr) continue;
1714
+ const potentialClicks = Math.round(impressions * .15);
1715
+ results.push({
1716
+ keyword: row.query,
1717
+ page: row.page ?? null,
1718
+ clicks,
1719
+ impressions,
1720
+ ctr,
1721
+ position,
1722
+ potentialClicks
1723
+ });
1724
+ }
1725
+ return sortResults(results, sortBy, sortOrder);
1726
+ }
1727
+ function typedQuery(state) {
1728
+ return { state };
1729
+ }
1730
+ function isTypedQuery(value) {
1731
+ return "state" in value;
1732
+ }
1733
+ async function queryRows(source, query) {
1734
+ const state = isTypedQuery(query) ? query.state : query;
1735
+ return await source.queryRows(state);
1736
+ }
1737
+ async function queryComparisonRows(source, current, previous) {
1738
+ const [currentRows, previousRows] = await Promise.all([queryRows(source, current), queryRows(source, previous)]);
1739
+ return {
1740
+ current: currentRows,
1741
+ previous: previousRows
1742
+ };
1743
+ }
1744
+ function keywordQuery(period, limit) {
1745
+ return typedQuery(keywordsQueryState(period, limit));
1746
+ }
1747
+ function pageQuery(period, limit) {
1748
+ return typedQuery(pagesQueryState(period, limit));
1749
+ }
1750
+ function dateQuery(period, limit) {
1751
+ return typedQuery(datesQueryState(period, limit));
1752
+ }
1753
+ function definePortableAnalyzer(definition) {
1754
+ return definition;
1755
+ }
1756
+ async function runPortableAnalyzer(source, definition, input, options, limit = 25e3) {
1757
+ const requiredQueries = definition.requiredQueries(input, limit);
1758
+ const entries = Object.entries(requiredQueries);
1759
+ const resolvedRows = await Promise.all(entries.map(async ([key, spec]) => [key, await queryRows(source, spec)]));
1760
+ return definition.run(Object.fromEntries(resolvedRows), options);
1761
+ }
1762
+ const PORTABLE_ANALYZERS = {
1763
+ strikingDistance: definePortableAnalyzer({
1764
+ requiredQueries: (period, limit) => ({ keywords: keywordQuery(period, limit) }),
1765
+ run: ({ keywords }, options) => analyzeStrikingDistance(keywords, options)
1766
+ }),
1767
+ opportunity: definePortableAnalyzer({
1768
+ requiredQueries: (period, limit) => ({ keywords: keywordQuery(period, limit) }),
1769
+ run: ({ keywords }, options) => {
1770
+ const { results } = opportunityAnalyzer.rows.reduce(keywords, { params: {
1771
+ type: "opportunity",
1772
+ minImpressions: options?.minImpressions
1773
+ } });
1774
+ return results;
1775
+ }
1776
+ }),
1777
+ brandSegmentation: definePortableAnalyzer({
1778
+ requiredQueries: (period, limit) => ({ keywords: keywordQuery(period, limit) }),
1779
+ run: ({ keywords }, options) => analyzeBrandSegmentation(keywords, options)
1780
+ }),
1781
+ pageConcentration: definePortableAnalyzer({
1782
+ requiredQueries: (period, limit) => ({ pages: pageQuery(period, limit) }),
1783
+ run: ({ pages }, options) => analyzePageConcentration(pages, options)
1784
+ }),
1785
+ keywordConcentration: definePortableAnalyzer({
1786
+ requiredQueries: (period, limit) => ({ keywords: keywordQuery(period, limit) }),
1787
+ run: ({ keywords }, options) => analyzeKeywordConcentration(keywords, options)
1788
+ }),
1789
+ clustering: definePortableAnalyzer({
1790
+ requiredQueries: (period, limit) => ({ keywords: keywordQuery(period, limit) }),
1791
+ run: ({ keywords }, options) => analyzeClustering(keywords, options)
1792
+ }),
1793
+ seasonality: definePortableAnalyzer({
1794
+ requiredQueries: (period, limit) => ({ dates: dateQuery(period, limit) }),
1795
+ run: ({ dates }, options) => analyzeSeasonality(dates, options)
1796
+ }),
1797
+ decay: definePortableAnalyzer({
1798
+ requiredQueries: (periods, limit) => ({
1799
+ current: pageQuery(periods.current, limit),
1800
+ previous: pageQuery(periods.previous, limit)
1801
+ }),
1802
+ run: ({ current, previous }, options) => analyzeDecay({
1803
+ current,
1804
+ previous
1805
+ }, options)
1806
+ }),
1807
+ movers: definePortableAnalyzer({
1808
+ requiredQueries: (periods, limit) => ({
1809
+ current: keywordQuery(periods.current, limit),
1810
+ previous: keywordQuery(periods.previous, limit)
1811
+ }),
1812
+ run: ({ current, previous }, options) => analyzeMovers({
1813
+ current,
1814
+ previous
1815
+ }, options)
1816
+ })
1817
+ };
1818
+ async function queryAnalyticsFromSource(source, period, options = {}) {
1819
+ const limit = options.limit ?? 25e3;
1820
+ const [keywords, pages, dates] = await Promise.all([
1821
+ queryRows(source, keywordQuery(period, limit)),
1822
+ queryRows(source, pageQuery(period, limit)),
1823
+ queryRows(source, dateQuery(period, limit))
1824
+ ]);
1825
+ return {
1826
+ keywords,
1827
+ pages,
1828
+ dates
1829
+ };
1830
+ }
1831
+ async function queryComparisonFromSource(source, periods, options = {}) {
1832
+ const [current, previous] = await Promise.all([queryAnalyticsFromSource(source, periods.current, options), queryAnalyticsFromSource(source, periods.previous, options)]);
1833
+ return {
1834
+ current,
1835
+ previous
1836
+ };
1837
+ }
1838
+ async function analyzeStrikingDistanceFromSource(source, period, options) {
1839
+ return runPortableAnalyzer(source, PORTABLE_ANALYZERS.strikingDistance, period, options);
1840
+ }
1841
+ async function analyzeOpportunityFromSource(source, period, options) {
1842
+ return runPortableAnalyzer(source, PORTABLE_ANALYZERS.opportunity, period, options);
1843
+ }
1844
+ async function analyzeBrandSegmentationFromSource(source, period, options) {
1845
+ return runPortableAnalyzer(source, PORTABLE_ANALYZERS.brandSegmentation, period, options);
1846
+ }
1847
+ async function analyzePageConcentrationFromSource(source, period, options) {
1848
+ return runPortableAnalyzer(source, PORTABLE_ANALYZERS.pageConcentration, period, options);
1849
+ }
1850
+ async function analyzeKeywordConcentrationFromSource(source, period, options) {
1851
+ return runPortableAnalyzer(source, PORTABLE_ANALYZERS.keywordConcentration, period, options);
1852
+ }
1853
+ async function analyzeClusteringFromSource(source, period, options) {
1854
+ return runPortableAnalyzer(source, PORTABLE_ANALYZERS.clustering, period, options);
1855
+ }
1856
+ async function analyzeSeasonalityFromSource(source, period, options) {
1857
+ return runPortableAnalyzer(source, PORTABLE_ANALYZERS.seasonality, period, options);
1858
+ }
1859
+ async function analyzeDecayFromSource(source, periods, options) {
1860
+ return runPortableAnalyzer(source, PORTABLE_ANALYZERS.decay, periods, options);
1861
+ }
1862
+ async function analyzeMoversFromSource(source, periods, options) {
1863
+ return runPortableAnalyzer(source, PORTABLE_ANALYZERS.movers, periods, options);
1864
+ }
1865
+ export { AnalyzerCapabilityError, ENGINE_QUERY_CAPABILITIES, GSC_API_CAPABILITIES, IN_MEMORY_DEFAULT_CAPABILITIES, analyzeBrandSegmentationFromSource, analyzeClusteringFromSource, analyzeDecayFromSource, analyzeFromSource, analyzeKeywordConcentrationFromSource, analyzeMoversFromSource, analyzeOpportunityFromSource, analyzePageConcentrationFromSource, analyzeSeasonalityFromSource, analyzeStrikingDistanceFromSource, collectRows as collectGscRows, createBrowserQuerySource, createEngineQuerySource, createGscApiQuerySource, createInMemoryQuerySource, createSqliteQuerySource, fetchGscDaily, fetchGscTopN, isSqlQuerySource, queryAnalyticsFromSource, queryComparisonFromSource, queryComparisonRows, queryRows, runAnalyzerWithEngine, typedQuery };