@gscdump/analysis 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,427 @@
1
+ import { BrowserQueryRunner, createEngine as createBrowserQuerySource } from "@gscdump/engine-wasm";
2
+ import { AnalysisQuerySource, AnalysisQuerySource as AnalysisQuerySource$1, FileSet, QueryRow, QueryRow as QueryRow$1, RowQuerySource, SourceCapabilities, SqlQuerySource, isSqlQuerySource } from "@gscdump/engine/resolver";
3
+ import { PlannerCapabilities } from "gscdump/query/plan";
4
+ import { BuilderState, Column, Dimension } from "gscdump/query";
5
+ import { GoogleSearchConsoleClient } from "gscdump";
6
+ import { EngineConfig, SqliteQueryExecutor, createEngine as createSqliteQuerySource } from "@gscdump/engine-sqlite";
7
+ import { Row, StorageEngine, TenantCtx } from "@gscdump/engine/contracts";
8
+ import { AnalysisParams, AnalysisResult } from "gscdump/contracts";
9
+ type SortOrder = 'asc' | 'desc';
10
+ /** Base search metrics */
11
+ interface BaseMetrics {
12
+ clicks: number;
13
+ impressions: number;
14
+ ctr: number;
15
+ position: number;
16
+ }
17
+ /** Keyword row from query */
18
+ interface KeywordRow extends BaseMetrics {
19
+ query: string;
20
+ page?: string;
21
+ }
22
+ /** Page row from query */
23
+ interface PageRow extends BaseMetrics {
24
+ page: string;
25
+ }
26
+ /** Date row from query */
27
+ interface DateRow extends BaseMetrics {
28
+ date: string;
29
+ }
30
+ /**
31
+ * Capabilities a Plan may require of its host. A dispatcher matches these
32
+ * against a source's declared capabilities and rejects mismatches.
33
+ */
34
+ type Capability = 'executeSql' | 'partitionedParquet' | 'attachedTables' | 'regex' | 'windowTotals' | 'comparisonJoin';
35
+ interface SqlExtraQuery {
36
+ name: string;
37
+ sql: string;
38
+ params: unknown[];
39
+ }
40
+ /**
41
+ * SQL-native plan: SQL string + placeholders, with optional extra file sets
42
+ * and follow-up queries. Mirrors the existing `AnalyzerSpec` shape but
43
+ * renamed for clarity under the unified contract.
44
+ */
45
+ interface SqlPlan {
46
+ kind: 'sql';
47
+ sql: string;
48
+ params: unknown[];
49
+ current: FileSet;
50
+ previous?: FileSet;
51
+ extraFiles?: Record<string, FileSet>;
52
+ extraQueries?: SqlExtraQuery[];
53
+ /** Emits direct table refs (browser-only). Dispatcher rejects for manifest path. */
54
+ requiresAttachedTables?: boolean;
55
+ }
56
+ interface TypedRowQuery<T extends Row = Row> {
57
+ state: BuilderState;
58
+ /** Optional type tag for downstream narrowing. */
59
+ rowType?: (row: Row) => T;
60
+ }
61
+ /**
62
+ * Row-queries plan: a named set of typed `BuilderState` queries. A portable
63
+ * dispatcher runs each against a source's `queryRows` and hands the row
64
+ * collection to `reduce`.
65
+ */
66
+ interface RowQueriesPlan {
67
+ kind: 'rows';
68
+ queries: Record<string, TypedRowQuery>;
69
+ }
70
+ type Plan = SqlPlan | RowQueriesPlan;
71
+ interface ReduceContext<TRow extends Row = Row> {
72
+ params: AnalysisParams;
73
+ /** Extra SQL-query results keyed by `SqlExtraQuery.name`. */
74
+ extras?: Record<string, TRow[]>;
75
+ }
76
+ /**
77
+ * Unified analyzer contract. `TRow` lets authors narrow from the default
78
+ * `Row = Record<string, unknown>` to a typed row shape (e.g. `KeywordRow`)
79
+ * when their reducer assumes specific columns exist — catches drift between
80
+ * `build` (SELECT list) and `reduce` (column access) at compile time.
81
+ */
82
+ interface Analyzer<P extends AnalysisParams = AnalysisParams, R = unknown, TRow extends Row = Row> {
83
+ /** Stable tool id (e.g. `striking-distance`, `opportunity`). */
84
+ id: string;
85
+ /** Capabilities a host source must provide. */
86
+ requires: readonly Capability[];
87
+ /** Pure: params → plan. Snapshot-testable. */
88
+ build: (params: P) => Plan;
89
+ /** Pure: rows + context → typed result + meta. */
90
+ reduce: (rows: TRow[] | Record<string, TRow[]>, ctx: ReduceContext<TRow>) => {
91
+ results: R;
92
+ meta?: Record<string, unknown>;
93
+ };
94
+ }
95
+ interface AnalyzerVariants {
96
+ sql?: Analyzer;
97
+ rows?: Analyzer;
98
+ }
99
+ interface AnalyzerRegistry {
100
+ listAnalyzerIds: () => readonly string[];
101
+ getAnalyzerVariants: (id: string) => AnalyzerVariants | undefined;
102
+ resolveAnalyzer: (id: string, sourceSupportsSql: boolean) => Analyzer | undefined;
103
+ listAnalyzersFor: (sourceSupportsSql: boolean) => readonly Analyzer[];
104
+ listAnalyzerIdsFor: (source: {
105
+ executeSql?: unknown;
106
+ }) => readonly string[];
107
+ }
108
+ interface TypedQuery<TRow> {
109
+ state: BuilderState;
110
+ readonly __row?: TRow;
111
+ }
112
+ declare function typedQuery<TRow>(state: BuilderState): TypedQuery<TRow>;
113
+ declare function queryRows<TRow = QueryRow$1>(source: AnalysisQuerySource$1, query: BuilderState | TypedQuery<TRow>): Promise<TRow[]>;
114
+ declare function queryComparisonRows<TRow = QueryRow$1>(source: AnalysisQuerySource$1, current: BuilderState | TypedQuery<TRow>, previous: BuilderState | TypedQuery<TRow>): Promise<{
115
+ current: TRow[];
116
+ previous: TRow[];
117
+ }>;
118
+ declare class AnalyzerCapabilityError extends Error {
119
+ readonly tool: string;
120
+ readonly missing: readonly Capability[];
121
+ constructor(tool: string, missing: readonly Capability[]);
122
+ }
123
+ declare function analyzeFromSource(source: AnalysisQuerySource, params: AnalysisParams, registry: AnalyzerRegistry): Promise<AnalysisResult>;
124
+ /**
125
+ * Capabilities the engine query path honors. Matches what the DuckDB compiler
126
+ * passes to {@link buildLogicalPlan} (see `gscdump/analytics/compiler`): regex
127
+ * pushes down; comparison joins and multi-dataset queries belong to the
128
+ * analyzer dispatcher, not the engine's builder-state query path.
129
+ */
130
+ declare const ENGINE_QUERY_CAPABILITIES: PlannerCapabilities;
131
+ interface EngineQuerySourceOptions {
132
+ engine: StorageEngine;
133
+ ctx: TenantCtx;
134
+ }
135
+ /**
136
+ * Wraps a storage engine as a {@link SqlQuerySource}. `queryRows` runs typed
137
+ * builder-state queries; `executeSql` delegates to `engine.runSQL` and
138
+ * requires `opts.fileSets` (with a `FILES` entry so the target table can be
139
+ * resolved for partition lookup).
140
+ */
141
+ declare function createEngineQuerySource(options: EngineQuerySourceOptions): SqlQuerySource;
142
+ /**
143
+ * Convenience: wrap a storage engine + tenant ctx in a source and dispatch.
144
+ * Equivalent to
145
+ * `runAnalyzerFromSource(createEngineQuerySource({ engine, ctx }), params, registry)`.
146
+ */
147
+ declare function runAnalyzerWithEngine(deps: {
148
+ engine: StorageEngine;
149
+ }, ctx: TenantCtx, params: AnalysisParams, registry: AnalyzerRegistry): Promise<AnalysisResult>;
150
+ /**
151
+ * Capabilities the live GSC API can satisfy. Regex pushes down via the
152
+ * `INCLUDING_REGEX` / `EXCLUDING_REGEX` filter types; comparison joins and
153
+ * cross-dataset queries do not exist on the wire, and the API does not
154
+ * expose window aggregations. Metric filters and ordering are honored by
155
+ * the source-layer post-process pass after row collection.
156
+ */
157
+ declare const GSC_API_CAPABILITIES: PlannerCapabilities;
158
+ interface GscApiQuerySourceOptions {
159
+ client: GoogleSearchConsoleClient;
160
+ siteUrl: string;
161
+ }
162
+ declare function createGscApiQuerySource(options: GscApiQuerySourceOptions): RowQuerySource;
163
+ declare function collectRows<T>(gen: AsyncGenerator<T[]>): Promise<T[]>;
164
+ interface GscRange {
165
+ start: string;
166
+ end: string;
167
+ }
168
+ interface GscTopNRow {
169
+ key: string;
170
+ clicks: number;
171
+ impressions: number;
172
+ sum_position: number;
173
+ }
174
+ interface FetchTopNOptions<D extends Dimension> {
175
+ client: GoogleSearchConsoleClient;
176
+ siteUrl: string;
177
+ dimension: Column<D>;
178
+ range: GscRange;
179
+ /**
180
+ * Ask the GSC API to order by clicks desc. Skip for dimensions where GSC
181
+ * already returns sensibly ranked rows (e.g. country).
182
+ */
183
+ orderByClicksDesc?: boolean;
184
+ /** Forwarded to the GSC builder. */
185
+ limit?: number;
186
+ /** Trim after the fact (e.g. country has no server-side limit). */
187
+ sliceTop?: number;
188
+ }
189
+ declare function fetchGscTopN<D extends Dimension>(opts: FetchTopNOptions<D>): Promise<GscTopNRow[]>;
190
+ interface GscDailyRow {
191
+ date: number;
192
+ clicks: number;
193
+ impressions: number;
194
+ sum_position: number;
195
+ anonymizedImpressionsPct: number;
196
+ }
197
+ declare function fetchGscDaily(opts: {
198
+ client: GoogleSearchConsoleClient;
199
+ siteUrl: string;
200
+ range: GscRange;
201
+ }): Promise<GscDailyRow[]>;
202
+ /**
203
+ * Permissive defaults: in-memory sources are usually test doubles, so they
204
+ * advertise every capability unless the test explicitly narrows them.
205
+ */
206
+ declare const IN_MEMORY_DEFAULT_CAPABILITIES: PlannerCapabilities;
207
+ interface InMemoryQuerySourceOptions {
208
+ queryRows: (state: BuilderState) => Promise<QueryRow[]> | QueryRow[];
209
+ capabilities?: PlannerCapabilities;
210
+ }
211
+ declare function createInMemoryQuerySource(options: InMemoryQuerySourceOptions): RowQuerySource;
212
+ interface BrandSegmentationOptions {
213
+ /** Brand terms to match against keywords (case-insensitive) */
214
+ brandTerms: string[];
215
+ /** Minimum impressions for a keyword to be included. Default: 10 */
216
+ minImpressions?: number;
217
+ }
218
+ interface BrandSummary {
219
+ brandClicks: number;
220
+ nonBrandClicks: number;
221
+ brandShare: number;
222
+ brandImpressions: number;
223
+ nonBrandImpressions: number;
224
+ }
225
+ interface BrandSegmentationResult {
226
+ brand: KeywordRow[];
227
+ nonBrand: KeywordRow[];
228
+ summary: BrandSummary;
229
+ }
230
+ type ClusterType = 'prefix' | 'intent' | 'both';
231
+ interface ClusteringOptions {
232
+ /** Minimum keywords for a cluster to be reported. Default: 2 */
233
+ minClusterSize?: number;
234
+ /** Minimum impressions for a keyword to be included. Default: 10 */
235
+ minImpressions?: number;
236
+ /** Clustering method. Default: 'both' */
237
+ clusterBy?: ClusterType;
238
+ }
239
+ interface KeywordCluster {
240
+ clusterName: string;
241
+ clusterType: 'prefix' | 'intent';
242
+ keywords: KeywordRow[];
243
+ totalClicks: number;
244
+ totalImpressions: number;
245
+ avgPosition: number;
246
+ keywordCount: number;
247
+ }
248
+ interface ClusteringResult {
249
+ clusters: KeywordCluster[];
250
+ unclustered: KeywordRow[];
251
+ }
252
+ type ConcentrationRiskLevel = 'low' | 'medium' | 'high';
253
+ interface ConcentrationOptions {
254
+ /** Number of top items to report. Default: 10 */
255
+ topN?: number;
256
+ }
257
+ interface ConcentrationItem {
258
+ key: string;
259
+ clicks: number;
260
+ share: number;
261
+ }
262
+ interface ConcentrationResult {
263
+ /** Gini coefficient: 0 = equal distribution, 1 = fully concentrated */
264
+ giniCoefficient: number;
265
+ /** Herfindahl-Hirschman Index: 0-10000, >2500 = highly concentrated */
266
+ hhi: number;
267
+ /** Percentage of total clicks from top N items */
268
+ topNConcentration: number;
269
+ topNItems: ConcentrationItem[];
270
+ totalItems: number;
271
+ totalClicks: number;
272
+ /** Risk level derived from HHI: <1500 low, 1500-2500 medium, >2500 high */
273
+ riskLevel: ConcentrationRiskLevel;
274
+ }
275
+ type DecaySortMetric = 'lostClicks' | 'declinePercent' | 'currentClicks';
276
+ interface DecayOptions {
277
+ /** Minimum clicks in previous period to consider. Default: 50 */
278
+ minPreviousClicks?: number;
279
+ /** Minimum decline percentage (0-1). Default: 0.2 (20%) */
280
+ threshold?: number;
281
+ /** Metric to sort results by. Default: lostClicks */
282
+ sortBy?: DecaySortMetric;
283
+ }
284
+ interface DecaySeriesPoint {
285
+ week: string;
286
+ clicks: number;
287
+ impressions: number;
288
+ }
289
+ interface DecayResult {
290
+ page: string;
291
+ currentClicks: number;
292
+ previousClicks: number;
293
+ lostClicks: number;
294
+ declinePercent: number;
295
+ currentPosition: number;
296
+ previousPosition: number;
297
+ positionDrop: number;
298
+ series?: DecaySeriesPoint[];
299
+ }
300
+ type MoversSortMetric = 'clicks' | 'impressions' | 'clicksChange' | 'impressionsChange' | 'positionChange';
301
+ interface MoversOptions {
302
+ /** Minimum change threshold to flag. Default: 0.2 (20%) */
303
+ changeThreshold?: number;
304
+ /** Minimum impressions in recent period. Default: 50 */
305
+ minImpressions?: number;
306
+ /** Metric to sort results by. Default: clicksChange */
307
+ sortBy?: MoversSortMetric;
308
+ }
309
+ interface MoverData {
310
+ keyword: string;
311
+ page: string | null;
312
+ recentClicks: number;
313
+ recentImpressions: number;
314
+ recentPosition: number;
315
+ baselineClicks: number;
316
+ baselineImpressions: number;
317
+ baselinePosition: number;
318
+ clicksChange: number;
319
+ clicksChangePercent: number;
320
+ impressionsChangePercent: number;
321
+ positionChange: number;
322
+ }
323
+ interface MoversResult {
324
+ rising: MoverData[];
325
+ declining: MoverData[];
326
+ stable: MoverData[];
327
+ }
328
+ interface OpportunityFactors {
329
+ positionScore: number;
330
+ impressionScore: number;
331
+ ctrGapScore: number;
332
+ }
333
+ interface OpportunityResult {
334
+ keyword: string;
335
+ page: string | null;
336
+ clicks: number;
337
+ impressions: number;
338
+ ctr: number;
339
+ position: number;
340
+ opportunityScore: number;
341
+ potentialClicks: number;
342
+ factors: OpportunityFactors;
343
+ }
344
+ type SeasonalityMetric = 'clicks' | 'impressions';
345
+ interface SeasonalityOptions {
346
+ /** Metric to analyze for seasonality. Default: clicks */
347
+ metric?: SeasonalityMetric;
348
+ }
349
+ interface MonthlyData {
350
+ month: string;
351
+ value: number;
352
+ vsAverage: number;
353
+ isPeak: boolean;
354
+ isTrough: boolean;
355
+ }
356
+ interface SeasonalityResult {
357
+ hasSeasonality: boolean;
358
+ /** Coefficient of variation: std dev / mean. Higher = more seasonal. */
359
+ strength: number;
360
+ peakMonths: string[];
361
+ troughMonths: string[];
362
+ monthlyBreakdown: MonthlyData[];
363
+ insufficientData: boolean;
364
+ }
365
+ interface AnalysisPeriod {
366
+ startDate: string;
367
+ endDate: string;
368
+ }
369
+ interface ComparisonPeriod {
370
+ current: AnalysisPeriod;
371
+ previous: AnalysisPeriod;
372
+ }
373
+ interface StrikingDistanceResult {
374
+ keyword: string;
375
+ page: string | null;
376
+ clicks: number;
377
+ impressions: number;
378
+ ctr: number;
379
+ position: number;
380
+ /** Estimated clicks at ~15% CTR (the average for positions 1–3). */
381
+ potentialClicks: number;
382
+ }
383
+ type StrikingDistanceSortMetric = 'clicks' | 'impressions' | 'ctr' | 'position' | 'potentialClicks';
384
+ interface StrikingDistanceOptions {
385
+ /** Minimum position (inclusive). Default: 4 */
386
+ minPosition?: number;
387
+ /** Maximum position (inclusive). Default: 20 */
388
+ maxPosition?: number;
389
+ /** Minimum impressions. Default: 100 */
390
+ minImpressions?: number;
391
+ /** Maximum CTR (queries with low CTR have more potential). Default: 0.05 (5%) */
392
+ maxCtr?: number;
393
+ /** Sort metric. Default: potentialClicks */
394
+ sortBy?: StrikingDistanceSortMetric;
395
+ /** Sort order. Default: desc */
396
+ sortOrder?: SortOrder;
397
+ }
398
+ type QueryDimension = 'keywords' | 'pages' | 'dates';
399
+ interface QueryOptions {
400
+ dimension?: QueryDimension;
401
+ limit?: number;
402
+ }
403
+ interface QueryResult {
404
+ keywords: KeywordRow[];
405
+ pages: PageRow[];
406
+ dates: DateRow[];
407
+ }
408
+ interface ComparisonQueryResult {
409
+ current: QueryResult;
410
+ previous: QueryResult;
411
+ }
412
+ interface OpportunityOptions {
413
+ minImpressions?: number;
414
+ }
415
+ declare function queryAnalyticsFromSource(source: AnalysisQuerySource, period: AnalysisPeriod, options?: QueryOptions): Promise<QueryResult>;
416
+ declare function queryComparisonFromSource(source: AnalysisQuerySource, periods: ComparisonPeriod, options?: QueryOptions): Promise<ComparisonQueryResult>;
417
+ declare function analyzeStrikingDistanceFromSource(source: AnalysisQuerySource, period: AnalysisPeriod, options?: StrikingDistanceOptions): Promise<StrikingDistanceResult[]>;
418
+ declare function analyzeOpportunityFromSource(source: AnalysisQuerySource, period: AnalysisPeriod, options?: OpportunityOptions): Promise<OpportunityResult[]>;
419
+ declare function analyzeBrandSegmentationFromSource(source: AnalysisQuerySource, period: AnalysisPeriod, options: BrandSegmentationOptions): Promise<BrandSegmentationResult>;
420
+ declare function analyzePageConcentrationFromSource(source: AnalysisQuerySource, period: AnalysisPeriod, options?: ConcentrationOptions): Promise<ConcentrationResult>;
421
+ declare function analyzeKeywordConcentrationFromSource(source: AnalysisQuerySource, period: AnalysisPeriod, options?: ConcentrationOptions): Promise<ConcentrationResult>;
422
+ declare function analyzeClusteringFromSource(source: AnalysisQuerySource, period: AnalysisPeriod, options?: ClusteringOptions): Promise<ClusteringResult>;
423
+ declare function analyzeSeasonalityFromSource(source: AnalysisQuerySource, period: AnalysisPeriod, options?: SeasonalityOptions): Promise<SeasonalityResult>;
424
+ declare function analyzeDecayFromSource(source: AnalysisQuerySource, periods: ComparisonPeriod, options?: DecayOptions): Promise<DecayResult[]>;
425
+ declare function analyzeMoversFromSource(source: AnalysisQuerySource, periods: ComparisonPeriod, options?: MoversOptions): Promise<MoversResult>;
426
+ type SqliteQuerySourceOptions = EngineConfig;
427
+ export { type AnalysisQuerySource, AnalyzerCapabilityError, type BrowserQueryRunner, type ComparisonQueryResult, ENGINE_QUERY_CAPABILITIES, type EngineQuerySourceOptions, type FetchTopNOptions, GSC_API_CAPABILITIES, type GscApiQuerySourceOptions, type GscDailyRow, type GscRange, type GscTopNRow, IN_MEMORY_DEFAULT_CAPABILITIES, type InMemoryQuerySourceOptions, type QueryDimension, type QueryOptions, type QueryResult, type QueryRow, type RowQuerySource, type SourceCapabilities, type SqlQuerySource, type SqliteQueryExecutor, type SqliteQuerySourceOptions, type TypedQuery, analyzeBrandSegmentationFromSource, analyzeClusteringFromSource, analyzeDecayFromSource, analyzeFromSource, analyzeKeywordConcentrationFromSource, analyzeMoversFromSource, analyzeOpportunityFromSource, analyzePageConcentrationFromSource, analyzeSeasonalityFromSource, analyzeStrikingDistanceFromSource, collectRows as collectGscRows, createBrowserQuerySource, createEngineQuerySource, createGscApiQuerySource, createInMemoryQuerySource, createSqliteQuerySource, fetchGscDaily, fetchGscTopN, isSqlQuerySource, queryAnalyticsFromSource, queryComparisonFromSource, queryComparisonRows, queryRows, runAnalyzerWithEngine, typedQuery };